r49218 - Code Review

From MediaWiki.org

Jump to: navigation, search
Repository:MediaWiki
Revision:r49217 | r49218 (on ViewVC) | r49219 >
Date:17:11, 5 April 2009
Author:vasilievvv
Status:ok
Tags:
Comment:Introduce list (non-associated array) support into abuse filter parser.
Modified paths:

Diff [purge]

Index: trunk/extensions/AbuseFilter/AbuseFilter.parser.php
===================================================================
--- trunk/extensions/AbuseFilter/AbuseFilter.parser.php	(revision 49217)
+++ trunk/extensions/AbuseFilter/AbuseFilter.parser.php	(revision 49218)
@@ -14,8 +14,13 @@
 * T_STRING - string, in "" or ''
 * T_KEYWORD - keyword
 * T_ID - identifier
+* T_STATEMENT_SEPARATOR - ;
+* T_SQUARE_BRACKETS - [ or ]
 
 Levels of parsing:
+* Entry - catches unexpected characters
+* Semicolon - ;
+* Set - :=
 * Conditionls (IF) - if-then-else-end, cond ? a :b
 * BoolOps (BO) - &, |, ^
 * CompOps (CO) - ==, !=, ===, !==, >, <, >=, <=
@@ -25,6 +30,7 @@
 * BoolNeg (BN) - ! operation
 * SpecialOperators (SO) - in and like
 * Unarys (U) - plus and minus in cases like -5 or -(2 * +2)
+* ListElement (LE) - list[number]
 * Braces (B) - ( and )
 * Functions (F)
 * Atom (A) - return value
@@ -40,6 +46,7 @@
 	const TFloat = 'T_FLOAT';
 	const TOp = 'T_OP';
 	const TBrace = 'T_BRACE';
+	const TSquareBracket = 'T_SQUARE_BRACKET';
 	const TComma = 'T_COMMA';
 	const TStatementSeparator = 'T_STATEMENT_SEPARATOR';
 	
@@ -61,6 +68,7 @@
 	const DNull   = 'null';
 	const DBool   = 'bool';
 	const DFloat  = 'float';
+	const DList   = 'list';
 	
 	var $type;
 	var $data;
@@ -79,6 +87,12 @@
 			return new AFPData( self::DFloat, $var );
 		elseif( is_bool( $var ) )
 			return new AFPData( self::DBool, $var );
+		elseif( is_array( $var ) ) {
+			$result = array();
+			foreach( $var as $item )
+				$result[] = self::newFromPHPVar( $item );
+			return new AFPData( self::DList, $result );
+		}
 		elseif( is_null( $var ) )
 			return new AFPData();
 		else
@@ -96,6 +110,24 @@
 		if( $target == self::DNull ) {
 			return new AFPData();
 		}
+
+		if( $orig->type == self::DList ) {
+			if( $target == self::DBool )
+				return new AFPData( self::DBool, (bool)count( $orig->data ) );
+			if( $target == self::DFloat ) {
+				return new AFPData( self::DFloat, doubleval( count( $orig->data  ) ) );
+			}
+			if( $target == self::DInt ) {
+				return new AFPData( self::DInt, intval( count( $orig->data ) ) );
+			}
+			if( $target == self::DString ) {
+				$lines = array();
+				foreach( $orig->data as $item )
+					$lines[] = $item->toString();
+				return new AFPData( self::DString, implode( "\n", $lines ) );
+			}
+		}
+
 		if( $target == self::DBool ) {
 			return new AFPData( self::DBool, (bool)$orig->data );
 		}
@@ -108,6 +140,9 @@
 		if( $target == self::DString ) {
 			return new AFPData( self::DString, strval( $orig->data ) );
 		}
+		if( $target == self::DList ) {
+			return new AFPData( self::DList, array( $orig ) );
+		}
 	}
 	
 	public static function boolInvert( $value ) {
@@ -119,6 +154,8 @@
 	}
 	
 	public static function keywordIn( $a, $b ) {
+		if( $b->type == self::DList )
+			return new AFPData( self::DBool, self::listContains( $a, $b ) );
 		$a = $a->toString();
 		$b = $b->toString();
 		
@@ -130,6 +167,8 @@
 	}
 	
 	public static function keywordContains( $a, $b ) {
+		if( $a->type == self::DList )
+			return new AFPData( self::DBool, self::listContains( $b, $a ) );
 		$a = $a->toString();
 		$b = $b->toString();
 		
@@ -140,6 +179,20 @@
 		return new AFPData( self::DBool, in_string( $b, $a ) );
 	}
 	
+	public static function listContains( $value, $list ) {
+		// Should use built-in PHP function somehow
+		foreach( $list->data as $item ) {
+			if( self::equals( $value, $item ) )
+				return true;
+		}
+		return false;
+	}
+	
+	public static function equals( $d1, $d2 ) {
+		return $d1->type != self::DList && $d2->type != self::DList &&
+			$d1->toString() === $d2->toString();
+	}
+	
 	public static function keywordLike( $str, $pattern ) {
 		$str = $str->toString();
 		$pattern = $pattern->toString();
@@ -184,13 +237,13 @@
 	
 	public static function compareOp( $a, $b, $op ) {
 		if( $op == '==' || $op == '=' )
-			return new AFPData( self::DBool, $a->toString() === $b->toString() );
+			return new AFPData( self::DBool, self::equals( $a, $b ) );
 		if( $op == '!=' )
-			return new AFPData( self::DBool, $a->toString() !== $b->toString() );
+			return new AFPData( self::DBool, !self::equals( $a, $b ) );
 		if( $op == '===' )
-			return new AFPData( self::DBool, $a->data == $b->data && $a->type == $b->type );
+			return new AFPData( self::DBool, $a->type == $b->type && self::equals( $a, $b ) );
 		if( $op == '!==' )
-			return new AFPData( self::DBool, $a->data !== $b->data || $a->type != $b->type );
+			return new AFPData( self::DBool, $a->type != $b->type || !self::equals( $a, $b ) );
 		$a = $a->toString();
 		$b = $b->toString();
 		if( $op == '>' )
@@ -241,7 +294,9 @@
 	
 	public static function sum( $a, $b ) {
 		if( $a->type == self::DString || $b->type == self::DString )
-			return new AFPData( self::DFloat, $a->toString() . $b->toString() );
+			return new AFPData( self::DString, $a->toString() . $b->toString() );
+		elseif( $a->type == self::DList && $b->type == self::DList )
+			return new AFPData( self::DList, array_merge( $a->toList(), $b->toList() ) );
 		else
 			return new AFPData( self::DFloat, $a->toFloat() + $b->toFloat() );
 	}
@@ -266,6 +321,10 @@
 	public function toInt() {
 		return self::castTypes( $this, self::DInt )->data;
 	}
+
+	public function toList() {
+		return self::castTypes( $this, self::DList )->data;
+	}
 }
 
 class AFPParserState {
@@ -497,6 +556,45 @@
 				$this->doLevelSet( $result );
 				$this->setUserVariable( $varname, $result );
 				return;
+			} elseif( $this->mCur->type == AFPToken::TSquareBracket && $this->mCur->value == '[' ) {
+				if( !$this->mVars->varIsSet( $varname ) ) {
+					throw new AFPUserVisibleException( 'unrecognisedvar',
+												$this->mCur->pos,
+												array( $var ) );
+				}
+				$list = $this->mVars->getVar( $varname );
+				if( $list->type != AFPData::DList )
+						throw new AFPUserVisibleException( 'notlist', $this->mCur->pos, array() );
+				$list = $list->toList();
+				$this->move();
+				if( $this->mCur->type == AFPToken::TSquareBracket && $this->mCur->value == ']' ) {
+					$idx = 'new';
+				} else {
+					$this->setState( $prev ); $this->move();
+					$idx = new AFPData();
+					$this->doLevelSemicolon( $idx );
+					$idx = $idx->toInt();
+					if( !($this->mCur->type == AFPToken::TSquareBracket && $this->mCur->value == ']') )
+						throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos, 
+							array(']', $this->mCur->type, $this->mCur->value ) );
+					if( count( $list ) <= $idx ) {
+						throw new AFPUserVisibleException( 'outofbounds', $this->mCur->pos,
+							array( $idx, count( $result->data ) ) );
+					}
+				}
+				$this->move();
+				if( $this->mCur->type == AFPToken::TOp && $this->mCur->value == ':=' ) {
+					$this->move();
+					$this->doLevelSet( $result );
+					if( $idx == 'new' )
+						$list[] = $result;
+					else
+						$list[$idx] = $result;
+					$this->setUserVariable( $varname, new AFPData( AFPData::DList, $list ) );
+					return;
+				} else {
+					$this->setState( $prev );
+				}
 			} else {
 				$this->setState( $prev );
 			}
@@ -734,17 +832,40 @@
 		$op = $this->mCur->value;
 		if( $this->mCur->type == AFPToken::TOp && ( $op == "+" || $op == "-" ) ) {
 			$this->move();
-			$this->doLevelBraces( $result );
+			$this->doLevelListElements( $result );
 			wfProfileIn( __METHOD__ );
 			if( $op == '-' ) {
 				$result = AFPData::unaryMinus( $result );
 			}
 			wfProfileOut( __METHOD__ );
 		} else {
-			$this->doLevelBraces( $result );
+			$this->doLevelListElements( $result );
 		}
 	}
-	
+
+	protected function doLevelListElements( &$result ) {
+		$this->doLevelBraces( $result );
+		while( $this->mCur->type == AFPToken::TSquareBracket && $this->mCur->value == '[' ) {
+			$idx = new AFPData();
+			$this->doLevelSemicolon( $idx );
+			if( !($this->mCur->type == AFPToken::TSquareBracket && $this->mCur->value == ']') ) {
+				throw new AFPUserVisibleException( 'expectednotfound', $this->mCur->pos,
+							array(']', $this->mCur->type, $this->mCur->value ) );
+			}
+			$idx = $idx->toInt();
+			if( $result->type == AFPData::DList ) {
+				if( count( $result->data ) <= $idx ) {
+					throw new AFPUserVisibleException( 'outofbounds', $this->mCur->pos,
+						array( $idx, count( $result->data ) ) );
+				}
+				$result = $result->data[$idx];
+			} else {
+				throw new AFPUserVisibleException( 'notlist', $this->mCur->pos, array() );
+			}
+			$this->move();
+		}
+	}
+
 	protected function doLevelBraces( &$result ) {
 		if( $this->mCur->type == AFPToken::TBrace && $this->mCur->value == '(' ) {
 			if( $this->mShortCircuit ) {
@@ -850,11 +971,31 @@
 														$this->mCur->pos,
 														array($tok) );
 				break;
-			case AFPToken::TBrace:
-			if( $this->mCur->value == ')' )
-				return;        // Handled at the entry level
 			case AFPToken::TNone:
 				return; // Handled at entry level
+			case AFPToken::TBrace:
+				if( $this->mCur->value == ')' )
+					return;        // Handled at the entry level
+			case AFPToken::TSquareBracket:
+				if( $this->mCur->value == '[' ) {
+					$list = array();
+					for(;;) {
+						$this->move();
+						if( $this->mCur->type == AFPToken::TSquareBracket && $this->mCur->value == ']' )
+							break;
+						$item = new AFPData();
+						$this->doLevelSet( $item );
+						$list[] = $item;
+						if( $this->mCur->type == AFPToken::TSquareBracket && $this->mCur->value == ']' )
+							break;
+						if( $this->mCur->type != AFPToken::TComma )
+							throw new AFPUserVisibleException( 'expectednotfound',
+								$this->mCur->pos,
+								array(', or ]', $this->mCur->type, $this->mCur->value ) );
+					}
+					$result = new AFPData( AFPData::DList, $list );
+					break;
+				}
 			default:
 				throw new AFPUserVisibleException( 'unexpectedtoken',
 													$this->mCur->pos,
@@ -930,6 +1071,11 @@
 			return array( $code[$offset], AFPToken::TBrace, $code, $offset + 1 );
 		}
 		
+		// Square brackets
+		if( $code[$offset] == '[' or $code[$offset] == ']' ) {
+			return array( $code[$offset], AFPToken::TSquareBracket, $code, $offset + 1 );
+		}
+		
 		// Semicolons
 		if ($code[$offset] == ';') {
 			return array( ';', AFPToken::TStatementSeparator, $code, $offset + 1 );
@@ -1112,6 +1258,9 @@
 		if( count( $args ) < 1 )
 			throw new AFPUserVisibleException( 'notenoughargs', $this->mCur->pos,
 						array( 'len', 2, count($args) ) );
+		if( $args[0]->type == AFPData::DList ) {
+			return new AFPData( AFPData::DInt, count( $args[0]->data ) );
+		}
 		$s = $args[0]->toString();
 		return new AFPData( AFPData::DInt, mb_strlen( $s, 'utf-8' ) );
 	}
@@ -1148,9 +1297,21 @@
 		if( count( $args ) < 1 )
 			throw new AFPUserVisibleException( 'notenoughargs', $this->mCur->pos,
 						array( 'count', 1, count($args) ) );
-			
+
+		if( $args[0]->type == AFPData::DList && count( $args ) == 1 ) {
+			return new AFPData( AFPData::DInt, count( $args[0]->data ) );
+		} elseif( count( $args ) > 1 && $args[1]->type == AFPData::DList ) {
+			$needle = $args[0];
+			$haystack = $args[1]->toList();
+			$count = 0;
+			foreach( $haystack as $item )
+				if( AFPData::equals( $needle, $item ))
+					$count++;
+			return new AFPData( AFPData::DInt, $count );
+		}
+
 		$offset = -1;
-		
+
 		if (count($args) == 1) {
 			$count = count( explode( ",", $args[0]->toString() ) );
 		} else {
@@ -1224,7 +1385,15 @@
 			throw new AFPUserVisibleException( 'notenoughargs', $this->mCur->pos,
 						array( 'contains_any', 2, count($args) ) );
 		}
-		
+
+		if( $args[0]->type == AFPData::DList ) {
+			$list = array_shift( $args );
+			foreach( $args as $arg ) 
+				if( AFPData::listContains( $arg, $list ) )
+					return new AFPData( AFPData::DBool, true );
+			return new AFPData( AFPData::DBool, false );
+		}
+
 		$s = array_shift( $args );
 		$s = $s->toString();
 		
@@ -1402,7 +1571,7 @@
 			throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, array(__METHOD__) );
 		$val = $args[0];
 		
-		return new AFPData( AFPData::DString, $val->data );
+		return AFPData::castTypes( $val, AFPData::DString );
 	}
 	
 	protected function castInt( $args ) {
@@ -1410,7 +1579,7 @@
 			throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, array(__METHOD__) );
 		$val = $args[0];
 		
-		return new AFPData( AFPData::DInt, intval($val->data) );
+		return AFPData::castTypes( $val, AFPData::DInt );
 	}
 
 	protected function castFloat( $args ) {
@@ -1418,7 +1587,7 @@
 			throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, array(__METHOD__) );
 		$val = $args[0];
 		
-		return new AFPData( AFPData::DFloat, doubleval($val->data) );
+		return AFPData::castTypes( $val, AFPData::DFloat );
 	}
 	
 	protected function castBool( $args ) {
@@ -1426,7 +1595,7 @@
 			throw new AFPUserVisibleException( 'noparams', $this->mCur->pos, array(__METHOD__) );
 		$val = $args[0];
 		
-		return new AFPData( AFPData::DBool, (bool)($val->data) );
+		return AFPData::castTypes( $val, AFPData::DBool );
 	}
 	
 	public static function regexErrorHandler( $errno, $errstr, $errfile, $errline, $context ) {
Index: trunk/extensions/AbuseFilter/tests/wptest1.t
===================================================================
--- trunk/extensions/AbuseFilter/tests/wptest1.t	(revision 49217)
+++ trunk/extensions/AbuseFilter/tests/wptest1.t	(revision 49218)
@@ -1,5 +1,5 @@
 /* Filter 30 from English Wikipedia (large deletion from article by new editors) */

-user_groups_test := "*";

+user_groups_test := ["*"];

 new_size_test := 100;

 article_namespace_test := 0;

 edit_delta_test := -5000;

Index: trunk/extensions/AbuseFilter/tests/wptest2.t
===================================================================
--- trunk/extensions/AbuseFilter/tests/wptest2.t	(revision 49217)
+++ trunk/extensions/AbuseFilter/tests/wptest2.t	(revision 49218)
@@ -1,5 +1,5 @@
 /* Filter 61 from English Wikipedia (new user removing references) */

-user_groups_test := "*";

+user_groups_test := ["*"];

 new_size_test := 100;

 article_namespace_test := 0;

 edit_delta_test := -22;

Index: trunk/extensions/AbuseFilter/tests/wptest3.t
===================================================================
--- trunk/extensions/AbuseFilter/tests/wptest3.t	(revision 49217)
+++ trunk/extensions/AbuseFilter/tests/wptest3.t	(revision 49218)
@@ -1,5 +1,5 @@
 /* Filter 18 from English Wikipedia (test type edits from clicking on edit bar) */

-user_groups_test := "*";

+user_groups_test := ["*"];

 article_namespace_test := 0;

 added_lines_test := "Hello world! '''Bold text''' [http://www.example.com link title]";

 

Index: trunk/extensions/AbuseFilter/tests/arrays.r
===================================================================
--- trunk/extensions/AbuseFilter/tests/arrays.r	(revision 0)
+++ trunk/extensions/AbuseFilter/tests/arrays.r	(revision 49218)
@@ -0,0 +1 @@
+MATCH

Index: trunk/extensions/AbuseFilter/tests/arrays.t
===================================================================
--- trunk/extensions/AbuseFilter/tests/arrays.t	(revision 0)
+++ trunk/extensions/AbuseFilter/tests/arrays.t	(revision 49218)
@@ -0,0 +1,12 @@
+array1 := [ 'a', 'b', 'c', ];

+array2 := [];

+array2[] := 'd';

+array2[] := 'g';

+array2[] := 'f';

+array2[1] := 'e';

+

+array3 := array1 + array2;

+array4 := [ [ 1, 2, 3 ], [ 4, 5, 6 ] ];

+

+(string(array3) == "a\nb\nc\nd\ne\nf" & !('b' in array2) & array1 contains 'c' & [ false, !(1;0), null ][1] & length(array3) == 6 &

+	array4[1][1] == 5 )
\ No newline at end of file
Index: trunk/extensions/AbuseFilter/AbuseFilter.i18n.php
===================================================================
--- trunk/extensions/AbuseFilter/AbuseFilter.i18n.php	(revision 49217)
+++ trunk/extensions/AbuseFilter/AbuseFilter.i18n.php	(revision 49218)
@@ -338,6 +338,8 @@
 	Expected $3 {{PLURAL:$3|argument|arguments}}, got $4',
 	'abusefilter-exception-regexfailure' => 'Error in regular expression "$3" at character $1: "$2"',
 	'abusefilter-exception-overridebuiltin' => 'Illegal overriding of built-in variable "$2" at character $1.',
+	'abusefilter-exception-outofbounds' => 'Requesting non-existent list item $2 (list size = 3) at character $1.',
+	'abusefilter-exception-notlist' => 'Requesting array item of non-array at character $1.',
 
 	// Actions
 	'abusefilter-action-throttle' => 'Throttle',

Status & tagging log

Views
Toolbox