r49206 - Code Review

From MediaWiki.org

Jump to: navigation, search
Repository:MediaWiki
Revision:r49205 | r49206 (on ViewVC) | r49207 >
Date:11:47, 5 April 2009
Author:vasilievvv
Status:ok (Comments)
Tags:
Comment:Abuse filter:
* Introduce := operator for setting variables
* Throw an exception when user tries to override built-in variable
* Fix UTF-8 handling in fnmatch() fallback
* Copy three main abuse filters from enwiki to test suite
* Fix update.php integration
Modified paths:

Diff [purge]

Index: trunk/extensions/AbuseFilter/AbuseFilter.parser.php
===================================================================
--- trunk/extensions/AbuseFilter/AbuseFilter.parser.php	(revision 49205)
+++ trunk/extensions/AbuseFilter/AbuseFilter.parser.php	(revision 49206)
@@ -268,6 +268,16 @@
 	}
 }
 
+class AFPParserState {
+	var $pos, $token, $lastInput;
+
+	public function __construct( $token, $pos ) {
+		$this->token = $token;
+		$this->pos = $pos;
+		$this->lastInput = AbuseFilterParser::$lastHandledToken;
+	}
+}
+
 class AFPException extends MWException {}
 
 // Exceptions that we might conceivably want to report to ordinary users
@@ -325,6 +335,7 @@
 	'**', '*', 			// Multiplication/exponentiation
 	'/', '+', '-', '%', // Other arithmetic
 	'&', '|', '^', 		// Logic
+	':=',				// Setting
 	'?', ':', 			// Ternery
 	'<=','<', 			// Less than
 	'>=', '>', 			// Greater than
@@ -339,6 +350,8 @@
 	
 	static $funcCache = array();
 	
+	static $lastHandledToken = array();
+	
 	public function __construct() {
 		$this->resetState();
 	}
@@ -391,7 +404,18 @@
 		wfProfileOut( __METHOD__ );
 		return $this->mCur = $token;
 	}
-	
+
+	// getState() and setState() function allows parser state to be rollbacked to several tokens back
+	protected function getState() {
+		return new AFPParserState( $this->mCur, $this->mPos );
+	}
+
+	protected function setState( AFPParserState $state ) {
+		$this->mCur = $state->token;
+		$this->mPos = $state->pos;
+		self::$lastHandledToken = $state->lastInput;
+	}
+
 	protected function skipOverBraces() {
 		if( !($this->mCur->type == AFPToken::TBrace && $this->mCur->value == '(') || !$this->mShortCircuit ) {
 			return;
@@ -446,7 +470,7 @@
 	
 	/** Handles unexpected characters after the expression */
 	protected function doLevelEntry( &$result ) {
-		$this->doLevelSet( $result );
+		$this->doLevelSemicolon( $result );
 		
 		if( $this->mCur->type != AFPToken::TNone ) {
 			throw new AFPUserVisibleException( 'unexpectedatend', $this->mCur->pos, array($this->mCur->type) );
@@ -454,14 +478,32 @@
 	}
 	
 	/** Handles multiple expressions */
-	protected function doLevelSet( &$result ) {
+	protected function doLevelSemicolon( &$result ) {
 		do {
 			$this->move();
-			$lastPos = $this->mCur->pos;
-			$this->doLevelConditions( $result );
+			if( $this->mCur->type != AFPToken::TStatementSeparator )
+				$this->doLevelSet( $result );
 		} while ($this->mCur->type == AFPToken::TStatementSeparator);
 	}
 
+	protected function doLevelSet( &$result ) {
+		if( $this->mCur->type == AFPToken::TID ) {
+			$varname = $this->mCur->value;
+			$prev = $this->getState();
+			$this->move();
+
+			if( $this->mCur->type == AFPToken::TOp && $this->mCur->value == ':=' ) {
+				$this->move();
+				$this->doLevelSet( $result );
+				$this->setUserVariable( $varname, $result );
+				return;
+			} else {
+				$this->setState( $prev );
+			}
+		}
+		$this->doLevelConditions( $result );
+	}
+
 	protected function doLevelConditions( &$result ) {
 		if( $this->mCur->type == AFPToken::TKeyword && $this->mCur->value == 'if' ) {
 			$this->move();
@@ -708,7 +750,7 @@
 			if( $this->mShortCircuit ) {
 				$this->skipOverBraces();
 			} else {
-				$this->doLevelSet( $result );
+				$this->doLevelSemicolon( $result );
 			}
 			if( !($this->mCur->type == AFPToken::TBrace && $this->mCur->value == ')') )
 				throw new AFPUserVisibleException( 'expectednotfound',
@@ -741,7 +783,7 @@
 			$args = array();
 			do {
 				$r = new AFPData();
-				$this->doLevelSet( $r );
+				$this->doLevelSemicolon( $r );
 				$args[] = $r;
 			} while( $this->mCur->type == AFPToken::TComma );
 			
@@ -825,6 +867,8 @@
 		wfProfileOut( __METHOD__ );
 	}
 	
+	/* End of levels */
+
 	protected function getVarValue( $var ) {
 		wfProfileIn( __METHOD__ );
 		$var = strtolower($var);
@@ -842,21 +886,24 @@
 			return $val;
 		}
 	}
-	
-	/* End of levels */
-	
+
+	protected function setUserVariable( $name, $value ) {
+		$builderValues = AbuseFilter::getBuilderValues();
+		if( array_key_exists( $name, $builderValues['vars'] ) )
+			throw new AFPUserVisibleException( 'overridebuiltin', $this->mCur->pos, array( $name ) );
+		$this->mVars->setVar( $name, $value );
+	}
+
 	static function nextToken( $code, $offset ) {
 		$tok = '';
 		
-		static $lastInput = array();
-		
 		// Check for infinite loops
-		if ( $lastInput == array( $code, $offset ) ) {
+		if ( self::$lastHandledToken == array( $code, $offset ) ) {
 			// Should never happen
 			throw new AFPException( "Entered infinite loop. Offset $offset of $code" );
 		}
 		
-		$lastInput = array( $code, $offset );
+		self::$lastHandledToken = array( $code, $offset );
 			
 		// Spaces
 		$matches = array();
@@ -1345,7 +1392,7 @@
 		$varName = $args[0]->toString();
 		$value = $args[1];
 		
-		$this->mVars->setVar( $varName, $value );
+		$this->setUserVariable( $varName, $value );
 		
 		return $value;
 	}
@@ -1395,7 +1442,7 @@
 if(!function_exists('fnmatch')) {
 
     function fnmatch($pattern, $string) {
-        return preg_match("#^".strtr(preg_quote($pattern, '#'), array('\*' => '.*', '\?' => '.'))."$#i", $string);
+        return preg_match("#^".strtr(preg_quote($pattern, '#'), array('\*' => '.*', '\?' => '.'))."$#iu", $string);
     } // end
 
 } // end if
Index: trunk/extensions/AbuseFilter/tests/wptest1.r
===================================================================
--- trunk/extensions/AbuseFilter/tests/wptest1.r	(revision 0)
+++ trunk/extensions/AbuseFilter/tests/wptest1.r	(revision 49206)
@@ -0,0 +1 @@
+MATCH

Index: trunk/extensions/AbuseFilter/tests/wptest2.r
===================================================================
--- trunk/extensions/AbuseFilter/tests/wptest2.r	(revision 0)
+++ trunk/extensions/AbuseFilter/tests/wptest2.r	(revision 49206)
@@ -0,0 +1 @@
+MATCH

Index: trunk/extensions/AbuseFilter/tests/wptest3.r
===================================================================
--- trunk/extensions/AbuseFilter/tests/wptest3.r	(revision 0)
+++ trunk/extensions/AbuseFilter/tests/wptest3.r	(revision 49206)
@@ -0,0 +1 @@
+MATCH

Index: trunk/extensions/AbuseFilter/tests/wptest1.t
===================================================================
--- trunk/extensions/AbuseFilter/tests/wptest1.t	(revision 0)
+++ trunk/extensions/AbuseFilter/tests/wptest1.t	(revision 49206)
@@ -0,0 +1,9 @@
+/* Filter 30 from English Wikipedia (large deletion from article by new editors) */

+user_groups_test := "*";

+new_size_test := 100;

+article_namespace_test := 0;

+edit_delta_test := -5000;

+added_lines_test := '';

+

+!("autoconfirmed" in user_groups_test) & (new_size_test > 50) & (article_namespace_test == 0) &

+	(edit_delta_test < -2000) & !("#redirect" in lcase(added_lines_test))

Index: trunk/extensions/AbuseFilter/tests/wptest2.t
===================================================================
--- trunk/extensions/AbuseFilter/tests/wptest2.t	(revision 0)
+++ trunk/extensions/AbuseFilter/tests/wptest2.t	(revision 49206)
@@ -0,0 +1,21 @@
+/* Filter 61 from English Wikipedia (new user removing references) */

+user_groups_test := "*";

+new_size_test := 100;

+article_namespace_test := 0;

+edit_delta_test := -22;

+added_lines_test := '<ref name="bah">test</ref> test2!';

+removed_lines_test := '<ref name="bah">test</ref><ref name="wah">test2</ref>';

+

+!("autoconfirmed" in user_groups_test)

+/* this edit_delta ignores large blankings that are treated by another filter */

+& edit_delta_test >= -1000

+& article_namespace_test == 0

+/* No added lines usually mean a blanking which is dealt with by other filter */

+& length(added_lines_test) != 0

+& !("#redirect" in lcase(added_lines_test))

+/*Counts of more reference tags are removed than added */

+& (rcount("(<ref>|<ref\sname|</ref>)",removed_lines_test) > rcount("(<ref>|<ref\sname|</ref>)",added_lines_test))

+/*Excludes changing to the named reference format and removing closing tags attached to formerly named refs. Unequality is to account for closing the first named tag */

+& !(rcount("<ref>",removed_lines_test) = rcount("<ref\sname",added_lines_test) | rcount("</ref>",removed_lines_test) <= rcount("<ref\sname",added_lines_test))

+/*Excludes removal of references to Wikipedia itself */

+& !(count("http://en.wikipedia.org",removed_lines_test) > count("http://en.wikipedia.org",added_lines_test))

Index: trunk/extensions/AbuseFilter/tests/wptest3.t
===================================================================
--- trunk/extensions/AbuseFilter/tests/wptest3.t	(revision 0)
+++ trunk/extensions/AbuseFilter/tests/wptest3.t	(revision 49206)
@@ -0,0 +1,28 @@
+/* Filter 18 from English Wikipedia (test type edits from clicking on edit bar) */

+user_groups_test := "*";

+article_namespace_test := 0;

+added_lines_test := "Hello world! '''Bold text''' [http://www.example.com link title]";

+

+(article_namespace_test == 0) &

+!("autoconfirmed" in user_groups_test) &

+(contains_any(added_lines_test, 

+	"'''Bold text'''", 

+	"''Italic text''", 

+	"[[Link title]]", 

+	"[http://www.example.com link title]", 

+	"== Headline text ==", 

+	"[[File:Example.jpg]]", 

+	"[[Media:Example.ogg]]", 

+	"<math>Insert formula here</math>", 

+	"<nowiki>Insert non-formatted text here</nowiki>", 

+	"#REDIRECT [[Target page name]]", 

+	"<s>Strike-through text</s>", 

+	"<sup>Superscript text</sup>", 

+	"<sub>Subscript text</sub>", 

+	"<small>Small Text</small>", 

+	"<!-- Comment -->", 

+	"Image:Example.jpg|Caption", 

+	"<ref>Insert footnote text here</ref>",

+	"Ǣ ǣ ǖ ǘ ǚ ǜ Ă"

+))

+

Index: trunk/extensions/AbuseFilter/tests/vars.r
===================================================================
--- trunk/extensions/AbuseFilter/tests/vars.r	(revision 0)
+++ trunk/extensions/AbuseFilter/tests/vars.r	(revision 49206)
@@ -0,0 +1 @@
+MATCH

Index: trunk/extensions/AbuseFilter/tests/vars.t
===================================================================
--- trunk/extensions/AbuseFilter/tests/vars.t	(revision 0)
+++ trunk/extensions/AbuseFilter/tests/vars.t	(revision 49206)
@@ -0,0 +1,5 @@
+/* Variables test */

+test_var1 := test_var2 := "aa";

+set( 'ResulT', set_var( 'TV3', "bb" ) );

+

+str_replace( test_var1, test_var2, tv3 ) == result;
\ No newline at end of file
Index: trunk/extensions/AbuseFilter/AbuseFilter.i18n.php
===================================================================
--- trunk/extensions/AbuseFilter/AbuseFilter.i18n.php	(revision 49205)
+++ trunk/extensions/AbuseFilter/AbuseFilter.i18n.php	(revision 49206)
@@ -337,6 +337,7 @@
 	'abusefilter-exception-notenoughargs' => 'Not enough arguments to function $2 called at character $1.
 	Expected $3 {{PLURAL:$3|argument|arguments}}, got $4',
 	'abusefilter-exception-regexfailure' => 'Error in regular expression "$3" at character $1: "$2"',
+	'abusefilter-exception-overridebuiltin' => 'Illegal overriding of built-in variable "$2" at character $1.',
 
 	// Actions
 	'abusefilter-action-throttle' => 'Throttle',
Index: trunk/extensions/AbuseFilter/AbuseFilter.hooks.php
===================================================================
--- trunk/extensions/AbuseFilter/AbuseFilter.hooks.php	(revision 49205)
+++ trunk/extensions/AbuseFilter/AbuseFilter.hooks.php	(revision 49206)
@@ -157,14 +157,12 @@
 		
 		// DB updates
 		if( $wgDBtype == 'mysql' ) {
-			$wgExtNewTables = array_merge( $wgExtNewTables,
-				    array(
-						  array( 'abuse_filter', "$dir/abusefilter.tables.sql" ),
-						  array( 'abuse_filter_history', "$dir/db_patches/patch-abuse_filter_history.sql" ),
-						  array( 'abuse_filter_history', 'afh_changed_fields', "$dir/db_patches/patch-afh_changed_fields.sql" ),
-						  array( 'abuse_filter', 'af_deleted', "$dir/db_patches/patch-af_deleted.sql" ),
-						  array( 'abuse_filter', 'af_actions', "$dir/db_patches/patch-af_actions.sql" ),
-					) );
+			$wgExtNewTables[] = array( 'abuse_filter', "$dir/abusefilter.tables.sql" );
+			$wgExtNewTables[] = array( 'abuse_filter_history', "$dir/db_patches/patch-abuse_filter_history.sql" );
+			$wgExtNewFields[] = array( 'abuse_filter_history', 'afh_changed_fields', "$dir/db_patches/patch-afh_changed_fields.sql" );
+			$wgExtNewFields[] = array( 'abuse_filter', 'af_deleted', "$dir/db_patches/patch-af_deleted.sql" );
+			$wgExtNewFields[] = array( 'abuse_filter', 'af_actions', "$dir/db_patches/patch-af_actions.sql" );
+			$wgExtNewFields[] = array( 'abuse_filter', 'af_global', "$dir/db_patches/patch-global_filters.sql" );
 		} else if ( $wgDBtype == 'postgres' ) {
 			$wgExtNewTables = array_merge( $wgExtNewTables,
 				    array(

Comments

#Comment by Werdna (Talk | contribs)   11:53, 5 April 2009

This looks okay to me. Thanks very much!

Status & tagging log

Views
Toolbox