r43098 - Code Review

From MediaWiki.org

Jump to: navigation, search
Repository:MediaWiki
Revision:r43097 | r43098 (on ViewVC) | r43099 >
Date:22:40, 2 November 2008
Author:siebrand
Status:ok
Tags:
Comment:(bug 16120) Prevent death on Spam Blacklist trigger using API. Patch by Brad Jorsch.

An API edit attempt with Spam Blacklist firing will now output something instead of crashing:

<?xml version="1.0"?><api><edit spamblacklist="http://blacklistme.example.com&quot;
result="Failure" /></api>
Modified paths:

Diff [purge]

Index: trunk/extensions/SpamBlacklist/SpamBlacklist.php
===================================================================
--- trunk/extensions/SpamBlacklist/SpamBlacklist.php	(revision 43097)
+++ trunk/extensions/SpamBlacklist/SpamBlacklist.php	(revision 43098)
@@ -10,8 +10,8 @@
 $wgExtensionCredits['other'][] = array(
 	'name'           => 'SpamBlacklist',
 	'author'         => 'Tim Starling',
-	'svn-date' => '$LastChangedDate$',
-	'svn-revision' => '$LastChangedRevision$',
+	'svn-date'       => '$LastChangedDate$',
+	'svn-revision'   => '$LastChangedRevision$',
 	'url'            => 'http://www.mediawiki.org/wiki/Extension:SpamBlacklist',
 	'description'    => 'Regex-based anti-spam tool',
 	'descriptionmsg' => 'spam-blacklist-desc',
@@ -24,7 +24,6 @@
 global $wgSpamBlacklistFiles;
 global $wgSpamBlacklistSettings;
 
-
 $wgSpamBlacklistFiles = false;
 $wgSpamBlacklistSettings = array();
 
@@ -39,9 +38,9 @@
 	$wgFilterCallback = 'wfSpamBlacklistFilter';
 }
 
-
 $wgHooks['EditFilter'][] = 'wfSpamBlacklistValidate';
 $wgHooks['ArticleSaveComplete'][] = 'wfSpamBlacklistArticleSave';
+$wgHooks['APIEditBeforeSave'][] = 'wfSpamBlacklistFilterAPIEditBeforeSave';
 
 /**
  * Internationalization messages
@@ -74,21 +73,45 @@
  */
 function wfSpamBlacklistFilter( &$title, $text, $section, &$hookErr, $editSummary ) {
 	$spamObj = wfSpamBlacklistObject();
-	return $spamObj->filter( $title, $text, $section, $editSummary );
+	$ret = $spamObj->filter( $title, $text, $section, $editSummary );
+	if ( $ret !== false ) EditPage::spamPage( $ret );
+	return ( $ret !== false );
 }
 
 /**
  * Hook function for EditFilterMerged, replaces wfSpamBlacklistFilter
  */
 function wfSpamBlacklistFilterMerged( &$editPage, $text, &$hookErr, $editSummary ) {
+	global $wgTitle;
+	if( is_null( $wgTitle ) ) {
+		# API mode
+		# wfSpamBlacklistFilterAPIEditBeforeSave already checked the blacklist
+		return true;
+	}
+
 	$spamObj = wfSpamBlacklistObject();
 	$title = $editPage->mArticle->getTitle();
 	$ret = $spamObj->filter( $title, $text, '', $editSummary, $editPage );
+	if ( $ret !== false ) $editPage->spamPage( $ret );
 	// Return convention for hooks is the inverse of $wgFilterCallback
-	return !$ret;
+	return ( $ret === false );
 }
 
 /**
+ * Hook function for APIEditBeforeSave
+ */
+function wfSpamBlacklistFilterAPIEditBeforeSave( &$editPage, $text, &$resultArr ) {
+	$spamObj = wfSpamBlacklistObject();
+	$title = $editPage->mArticle->getTitle();
+	$ret = $spamObj->filter( $title, $text, '', '', $editPage );
+	if ( $ret!==false ) {
+		$resultArr['spamblacklist'] = $ret;
+	}
+	// Return convention for hooks is the inverse of $wgFilterCallback
+	return ( $ret === false );
+}
+
+/**
  * Hook function for EditFilter
  * Confirm that a local blacklist page being saved is valid,
  * and toss back a warning to the user if it isn't.
Index: trunk/extensions/SpamBlacklist/SpamBlacklist_body.php
===================================================================
--- trunk/extensions/SpamBlacklist/SpamBlacklist_body.php	(revision 43097)
+++ trunk/extensions/SpamBlacklist/SpamBlacklist_body.php	(revision 43098)
@@ -25,7 +25,7 @@
 	 */
 	function isLocalSource( $title ) {
 		global $wgDBname;
-		
+
 		if( $title->getNamespace() == NS_MEDIAWIKI ) {
 			$sources = array(
 				"Spam-blacklist",
@@ -34,10 +34,10 @@
 				return true;
 			}
 		}
-		
+
 		$thisHttp = $title->getFullUrl( 'action=raw' );
 		$thisHttpRegex = '/^' . preg_quote( $thisHttp, '/' ) . '(?:&.*)?$/';
-		
+
 		foreach( $this->files as $fileName ) {
 			if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) {
 				if ( $wgDBname == $matches[1] ) {
@@ -52,17 +52,17 @@
 				return true;
 			}
 		}
-		
+
 		return false;
 	}
-	
+
 	/**
 	 * @deprecated back-compat
 	 */
 	function getRegexes() {
 		return $this->getBlacklists();
 	}
-	
+
 	/**
 	 * Fetch local and (possibly cached) remote blacklists.
 	 * Will be cached locally across multiple invocations.
@@ -76,7 +76,7 @@
 		}
 		return $this->regexes;
 	}
-	
+
 	/**
 	 * Fetch (possibly cached) remote blacklists.
 	 * @return array
@@ -103,19 +103,19 @@
 			wfProfileOut( $fname );
 			return $cachedRegexes;
 		}
-		
+
 		$regexes = $this->buildSharedBlacklists();
 		$wgMemc->set( "$wgDBname:spam_blacklist_regexes", $regexes, $this->expiryTime );
-		
+
 		return $regexes;
 	}
-	
+
 	function clearCache() {
 		global $wgMemc, $wgDBname;
 		$wgMemc->delete( "$wgDBname:spam_blacklist_regexes" );
 		wfDebugLog( 'SpamBlacklist', "Spam blacklist local cache cleared.\n" );
 	}
-	
+
 	function buildSharedBlacklists() {
 		$regexes = array();
 		# Load lists
@@ -129,7 +129,7 @@
 				$text = file_get_contents( $fileName );
 				wfDebugLog( 'SpamBlacklist', "got from file $fileName\n" );
 			}
-			
+
 			// Build a separate batch of regexes from each source.
 			// While in theory we could squeeze a little efficiency
 			// out of combining multiple sources in one regex, if
@@ -138,20 +138,20 @@
 			$regexes = array_merge( $regexes,
 				SpamRegexBatch::regexesFromText( $text, $fileName ) );
 		}
-		
+
 		return $regexes;
 	}
-	
+
 	function getHttpText( $fileName ) {
 		global $wgDBname, $messageMemc;
-		
+
 		# HTTP request
 		# To keep requests to a minimum, we save results into $messageMemc, which is
 		# similar to $wgMemc except almost certain to exist. By default, it is stored
 		# in the database
 		#
 		# There are two keys, when the warning key expires, a random thread will refresh
-		# the real key. This reduces the chance of multiple requests under high traffic 
+		# the real key. This reduces the chance of multiple requests under high traffic
 		# conditions.
 		$key = "spam_blacklist_file:$fileName";
 		$warningKey = "$wgDBname:spamfilewarning:$fileName";
@@ -171,11 +171,11 @@
 		}
 		return $httpText;
 	}
-	
+
 	static function getLocalBlacklists() {
 		return SpamRegexBatch::regexesFromMessage( 'spam-blacklist' );
 	}
-	
+
 	static function getWhitelists() {
 		return SpamRegexBatch::regexesFromMessage( 'spam-whitelist' );
 	}
@@ -186,8 +186,7 @@
 	 * @param string $section Section number or name
 	 * @param EditSummary $editSummary Edit summary if one exists, some people use urls there too
 	 * @param EditPage $editPage EditPage if EditFilterMerged was called, null otherwise
-	 * @return True if the edit should not be allowed, false otherwise
-	 * If the return value is true, an error will have been sent to $wgOut
+	 * @return Matched text if the edit should not be allowed, false otherwise
 	 */
 	function filter( &$title, $text, $section, $editsummary = '', EditPage &$editPage = null ) {
 		global $wgArticle, $wgVersion, $wgOut, $wgParser, $wgUser;
@@ -226,14 +225,14 @@
 			$newLinks = array_keys( $out->getExternalLinks() );
 			$oldLinks = $this->getCurrentLinks( $title );
 			$addedLinks = array_diff( $newLinks, $oldLinks );
-			
+
 			// We add the edit summary if one exists
 			if ( !empty( $editsummary ) ) $addedLinks[] = $editsummary;
-			
+
 			wfDebugLog( 'SpamBlacklist', "Old URLs: " . implode( ', ', $oldLinks ) );
 			wfDebugLog( 'SpamBlacklist', "New URLs: " . implode( ', ', $newLinks ) );
 			wfDebugLog( 'SpamBlacklist', "Added URLs: " . implode( ', ', $addedLinks ) );
-			
+
 			$links = implode( "\n", $addedLinks );
 
 			# Strip whitelisted URLs from the match
@@ -263,12 +262,7 @@
 					wfDebugLog( 'SpamBlacklist', "Match!\n" );
 					$ip = wfGetIP();
 					wfDebugLog( 'SpamBlacklistHit', "$ip caught submitting spam: {$matches[0]}\n" );
-					if ( $editPage ) {
-						$editPage->spamPage( $matches[0] );
-					} else {
-						EditPage::spamPage( $matches[0] );
-					}
-					$retVal = true;
+					$retVal = $matches[0];
 					break;
 				}
 			}
@@ -279,7 +273,7 @@
 		wfProfileOut( $fname );
 		return $retVal;
 	}
-	
+
 	/**
 	 * Look up the links currently in the article, so we can
 	 * ignore them on a second run.
@@ -289,7 +283,7 @@
 	function getCurrentLinks( $title ) {
 		$dbr =& wfGetDB( DB_SLAVE );
 		$id = $title->getArticleId(); // should be zero queries
-		$res = $dbr->select( 'externallinks', array( 'el_to' ), 
+		$res = $dbr->select( 'externallinks', array( 'el_to' ),
 			array( 'el_from' => $id ), __METHOD__ );
 		$links = array();
 		while ( $row = $dbr->fetchObject( $res ) ) {
@@ -440,7 +434,7 @@
 		}
 		return $regexes;
 	}
-	
+
 	/**
 	 * Confirm that a set of regexes is either empty or valid.
 	 * @param array $lines set of regexes
@@ -453,14 +447,14 @@
 			wfSuppressWarnings();
 			$ok = preg_match( $regex, '' );
 			wfRestoreWarnings();
-			
+
 			if( $ok === false ) {
 				return false;
 			}
 		}
 		return true;
 	}
-	
+
 	/**
 	 * Strip comments and whitespace, then remove blanks
 	 * @private
@@ -472,7 +466,7 @@
 					preg_replace( '/#.*$/', '',
 						$lines ) ) );
 	}
-	
+
 	/**
 	 * Do a sanity check on the batch regex.
 	 * @param lines unsanitized input lines
@@ -496,7 +490,7 @@
 			return SpamRegexBatch::buildRegexes( $lines, 0 );
 		}
 	}
-	
+
 	/**
 	 * @param array $lines
 	 * @return array of input lines which produce invalid input, or empty array if no problems
@@ -504,7 +498,7 @@
 	 */
 	static function getBadLines( $lines ) {
 		$lines = SpamRegexBatch::stripLines( $lines );
-		
+
 		$badLines = array();
 		foreach( $lines as $line ) {
 			if( substr( $line, -1, 1 ) == "\\" ) {
@@ -512,13 +506,13 @@
 				$badLines[] = $line;
 			}
 		}
-		
+
 		$regexes = SpamRegexBatch::buildRegexes( $lines );
 		if( SpamRegexBatch::validateRegexes( $regexes ) ) {
 			// No other problems!
 			return $badLines;
 		}
-		
+
 		// Something failed in the batch, so check them one by one.
 		foreach( $lines as $line ) {
 			$regexes = SpamRegexBatch::buildRegexes( array( $line ) );
@@ -528,7 +522,7 @@
 		}
 		return $badLines;
 	}
-	
+
 	/**
 	 * Build a set of regular expressions from the given multiline input text,
 	 * with empty lines and comments stripped.
@@ -542,7 +536,7 @@
 		$lines = explode( "\n", $source );
 		return SpamRegexBatch::buildSafeRegexes( $lines, $fileName );
 	}
-	
+
 	/**
 	 * Build a set of regular expressions from a MediaWiki message.
 	 * Will be correctly empty if the message isn't present.
Views
Toolbox