Index: trunk/extensions/SpamBlacklist/SpamBlacklist.php
===================================================================
--- trunk/extensions/SpamBlacklist/SpamBlacklist.php (revision 43097)
+++ trunk/extensions/SpamBlacklist/SpamBlacklist.php (revision 43098)
@@ -10,8 +10,8 @@
$wgExtensionCredits['other'][] = array(
'name' => 'SpamBlacklist',
'author' => 'Tim Starling',
- 'svn-date' => '$LastChangedDate$',
- 'svn-revision' => '$LastChangedRevision$',
+ 'svn-date' => '$LastChangedDate$',
+ 'svn-revision' => '$LastChangedRevision$',
'url' => 'http://www.mediawiki.org/wiki/Extension:SpamBlacklist',
'description' => 'Regex-based anti-spam tool',
'descriptionmsg' => 'spam-blacklist-desc',
@@ -24,7 +24,6 @@
global $wgSpamBlacklistFiles;
global $wgSpamBlacklistSettings;
-
$wgSpamBlacklistFiles = false;
$wgSpamBlacklistSettings = array();
@@ -39,9 +38,9 @@
$wgFilterCallback = 'wfSpamBlacklistFilter';
}
-
$wgHooks['EditFilter'][] = 'wfSpamBlacklistValidate';
$wgHooks['ArticleSaveComplete'][] = 'wfSpamBlacklistArticleSave';
+$wgHooks['APIEditBeforeSave'][] = 'wfSpamBlacklistFilterAPIEditBeforeSave';
/**
* Internationalization messages
@@ -74,21 +73,45 @@
*/
function wfSpamBlacklistFilter( &$title, $text, $section, &$hookErr, $editSummary ) {
$spamObj = wfSpamBlacklistObject();
- return $spamObj->filter( $title, $text, $section, $editSummary );
+ $ret = $spamObj->filter( $title, $text, $section, $editSummary );
+ if ( $ret !== false ) EditPage::spamPage( $ret );
+ return ( $ret !== false );
}
/**
* Hook function for EditFilterMerged, replaces wfSpamBlacklistFilter
*/
function wfSpamBlacklistFilterMerged( &$editPage, $text, &$hookErr, $editSummary ) {
+ global $wgTitle;
+ if( is_null( $wgTitle ) ) {
+ # API mode
+ # wfSpamBlacklistFilterAPIEditBeforeSave already checked the blacklist
+ return true;
+ }
+
$spamObj = wfSpamBlacklistObject();
$title = $editPage->mArticle->getTitle();
$ret = $spamObj->filter( $title, $text, '', $editSummary, $editPage );
+ if ( $ret !== false ) $editPage->spamPage( $ret );
// Return convention for hooks is the inverse of $wgFilterCallback
- return !$ret;
+ return ( $ret === false );
}
/**
+ * Hook function for APIEditBeforeSave
+ */
+function wfSpamBlacklistFilterAPIEditBeforeSave( &$editPage, $text, &$resultArr ) {
+ $spamObj = wfSpamBlacklistObject();
+ $title = $editPage->mArticle->getTitle();
+ $ret = $spamObj->filter( $title, $text, '', '', $editPage );
+ if ( $ret!==false ) {
+ $resultArr['spamblacklist'] = $ret;
+ }
+ // Return convention for hooks is the inverse of $wgFilterCallback
+ return ( $ret === false );
+}
+
+/**
* Hook function for EditFilter
* Confirm that a local blacklist page being saved is valid,
* and toss back a warning to the user if it isn't.
Index: trunk/extensions/SpamBlacklist/SpamBlacklist_body.php
===================================================================
--- trunk/extensions/SpamBlacklist/SpamBlacklist_body.php (revision 43097)
+++ trunk/extensions/SpamBlacklist/SpamBlacklist_body.php (revision 43098)
@@ -25,7 +25,7 @@
*/
function isLocalSource( $title ) {
global $wgDBname;
-
+
if( $title->getNamespace() == NS_MEDIAWIKI ) {
$sources = array(
"Spam-blacklist",
@@ -34,10 +34,10 @@
return true;
}
}
-
+
$thisHttp = $title->getFullUrl( 'action=raw' );
$thisHttpRegex = '/^' . preg_quote( $thisHttp, '/' ) . '(?:&.*)?$/';
-
+
foreach( $this->files as $fileName ) {
if ( preg_match( '/^DB: (\w*) (.*)$/', $fileName, $matches ) ) {
if ( $wgDBname == $matches[1] ) {
@@ -52,17 +52,17 @@
return true;
}
}
-
+
return false;
}
-
+
/**
* @deprecated back-compat
*/
function getRegexes() {
return $this->getBlacklists();
}
-
+
/**
* Fetch local and (possibly cached) remote blacklists.
* Will be cached locally across multiple invocations.
@@ -76,7 +76,7 @@
}
return $this->regexes;
}
-
+
/**
* Fetch (possibly cached) remote blacklists.
* @return array
@@ -103,19 +103,19 @@
wfProfileOut( $fname );
return $cachedRegexes;
}
-
+
$regexes = $this->buildSharedBlacklists();
$wgMemc->set( "$wgDBname:spam_blacklist_regexes", $regexes, $this->expiryTime );
-
+
return $regexes;
}
-
+
function clearCache() {
global $wgMemc, $wgDBname;
$wgMemc->delete( "$wgDBname:spam_blacklist_regexes" );
wfDebugLog( 'SpamBlacklist', "Spam blacklist local cache cleared.\n" );
}
-
+
function buildSharedBlacklists() {
$regexes = array();
# Load lists
@@ -129,7 +129,7 @@
$text = file_get_contents( $fileName );
wfDebugLog( 'SpamBlacklist', "got from file $fileName\n" );
}
-
+
// Build a separate batch of regexes from each source.
// While in theory we could squeeze a little efficiency
// out of combining multiple sources in one regex, if
@@ -138,20 +138,20 @@
$regexes = array_merge( $regexes,
SpamRegexBatch::regexesFromText( $text, $fileName ) );
}
-
+
return $regexes;
}
-
+
function getHttpText( $fileName ) {
global $wgDBname, $messageMemc;
-
+
# HTTP request
# To keep requests to a minimum, we save results into $messageMemc, which is
# similar to $wgMemc except almost certain to exist. By default, it is stored
# in the database
#
# There are two keys, when the warning key expires, a random thread will refresh
- # the real key. This reduces the chance of multiple requests under high traffic
+ # the real key. This reduces the chance of multiple requests under high traffic
# conditions.
$key = "spam_blacklist_file:$fileName";
$warningKey = "$wgDBname:spamfilewarning:$fileName";
@@ -171,11 +171,11 @@
}
return $httpText;
}
-
+
static function getLocalBlacklists() {
return SpamRegexBatch::regexesFromMessage( 'spam-blacklist' );
}
-
+
static function getWhitelists() {
return SpamRegexBatch::regexesFromMessage( 'spam-whitelist' );
}
@@ -186,8 +186,7 @@
* @param string $section Section number or name
* @param EditSummary $editSummary Edit summary if one exists, some people use urls there too
* @param EditPage $editPage EditPage if EditFilterMerged was called, null otherwise
- * @return True if the edit should not be allowed, false otherwise
- * If the return value is true, an error will have been sent to $wgOut
+ * @return Matched text if the edit should not be allowed, false otherwise
*/
function filter( &$title, $text, $section, $editsummary = '', EditPage &$editPage = null ) {
global $wgArticle, $wgVersion, $wgOut, $wgParser, $wgUser;
@@ -226,14 +225,14 @@
$newLinks = array_keys( $out->getExternalLinks() );
$oldLinks = $this->getCurrentLinks( $title );
$addedLinks = array_diff( $newLinks, $oldLinks );
-
+
// We add the edit summary if one exists
if ( !empty( $editsummary ) ) $addedLinks[] = $editsummary;
-
+
wfDebugLog( 'SpamBlacklist', "Old URLs: " . implode( ', ', $oldLinks ) );
wfDebugLog( 'SpamBlacklist', "New URLs: " . implode( ', ', $newLinks ) );
wfDebugLog( 'SpamBlacklist', "Added URLs: " . implode( ', ', $addedLinks ) );
-
+
$links = implode( "\n", $addedLinks );
# Strip whitelisted URLs from the match
@@ -263,12 +262,7 @@
wfDebugLog( 'SpamBlacklist', "Match!\n" );
$ip = wfGetIP();
wfDebugLog( 'SpamBlacklistHit', "$ip caught submitting spam: {$matches[0]}\n" );
- if ( $editPage ) {
- $editPage->spamPage( $matches[0] );
- } else {
- EditPage::spamPage( $matches[0] );
- }
- $retVal = true;
+ $retVal = $matches[0];
break;
}
}
@@ -279,7 +273,7 @@
wfProfileOut( $fname );
return $retVal;
}
-
+
/**
* Look up the links currently in the article, so we can
* ignore them on a second run.
@@ -289,7 +283,7 @@
function getCurrentLinks( $title ) {
$dbr =& wfGetDB( DB_SLAVE );
$id = $title->getArticleId(); // should be zero queries
- $res = $dbr->select( 'externallinks', array( 'el_to' ),
+ $res = $dbr->select( 'externallinks', array( 'el_to' ),
array( 'el_from' => $id ), __METHOD__ );
$links = array();
while ( $row = $dbr->fetchObject( $res ) ) {
@@ -440,7 +434,7 @@
}
return $regexes;
}
-
+
/**
* Confirm that a set of regexes is either empty or valid.
* @param array $lines set of regexes
@@ -453,14 +447,14 @@
wfSuppressWarnings();
$ok = preg_match( $regex, '' );
wfRestoreWarnings();
-
+
if( $ok === false ) {
return false;
}
}
return true;
}
-
+
/**
* Strip comments and whitespace, then remove blanks
* @private
@@ -472,7 +466,7 @@
preg_replace( '/#.*$/', '',
$lines ) ) );
}
-
+
/**
* Do a sanity check on the batch regex.
* @param lines unsanitized input lines
@@ -496,7 +490,7 @@
return SpamRegexBatch::buildRegexes( $lines, 0 );
}
}
-
+
/**
* @param array $lines
* @return array of input lines which produce invalid input, or empty array if no problems
@@ -504,7 +498,7 @@
*/
static function getBadLines( $lines ) {
$lines = SpamRegexBatch::stripLines( $lines );
-
+
$badLines = array();
foreach( $lines as $line ) {
if( substr( $line, -1, 1 ) == "\\" ) {
@@ -512,13 +506,13 @@
$badLines[] = $line;
}
}
-
+
$regexes = SpamRegexBatch::buildRegexes( $lines );
if( SpamRegexBatch::validateRegexes( $regexes ) ) {
// No other problems!
return $badLines;
}
-
+
// Something failed in the batch, so check them one by one.
foreach( $lines as $line ) {
$regexes = SpamRegexBatch::buildRegexes( array( $line ) );
@@ -528,7 +522,7 @@
}
return $badLines;
}
-
+
/**
* Build a set of regular expressions from the given multiline input text,
* with empty lines and comments stripped.
@@ -542,7 +536,7 @@
$lines = explode( "\n", $source );
return SpamRegexBatch::buildSafeRegexes( $lines, $fileName );
}
-
+
/**
* Build a set of regular expressions from a MediaWiki message.
* Will be correctly empty if the message isn't present.