Extension:RigorousSearch/1.0

<?php
 * 1) SpecialRigorousSearch Mediawiki extension
 * 2) by Johan the Ghost 1 Feb 2007
 * 3) Copyright (C) 2007 Johan the Ghost
 * 4) What it Is
 * 5) This extension implements a full-page search facility, by the tedious
 * 6) method of individually searching the source of each page as stored in
 * 7) the "page" / "text" tables -- *not* the FULLTEXT index kept in the
 * 8) "searchindex" table for MySQL searches.
 * 9) This is VERY slow, and almost totally useless -- except that it allows
 * 10) searching of the complete page source, not just the user-visible version
 * 11) of the text stored in "searchindex".  So, for example, if you want to
 * 12) search for hyperlinks to a particular web site, this will work, whereas
 * 13) a MediaWiki search would not ("searchindex" includes link text, but not
 * 14) the link URL).  You can also use it to search for particular markup tags.
 * 15) A useful application is to search for novice users making "http://" links
 * 16) into the wiki instead of using regular wikilinks, which causes pages to
 * 17) appear orphaned when they're not.
 * 18) Usage
 * 19) The extension creates a new special page, Special:RigorousSearch.
 * 20) Because it uses a lot of resources, access is restricted to users with
 * 21) "patrol" user rights.  (You can change this easily enough; search for
 * 22) "patrol" below.)
 * 23) You can invoke this feature in multiple ways:
 * 24)   * Go to Special:RigorousSearch, and fill in the search form.
 * 25)   * Link to Special:RigorousSearch/mypattern to do an immediate
 * 26)     search for "mypattern".  Due to URL processing, this won't work
 * 27)     for patterns containing special characters, including multiple
 * 28)     slashes (as in "http://...").
 * 29)   * Link to
 * 30)
 * 31)     This also does an immediate search for "mypattern", but you can use
 * 32)     "%2F" escapes for slashes, etc.
 * 33) Note that this is really slow.  You should only use it when necessary,
 * 34) and you probably shouldn't use it on large wikis at all.
 * 35) This program is free software; you can redistribute it and/or modify
 * 36) it under the terms of the GNU General Public License as published by
 * 37) the Free Software Foundation; either version 2 of the License, or
 * 38) (at your option) any later version.
 * 39) This program is distributed in the hope that it will be useful,
 * 40) but WITHOUT ANY WARRANTY; without even the implied warranty of
 * 41) MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * 42) GNU General Public License for more details.
 * 43) You should have received a copy of the GNU General Public License along
 * 44) with this program; if not, write to the Free Software Foundation, Inc.,
 * 45) 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 * 46) http://www.gnu.org/copyleft/gpl.html
 * 1)     "%2F" escapes for slashes, etc.
 * 2) Note that this is really slow.  You should only use it when necessary,
 * 3) and you probably shouldn't use it on large wikis at all.
 * 4) This program is free software; you can redistribute it and/or modify
 * 5) it under the terms of the GNU General Public License as published by
 * 6) the Free Software Foundation; either version 2 of the License, or
 * 7) (at your option) any later version.
 * 8) This program is distributed in the hope that it will be useful,
 * 9) but WITHOUT ANY WARRANTY; without even the implied warranty of
 * 10) MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * 11) GNU General Public License for more details.
 * 12) You should have received a copy of the GNU General Public License along
 * 13) with this program; if not, write to the Free Software Foundation, Inc.,
 * 14) 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 * 15) http://www.gnu.org/copyleft/gpl.html
 * 1) GNU General Public License for more details.
 * 2) You should have received a copy of the GNU General Public License along
 * 3) with this program; if not, write to the Free Software Foundation, Inc.,
 * 4) 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 * 5) http://www.gnu.org/copyleft/gpl.html
 * 1) http://www.gnu.org/copyleft/gpl.html

if( !defined( 'MEDIAWIKI' ) ) { die; }

require_once "SpecialPage.php";

$wgExtensionFunctions[] = "wfRigorousSearch";

$wgExtensionCredits['specialpage'][] = array(   'name' =>        'RigorousSearch',    'author' =>      'Johan the Ghost',    'url' =>         'http://www.mediawiki.org/wiki/Extension:RigorousSearch',    'version'=>      '1.0.0',    'description' => 'Performs a search on full page text (including links etc.)', );

function wfRigorousSearch { SpecialPage::addPage( new SpecialRigorousSearch ); }

class SpecialRigorousSearch extends SpecialPage {

/*    * Construct the extension and install it as a special page. */   function SpecialRigorousSearch { // Restrict access to users with "patrol" user rights. SpecialPage::SpecialPage('RigorousSearch', 'patrol'); }

/*    * The special page handler function. Receives the parameter * specified after "/", if any. */   function execute($param) { global $wgRequest, $wgOut; global $wgUser;

// This function is so slow that we only let users with // "patrol" user rights do it. if (!$wgUser->isAllowed('patrol')) { $wgOut->permissionRequired('patrol'); return; }

// What are we searching for? $pattern = null; if ($s = $wgRequest->getVal('pattern')) $pattern = $s; else if ($param) $pattern = $param;

// What namespaces are we searching? If none are specified (eg.       // this is the first invocation), then default to all. $spaces = SearchEngine::searchableNamespaces; $searchNs = $this->selectedNamespaces($wgRequest, $spaces); if (!$searchNs) $searchNs = $spaces;

// Set up the output. $this->setHeaders; $wgOut->setPagetitle(wfMsg('rigoroussearch'));

// If we have a search term, do the search and show the results. if ($pattern) $wgOut->addWikiText($this->searchResults($pattern, $searchNs));

// Make the search form and output it (as HTML, otherwise the       // form tags get suppressed). $wgOut->addHTML($this->searchForm($pattern, $spaces, $searchNs)); }

/*    * Extract the selected namespaces settings from the request object, * returning a list of index numbers to search. We are given the * page request and the list of all searchable namespaces. * Returns the namespace list pruned to just the selected ones, * or null if none are selected. */   function selectedNamespaces(&$request, &$spaces) { $arr = array; foreach ($spaces as $ns => $name) { if ($request->getCheck('ns'. $ns)) $arr[$ns] = $name; }

return count($arr) > 0 ? $arr : null; }

/*    * Perform a search for the given pattern, and return wiki markup * describing the results. *    $pattern          the pattern to search for *    $spaces           the list of namespaces selected for searching */   function searchResults($pattern, &$spaces) { $db = &wfGetDB(DB_SLAVE); $out = '';

// Confirm what we're searching for. // NOTE: we have to be careful abou the nowiki tag; using it       // in the normal way will break the code page in mediawiki.org. $out .= "You rigorously searched for". "   \n";

// Perform the search, and get the match count and results list. $matches = 0; foreach ($spaces as $ns => $nsname) { $hits = $this->doSearch($db, $ns, $nsname, $pattern); $count = count($hits);

// Output the results for this namespace, if any. if ($count != 0) { // Output the namespace header. if ($ns == 0) $head = "Article Namespace"; else $head = $nsname. " Namespace"; $out .= " '''". $head. ":''' ";               $out .= $this->matchCount($count). ".\n";

// Output the hit list. foreach ($hits as $hit) $out .= "* " . $hit . "\n";

$out .= "\n\n\n"; $matches += $count; }       }

// If we got no hits at all, say so. if ($matches == 0) { $out .= $this->matchCount($matches). ".\n"; $out .= "\n\n\n"; }

// Let's not bother with the TOC. $out .= "\n";

return $out; }

/*    * Perform a search for the given pattern in a specified namespace. *    $db           Database handle *    $ns           Namespace ID to search *    $nsname       Name of the namespace (null for Main) *    $pattern      Pattern to search for *    * Returns a list of the page titles which match. */   function doSearch(&$db, $ns, $nsname, $pattern) { $matchingPages = array;

// Escape the pattern string. escapeLike does normal MySQL escaping, // plus additional processing necessary for LIKE. $pattern = $db->escapeLike($pattern);

// Select every page in the given namespace. If we fail, return an       // empty result. $pageCond = array('page_namespace' => $ns); $pageResult = $db->select('page', '*', $pageCond); if (!$pageResult) return array(0, null);

// Process each page we found. while ($pageRow = $db->fetchObject($pageResult)) { // Now select the revision data for the page's latest rev. // If we can't, pass on this page. $revCond = array('rev_id' => $pageRow->page_latest); $revRow = $db->selectRow('revision', 'rev_text_id', $revCond); if (!$revRow) continue; $text_id = $revRow->rev_text_id;

// Now select the text for the revision, if it matches the pattern. $queryTxt = "SELECT old_text FROM ". $db->tableName('text'). " WHERE old_id = ". $text_id. " AND old_text LIKE '%". $pattern. "%'";           $textResult = $db->query($queryTxt); if (!$textResult) continue;

// If it matches, list and count it. if ($db->numRows($textResult) > 0) { // Get the page title. $title = Title::makeTitle($ns, $pageRow->page_title); $link = $title->getFullText;

// Add to the results. $matchingPages[] = $link; }

$db->freeResult($textResult); }

$db->freeResult($pageResult);

return $matchingPages; }

/*    * Create and return the HTML markup for the search form. *    $pattern          the default value for the pattern field *    $nameSpaces       the list of searchable namespaces *    $searchSpaces     the list of namespaces currently selected */   function searchForm($pattern, &$nameSpaces, &$searchSpaces) { $out = '';

// The form header, which links back to this page. $title = SpecialPage::getTitleFor('RigorousSearch'); $action = $title->escapeLocalURL; $out .= "\n";

// The search text field. $pattern = htmlspecialchars($pattern); $out .= " Search for \n";

// The search button. $out .= " \n";

// The table of namespace checkboxes. $out .= " \n";

$out .= " \n";

return $out; }

/*    * Make a message describing a match count. */   function matchCount($num) { if ($num == 0) return "No matches"; return $num. ($num == 1 ? " match" : " matches"); }

}

?>