Extension:RigorousSearch/1.0
From MediaWiki.org
<?php # # SpecialRigorousSearch MediaWiki extension # # by Johan the Ghost 1 Feb 2007 # # Copyright (C) 2007 Johan the Ghost # # What it is # ========== # # This extension implements a full-page search facility, by the tedious # method of individually searching the source of each page as stored in # the "page" / "text" tables -- *not* the FULLTEXT index kept in the # "searchindex" table for MySQL searches. # # This is VERY slow, and almost totally useless -- except that it allows # searching of the complete page source, not just the user-visible version # of the text stored in "searchindex". So, for example, if you want to # search for hyperlinks to a particular web site, this will work, whereas # a MediaWiki search would not ("searchindex" includes link text, but not # the link URL). You can also use it to search for particular markup tags. # # A useful application is to search for novice users making "http://" links # into the wiki instead of using regular wikilinks, which causes pages to # appear orphaned when they're not. # # Usage # ===== # # The extension creates a new special page, Special:RigorousSearch. # Because it uses a lot of resources, access is restricted to users with # "patrol" user rights. (You can change this easily enough; search for # "patrol" below.) # # You can invoke this feature in multiple ways: # # * Go to [[Special:RigorousSearch]], and fill in the search form. # # * Link to [[Special:RigorousSearch/mypattern]] to do an immediate # search for "mypattern". Due to URL processing, this won't work # for patterns containing special characters, including multiple # slashes (as in "http://..."). # # * Link to # [http://x/w/index.php?title=Special:RigorousSearch&pattern=mypattern] # This also does an immediate search for "mypattern", but you can use # "%2F" escapes for slashes, etc. # # Note that this is really slow. You should only use it when necessary, # and you probably shouldn't use it on large wikis at all. # # History # ======= # 2007-05-02: 1.0.1 by Bananeweizen, made compatible with MediaWiki 1.7.x # # ############################################################################ # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # http://www.gnu.org/copyleft/gpl.html if( !defined( 'MEDIAWIKI' ) ) { die(); } require_once "SpecialPage.php"; $wgExtensionFunctions[] = "wfRigorousSearch"; $wgExtensionCredits['specialpage'][] = array( 'name' => 'RigorousSearch', 'author' => 'Johan the Ghost', 'url' => 'http://www.mediawiki.org/wiki/Extension:RigorousSearch', 'version'=> '1.0.1', 'description' => 'Performs a search on full page text (including links etc.)', ); function wfRigorousSearch() { global $wgMessageCache; $wgMessageCache->addMessages( array( 'rigoroussearch' => 'Rigorous search' )); SpecialPage::addPage( new SpecialRigorousSearch ); } class SpecialRigorousSearch extends SpecialPage { /* * Construct the extension and install it as a special page. */ function SpecialRigorousSearch() { // Restrict access to users with "patrol" user rights. SpecialPage::SpecialPage('RigorousSearch', 'patrol'); } /* * The special page handler function. Receives the parameter * specified after "/", if any. */ function execute($param) { global $wgRequest, $wgOut; global $wgUser; // This function is so slow that we only let users with // "patrol" user rights do it. if (!$wgUser->isAllowed('patrol')) { $wgOut->permissionRequired('patrol'); return; } // What are we searching for? $pattern = null; if ($s = $wgRequest->getVal('pattern')) $pattern = $s; else if ($param) $pattern = $param; // What namespaces are we searching? If none are specified (eg. // this is the first invocation), then default to all. $spaces = SearchEngine::searchableNamespaces(); $searchNs = $this->selectedNamespaces($wgRequest, $spaces); if (!$searchNs) $searchNs = $spaces; // Set up the output. $this->setHeaders(); $wgOut->setPagetitle(wfMsg('rigoroussearch')); // If we have a search term, do the search and show the results. if ($pattern) $wgOut->addWikiText($this->searchResults($pattern, $searchNs)); // Make the search form and output it (as HTML, otherwise the // form tags get suppressed). $wgOut->addHTML($this->searchForm($pattern, $spaces, $searchNs)); } /* * Extract the selected namespaces settings from the request object, * returning a list of index numbers to search. We are given the * page request and the list of all searchable namespaces. * Returns the namespace list pruned to just the selected ones, * or null if none are selected. */ function selectedNamespaces(&$request, &$spaces) { $arr = array(); foreach ($spaces as $ns => $name) { if ($request->getCheck('ns' . $ns)) $arr[$ns] = $name; } return count($arr) > 0 ? $arr : null; } /* * Perform a search for the given pattern, and return wiki markup * describing the results. * $pattern the pattern to search for * $spaces the list of namespaces selected for searching */ function searchResults($pattern, &$spaces) { $db = &wfGetDB(DB_SLAVE); $out = ''; // Confirm what we're searching for. // NOTE: we have to be careful abou the nowiki tag; using it // in the normal way will break the code page in mediawiki.org. $out .= "<div id=\"contentSub\">You rigorously searched for" . " '''<code><" . "nowiki>" . htmlspecialchars($pattern) . "<" . "/nowiki></code>'''</div>\n"; // Perform the search, and get the match count and results list. $matches = 0; foreach ($spaces as $ns => $nsname) { $hits = $this->doSearch($db, $ns, $nsname, $pattern); $count = count($hits); // Output the results for this namespace, if any. if ($count != 0) { // Output the namespace header. if ($ns == 0) $head = "Article Namespace"; else $head = $nsname . " Namespace"; $out .= "<big>'''" . $head . ":'''</big> "; $out .= $this->matchCount($count) . ".\n"; // Output the hit list. foreach ($hits as $hit) $out .= "* [[" . $hit . "]]\n"; $out .= "\n\n----\n"; $matches += $count; } } // If we got no hits at all, say so. if ($matches == 0) { $out .= $this->matchCount($matches) . ".\n"; $out .= "\n\n----\n"; } // Let's not bother with the TOC. $out .= "__NOTOC__\n"; return $out; } /* * Perform a search for the given pattern in a specified namespace. * $db Database handle * $ns Namespace ID to search * $nsname Name of the namespace (null for Main) * $pattern Pattern to search for * * Returns a list of the page titles which match. */ function doSearch(&$db, $ns, $nsname, $pattern) { $matchingPages = array(); // Escape the pattern string. escapeLike does normal MySQL escaping, // plus additional processing necessary for LIKE. $pattern = $db->escapeLike($pattern); // Select every page in the given namespace. If we fail, return an // empty result. $pageCond = array('page_namespace' => $ns); $pageResult = $db->select('page', '*', $pageCond); if (!$pageResult) return array(0, null); // Process each page we found. while ($pageRow = $db->fetchObject($pageResult)) { // Now select the revision data for the page's latest rev. // If we can't, pass on this page. $revCond = array('rev_id' => $pageRow->page_latest); $revRow = $db->selectRow('revision', 'rev_text_id', $revCond); if (!$revRow) continue; $text_id = $revRow->rev_text_id; // Now select the text for the revision, if it matches the pattern. $queryTxt = "SELECT old_text FROM " . $db->tableName('text') . " WHERE old_id = " . $text_id . " AND old_text LIKE '%" . $pattern . "%'"; $textResult = $db->query($queryTxt); if (!$textResult) continue; // If it matches, list and count it. if ($db->numRows($textResult) > 0) { // Get the page title. $title = Title::makeTitle($ns, $pageRow->page_title); $link = $title->getFullText(); // Add to the results. $matchingPages[] = $link; } $db->freeResult($textResult); } $db->freeResult($pageResult); return $matchingPages; } /* * Create and return the HTML markup for the search form. * $pattern the default value for the pattern field * $nameSpaces the list of searchable namespaces * $searchSpaces the list of namespaces currently selected */ function searchForm($pattern, &$nameSpaces, &$searchSpaces) { $out = ''; // The form header, which links back to this page. $title = Title::makeTitle(NS_SPECIAL,'RigorousSearch'); $action = $title->escapeLocalURL(); $out .= "<form method=\"get\" action=\"$action\">\n"; // The search text field. $pattern = htmlspecialchars($pattern); $out .= "<p>Search for <input type=\"text\" name=\"pattern\"" . " value=\"$pattern\" size=\"36\" />\n"; // The search button. $out .= "<input type=\"submit\" name=\"searchx\" value=\"Search\" /></p>\n"; // The table of namespace checkboxes. $out .= "<p><table><tr>\n"; $i = 0; foreach ($nameSpaces as $ns => $name) { if ($i > 0 && $i % 8 == 0) $out .= "</tr><tr>\n"; $checked = array_key_exists($ns, $searchSpaces) ? ' checked="checked"' : ''; if (!$name) $name = "Main"; else $name = str_replace('_', ' ', $name); $out .= "<td><label><input type='checkbox' value=\"1\" name=\"" . "ns{$ns}\"{$checked} />{$name}</label></td>\n"; ++$i; } $out .= "</tr></table></p>\n"; $out .= "</form>\n"; return $out; } /* * Make a message describing a match count. */ function matchCount($num) { if ($num == 0) return "No matches"; return $num . ($num == 1 ? " match" : " matches"); } }
