Extension:Google Sitemap/Sources

Introduction
Please see the Google Sitemap Extension.

GoogleSitemap.php
 'Special:GoogleSitemap',	'description' => 'Adds a special page to create a XML Google Sitemap file, along with some reporting.',	'url' => 'http://www.mediawiki.org/wiki/Google_Sitemap_Extension',	'author' => 'Fran&amp;#231;ois Boutines-Vignard',	'version' => '0.0.4' );

function wfExtensionSpecialGoogleSitemap {	global $wgMessageCache; $wgMessageCache->addMessages(array('googlesitemap' => 'Google Sitemap')); $wgAvailableRights[] = 'googlesitemap'; $wgGroupPermissions['bureaucrat']['googlesitemap'] = true; SpecialPage::addPage( new SpecialPage( 'GoogleSitemap', 'userrights') ); }

?>

SpecialGoogleSitemap.php
<?php
 * 1) Special:GoogleSitemap Mediawiki extension
 * 2) Version 0.0.5
 * 3) Copyright (c) 2006 François Boutines-Vignard.
 * 4) A special page to generate Google Sitemap XML files.
 * 5) see http://www.google.com/schemas/sitemap/0.84/sitemap.xsd for details.
 * 6) This program is free software; you can redistribute it and/or modify
 * 7) it under the terms of the GNU General Public License as published by
 * 8) the Free Software Foundation; either version 2 of the License, or
 * 9) (at your option) any later version.
 * 10) This program is distributed in the hope that it will be useful,
 * 11) but WITHOUT ANY WARRANTY; without even the implied warranty of
 * 12) MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * 13) GNU General Public License for more details.
 * 14) You should have received a copy of the GNU General Public License along
 * 15) with this program; if not, write to the Free Software Foundation, Inc.,
 * 16) 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 * 17) http://www.gnu.org/copyleft/gpl.html
 * 18) Revisions:
 * 19) 0.0.2: date format correction, lighter markup. (2006/09/15)
 * 20) 0.0.3: added 'priority' and 'changefreq' tags management in the 'Options' form. (2006/09/16)
 * 21) 0.0.4: Unicode support, gmdate format, exponential and quadratic priorities. (2006/09/17)
 * 22) 0.0.5: Possibility to sort by last page revision. (2006/09/19)
 * 1) http://www.gnu.org/copyleft/gpl.html
 * 2) Revisions:
 * 3) 0.0.2: date format correction, lighter markup. (2006/09/15)
 * 4) 0.0.3: added 'priority' and 'changefreq' tags management in the 'Options' form. (2006/09/16)
 * 5) 0.0.4: Unicode support, gmdate format, exponential and quadratic priorities. (2006/09/17)
 * 6) 0.0.5: Possibility to sort by last page revision. (2006/09/19)
 * 1) 0.0.5: Possibility to sort by last page revision. (2006/09/19)

require_once("QueryPage.php");

/** class GoogleSitemapPage extends QueryPage {       var $file_name = "sitemap.xml"; // relative to $wgSitename (must be writable) var $sitemaps_url = "https://www.google.com/webmasters/sitemaps/login"; /*        * see http://www.google.com/schemas/sitemap/0.84/sitemap.xsd for more details */       var $DEFAULT_SITEMAP_HEADER = "\n\n\n\n"; var $DEFAULT_PRIORITY = 0.5; var $DEFAULT_CHANGE_FREQ = "daily"; var $file_handle; var $file_exists; var $offset,$limit; var $count, $cursor_pos = 0; var $form_action; var $article_priorities = "constant"; var $estimate_change_freq = false; var $sorting_criterion = "POP"; function GoogleSitemapPage( $offset, $limit ) {               global $wgRequest; $request =& $wgRequest; $file_name=$request->getText( 'wpFileName' ); if( $file_name ) {                       $this->file_name=$file_name; }               $change_freq = $request->getCheck( 'wpChangeFreq' ); if( $change_freq ) {                       $this->estimate_change_freq = $change_freq ; }               $priority = $request->getText( 'wpPriorityType' ); if( $priority ) {                       $this->article_priorities = $priority; }		$sorting_criterion = $request->getText( 'wpSortCriterion' ); if( $sorting_criterion ) {			$this->sorting_criterion = $sorting_criterion; }               $this->offset = $offset; $this->limit = $limit; }
 * 'GoogleSitemapPage' class
 * The XML file is ordered by decreasing popularity order (ie. maximum number of hits).
 * User should have the 'bureaucrat' rights.
 * Ignores Mediawiki (and Mediawiki talk) namespace.
 * Redirect pages are ignored.
 * Accepts 'limit' and 'offset' parameters,
 * eg: Special:GoogleSitemap&limit=5000 to build a file of the 5000 first pages.
 * eg: Special:GoogleSitemap&limit=5000 to build a file of the 5000 first pages.

function utf8_write( $handle, $data ) {               fwrite( $handle, utf8_encode( $data ) ) ; }

function getName {               return "GoogleSitemap"; }

function isExpensive {               return false; }

function isSyndicated {               return false; }

function initialize {               global $wgExtensionCredits; $this->file_exists = file_exists ( $this->file_name ) ; $this->file_handle = fopen( $this->file_name, 'w' ) or die( "Cannot write to '$this->file_name.'" ); $this->utf8_write( $this->file_handle, $this->DEFAULT_SITEMAP_HEADER ); $this->doQuery( $this->offset, $this->limit ); }       function finalize {               $close_tag = "\n "; $this->utf8_write( $this->file_handle, $close_tag ) ; fclose( $this->file_handle ); }       function getPageHeader # has text {               global $wgServer, $wgScriptPath,$wgSitename;

$url = "$wgServer$wgScriptPath/$this->file_name"; $misc_estimate = $this->estimate_change_freq?" and estimated change frequencies":""; $misc_file_action = $this->file_exists?"rebuild":"created"; $default_text="The sitemaps_url\" title=\"Sitemaps login\"> Google Sitemap's  $url was $misc_file_action for the following $this->count pages (with $this->article_priorities priority$misc_estimate)  . \n"; #English

$info="";

if( $this->offset != 0 ) {                       $class="errorbox"; $info=" This selection misses the $this->offset most viewed pages of $wgSitename, however ... \n"; #English }               else {                       $class="successbox"; }               return "$info$default_text  \n".$this->addPageOptions; }       function addPageOptions {               return "                         form_action'>                          Options                           \n\n"; }

function getSQL {               $dbr =& wfGetDB( DB_SLAVE ); $page = $dbr->tableName( 'page' ); $revision = $dbr->tableName( 'revision' ); $criterion = $this->sorting_criterion=="REV"?"rev_timestamp":"page_counter"; return "SELECT 'Popularpages' AS type,                               page_id AS id,                                page_namespace AS namespace,                                page_title AS title,                                ( MAX( rev_timestamp ) ) AS last_modification,                                $criterion AS value                                FROM $page, $revision                                        WHERE ( page_namespace <> 8 AND page_namespace <> 9 )                                        AND page_is_redirect = 0                                        AND rev_page = page_id                                GROUP BY page_id"; }       function sortDescending {               return true; }       function preprocessResults( $db, $res ) {               $this->count = $db->numRows($res); }       function formatResult( $skin, $result ) {               global $wgLang, $wgContLang, $wgServer; $title = Title::makeTitle( $result->namespace, $result->title ); $link = $skin->makeKnownLinkObj( $title, htmlspecialchars( $wgContLang->convert( $title->getPrefixedText ) ) );

$url = $title->escapeLocalURL; $this->form_action=$title->escapeLocalURL( 'action=submit' ); // The date must conform to ISO 8601 (http://www.w3.org/TR/NOTE-datetime) // UTC (Coordinated Universal Time) is used, google currently ignores time however $last_modification = gmdate( "Y-m-d\TH:i:s\Z", wfTimestamp( TS_UNIX, $result->last_modification ) ); $this->addURL( $wgServer, $url, $last_modification, $result->id ); ++$this->cursor_pos; return "{$link} ($wgServer$url) "; }

function addURL( $base, $url, $last_modification, $page_id ) # parameters must be valid XML data {               $result="  \n    $base$url \n    ".$this->getPriority." \n    $last_modification \n    ".$this->getChangeFreq($page_id)." \n  \n"; $this->utf8_write( $this->file_handle, $result ); }       function getPriority  # must return valid XML data {               $x = $this->cursor_pos / $this->count;

switch( $this->article_priorities ) {                       case "constant"    : return $this->DEFAULT_PRIORITY; case "linear"     : return 1.0 - $x; case "quadratic"  : return pow( 1.0 - $x, 2.0 ) ; case "cubic"      : return 3.0 * pow( ( 1.0 - $x ), 2.0 ) - 2.0 * pow( ( 1.0 - $x ), 3.0 ); case "exponential" : return exp( -6 * $x ); # exp(-6) ~= 0,002479 case "smooth"     : return cos( $x * pi / 2.0 ); case "random"     : return mt_rand / mt_getrandmax; case "reverse"    : return $x; default: return $this->DEFAULT_PRIORITY; }       }        function getChangeFreq( $page_id ) # must return valid XML data {               if( $this->estimate_change_freq ) {                       $dbr =& wfGetDB( DB_SLAVE );

$revision = $dbr->tableName( 'revision' );

$sql = "SELECT                                        MIN(rev_timestamp) AS creation_timestamp,                                        COUNT(rev_timestamp) AS revision_count                                        FROM $revision WHERE rev_page = $page_id"; $res = $dbr->query( $sql ); $count = $dbr->numRows( $res ); if( $count < 1 ) {                               return $this->DEFAULT_CHANGE_FREQ; }                       else {                                      $item1 =( $dbr->fetchObject( $res ) ); $cur = time ; // now $first = wfTimestamp( TS_UNIX, $item1->creation_timestamp ); // there were $item1->revision_count revisions in ($cur - $first) seconds $diff = ($cur - $first) / $item1->revision_count ; switch( true ) {                                       # case $diff < 60: return "always"; // I suspect google to ignore these pages more often... case $diff < 3600: return "hourly"; case $diff < 24*3600: return "daily"; case $diff < 7*24*3600: return "weekly"; case $diff < 30.33*24*3600: return "monthly"; case $diff < 365.25*24*3600: return "yearly"; default: return $this->DEFAULT_CHANGE_FREQ; # return "never"; // for archived pages only }                       }                }                else {                       return $this->DEFAULT_CHANGE_FREQ; }       } }

/** function wfSpecialGoogleSitemap {       list( $limit, $offset ) = wfCheckLimits; $gsitemap = new GoogleSitemapPage( $offset, $limit ); $gsitemap->initialize; $gsitemap->finalize; } ?>
 * Entry point.