Extension:Google Sitemap/Sources

From MediaWiki.org
Jump to navigation Jump to search

Introduction[edit]

This is the source coding used for installing Google Sitemap Extension.

GoogleSitemap.php[edit]

<?php

$wgExtensionFunctions[] = 'wfExtensionSpecialGoogleSitemap';
$wgExtensionCredits['specialpage'][] = array (
	'name' => 'Special:GoogleSitemap',
	'description' => 'Adds a special page to create a XML Google Sitemap file, along with some reporting.',
	'url' => 'http://www.mediawiki.org/wiki/Extension:Google_Sitemap',
	'author' => 'Fran&ccedilois Boutines-Vignard',
	'version' => '0.0.5'
);

function wfExtensionSpecialGoogleSitemap() {
	global $wgMessageCache;
	$wgMessageCache->addMessages(array('googlesitemap' => 'Google Sitemap')); 
	
	$wgAvailableRights[] = 'googlesitemap';
	$wgGroupPermissions['bureaucrat']['googlesitemap'] = true;
	
	SpecialPage::addPage( new SpecialPage( 'GoogleSitemap' , 'userrights') );
}
?>

SpecialGoogleSitemap.php[edit]

<?php
#
# Special:GoogleSitemap MediaWiki extension
# Version 0.0.5
#
# Copyright © 2006 François Boutines-Vignard.
#
# A special page to generate Google Sitemap XML files.
# see http://www.google.com/schemas/sitemap/0.84/sitemap.xsd for details.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# http://www.gnu.org/copyleft/gpl.html
#
# Revisions:
# 0.0.2: date format correction, lighter markup. (2006/09/15)
# 0.0.3: added 'priority' and 'changefreq' tags management in the 'Options' form. (2006/09/16)
# 0.0.4: Unicode support, gmdate format, exponential and quadratic priorities. (2006/09/17)
# 0.0.5: Possibility to sort by last page revision. (2006/09/19)

require_once("QueryPage.php");

/**
* 'GoogleSitemapPage' class
*
* The XML file is ordered by decreasing popularity order (ie. maximum number of hits).
* User should have the 'bureaucrat' rights.
* Ignores MediaWiki (and MediaWiki talk) namespace.
* Redirect pages are ignored.
* Accepts 'limit' and 'offset' parameters,
*       eg: Special:GoogleSitemap&limit=5000 to build a file of the 5000 first pages.
*/
class GoogleSitemapPage extends QueryPage {
        var $file_name = "sitemap.xml"; // relative to $wgSitename (must be writable)
        
        var $sitemaps_url = "https://www.google.com/webmasters/sitemaps/login";        
        
        /*
         * see http://www.google.com/schemas/sitemap/0.84/sitemap.xsd for more details
         */
        var $DEFAULT_SITEMAP_HEADER = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n\n<urlset xmlns=\"http://www.google.com/schemas/sitemap/0.84\"\n\txmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n\txsi:schemaLocation=\"http://www.google.com/schemas/sitemap/0.84 http://www.google.com/schemas/sitemap/0.84/sitemap.xsd\">\n\n";
        var $DEFAULT_PRIORITY = 0.5;
        var $DEFAULT_CHANGE_FREQ = "daily";
        
        var $file_handle;
        var $file_exists;
        
        var $offset,$limit;
        var $count, $cursor_pos = 0;
        
        var $form_action;
        var $article_priorities = "constant";
        var $estimate_change_freq = false;
	var $sorting_criterion = "POP";
	
        function GoogleSitemapPage( $offset, $limit ) {
                global $wgRequest;
                $request =& $wgRequest;
                
                $file_name = $request->getText( 'wpFileName' );
                
                if( $file_name ) {
                        $this->file_name = $file_name;
                }
                
                $change_freq = $request->getCheck( 'wpChangeFreq' );
                
                if( $change_freq ) {
                        $this->estimate_change_freq = $change_freq ;
                }
                
                $priority = $request->getText( 'wpPriorityType' );
                
                if( $priority ) {
                        $this->article_priorities = $priority;
                }
		
		$sorting_criterion = $request->getText( 'wpSortCriterion' );
		
		if( $sorting_criterion ) {
			$this->sorting_criterion = $sorting_criterion;
		}
                
                $this->offset = $offset;
                $this->limit = $limit;
        }

        function utf8_write( $handle, $data ) {
                fwrite( $handle, utf8_encode( $data ) ) ;
        }

        function getName() {
                return "GoogleSitemap";
        }

        function isExpensive() {
                return false;
        }

        function isSyndicated() {
                return false;
        }

        function initialize() {
                global $wgExtensionCredits;
                
                $this->file_exists = file_exists ( $this->file_name ) ;
                
                $this->file_handle = fopen( $this->file_name, 'w' ) or die( "Cannot write to '$this->file_name.'" );
                
                $this->utf8_write( $this->file_handle, $this->DEFAULT_SITEMAP_HEADER ); 
                
                $this->doQuery( $this->offset, $this->limit );                         
        }
        
        function finalize() {
                $close_tag = "\n</urlset>";
                $this->utf8_write( $this->file_handle, $close_tag ) ;
                
                fclose( $this->file_handle );
        }
                        
        function getPageHeader() { // has text
                global $wgServer, $wgScriptPath, $wgSitename;

                $url = "$wgServer$wgScriptPath/$this->file_name";                    
                
                $misc_estimate = $this->estimate_change_freq?" and estimated change frequencies":"";
                $misc_file_action = $this->file_exists?"rebuild":"created";
                
                $default_text="The <a href=\"$this->sitemaps_url\" title=\"Sitemaps login\"><em>Google Sitemap's</em></a> <strong><a href=\"$url\" title=\"$wgSitename Sitemap\">$url</a></strong> was $misc_file_action for the following <strong>$this->count</strong> pages <small><em>(with $this->article_priorities priority$misc_estimate)</em></small>.<br />\n"; #English

                $info="";

                if( $this->offset != 0 ) {
                        $class="errorbox";
                        $info="<strong>This selection misses the $this->offset most viewed pages of $wgSitename, however</strong>...<br />\n"; #English
                } else {
                        $class = "successbox";
                }
                               
                return "<div class=\"$class\">$info$default_text</div><div class=\"visualClear\"></div>\n".$this->addPageOptions();
        }
        
        function addPageOptions() {
                return "
                        <div id='userloginForm'>                     
                         <form id='sitemaps' method='post' enctype='multipart/form-data' action='$this->form_action'>
                          <h2>Options</h2>
                          <table>
                           <tr>
                            <td>
                             <label for='wpFileName1'>File name</label>
                            </td>
                            <td>
                             <input tabindex='1' type='text' name='wpFileName' id='wpFileName1' title='file to overwrite' value='$this->file_name' disabled=true></input>
                            </td>
                           </tr>     
			   <tr>
                            <td>
                             <label for='wpSortCriterion1'>Sorting criterion</label>
                            </td>
			    <td>
			     <input type=radio name='wpSortCriterion' id='wpSortCriterion1' value='POP' checked='checked'>Popularity</input><br />
			     <input type=radio name='wpSortCriterion' id='wpSortCriterion1' value='REV'>Last revision</input>
			    </td>
			   </tr>
                           <tr>
                            <td>
                             <label for='wpChangeFreq1'>Estimate revision frequencies</label>
                            </td>
                            <td>
                             <input tabindex='2' type='checkbox' name='wpChangeFreq' id='wpChangeFreq1' title='daily, weekly, monthly...'></input>
                            </td>
                           </tr>                   
                           <tr>                    
                            <td>
                             <label for='wpPriorityType1'>Priority</label>
                            </td>
                            <td>
                             <select tabindex='3' name='wpPriorityType' id='wpPriorityType1' title='set relative priority based on page ranks'>
                                        <option>constant</option>
                                        <option>linear</option>
                                        <option>quadratic</option>
                                        <option>cubic</option>
                                        <option>exponential</option>
                                        <option>smooth</option>
                                        <option>random</option>
                                        <option>reverse</option>
                             </select>
                            </td>
                           </tr>
                           <tr>
                            <td>
                             <input tabindex='2' type='submit' value='Update Sitemap'></input>
                            </td>
                           </tr>
                          </table>
                         </form>
                        </div>
                        <div class=\"visualClear\"></div>
                        <br /><hr />\n\n";
        }

        function getSQL() {
                $dbr =& wfGetDB( DB_SLAVE );
                
                $page = $dbr->tableName( 'page' );
                $revision = $dbr->tableName( 'revision' );
		
		$criterion = $this->sorting_criterion=="REV"?"rev_timestamp":"page_counter";
		
                return "SELECT 'Popularpages' AS type,
                                page_id AS id,
                                page_namespace AS namespace,
                                page_title AS title,
                                ( MAX( rev_timestamp ) ) AS last_modification,
                                $criterion AS value
                                FROM $page, $revision
                                        WHERE ( page_namespace <> 8 AND page_namespace <> 9 )
                                        AND page_is_redirect = 0
                                        AND rev_page = page_id
                                GROUP BY page_id";
        }
        
        function sortDescending() {
                return true;
        }
        
        function preprocessResults( $db, $res ) {
                $this->count = $db->numRows($res);
        }
        
        function formatResult( $skin, $result ) {
                global $wgLang, $wgContLang, $wgServer;                
        
                $title = Title::makeTitle( $result->namespace, $result->title );
                $link = $skin->makeKnownLinkObj( $title, htmlspecialchars( $wgContLang->convert( $title->getPrefixedText() ) ) );

                $url = $title->escapeLocalURL();
                $this->form_action=$title->escapeLocalURL( 'action=submit' );
                
                // The date must conform to ISO 8601 (http://www.w3.org/TR/NOTE-datetime)
                // UTC (Coordinated Universal Time) is used, google currently ignores time however
                $last_modification = gmdate( "Y-m-d\TH:i:s\Z", wfTimestamp( TS_UNIX, $result->last_modification ) );
                
                $this->addURL( $wgServer, $url, $last_modification, $result->id );
                
                ++$this->cursor_pos;
                
                return "{$link} <small>($wgServer$url)</small>";
        }

        function addURL( $base, $url, $last_modification, $page_id ) { // parameters must be valid XML data
                $result="  <url>\n    <loc>$base$url</loc>\n    <priority>".$this->getPriority()."</priority>\n    <lastmod>$last_modification</lastmod>\n    <changefreq>".$this->getChangeFreq($page_id)."</changefreq>\n  </url>\n";
                $this->utf8_write( $this->file_handle, $result );
        }
        
        function getPriority() { // must return valid XML data
                $x = $this->cursor_pos / $this->count;

                switch( $this->article_priorities ) {
                        case "constant"    : return $this->DEFAULT_PRIORITY;
                        case "linear"      : return 1.0 - $x;
                        case "quadratic"   : return pow( 1.0 - $x, 2.0 ) ;
                        case "cubic"       : return 3.0 * pow( ( 1.0 - $x ), 2.0 ) - 2.0 * pow( ( 1.0 - $x ), 3.0 );
                        case "exponential" : return exp( -6 * $x ); # exp(-6) ~= 0,002479
                        case "smooth"      : return cos( $x * pi() / 2.0 );
                        case "random"      : return mt_rand() / mt_getrandmax();
                        case "reverse"     : return $x;
                        
                        default: return $this->DEFAULT_PRIORITY;
                }
        }
        
        function getChangeFreq( $page_id ) { // must return valid XML data
                if( $this->estimate_change_freq ) {
                        $dbr =& wfGetDB( DB_SLAVE );

                        $revision = $dbr->tableName( 'revision' );

                        $sql = "SELECT 
                                        MIN(rev_timestamp) AS creation_timestamp,
                                        COUNT(rev_timestamp) AS revision_count
                                        FROM $revision WHERE rev_page = $page_id";
                                
                        $res = $dbr->query( $sql );
                        $count = $dbr->numRows( $res );      
                        
                        if( $count < 1 ) {
                                return $this->DEFAULT_CHANGE_FREQ;
                        } else {       
                                $item1 =( $dbr->fetchObject( $res ) );
                                
                                $cur = time() ; // now
                                $first = wfTimestamp( TS_UNIX, $item1->creation_timestamp );
                                                                
                                // there were $item1->revision_count revisions in ($cur - $first) seconds 
                                $diff = ($cur - $first) / $item1->revision_count ;
                                
                                switch( true ) {
                                        # case $diff < 60: return "always"; // I suspect Google to ignore these pages more often...
                                        case $diff < 3600: return "hourly";
                                        case $diff < 24*3600: return "daily";
                                        case $diff < 7*24*3600: return "weekly";
                                        case $diff < 30.33*24*3600: return "monthly";
                                        case $diff < 365.25*24*3600: return "yearly";
                                        default: return $this->DEFAULT_CHANGE_FREQ;
                                        # return "never"; // for archived pages only                                                            
                                }
                        }
                } else {
                        return $this->DEFAULT_CHANGE_FREQ;
                }
        }
}

/**
 * Entry point.
 */
function wfSpecialGoogleSitemap() {
        list( $limit, $offset ) = wfCheckLimits();
               
        $gsitemap = new GoogleSitemapPage( $offset, $limit );
        
        $gsitemap->initialize();
        $gsitemap->finalize();
}