Extension:RPED/Alpha release

From MediaWiki.org
Jump to: navigation, search

Contents

[edit] Files

[edit] RPED.php

<?php
/**
 * Remote Page Existence Detection (RPED) extension by Tisane
 * URL: http://www.mediawiki.org/wiki/Extension:RemotePageExistenceDetection
 *
 * This program is free software. You can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version. You can also redistribute it and/or
 * modify it under the terms of the Creative Commons Attribution 3.0 license.
 *
 * This extension looks up all the wikilinks on a page that would otherwise be red and compares them
 * to a table of page titles to determine whether they exist on a remote wiki. If so, the wikilink
 * turns blue and links to the page on the remote wiki.
 */
 
 
/* Alert the user that this is not a valid entry point to MediaWiki if they try to access the
special pages file directly.*/
 
if (!defined('MEDIAWIKI')) {
        echo <<<EOT
                To install my extension, put the following line in LocalSettings.php:
                require( "extensions/RPED/RPED.php" );
EOT;
    exit( 1 );
}
 
$wgExtensionCredits['specialpage'][] = array(
        'name' => 'Remote Page Existence Detection',
        'author' => 'Tisane',
        'url' => 'http://www.mediawiki.org/wiki/Extension:RemotePageExistenceDetection',
        'description' => 'Remote Page Existence Detection',
        'descriptionmsg' => 'rped-desc',
        'version' => '0.0.0',
);
 
// Permissions
$wgAvailableRights[]='RPED';
$wgGroupPermissions['*']['RPED']    = false;
$wgGroupPermissions['RPED']['RPED'] = true; // Only RPED users can operate RPED
//$wgSpecialPages['RPED'] = 'Remote Page Existence Detection';
 
$dir = dirname(__FILE__) . '/';
$wgAutoloadClasses['ApiRPED'] = "$dir/ApiRPED.php";
$wgAutoloadClasses['RPEDHooks'] = $dir.'RPED.hooks.php';
$wgExtensionMessagesFiles['RPED'] = $dir . 'RPED.i18n.php';
$wgExtensionAliasesFiles['RPED'] = $dir . 'RPED.alias.php';
 
$wgHooks['LoadExtensionSchemaUpdates'][] = 'RPEDHooks::RPEDCreateTable';
$wgHooks['LinkBegin'][] = 'RPEDHooks::wikipediaLink';
//$wgHooks['ParserFirstCallInit'][] = 'RPEDHooks:efRPEDParserFunction_Setup';
//$wgHooks['LanguageGetMagic'][]       = 'RPEDHooks:efRPEDParserFunction_Magic';
$wgAPIModules['rped'] = 'ApiRPED';

[edit] RPED.hooks.php

<?php
if (!defined('MEDIAWIKI')) {
        echo <<<EOT
To install the RPED extension, put the following line in LocalSettings.php:
require_once( "\$IP/extensions/RPED/RPED.php" );
EOT;
        exit( 1 );
}
 
class RPEDHooks {
 
    public static function RPEDCreateTable() {
        global $wgExtNewTables;
        $wgExtNewTables[] = array(
            'blanked_pages',
            dirname( __FILE__ ) . '/rpedtable.sql' );
        return true;
    }
 
    public static function wikipediaLink( $skin, $target, &$text
        , &$customAttribs, &$query, &$options, &$ret ) {
        global $wgLocalStyle;
        // Return immediately if we know it's existent on the local wiki
        if ( in_array( 'known', $options ) ) {
            if (!isset($query['action']) && !isset($query['curid'])){
                $customAttribs['style']=$wgLocalStyle;
            }
            return true;
        }
 
        // If it doesn't exist on the local wiki, then see if it exists on the
        // remote wiki (Wikipedia)
        if ( in_array( 'broken', $options ) ) {
            $title=$target->getFullText ();
            for ($thiscount=0; $thiscount<strlen($title); $thiscount++){
                if (substr($title,$thiscount,1)==' '){
                    $title=substr_replace($title,'_',$thiscount,1);
                }
            }
            $dbr = wfGetDB( DB_SLAVE );
            $result=$dbr->selectRow('rped_pages','rped_page_id'
                ,array("rped_page_title" => $title));
 
            if (!$result){
                return true;
            } else {
                $url='http://en.wikipedia.org/wiki/'.$title;
 
                // The page that we'll link to
                if ($wgRemoteStyle!=''){
                    $customAttribs['style']=$wgRemoteStyle;
                }
                $ret=htmlentities($text);
            }
        }
        return true;
    }
 
    public static function efRPEDParserFunction_Setup( $parser ) {
        # Set a function hook associating the "example" magic word with our function
        $parser->setFunctionHook( 'ifexistremotely', 'efRPEDParserFunction_Render' );
        return true;
    }
 
    public static function efRPEDParserFunction_Magic( &$magicWords, $langCode ) {
        # Add the magic word
        # The first array element is whether to be case sensitive, in this case (0) it is not case sensitive, 1 would be sensitive
        # All remaining elements are synonyms for our parser function
        $magicWords['ifexistremotely'] = array( 0, 'ifexistremotely' );
        # unless we return true, other parser functions extensions won't get loaded.
        return true;
    }
 
    public static function efRPEDParserFunction_Render( $parser, $param1 = '', $param2='', $param3='') {
        # The parser function itself
        # The input parameters are wikitext with templates expanded
        # The output should be wikitext too
        require( "extensions/RPED/RPEDConfig.php" );
 
        for ($thiscount=0; $thiscount<strlen($param1); $thiscount++){
                if (substr($param1,$thiscount,1)==' '){
                        $param1=substr_replace($param1,'_',$thiscount,1);
                }
        }
        $dbr = wfGetDB( DB_SLAVE );
        $result=$dbr->selectRow('rped_pages','rped_page_id'
            ,array("rped_page_title" => $param1));
        if (!$result){
                if (isset($param3)){
                        return htmlentities($param3);
                }
        } else {
                if (isset($param2)){
                        return htmlentities($param2);
                }
        }
 
        return;
    }
}

[edit] ApiRPED.php

<?php
if (!defined('MEDIAWIKI')) {
    die();
}
 
class ApiRPED extends ApiBase {
    public function __construct($main, $action) {
        parent :: __construct($main, $action);
    }
    public function execute() {
        global $wgUser;
        /*if (!$this->userCanExecute( $wgUser )) {
            $this->displayRestrictionError();
            return;
        }*/
 
        if (!$wgUser->isAllowed( 'RPED' ) ){
            $this->displayRestrictionError();
            return;
        }
 
        $dbr = wfGetDB( DB_SLAVE );
        $dbw = wfGetDB( DB_MASTER );
        $params = $this->extractRequestParams(false);
        foreach ($params as $key => $value) {
            $myInputNum=0;
            for ($count=0; $count<strlen($value); $count++){
                if (substr($value,$count,1)=='|'){
                    $myInputNum++;
                } else {
                    if (!isset($myInput[$myInputNum])){
                        $myInput[$myInputNum]='';
                    }
                    if (substr($value,$count,1)==' '){
                        $myInput[$myInputNum].='_';
                    } else {
                    $myInput[$myInputNum].=substr($value,$count,1);
                    }
                }
            }
            if (isset($myInput)){
                foreach ($myInput as $value2){
                    if (isset($value2) && !($value2===null)){
                        $existCheck=$dbr->selectrow('rped_pages','rped_page_title',array
                            ('rped_page_title' => $value2));
                        if($key=='insert' && !isset($existCheck)){
                            $dbw->insert('rped_pages',array('rped_page_title' => $value2));
                        }
                        if ($key=='delete' && isset($existCheck)){
                            $dbw->delete('rped_pages',array('rped_page_title' => $value2));
                        }
                    }
                }
            }
        }
        return;
    }
 
    public function getAllowedParams(){
        return array(
            'insert' => null,
            'delete' => null
            );
    }
 
    public function getParamDescription(){
        return array (
            'insert' => 'page name to insert',
            'delete' => 'page name to delete'
            );
    }
 
    public function getDescription(){
        return array (
        'This module is used to insert data into, and delete date from, ',
        'the RPED page name table.'
        );
    }
 
    public function getVersion() {
        return __CLASS__ . ': $Id$';
    }
 
    public function displayRestrictionError(){
        echo("Access denied.");
    }
}

[edit] RPED.i18n.php

<?php
$messages = array();
 
$messages['en'] = array( 
        'rped' => 'Remote Page Existence Detection',
);

[edit] RPED.alias.php

<?php
$aliases = array();
 
/** English */
$aliases['en'] = array(
    'RPED' => array( 'RPED' ),
);

[edit] RPEDCentralServer.php

<?php
/**
 * Remote Page Existence Detection (RPED) Central Server by Tisane
 * URL: http://www.mediawiki.org/wiki/Extension:RemotePageExistenceDetection
 *
 * This program is free software. You can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version. You can also redistribute it and/or
 * modify it under the terms of the Creative Commons Attribution 3.0 license.
 *
 * This code accepts requests for page title data syndication from wikis that have installed
 * the RPED extension.
 */
 
$yourHost='localhost';
$yourUsername='wikiuser2';
$yourPassword='password';
$yourCentralServerPassword='password';
$con = mysql_connect($yourHost,$yourUsername,$yourPassword);
if (!$con){
        die('Could not connect: ' . mysql_error());
}
mysql_select_db("page_title_db", $con);
 
$url=$_SERVER['QUERY_STRING'];
$myInputNum=0;
$myInput[0]='';
$myInput[1]='';
$myInput[2]='';
for ($count=0; $count<strlen($url); $count++){
        if (substr($url,$count,1)=='|'){
                $myInputNum++;
        } else{
                if (!isset($myInput[$myInputNum])){
                        $myInput[$myInputNum]='';
                }
                $myInput[$myInputNum].=substr($url,$count,1);
        }
}
// Provide all wikis and their passwords
if ($myInput[0]==$yourCentralServerPassword){
        $myQuery="SELECT * FROM subscribers";
        $result = mysql_query($myQuery,$con);
        $afterFirstRow=false;
        while ($result2=mysql_fetch_array($result)){
                if ($afterFirstRow==true){
                echo ('|');
                }
                echo (htmlentities($result2['subscriber_url']).'|'.'subscriber_password');
                $afterFirstRow=true;
        }
}
elseif ($myInput[0]==''){
        echo("Welcome to the RPED Central Server. This system is only designed to receive automated
                page title data subscription requests from wikis with the RPED Extension installed. For
                more information, please see
                <A href=\"http://www.mediawiki.org/wiki/Extension:RemotePageExistenceDetection\">
                the RPED site</A>. If you wish to terminate your wiki's subscription to RPED page title
                syndication, please use the Remote Page Existence Detection special page on your wiki or
                contact me at
                <A href=\"http://www.mediawiki.org/w/index.php?title=User_talk:Tisane&action=edit&section=new\">
                my Mediawiki talk page</A>. If you do so, please be sure to leave contact information so
                that I can communicate with you if necessary. Thank you.");
} else {
        $remoteURL=$myInput[0].'/extensions/RPED/RPEDAPIReader.php?'.$myInput[1];
        sleep(2); // Give time for the remote server to update its database
        $response=file_get_contents($remoteURL);
        $successString='Access granted';
        $mySubString=substr($response,0,14);
        if ($mySubString==$successString){ // Access granted
                $sql = 'CREATE TABLE subscribers(
                        p_ID int NOT NULL AUTO_INCREMENT,
                        PRIMARY KEY(p_ID),
                        subscriber_url text(1000),
                        subscriber_password text(1000),
                        subscriber_status text(1000)
                )';
                mysql_query($sql,$con);
                if ($myInput[2]=='unsubscribe'){
                        $sql=sprintf("DELETE FROM subscribers WHERE subscriber_url='%s'",
                                mysql_real_escape_string($myInput[0]));
                        mysql_query($sql,$con);
                        echo('<big>Your wiki has been successfully unsubscribed.</big>');
                } else {
                        $myQuery=sprintf("SELECT * FROM subscribers WHERE subscriber_url='%s'",
                                mysql_real_escape_string($myInput[0]));
                        $result = mysql_query($myQuery,$con);
                        $matches=0;
                        if ($result){
                                $matches=mysql_num_rows  ( $result  );
                        }
                        if ($matches==0){
                                $sql="INSERT INTO subscribers (subscriber_url,subscriber_password,subscriber_status) "
                                        ."VALUES ('"
                                        .$myInput[0]
                                        ."', '".$myInput[1]."', 'new')";
                                mysql_query($sql,$con);
                                echo('<big>Congratulations!</big>
                                        Your wiki has been successfully subscribed for page title data syndication. '
                                        .'The central server will begin populating your database with page titles '
                                        .'shortly. This may take several hours to complete. <span style="color:red">'
                                        .'\'\'\'Please do not submit another such request unless there is a good reason '
                                        .'for doing so\'\'\'</span> (e.g. your server goes down and you miss some '
                                        .'syndication content) since that will start the data population process all over '
                                        .'from the beginning.');
                        } else {
                                $sql=sprintf("DELETE FROM subscribers WHERE subscriber_url='%s'",
                                        mysql_real_escape_string($myInput[0]));
                                mysql_query($sql,$con);
                                $sql=sprintf("INSERT INTO subscribers (subscriber_url,subscriber_password,subscriber_status) "
                                ."VALUES '%s', '%s', 'new'",
                                mysql_real_escape_string($myInput[0]),mysql_real_escape_string($myInput[1]));
                                mysql_query($sql,$con);
                                echo("Your request has been received, and the central server will shortly begin "
                                        ."repopulating your database with all of the page titles. This may take several "
                                        ."hours to complete. <span style=\"color:red\"> '''Please do not submit another "
                                        ."such request unless there is a good reason for doing so'''</span> (e.g. your "
                                        ."server goes down and you miss some syndication content) since that will start "
                                        ."the data population process all over from the beginning.");
                        }
                }
        }
        else{
                echo $response; // Access was denied
        }
}

[edit] RPEDFileReader.pl

# RPEDFileReader.pl by Tisane, http://www.mediawiki.org/wiki/User:Tisane
#
# This script is free software that is available under the terms of the Creative Commons
# Attribution 3.0 license and the current version of the GNU General Public License.
#
# The purpose of this script is to read a text file (specifically, the list of page titles from
# Wikipedia's data dump) and add each page title to a database table.
 
use strict;
use Mysql;
use DBI;
 
my $sql_login = 'wikiuser2';
my $sql_pass = 'password';
my $db_name = 'page_title_db';
my $db_host = 'localhost'; # or remote mysql server name
# if left blank, this defaults to localhost 
 
# PERL MYSQL CONNECT()
#my $connect = Mysql->connect($host, $database, $user, $pw);
my $conn_string = "DBI:mysql:$db_name";
if ($db_host) { $conn_string .= ":$db_host"; }
my $dbh = DBI->connect("$conn_string",$sql_login,$sql_pass); 
 
# SELECT DB
#$dbh->selectdb($database);
 
my $sql = "CREATE TABLE page_title_table(
                p_ID int NOT NULL AUTO_INCREMENT,
                PRIMARY KEY(p_ID),
                page_title VARCHAR(256)
        )";
my $execute = $dbh->do($sql);
$sql="CREATE INDEX pageind on page_title_table (page_title)";
$execute = $dbh->do($sql);
 
my $filename='enwiki-20100116-all-titles-in-ns0';
open(MYDATA, $filename) or 
        die("Error: cannot open file '".$filename."'\n");
my $line;
my $lnum = 1;
while( $line = <MYDATA> ){
        chomp($line);
        $line=$dbh->quote("$line");
        #print "$lnum: $line\n";
        $sql="INSERT INTO page_title_table (page_title) VALUES ($line)";
        $dbh->prepare($sql);
        $dbh->do($sql);
 $lnum++;
}
 
close MYDATA;

[edit] RPEDGetDeletedAndRestoredPageTitles.pl

# RPEDGetDeletedAndRestoredPageTitles.pl by Tisane, http://www.mediawiki.org/wiki/User:Tisane
#
# This script is free software that is available under the terms of the Creative Commons
# Attribution 3.0 license and the current version of the GNU General Public License.
#
# The purpose of this script is to query the enwiki API for deleted and restored pages from the
# logs. Each page title is added to a database table. The script loops indefinitely, but sleeps
# for a certain number of seconds between queries.
 
use strict;
use warnings;
use Mysql;
use DBI;
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
 
my $sql_login = 'wikiuser2';
my $sql_pass = 'password';
my $db_name = 'page_title_db';
my $db_host = 'localhost';
my $table_name1 = 'page_title_update_table';
my $table_name2 = 'syndication_table';
 
my $conn_string = "DBI:mysql:$db_name";
if ($db_host) { $conn_string .= ":$db_host"; }
my $dbh = DBI->connect("$conn_string",$sql_login,$sql_pass); 
 
my $table_name=$table_name1;
for (my $thisCount=0; $thisCount<=1; $thisCount++){
        my $sql = "CREATE TABLE ".$table_name."(
                        p_ID int NOT NULL AUTO_INCREMENT,
                        PRIMARY KEY(p_ID),
                        logid VARCHAR(256),
                        ns VARCHAR(256),
                        page_title VARCHAR(256),
                        rcid VARCHAR(256),
                        action VARCHAR(256),
                        timestamp VARCHAR(256)
                )";
        my $execute = $dbh->do($sql);
 
        $sql="CREATE INDEX logid_ind on ".$table_name." (logid)";
        $execute = $dbh->do($sql);
 
        $sql="CREATE INDEX ns_ind on ".$table_name." (ns)";
        $execute = $dbh->do($sql);
 
        $sql="CREATE INDEX page_title_ind on ".$table_name." (page_title)";
        $execute = $dbh->do($sql);
 
        $sql="CREATE INDEX rcid_ind on ".$table_name." (rcid)";
        $execute = $dbh->do($sql);
 
        $sql="CREATE INDEX action_ind on ".$table_name." (action)";
        $execute = $dbh->do($sql);
 
        $sql="CREATE INDEX timestamp_ind on ".$table_name." (timestamp)";
        $execute = $dbh->do($sql);
 
        $table_name=$table_name2;
}
 
my $agentName="User:Tisane (http://www.mediawiki.org/wiki/User:Tisane) grabbing some
        page title data off Wikipedia using RPEDGetWP.pl (alpha)";
my $browser = LWP::UserAgent->new();
$browser->agent($agentName);
 
my $initialTimestamp='20100305104000';
my $lestart=$initialTimestamp;
 
my @currentLineRecord=('Hello');
my @pastLineRecord=('Hello');
 
for (my $count=0; $count<=500; $count++){
        $currentLineRecord[$count]=0;
        $pastLineRecord[$count]=0;
}
 
my $logidPreface='[logid] => ';
my $logidEOL='[pageid]';
my $nsPreface='[ns] => ';
my $nsEOL='[title]';
my $titlePreface='[title] => ';
my $titleEOL='[type]';
my $actionPreface='[action] => ';
my $actionEOL='[timestamp]';
my $timestampPreface='[timestamp] => ';
my $timestampEOL=')';
my $lelimit=500;
my $sleepNumber=12;
 
while (1){
        my $URL="http://en.wikipedia.org/w/api.php?action=query&list=logevents&letype=delete&lelimit="
        .$lelimit."&lestart=".$lestart."&leprop=title|timestamp|ids|type&ledir=newer&format=txt";
        #$browser->timeout(500);
        my $request = HTTP::Request->new(GET => $URL);
        my $response = $browser->request($request);
        if ($response->is_error()) {printf "%s\n", $response->status_line;}
        my $contents = $response->content();
 
        #print $contents;
        #sleep 6;
 
        # Find the title
        my $logidPosition=0;
        my $nsPosition=0;
        my $titlePosition=0;
        my $actionPosition=0;
        my $timestampPosition=0;
        my $currentPosition=0;
        my $lineCount=0;
        my $timestampName="";
        my $cleared=0;
 
        while ($logidPosition!=-1){
                $logidPosition=index($contents,$logidPreface,$currentPosition);
                if ($logidPosition!=-1){
                        $logidPosition+=length($logidPreface);
                        my $logidEOLPosition=index($contents,$logidEOL,$logidPosition);
                        my $logidName=substr($contents,$logidPosition,$logidEOLPosition-$logidPosition);
                        while (substr($logidName,length($logidName)-1,1) eq " "
                        || substr($logidName,length($logidName)-1,1) eq "\t"
                        || substr($logidName,length($logidName)-1,1) eq "\n"){
                                chop $logidName;
                        }
                        $currentLineRecord[$lineCount]=$logidName;
                        my $identical=0;
                        if ($lestart eq $initialTimestamp && $cleared==0){
                                my $checkQuery="SELECT COUNT(*) FROM `".$table_name1."` WHERE `logid`=".$logidName;
                                my $execute=$dbh->prepare($checkQuery);
                                $execute->execute();
                                $execute->bind_col( 1, \my $countResult );
                                while ( $execute->fetch ) {
                                        $identical=$countResult;
                                }
                                if ($identical==0){
                                        $cleared=1;
                                }
                        }
                        for (my $count=0; $count<=500; $count++){
                                if ($logidName eq $pastLineRecord[$count]){
                                        $identical=1;
                                }
                        }
                        if ($identical==0){
                                print $logidName." ";
                                $currentPosition=$logidPosition;
                                $nsPosition=index($contents,$nsPreface,$currentPosition);
                                $nsPosition+=length($nsPreface);
                                my $nsEOLPosition=index($contents,$nsEOL,$nsPosition);
                                my $nsName=substr($contents,$nsPosition,$nsEOLPosition-$nsPosition);
                                while (substr($nsName,length($nsName)-1,1) eq " "
                                || substr($nsName,length($nsName)-1,1) eq "\t"
                                || substr($nsName,length($nsName)-1,1) eq "\n"){
                                        chop $nsName;
                                }
                                print $nsName." ";
                                $currentPosition=$nsPosition;
                                $titlePosition=index($contents,$titlePreface,$currentPosition);
                                $titlePosition+=length($titlePreface);
                                my $titleEOLPosition=index($contents,$titleEOL,$titlePosition);
                                my $titleName=substr($contents,$titlePosition,$titleEOLPosition-$titlePosition);
                                while (substr($titleName,length($titleName)-1,1) eq " "
                                || substr($titleName,length($titleName)-1,1) eq "\t"
                                || substr($titleName,length($titleName)-1,1) eq "\n"){
                                        chop $titleName;
                                }
                                print $titleName." ";
                                $currentPosition=$titlePosition;
                                $actionPosition=index($contents,$actionPreface,$currentPosition);
                                $actionPosition+=length($actionPreface);
                                my $actionEOLPosition=index($contents,$actionEOL,$actionPosition);
                                my $actionName=substr($contents,$actionPosition,$actionEOLPosition-$actionPosition);
                                while (substr($actionName,length($actionName)-1,1) eq " "
                                || substr($actionName,length($actionName)-1,1) eq "\t"
                                || substr($actionName,length($actionName)-1,1) eq "\n"){
                                        chop $actionName;
                                }
                                print $actionName." ";
                                $currentPosition=$actionPosition;
                                $timestampPosition=index($contents,$timestampPreface,$currentPosition);
                                $timestampPosition+=length($timestampPreface);
                                my $timestampEOLPosition=index($contents,$timestampEOL,$timestampPosition);
                                $timestampName=substr($contents,$timestampPosition,$timestampEOLPosition-$timestampPosition);
                                while (substr($timestampName,length($timestampName)-1,1) eq " "
                                || substr($timestampName,length($timestampName)-1,1) eq "\t"
                                || substr($timestampName,length($timestampName)-1,1) eq "\n"){
                                        chop $timestampName;
                                }
                                print $timestampName." \n";
                                $currentPosition=$timestampPosition;
                                $logidName=$dbh->quote("$logidName");
                                $nsName=$dbh->quote("$nsName");
                                $titleName=$dbh->quote("$titleName");
                                $actionName=$dbh->quote("$actionName");
                                my $quotedTimestampName=$dbh->quote("$timestampName");
                                #my $sql="INSERT INTO ".$table_name1." (logid,page_title,action,timestamp) VALUES ('"
                                #       .$logidName."','".$titleName."','".$actionName."','".$timestampName."')";
                                my $sql="INSERT INTO $table_name1 (logid,ns,page_title,action,timestamp) VALUES "
                                        ."($logidName,$nsName,$titleName,$actionName,$quotedTimestampName)";
                                $dbh->prepare($sql);
                                $dbh->do($sql);
                                $sql="INSERT INTO $table_name2 (logid,ns,page_title,action,timestamp) VALUES "
                                        ."($logidName,$nsName,$titleName,$actionName,$quotedTimestampName)";
                                $dbh->prepare($sql);
                                $dbh->do($sql);
                                #print $sql."\n";
                        }
                        else{
                                print "(Duplicate) ".$logidName."\n";
                                $currentPosition=$logidPosition+1;
                        }
                        $lineCount++;
                }
        }
        if ($timestampName ne ""){
                $lestart=$timestampName;
        }
        for (my $count=0; $count<=500; $count++){
                $pastLineRecord[$count]=$currentLineRecord[$count];
                $currentLineRecord[$count]='';
        }
        sleep $sleepNumber;
}

[edit] RPEDGetNew.pl

# RPEDGetNew.pl by Tisane, http://www.mediawiki.org/wiki/User:Tisane
#
# This script is free software that is available under the terms of the Creative Commons
# Attribution 3.0 license and the current version of the GNU General Public License.
#
# The purpose of this script is to query the enwiki API for new pages as they are appear on
# RecentChanges. Each page title is added to a database table. The script loops indefinitely,
# but sleeps for a certain number of seconds between queries.
 
use strict;
use warnings;
use Mysql;
use DBI;
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
 
my $sql_login = 'wikiuser2';
my $sql_pass = 'password';
my $db_name = 'page_title_db';
my $db_host = 'localhost';
my $table_name1 = 'page_title_update_table_new';
my $table_name2 = 'syndication_table';
 
my $conn_string = "DBI:mysql:$db_name";
if ($db_host) { $conn_string .= ":$db_host"; }
my $dbh = DBI->connect("$conn_string",$sql_login,$sql_pass); 
 
my $table_name=$table_name1;
for (my $thisCount=0; $thisCount<=1; $thisCount++){
        my $sql = "CREATE TABLE ".$table_name."(
                        p_ID int NOT NULL AUTO_INCREMENT,
                        PRIMARY KEY(p_ID),
                        logid VARCHAR(256),
                        ns VARCHAR(256),
                        page_title VARCHAR(256),
                        rcid VARCHAR(256),
                        action VARCHAR(256),
                        timestamp VARCHAR(256)
                )";
        my $execute = $dbh->do($sql);
 
        $sql="CREATE INDEX logid_ind on ".$table_name." (logid)";
        $execute = $dbh->do($sql);
 
        $sql="CREATE INDEX ns_ind on ".$table_name." (ns)";
        $execute = $dbh->do($sql);
 
        $sql="CREATE INDEX page_title_ind on ".$table_name." (page_title)";
        $execute = $dbh->do($sql);
 
        $sql="CREATE INDEX rcid_ind on ".$table_name." (rcid)";
        $execute = $dbh->do($sql);
 
        $sql="CREATE INDEX action_ind on ".$table_name." (action)";
        $execute = $dbh->do($sql);
 
        $sql="CREATE INDEX timestamp_ind on ".$table_name." (timestamp)";
        $execute = $dbh->do($sql);
 
        $table_name=$table_name2;
}
 
my $agentName="User:Tisane (http://www.mediawiki.org/wiki/User:Tisane) grabbing some
        page title data off Wikipedia using RPEDGetNew.pl (alpha)";
my $browser = LWP::UserAgent->new();
$browser->agent($agentName);
 
my $initialTimestamp='20100115000000';
my $rcstart=$initialTimestamp;
 
my @currentLineRecord=('Hello');
my @pastLineRecord=('Hello');
 
for (my $count=0; $count<=500; $count++){
        $currentLineRecord[$count]=0;
        $pastLineRecord[$count]=0;
}
 
my $nsPreface='[ns] => ';
my $nsEOL='[title]';
my $titlePreface='[title] => ';
my $titleEOL='[rcid]';
my $rcidPreface='[rcid] => ';
my $rcidEOL='[pageid]';
my $timestampPreface='[timestamp] => ';
my $timestampEOL=')';
my $rclimit=500;
my $sleepNumber=12;
 
while (1){
        #my $URL="http://en.wikipedia.org/w/api.php?action=query&list=recentchanges&rctype=new&rclimit="
        #.$rclimit."&rcstart=".$rcstart."&rcprop=title|timestamp|ids&rcdir=newer&format=txt";
        my $URL="http://en.wikipedia.org/w/api.php?action=query&list=recentchanges&rctype=new&rclimit="
        .$rclimit."&rcprop=title|timestamp|ids&format=txt";
        #print $URL;
        $browser->timeout(500);
        my $request = HTTP::Request->new(GET => $URL);
        my $response = $browser->request($request);
        if ($response->is_error()) {printf "%s\n", $response->status_line;}
        my $contents = $response->content();
 
        #print $contents;
        #sleep 1;
 
        # Find the title
        my $nsPosition=0;
        my $titlePosition=0;
        my $rcidPosition=0;
        my $timestampPosition=0;
        my $currentPosition=0;
        my $lineCount=0;
        my $timestampName="";
        my $cleared=0;
 
        while ($nsPosition!=-1){
                $nsPosition=index($contents,$nsPreface,$currentPosition);
                if ($nsPosition!=-1){
                        $nsPosition+=length($nsPreface);
                        my $nsEOLPosition=index($contents,$nsEOL,$nsPosition);
                        my $nsName=substr($contents,$nsPosition,$nsEOLPosition-$nsPosition);
                        while (substr($nsName,length($nsName)-1,1) eq " "
                        || substr($nsName,length($nsName)-1,1) eq "\t"
                        || substr($nsName,length($nsName)-1,1) eq "\n"){
                                chop $nsName;
                        }
                        $currentPosition=$nsPosition;
                        $titlePosition=index($contents,$titlePreface,$currentPosition);
                        $titlePosition+=length($titlePreface);
                        my $titleEOLPosition=index($contents,$titleEOL,$titlePosition);
                        my $titleName=substr($contents,$titlePosition,$titleEOLPosition-$titlePosition);
                        while (substr($titleName,length($titleName)-1,1) eq " "
                        || substr($titleName,length($titleName)-1,1) eq "\t"
                        || substr($titleName,length($titleName)-1,1) eq "\n"){
                                chop $titleName;
                        }
                        $currentPosition=$titlePosition;
                        $rcidPosition=index($contents,$rcidPreface,$currentPosition);
                        $rcidPosition+=length($rcidPreface);
                        my $rcidEOLPosition=index($contents,$rcidEOL,$rcidPosition);
                        my $rcidName=substr($contents,$rcidPosition,$rcidEOLPosition-$rcidPosition);
                        while (substr($rcidName,length($rcidName)-1,1) eq " "
                        || substr($rcidName,length($rcidName)-1,1) eq "\t"
                        || substr($rcidName,length($rcidName)-1,1) eq "\n"){
                                chop $rcidName;
                        }
                        $currentPosition=$rcidPosition;
                        $currentLineRecord[$lineCount]=$rcidName;
                        my $identical=0;
                        if ($rcstart eq $initialTimestamp){
                                my $checkQuery="SELECT COUNT(*) FROM `".$table_name1."` WHERE `rcid`=".$rcidName;
                                my $execute=$dbh->prepare($checkQuery);
                                $execute->execute();
                                $execute->bind_col( 1, \my $countResult );
                                while ( $execute->fetch ) {
                                        $identical=$countResult;
                                }
                                if ($identical==0){
                                        $cleared=1;
                                }
                        }
                        for (my $count=0; $count<=500; $count++){
                                if ($rcidName eq $pastLineRecord[$count]){
                                        $identical=1;
                                }
                        }
                        if ($identical==0){
                                print $nsName." ".$titleName." ".$rcidName." ";
                                $timestampPosition=index($contents,$timestampPreface,$currentPosition);
                                $timestampPosition+=length($timestampPreface);
                                my $timestampEOLPosition=index($contents,$timestampEOL,$timestampPosition);
                                my $timestampName=substr($contents,$timestampPosition,$timestampEOLPosition-$timestampPosition);
                                while (substr($timestampName,length($timestampName)-1,1) eq " "
                                || substr($timestampName,length($timestampName)-1,1) eq "\t"
                                || substr($timestampName,length($timestampName)-1,1) eq "\n"){
                                        chop $timestampName;
                                }
                                $currentPosition=$timestampPosition;
                                print $timestampName." \n";
                                $nsName=$dbh->quote("$nsName");
                                $rcidName=$dbh->quote("$rcidName");
                                $titleName=$dbh->quote("$titleName");
                                my $actionName=$dbh->quote("new");
                                my $quotedTimestampName=$dbh->quote("$timestampName");
                                #my $sql="INSERT INTO ".$table_name." (logid,page_title,action,timestamp) VALUES ('"
                                #       .$logidName."','".$titleName."','".$actionName."','".$timestampName."')";
                                my $sql="INSERT INTO $table_name1 (ns,page_title,action,rcid,timestamp) VALUES "
                                        ."($nsName,$titleName,$actionName,$rcidName,$quotedTimestampName)";
                                $dbh->prepare($sql);
                                $dbh->do($sql);
                                $sql="INSERT INTO $table_name2 (ns,page_title,action,rcid,timestamp) VALUES "
                                        ."($nsName,$titleName,$actionName,$rcidName,$quotedTimestampName)";
                                $dbh->prepare($sql);
                                $dbh->do($sql);
                                #print $sql."\n";
                        }
                        else{
                                print "(Duplicate) ".$rcidName."\n";
                                $currentPosition=$rcidPosition+1;
                        }
                        $lineCount++;
                }
        }
        if ($timestampName ne ""){
                $rcstart=$timestampName;
        }
        for (my $count=0; $count<=500; $count++){
                $pastLineRecord[$count]=$currentLineRecord[$count];
                $currentLineRecord[$count]='';
        }
        print "\n";
        sleep $sleepNumber;
}

[edit] RPEDSyndicate.pl

# RPEDSyndicate.pl by Tisane, http://www.mediawiki.org/wiki/User:Tisane
#
# This script is free software that is available under the terms of the Creative Commons
# Attribution 3.0 license and the current version of the GNU General Public License.
#
# The purpose of this script is to send the contents of the syndication table out to all the
# subscribing wikis. It is part of the RPED extension project; for more details, see
# http://www.mediawiki.org/wiki/Extension:RemotePageExistenceDetection .
 
use strict;
use warnings;
use Mysql;
use Thread;
use DBI;
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
 
my $sql_login = 'wikiuser2';
my $sql_pass = 'password';
my $db_name = 'page_title_db';
my $db_host = 'localhost';
my $table_name = 'syndication_table';
my $page_table_name='syndication_page_title_table';
my $sleepNumber=10;
my $agentName="User:Tisane (http://www.mediawiki.org/wiki/User:Tisane) syndicating content to
        subscribers of the RPED extension via the RPED central server.";
my $browser = LWP::UserAgent->new();
$browser->agent($agentName);
 
### Perl variables to store the field data in
my ( $url, $password, $status );
my @subscriberURL=('Hello');
my @subscriberPassword=('Hello');
my @subscriberStatus=('Hello');
my ($logid_bound, $page_title_bound, $action_bound, $rcid_bound);
my @logid=('Hello');
my @page_title=('Hello');
my @action=('Hello');
my @rcid=('Hello');
 
my $conn_string = "DBI:mysql:$db_name";
if ($db_host) { $conn_string .= ":$db_host"; }
my $dbh = DBI->connect("$conn_string",$sql_login,$sql_pass);
 
while(1){
        my $sth = $dbh->prepare( "
                                SELECT subscriber_url, subscriber_password, subscriber_status
                                FROM subscribers
                          " );
        $sth->execute(  );
 
        ### Associate Perl variables with each output column
        $sth->bind_col( 1, \$url );
        $sth->bind_col( 2, \$password );
        $sth->bind_col( 3, \$status );
 
        ### Fetch the data from the result set
        my $subscriberNumber=0;
        while ( $sth->fetch ) {
                $subscriberURL[$subscriberNumber]=$url;
                $subscriberPassword[$subscriberNumber]=$password;
                $subscriberStatus[$subscriberNumber]=$status;
                # if ($subscriberStatus[$subscriberNumber]=='new'){
                        # my $thr = new Thread \&sub1, $subscriberURL[$subscriberNumber],
                                # $subscriberPassword[$subscriberNumber];
                #}
                $subscriberNumber++;
        }
 
        $sth = $dbh->prepare( "
                                SELECT logid, page_title, action, rcid
                                FROM ".$table_name );
        $sth->execute(  );
 
 
        $sth->bind_col( 1, \$logid_bound );
        $sth->bind_col( 2, \$page_title_bound );
        $sth->bind_col( 3, \$action_bound );
        $sth->bind_col( 4, \$rcid_bound );
 
        my $pageNumber=0;
        my $recordsSoFar=0;
        my $whereWeAt=0;
        while ( $sth->fetch ) {
                $logid[$pageNumber]=$logid_bound;
                $page_title[$pageNumber]=$page_title_bound;
                $action[$pageNumber]=$action_bound;
                $rcid[$pageNumber]=$rcid_bound;
                $pageNumber++;
        }
 
        my $dataPortionOfURL='';
        my $afterFirstRow=0;
        for (my $count=0; $count<$pageNumber; $count++){
                if ($afterFirstRow==1){
                        $dataPortionOfURL.='|';
                }
                if ($action[$count] eq 'new' || $action[$count] eq 'restore'){
                        $dataPortionOfURL.='ins=';
                } else {
                        $dataPortionOfURL.='del=';
                }
                $dataPortionOfURL=$dataPortionOfURL.$page_title[$count];
                $afterFirstRow=1;
                if (length($dataPortionOfURL)>3000){
                        my $successRate=0;
                        $whereWeAt=$count-$recordsSoFar;
                        print "Sending ".$whereWeAt." records to ".$subscriberNumber." subscribers...\n";
                        for (my $innerSubscriberCount=0; $innerSubscriberCount<$subscriberNumber
                                ; $innerSubscriberCount++){
                                print "Trying ".$subscriberURL[$innerSubscriberCount]." ... ";
                                my $fullURL=$subscriberURL[$innerSubscriberCount].'/extensions/RPED/RPEDAPIReader.php?'
                                        .$subscriberPassword[$innerSubscriberCount].'|'.$dataPortionOfURL;
                                # print $fullURL;
                                $browser->timeout(500);
                                my $request = HTTP::Request->new(GET => $fullURL);
                                my $response = $browser->request($request);
                                if ($response->is_error()) {printf "%s\n", $response->status_line;}
                                my $contents = $response->content();
                                if (substr($contents,0,14) eq 'Access granted'){
                                        $successRate++;
                                }
                                print $contents."\n"; # Print this if the query fails
                        }
                        print $successRate." wikis were successfully updated.\n";
                        $dataPortionOfURL="";
                        $recordsSoFar=$count;
                        $afterFirstRow=0;
                }
        }
        my $successRate=0;
        if ($pageNumber>0 && $subscriberNumber>0){
                my $whereWeAt=$pageNumber-$recordsSoFar;
                print "Sending ".$whereWeAt." records to ".$subscriberNumber." subscribers...\n";
                for (my $innerSubscriberCount=0; $innerSubscriberCount<$subscriberNumber
                                ;$innerSubscriberCount++){
                        print "Trying ".$subscriberURL[$innerSubscriberCount]." ... ";
                        my $fullURL=$subscriberURL[$innerSubscriberCount].'/extensions/RPED/RPEDAPIReader.php?'
                                .$subscriberPassword[$innerSubscriberCount].'|'.$dataPortionOfURL;
                        # print $fullURL;
                        $browser->timeout(500);
                        my $request = HTTP::Request->new(GET => $fullURL);
                        my $response = $browser->request($request);
                        if ($response->is_error()) {printf "%s\n", $response->status_line;}
                        my $contents = $response->content();
                        if (substr($contents,0,14) eq 'Access granted'){
                                $successRate++;
                        }
                        print $contents."\n";
                }
                print $successRate." wikis were successfully updated.\n";
 
                for (my $count=0; $count<$pageNumber; $count++){
                        my $logString='';
                        my $rcString='';
                        if (defined($logid[$count])){
                                $logString="=".$logid[$count];
                        }
                        else{
                                $logString=" is NULL";
                        }
                        if (defined($rcid[$count])){
                                $rcString="=".$rcid[$count];
                        }
                        else{
                                $rcString=" is NULL";
                        }
                        my $myQuery="DELETE FROM ".$table_name." WHERE logid".$logString." AND rcid".$rcString;
                        $dbh->prepare($myQuery);
                        $dbh->do($myQuery);
                }
        } else {
                if ($pageNumber==0){
                        print "(Nothing to send)\n";
                } else {
                        print "(No subscribers)\n";
                }
        }
 
        sleep 6;
}
 
# sub sub1 { 
        # my @InboundParameters = @_;
        # my @massPageTitle=('Hello');
        # my $newWikiUrl=InboundParameters[0];
        # my $newWikiPassword=InboundParameters[1];
        # my $myOtherQuery="UPDATE subscribers SET subscriber_status='old' WHERE subscriber_url='"
                #.InboundParameters[0]."'";
        # $dbh->prepare($myOtherQuery);
        # $dbh->do($myOtherQuery);
        # print "Loading database for ".$newWikiUrl." ...\n";
 
        # my $ath = $dbh->prepare( "
                                # SELECT page_title
                                # FROM ".$page_table_name."
                          # " );
        # $ath->execute(  );
 
        ## Associate Perl variables with each output column
        # $ath->bind_col( 1, \$anotherPageTitle );
 
        ## Fetch the data from the result set
        # my $anotherCount=0;
        # while ( $ath->fetch ) {
                # $massPageTitle[$anotherCount]=$anotherPageTitle;
                # $anotherCount++;
        # }
        # $fullURL='';
        # $dataPortionOfURL='';
        # for (my $nextCount=0; $nextCount<$anotherCount; $nextCount++){
                # $dataPortionOfURL.=$massPageTitle[$nextCount];
                # if ($nextCount%1001==0){
                        # $fullURL=$newWikiUrl.'/extensions/RPED/RPEDAPIReader.php?'.$newWikiPassword.'|'
                                #.$dataPortionOfURL;
                        # $browser->timeout(500);
                        # my $request = HTTP::Request->new(GET => $fullURL);
                        # my $response = $browser->request($request);
                        # if ($response->is_error()) {printf "%s\n", $response->status_line;}
                        # my $contents = $response->content();
                        # if (substr($contents,0,14) ne 'Access granted'){
                                # print $contents;
                                # return;
                        # }
 
                # }
        # }
# }
Personal tools
Namespaces

Variants
Actions
Navigation
Support
Download
Development
Communication
Print/export
Toolbox