User:Leucosticte/RCQ

CREATE TABLE mb_rc_queue( -- Primary key mbrcq_id INT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, -- anon (from api rc) mbrcq_anon tinyint UNSIGNED NOT NULL DEFAULT 0, -- content (from api rev) mbrcq_text mediumblob NOT NULL, -- redirect (from api rc) mbrcq_redirect tinyint UNSIGNED NOT NULL DEFAULT 0, -- sha1 (from api rev) mbrcq_sha1 varbinary(32) NOT NULL DEFAULT , -- tags (from api rc) mbrcq_tags VARCHAR(255) BINARY NOT NULL DEFAULT , -- user (from api us). This is the user who is being created, not creating mbrcq_user_text VARCHAR(255) BINARY NOT NULL, -- userid (from api us). This is the user who is being created, not creating mbrcq_user INT UNSIGNED NOT NULL DEFAULT 0, -- bot mbrcq_rc_bot tinyint UNSIGNED NOT NULL DEFAULT 0, -- comment mbrcq_rc_comment VARCHAR(255) BINARY NOT NULL DEFAULT '', -- id mbrcq_rc_id INT UNSIGNED NOT NULL DEFAULT 0, -- logaction mbrcq_rc_log_action varbinary(255) NULL DEFAULT NULL, -- logid mbrcq_rc_logid INT UNSIGNED NOT NULL, -- logtype mbrcq_rc_logtype varbinary(32) NOT NULL DEFAULT '', -- minor mbrcq_rc_minor tinyint UNSIGNED NOT NULL DEFAULT 0, -- new mbrcq_rc_new tinyint UNSIGNED NOT NULL DEFAULT 0, -- newlen mbrcq_rc_new_len INT, -- ns mbrcq_rc_namespace INT NOT NULL DEFAULT 0, -- oldlen mbrcq_rc_old_len INT, -- pageid mbrcq_rc_cur_id INT UNSIGNED NOT NULL DEFAULT 0, -- patrolled mbrcq_rc_patrolled tinyint UNSIGNED NOT NULL DEFAULT 0, -- revid mbrcq_rc_thisoldid INT UNSIGNED NOT NULL DEFAULT 0, -- revoldid mbrcq_rc_lastoldidid INT UNSIGNED NOT NULL DEFAULT 0, -- timestamp mbrcq_rc_timestamp varbinary(14) NOT NULL DEFAULT '', -- title (512, because it is prefixed by the namespace) mbrcq_rc_title VARCHAR(512) BINARY NOT NULL DEFAULT '', -- type mbrcq_rc_type varbinary(255) NULL DEFAULT NULL, -- userid mbrcq_rc_user INT UNSIGNED NOT NULL, -- user mbrcq_rc_user_text VARCHAR(255) BINARY NOT NULL, -- contentmodel (obtained from push) mbrcq_push_contentmodel varbinary(32) DEFAULT NULL, -- newrevid (from push) mbrcq_push_newrevid INT UNSIGNED NOT NULL DEFAULT 0, -- newtimestamp (from push) mbrcq_push_newtimestamp varbinary(14) NOT NULL DEFAULT '', -- oldrevid (from push) mbrcq_push_oldrevid INT UNSIGNED NOT NULL DEFAULT 0, -- pageid (from push) mbrcq_push_page_id INT UNSIGNED NOT NULL DEFAULT 0, -- result (from push) mbrcq_push_result VARCHAR(255) BINARY NOT NULL DEFAULT '', -- title (from push) (512, because it is prefixed by the namespace) mbrcq_push_title VARCHAR(512) BINARY NOT NULL DEFAULT '', -- user (from push) mbrcq_push_user INT UNSIGNED NOT NULL );

 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html */

// "q" (queue) Three options: -qrc, -qrev, qus // "r" (repeat) Three options: -ro (onetime), -rd (continuous, using defaults), // r // "s" starting timestamp $usage = 'Usage: php mirrorpullbot.php -q ' . '[-r] [-s])'. "\n"; $options = getopt( 'q:r:s:'); $allowableOptions['q'] = array(     'rc',      'rev',      'us' ); $allowableOptions['r'] = array(     'o',      'd', ); if ( !isset ( $options['q'] ) ) { die ( $usage ); } if ( !isset ( $options['r'] ) ) { $options['r'] = 'o'; // Default to onetime } if ( !in_array ( $options[ 'q' ], $allowableOptions['q'] ) ) { die ( $usage ); } if ( !in_array ( $options[ 'r' ], $allowableOptions['r'] ) ) { if ( !is_numeric ( $options['r'] ) ) { // Microseconds option echo "You did not select an acceptable option for r\n"; die ( $usage ); } else { $sleepMicroseconds = $options['r']; } } if ( isset ( $options['s'] ) ) { #echo $options['s']; if ( is_numeric ( $options['s'] ) ) { if ( $options['s'] < 10000000000000 || $options['s'] > 30000000000000 ) { die ( "Error: Timestamp must be after C.E. 1000 and before C.E. 3000\n" ); }     } else { die ( "Starting timestamp supposed to be an integer\n" ); }     $startingTimestamp = $options['s']; }

// Get the passwords $passwordFile = 'inclubot_passwords.php'; if ( !file_exists ( $passwordFile ) ) { die ( "File $passwordFile does not exist\n" ); }

// Get the defaults $defaultsFile = 'inclubot_defaults.php'; if ( !file_exists ( $defaultsFile ) ) { die ( "File $defaultsFile does not exist\n" ); } include( 'inclubot_defaults.php' ); if ( $options['r'] == 'd' ) { $sleepMicroseconds = $defaultMicroseconds['pull'][$options['q']]; }

// Prepare failure log file $failures = fopen ( $failureLogFile, 'a' );

// Connect to database $con = new mysqli( $host, $dbUser, $dbPass ); if ( !$con ) { die( 'Could not connect: ' . mysql_error ); }

// Create database and select it $con->query ( "CREATE DATABASE IF NOT EXISTS $dbName" ); $con->select_db ( "$dbName" );

$existenceArr = array; $existenceResult = $con->prepare( "SHOW TABLES FROM $dbName" ); $existenceResult->execute; $existenceResult->bind_result( $existenceRow ); if ( !$existenceResult ) { die( "Could not show tables from $dbName" ); } while( $existenceResult->fetch ) { $existenceArr[] = $existenceRow; } foreach ( $tables as $table => $sqlFile ) { echo "Checking table $table..."; if ( in_array ( $table, $existenceArr ) ) { echo "table exists\n"; } else { echo "not found; creating..."; if ( !file_exists( $sqlFile ) ) { die( "Error: file $sqlFile missing!\n" ); }           $sql = file_get_contents ( $sqlFile ); $conResult = $con->query ( $sql ); if ( !$conResult ) { die( "failed!\n" ); }           echo "done.\n"; } }

/* Setup my classes. */ include( 'botclasses.php' ); include( 'inclubot_passwords.php' ); $wiki     = new wikipedia; $wiki->url = $remoteWikiUrl;

// Login $wiki->login( $pullUser, $pullPass );

$passes = 0; while ( $options['r'] != 'o' || !$passes ) { $passes++; switch ( $options['q'] ) { case 'rc': // Get starting timestamp $rcContinue = ''; $rcStart = ''; $continueValue = ''; if ( !$passes ) { $continueResult = $con->query( 'SELECT * FROM mb_cursor WHERE'                             . ' mbc_key=rccontinue' ); if ( !$continueResult ) { $startingTimestamp = $defaultStart['rc']; $rcStart = "&rcstart=$startingTimestamp"; } else { // TODO: Make sure this actually works $continueValueArr = $startingTimestampResult->fetch_assoc; $continueValue = $startingTimestampArr['mbc_value']; $rcContinue = "&rccontinue=$continueValue"; }                 }                  if ( !$rcStart && $continueValue ) { $rcContinue = "&rccontinue=$continueValue"; }                 $ret = $wiki->query ( "?action=query&list=recentchanges"                  . "$rcStart&rcdir=newer&rcprop=user|userid|comment|timestamp|"                  . 'patrolled|title|ids|sizes|redirect|loginfo|flags|loginfo|tags&rclimit=500'                  . "$rcContinue&format=php", true); #var_dump ( $ret ); $rcContinue = $ret['query-continue']['recentchanges']['rccontinue']; $events = $ret['query']['recentchanges']; $table = 'mb_rc_queue'; $fields = array (                       'mbrcq_rc_id' => 'rcid',                        'mbrcq_anon' => 'anon',                        'mbrcq_rc_bot' => 'bot',                        'mbrcq_rc_comment' => 'comment',                        'mbrcq_rc_log_action' => 'logaction',                        'mbrcq_rc_logid' => 'logid',                        'mbrcq_rc_logtype' => 'logtype',                        'mbrcq_rc_minor' => 'minor',                        'mbrcq_rc_new' => 'new',                        'mbrcq_rc_new_len' => 'newlen',                        'mbrcq_rc_namespace' => 'ns',                        'mbrcq_rc_old_len' => 'oldlen',                        'mbrcq_rc_cur_id' => 'pageid',                        'mbrcq_rc_patrolled' => 'patrolled',                        'mbrcq_rc_thisoldid' => 'revid',                        'mbrcq_rc_lastoldidid' => 'revoldid',                        'mbrcq_redirect' => 'redirect', 'mbrcq_rc_timestamp' => 'timestamp', 'mbrcq_rc_title' => 'title', 'mbrcq_rc_type' => 'type', 'mbrcq_rc_user' => 'userid', 'mbrcq_rc_user_text' => 'user', 'mbrcq_user' => 'addeduserid', // This isn't actually in the API result 'mbrcq_user_text' => 'addeduser' // This isn't actually in the API result );                 $stringFields = array ( 'title', 'type', 'action', 'user', 'comment', 'tags', 'logaction', 'logtype', 'addeduser', );                 $booleanFields = array ( 'anon', 'bot', 'minor', 'new', 'patrolled', 'redirect', );                 $defaultFields = array ( 'rcid' => 0, 'anon' => 0, 'bot' => 0, 'comment' => "''", 'logaction' => 'NULL', 'logid' => 0, 'logtype' => "''", 'minor' => 0, 'new' => 0, 'newlen' => 0, 'ns' => 0, 'oldlen' => 0, 'pageid' => 0, 'patrolled' => 0, 'revid' => 0, 'revoldid' => 0, 'redirect' => 0, 'timestamp' => "''", 'title' => "''", 'type' => 'NULL', 'user' => "''", 'userid' => 0, 'addeduser' => "''", 'addeduserid' => 0, );                 break;            case 'us':                  $table = 'mb_rc_queue';                  $where = "mbrcq_rc_log_action='create2' AND mbrcq_user=0";                  $ret = $con->query( "SELECT * FROM mb_rc_queue " ."WHERE $where LIMIT 500 AND mbrcq_push_result=" );                 if ( !$ret ) {                        die ( "No $where items\n" );                  }                  $userTitle = array;                  while ( $value = $ret->fetch_assoc ) {                        $userTitle[] = $value[ 'mbrcq_rc_title' ];                  }                  $firstUserTitle = true;                  $queryChunk = ;                  foreach ( $userTitle as $thisUserTitle ) {                        if ( !$firstUserTitle ) {                              $queryChunk .= '|';                        }                        $firstUserTitle = false;                        $queryChunk .= $thisUserTitle;                  }                  $ret = $wiki->query ('?action=query&list=users&ususers='. $queryChunk . '&format=php', true);                 if ( !$ret ) {                        echo "Error: Did not retrieve any user IDs from query\n";                  }                  $events = $ret['query']['users'];                  foreach ( $events as $thisEvent ) {                        $name = "'" . $con->real_escape_string( $thisEvent[ 'name' ] ) . "'";                        $pageTitle = "'User:" . $con->real_escape_string( $thisEvent[ 'name' ] ) . "'";                       $query = 'UPDATE mb_rc_queue SET '                              . 'mbrcq_user=' . $thisEvent[ 'userid']                              . ', mbrcq_user_text=' . $name                              . " WHERE mbrcq_rc_log_action='create2' AND mbrcq_rc_title="                              . $pageTitle;                        echo $query . "\n";                        $status = $con->query ( $query );                        if ( $status ) {                              echo "Success\n";                        } else {                              echo "Failure\n";                              // Note this failure in the failure log file                              fwrite ( $failures, $query. "\n" );                       }                  }                  break;      }
 * 1) iterate;

if ( $options[ 'q' ] == 'rc' ) { $dbFields = array_keys ( $fields ); $userRow = array_values ( $fields ); $undesirables = array ( '-', ':', 'T', 'Z' ); $row = 'insert into '. $table. ' ( ' . implode ( ', ', $dbFields ) . ' ) values '; $isFirstInEvent = true; $events = $ret['query']['recentchanges']; // For each user creation event in that result set foreach ( $events as $thisLogevent ) { // Default values for adduserid and adduser if ( isset ( $thisLogevent[ 'userid' ] ) ) { $thisLogevent[ 'addeduserid' ] = $thisLogevent[ 'userid' ]; }                 if ( isset ( $thisLogevent[ 'user' ] ) ) { $thisLogevent[ 'addeduser' ] = $thisLogevent[ 'user' ]; }                 // Make those different if it's a create2 if ( isset ( $thisLogevent[ 'logaction' ] ) ) { if ( $thisLogevent[ 'logaction' ] == 'create2' ) { $thisLogevent [ 'addeduserid' ] = 0; $title = $thisLogevent[ 'title' ]; $strposTitle = strpos ( $title, ':' ); $thisLogevent [ 'addeduser' ] = substr ( $title, $strposTitle + 1                                  , strlen ( $title ) - $strposTitle ); }                 }                  if ( !$isFirstInEvent ) { $row .= ', '; }                 $isFirstInEvent = false; $row .= '( ';                 $isFirstInItem = true;                  // Get rid of dashes, colons, Ts and Zs in timestamp                  $thisLogevent['timestamp'] = str_replace ( $undesirables, '', $thisLogevent['timestamp'] );                  // Iterate over those database fields                  foreach ( $userRow as $thisRowItem ) {                        if ( !$isFirstInItem ) {                              $row .= ', ';                        }                        $isFirstInItem = false;                        // If it's a boolean field, 1 if it's there, 0 if not                        if ( in_array( $thisRowItem, $booleanFields ) ) {                              if ( isset ( $thisLogevent[ $thisRowItem ] ) ) {                                    $row .= '1';                              } else {                                    $row .= '0';                              } } else { if ( isset ( $thisLogevent[$thisRowItem] ) ) { // If it's an array (e.g. tag array), implode it                                   if ( is_array ( $thisLogevent[$thisRowItem] ) ) { $thisLogevent[$thisRowItem] = implode ( $thisLogevent[$thisRowItem] ); }                                   // If it's a string field, escape it                                    if ( in_array ( $thisRowItem, $stringFields ) ) { $thisLogevent[$thisRowItem] = "'". $con->real_escape_string ( $thisLogevent[$thisRowItem] ). "'";                                   }                                    $row .= $thisLogevent[$thisRowItem]; } else { $row .= $defaultFields[$thisRowItem]; }                       }                  }                  $row .= ')';            }            $row .= ';';            echo $row . "\n";            $queryResult = $con->query ( $row );            if ( $queryResult ) {                  echo "Success\n";                  // Check cursor existence; if it doesn't, then create one                  $exist = $con->query( 'SELECT * FROM mb_cursor WHERE' . ' mbc_key=rccontinue');                 if ( $exist ) {                        $query = "UPDATE mb_cursor SET mbc_value=$rcContinue "                              . "WHERE mbc_key=rccontinue";                  } else {                        $query = "INSERT INTO mb_cursor (mbc_key, mbc_value) "                              . " values ('rccontinue', '$rcContinue')";                  }                  $con->query( $query );                  die ( $query );            } else {                  echo "Failure\n";                  // Note this failure in the failure log file                  fwrite ( $failures, $row. "\n" );           }      }      if ( $options['r'] != 'o' ) {            echo "Sleeping $sleepMicroseconds microseconds...";            usleep ( $sleepMicroseconds );            echo "done sleeping.\n";      } }