Manual:Chris G's botclasses/AllPagesBot.php

From mediawiki.org

This is a bot that uses Chris G's botclasses to retrieve a list of all files on the wiki and store that list in two text files, one for the File: namespace, and another for all the other namespaces. Customize the urls, login info, and namespace variables to suit your needs.

<?php
/* AllPagesBot
 * By Leucosticte, https://www.mediawiki.org/wiki/User:Leucosticte
 * GNU Public License 2.0
 *
 * This bot retrieves a list of all files on the wiki and stores that list in two text files,
 * one for the File: namespace, and another for all the other namespaces.
 */

/* Setup my classes. */
include( 'botclasses.php' );
$wiki      = new wikipedia;
$wiki->url = "http://en.wikipedia.org/w/api.php";

/* All the login stuff. */
$user = 'REMOVED';
$pass = 'REMOVED';
$wiki->login( $user,$pass );

$namespaces = range( 0, 15 ); // Default namespaces
// Extra namespaces
#$namespaces[] = 500;
#$namespaces[] = 501;
$namespaces = array_filter( $namespaces, "notFile" ); // Filter out the File: namespace

$pageTitlesFile = 'PageTitles.txt';
$pageTitlesNs6File = 'PageTitlesNs6.txt';
$pageTitles = fopen ( $pageTitlesFile, 'w' );
$pageTitlesNs6 = fopen ( $pageTitlesNs6File, 'w' );

iterate ( $wiki, $namespaces, $pageTitles ); // Everything but File: namespace
iterate ( $wiki, array ( 6), $pageTitlesNs6 ); // Only the File: namespace

// Filter out the File: namespace
function notFile ( $var ) {
    return ( $var != 6 );
}

// Retrieve the data and store it in the file
function iterate ( $wiki, $namespaces, $pageTitles ) {
    foreach ( $namespaces as $namespace ) {
        $done = false;
        $apfrom = '';
            while ( !$done ) {
            $query = "?action=query&format=php&list=allpages&aplimit=500&apnamespace=$namespace";
            if ( $apfrom ) {
                $query .= "&apfrom=$apfrom";
            }
            $ret = $wiki->query ( $query );
            if ( !isset ( $ret['query-continue'] ) ) {
                $done = true;
            } else {
                $apfrom = $ret['query-continue']['allpages']['apfrom'];
            }
            foreach ( $ret['query']['allpages'] as $thisPage ) {
                fwrite ( $pageTitles, $thisPage['title'] . "\n" );
            }
        }
    }
}
fclose ( $pageTitles );