Extension:RPED/RPEDFileReader.pl

From MediaWiki.org
Jump to: navigation, search

[edit] RPEDFileReader.pl

# RPEDFileReader.pl by Tisane, http://www.mediawiki.org/wiki/User:Tisane
#
# This script is free software that is available under the terms of the Creative Commons
# Attribution 3.0 license and the current version of the GNU General Public License.
#
# The purpose of this script is to read a text file (specifically, the list of page titles from
# Wikipedia's data dump) and add each page title to a database table.
 
use strict;
use Mysql;
use DBI;
 
my $sql_login = 'wikiuser2';
my $sql_pass = 'password';
my $db_name = 'page_title_db';
my $db_host = 'localhost'; # or remote mysql server name
# if left blank, this defaults to localhost 
 
# PERL MYSQL CONNECT()
#my $connect = Mysql->connect($host, $database, $user, $pw);
my $conn_string = "DBI:mysql:$db_name";
if ($db_host) { $conn_string .= ":$db_host"; }
my $dbh = DBI->connect("$conn_string",$sql_login,$sql_pass); 
 
# SELECT DB
#$dbh->selectdb($database);
 
my $sql = "CREATE TABLE page_title_table(
                p_ID int NOT NULL AUTO_INCREMENT,
                PRIMARY KEY(p_ID),
                page_title VARCHAR(256)
        )";
my $execute = $dbh->do($sql);
$sql="CREATE INDEX pageind on page_title_table (page_title)";
$execute = $dbh->do($sql);
 
my $filename='enwiki-20100116-all-titles-in-ns0';
open(MYDATA, $filename) or 
        die("Error: cannot open file '".$filename."'\n");
my $line;
my $lnum = 1;
while( $line = <MYDATA> ){
        chomp($line);
        $line=$dbh->quote("$line");
        #print "$lnum: $line\n";
        $sql="INSERT INTO page_title_table (page_title) VALUES ($line)";
        $dbh->prepare($sql);
        $dbh->do($sql);
 $lnum++;
}
 
close MYDATA;
Personal tools
Namespaces

Variants
Actions
Navigation
Support
Download
Development
Communication
Print/export
Toolbox