Extension:RPED/RPEDFileReader.pl
From MediaWiki.org
[edit] RPEDFileReader.pl
# RPEDFileReader.pl by Tisane, http://www.mediawiki.org/wiki/User:Tisane # # This script is free software that is available under the terms of the Creative Commons # Attribution 3.0 license and the current version of the GNU General Public License. # # The purpose of this script is to read a text file (specifically, the list of page titles from # Wikipedia's data dump) and add each page title to a database table. use strict; use Mysql; use DBI; my $sql_login = 'wikiuser2'; my $sql_pass = 'password'; my $db_name = 'page_title_db'; my $db_host = 'localhost'; # or remote mysql server name # if left blank, this defaults to localhost # PERL MYSQL CONNECT() #my $connect = Mysql->connect($host, $database, $user, $pw); my $conn_string = "DBI:mysql:$db_name"; if ($db_host) { $conn_string .= ":$db_host"; } my $dbh = DBI->connect("$conn_string",$sql_login,$sql_pass); # SELECT DB #$dbh->selectdb($database); my $sql = "CREATE TABLE page_title_table( p_ID int NOT NULL AUTO_INCREMENT, PRIMARY KEY(p_ID), page_title VARCHAR(256) )"; my $execute = $dbh->do($sql); $sql="CREATE INDEX pageind on page_title_table (page_title)"; $execute = $dbh->do($sql); my $filename='enwiki-20100116-all-titles-in-ns0'; open(MYDATA, $filename) or die("Error: cannot open file '".$filename."'\n"); my $line; my $lnum = 1; while( $line = <MYDATA> ){ chomp($line); $line=$dbh->quote("$line"); #print "$lnum: $line\n"; $sql="INSERT INTO page_title_table (page_title) VALUES ($line)"; $dbh->prepare($sql); $dbh->do($sql); $lnum++; } close MYDATA;
