Wik2dict

I want to have some Wiki information on my harddrive - and use it with the DICT interface. So I wrote a little conversion program in Python.

Warning: Might work for you, or it might not. Worked for me. :)

Oh, for the moment, copy the thing from the wiki-source, not from what you see now :)


 * 1) !/usr/bin/python


 * 1) *  This program is free software; you can redistribute it and/or modify  *
 * 2) *  it under the terms of the GNU General Public License as published by  *
 * 3) *  the Free Software Foundation; either version 2 of the License, or     *
 * 4) *  (at your option) any later version.                                   *
 * 1) *  the Free Software Foundation; either version 2 of the License, or     *
 * 2) *  (at your option) any later version.                                   *

import MySQLdb import dictdlib import commands import os, re from sets import Set

user = "root" wik_datafile = "20040623_cur_table.sql"
 * 1) dictfilename = "WikiWoordenboek-NL"

con = MySQLdb.connect("localhost", user)
 * 1) create table and dump sql file into mysql
 * 1) create table and dump sql file into mysql

try: con.query("CREATE DATABASE wikitest") except MySQLdb._mysql.ProgrammingError: pass

commands.getstatusoutput("mysql -u " + user + " wikitest < " + wik_datafile)

def process_index(index): return index

def process_entry(s):

def internal_links(s): #for t in s.split(""):		s = s.replace("[[", "{")		s = s.replace("", "}")		return s

s = internal_links(s)

#remove HTML comments s = re.sub(re.compile("", re.DOTALL), "", s)	return s

cursor = con.cursor cursor.execute("USE wikitest")

cursor.execute("SELECT cur_text FROM cur WHERE cur_title = 'Wikititlesuffix'") dictname = cursor.fetchone[0] info = dictname + ", GNU Free Documentation License"

cursor.execute("SELECT cur_text FROM cur WHERE cur_title = 'Printsubtitle'") url = cursor.fetchone[0] url = url[url.index("http"):url.index("org")+3]

dictfilename = dictname.replace(" ", "-")

cursor.execute("SELECT cur_title, cur_text FROM cur WHERE SUBSTRING(cur_text, 1, 9) = '\#redirect'") redirs = {} for k, v in map(lambda x: (x[1][10:].strip.replace("", "").replace("", ""), x[0]),				cursor.fetchall): redirs[k] = v
 * 1) def redirects: #redirects

cursor.execute("SELECT cur_title, cur_text FROM cur WHERE cur_namespace = 8 OR cur_namespace = 10")

messages = {} for k, v in map(lambda x: (x[1][10:].strip.replace("", "").replace("", ""), x[0]),				cursor.fetchall): messages[k] = v

print messages

if os.path.exists(dictfilename + ".dict.dz"): os.remove(dictfilename + ".dict.dz") if os.path.exists(dictfilename + ".index"): os.remove(dictfilename + ".index")
 * 1) create dict file
 * 1) create dict file

dict = dictdlib.DictDB(dictfilename, mode='write', quiet=0)

cursor.execute("SELECT cur_namespace, cur_title, cur_text FROM cur WHERE cur_namespace = 0 ORDER BY cur_title") for i in range(cursor.rowcount): f = cursor.fetchone

if True: # divmod(f[0], 2)[1] == 0: #f[1] == 0: #not f[1] == 8 and not f[1] == 10 and not f[1] == 2:

index = process_index(f[1]) if not index in redirs.values: indices = Set([index]) if index in redirs: indices.add(redirs[index]) for e in list(indices): indices.add(e.replace("_", " ")) #print indices

entry = process_entry(f[2]) dict.addentry(index + "\n" + entry, indices) else: pass #print index, " in redirs.values"

dict.setlonginfo(info) dict.setshortname(dictname) dict.seturl(url)

dict.finish

commands.getstatusoutput("dictzip " + dictfilename + ".dict")