User:Alterego/ExtensionMatrix/Source code
This is the source code for User:Alterego/ExtensionMatrix
#!/usr/bin/env python
# MediaWiki extension matrix bot
# Copyright (C) 2009 Brian Mingus
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
username = ''
password = ''
from re import sub
from sys import path
import datetime
path.append('/usr/local/mwclient')
path.append('/usr/local/mwclient/simplejson')
import client as mwclient
site = mwclient.Site('www.mediawiki.org', path='/w/')
site.login(username,password)
all_extensions = site.categories["All extensions"]
# A dictionary containing the extension name and its template
extensions = {}
for this_extension in all_extensions:
try:
extension_name = this_extension.name.split(':')[1]
# Keep empty vals around to create a list of poorly formatted extensions
extensions[extension_name] = ''
# Extract the wikitext. Normally wouldn't be this simple but
# the extensions are well formatted, each ending with
# \n}}. Could recursively look for sub templates to be more
# sure we're at the end.
wikitext = site.Pages['Extension:' + extension_name].edit()
template_start = wikitext.find('{{Extension')
if template_start == -1:
template_start = wikitext.find('{{extension')
if template_start == -1:
raise
template_end = template_start + wikitext[template_start:].find('\n}}')
template = wikitext[template_start:template_end+3]
extensions[extension_name] = template
except:
# If someone did something stupid, not worth breaking the bot
continue
# These had no parseable template
borked_extensions = []
# With just a little work we can turn the template into a dictionary
# and then do some cleanup processing of its parameters. This bot
# is definitely relying on the fact that the template ends with \n}}
extension_matrix = ''
for extension in extensions.keys():
print extension
extension_dict = {}
hooks, tags, types = [], [], []
template = extensions[extension]
# Some people like to have funky spacing. Double up just in case
template = template.replace(' |','|').replace(' |','|').replace('| ','|').replace('| ','|')
# This hacks off {{Extension and }}, and has the convenient side effect
# of nuking |templatemode= when it shows up on the first line
template = template.split('\n')[1:-1]
# Can't allow newlines - saw way too many crazy template values. In order for
# this to be sane the template must have a pipe as the first non whitespace char
# on each line
filtered_template = []
for line in template:
if len(line):
if line[0] == '|':
filtered_template.append(line)
template = filtered_template
# Don't allow subpage extensions
if '/' in extension:
continue
for param in template:
try:
param = param.split('=',1)
key = param[0].replace('|','').strip()
value = param[1].strip()
except:
continue # Can't do this? Not my fault.
if not len(value.strip()):
continue
if 'name' in key:
# Sometimes the name field doesn't contain the actual name of the extension
value = extension
if key.find('hook') is not -1:
hooks.append(value)
continue
if key.find('tag') is not -1:
tags.append(value)
continue
if key.find('type') is not -1:
types.append(value)
continue
# The WikimediaDownload template is just too insane to allow - click through
# to the extension to download in this case
# if key == 'download' and 'WikimediaDownload' in value:
# continue
# Have a look at LocalisationUpdate for nested templateness that is just not ok.
if '{{' in value and not '}}' in value:
continue
if '}}' in value and not '{{' in value:
continue
# These always turn out to be copy/paste jobs from the prototype template
if '<!-' in value or '-->' in value:
continue
extension_dict[key] = value
if hooks:
hooks.sort()
hooks = '<br/>'.join(hooks)
extension_dict['hooks'] = hooks
if tags:
tags.sort()
tags = '<br/>'.join(tags)
extension_dict['tags'] = tags
if types:
types.sort()
types = '<br/>'.join(types)
extension_dict['types'] = types
# Sometimes the name isn't specified at all
if not extension_dict.has_key('name'):
extension_dict['name'] = extension
template = '{{ExtensionMatrix\n'
# If there is a newline in a template parameter, that's probably
# going to mess things up
keys = extension_dict.keys()
# Don't allow empty templates, or templates with just one parameter
if not len(keys) or len(keys) == 1:
continue
keys.sort()
for key in keys:
# This guy giving me a hard time for some reason
if '<!-' in extension_dict[key] or '-->' in extension_dict[key]:
continue
# Build this line of the template
template += '|' + key + '=' + extension_dict[key] + '\n'
template += '}}\n'
extension_matrix += template
updated = 'Last updated: ' + \
datetime.datetime.now().strftime("%Y-%m-%d %H:%M") + ' MST<br/>'
num_listed = 'Listing ' + \
str(len(extension_matrix.split('}}\n{{'))) + \
' out of ' + \
str(len(extensions.keys())) + \
' members of [[:Category:Extensions]]<br/>'
extension_matrix = '{{ExtensionMatrixHeader}}\n' + \
updated + \
num_listed + \
extension_matrix + '\n{{ExtensionMatrixFooter}}\n'
page = site.Pages["User:Alterego/ExtensionMatrix"]
page.save(extension_matrix)