User:Alterego/ExtensionMatrix/Source code

From mediawiki.org

This is the source code for User:Alterego/ExtensionMatrix

#!/usr/bin/env python
# MediaWiki extension matrix bot
# Copyright (C) 2009 Brian Mingus
# 
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

username = ''
password = ''

from re import sub
from sys import path
import datetime
path.append('/usr/local/mwclient')
path.append('/usr/local/mwclient/simplejson')
import client as mwclient

site = mwclient.Site('www.mediawiki.org', path='/w/')
site.login(username,password)

all_extensions = site.categories["All extensions"]

# A dictionary containing the extension name and its template
extensions = {}

for this_extension in all_extensions:
    try:
        extension_name = this_extension.name.split(':')[1]

        # Keep empty vals around to create a list of poorly formatted extensions
        extensions[extension_name] = ''

        # Extract the wikitext. Normally wouldn't be this simple but
        # the extensions are well formatted, each ending with
        # \n}}. Could recursively look for sub templates to be more
        # sure we're at the end.

        wikitext = site.Pages['Extension:' + extension_name].edit()
        template_start = wikitext.find('{{Extension')

        if template_start == -1:
            template_start = wikitext.find('{{extension')

        if template_start == -1:
            raise

        template_end = template_start + wikitext[template_start:].find('\n}}')
        template = wikitext[template_start:template_end+3]
        extensions[extension_name] = template

    except:
        # If someone did something stupid, not worth breaking the bot
        continue

# These had no parseable template
borked_extensions = []

# With just a little work we can turn the template into a dictionary
# and then do some cleanup processing of its parameters. This bot
# is definitely relying on the fact that the template ends with \n}}

extension_matrix = ''
for extension in extensions.keys():
    print extension

    extension_dict = {}
    hooks, tags, types = [], [], []

    template = extensions[extension]

    # Some people like to have funky spacing. Double up just in case
    template = template.replace(' |','|').replace('  |','|').replace('| ','|').replace('|  ','|')

    # This hacks off {{Extension and }}, and has the convenient side effect
    # of nuking |templatemode= when it shows up on the first line
    template = template.split('\n')[1:-1]

    # Can't allow newlines - saw way too many crazy template values. In order for
    # this to be sane the template must have a pipe as the first non whitespace char
    # on each line
    filtered_template = []
    for line in template:
        if len(line):
            if line[0] == '|':
                filtered_template.append(line)
    template = filtered_template

    # Don't allow subpage extensions
    if '/' in extension:
        continue

    for param in template:
        try:
            param = param.split('=',1)
            key = param[0].replace('|','').strip()
            value = param[1].strip()
        except:
            continue # Can't do this? Not my fault.

        if not len(value.strip()):
            continue

        if 'name' in key:
            # Sometimes the name field doesn't contain the actual name of the extension
            value = extension
        if key.find('hook') is not -1:
            hooks.append(value)
            continue
        if key.find('tag') is not -1:
            tags.append(value)
            continue
        if key.find('type') is not -1:
            types.append(value)
            continue
        # The WikimediaDownload template is just too insane to allow - click through
        # to the extension to download in this case
        # if key == 'download' and 'WikimediaDownload' in value:
        #     continue
        # Have a look at LocalisationUpdate for nested templateness that is just not ok.
        if '{{' in value and not '}}' in value:
            continue
        if '}}' in value and not '{{' in value:
            continue
        # These always turn out to be copy/paste jobs from the prototype template
        if '<!-' in value or '-->' in value:
            continue
        extension_dict[key] = value

    if hooks:
        hooks.sort()
        hooks = '<br/>'.join(hooks)
        extension_dict['hooks'] = hooks
    if tags:
        tags.sort()
        tags = '<br/>'.join(tags)
        extension_dict['tags'] = tags   
    if types:
        types.sort()
        types = '<br/>'.join(types)
        extension_dict['types'] = types

    # Sometimes the name isn't specified at all
    if not extension_dict.has_key('name'):
        extension_dict['name'] = extension

    template = '{{ExtensionMatrix\n'

    # If there is a newline in a template parameter, that's probably
    # going to mess things up
    keys = extension_dict.keys()

    # Don't allow empty templates, or templates with just one parameter
    if not len(keys) or len(keys) == 1:
        continue

    keys.sort()

    for key in keys:
        # This guy giving me a hard time for some reason
        if '<!-' in extension_dict[key] or '-->' in extension_dict[key]:
            continue
        # Build this line of the template
        template += '|' + key + '=' + extension_dict[key] + '\n'

    template += '}}\n'

    extension_matrix += template


updated = 'Last updated: ' + \
          datetime.datetime.now().strftime("%Y-%m-%d %H:%M") + ' MST<br/>'

num_listed = 'Listing ' + \
             str(len(extension_matrix.split('}}\n{{'))) + \
             ' out of ' + \
             str(len(extensions.keys())) + \
             ' members of [[:Category:Extensions]]<br/>'

extension_matrix = '{{ExtensionMatrixHeader}}\n' + \
                   updated + \
                   num_listed + \
                   extension_matrix + '\n{{ExtensionMatrixFooter}}\n'

page = site.Pages["User:Alterego/ExtensionMatrix"]
page.save(extension_matrix)