User:Alterego/ExtensionMatrix/Source code
From MediaWiki.org
This is the source code for User:Alterego/ExtensionMatrix
username = '' password = '' from re import sub from sys import path from dateutil.parser import parse import datetime path.append('/usr/local/mwclient') path.append('/usr/local/mwclient/simplejson') import client as mwclient site = mwclient.Site('www.mediawiki.org', path='/w/') site.login(username,password) all_extensions = site.categories["All extensions"] extensions, extensions_dicts, extensions_by_type, extensions_by_status = {}, {}, {}, {} extensions_by_mw_version, extensions_by_creation_date = {}, {} recently_edited, recently_discussed, recently_updated, recently_created = [], [], [], [] # converts an extension dict back into template format def BuildTemplate(extension_dict): template = '{{ExtensionMatrix\n' keys = extension_dict.keys() for key in keys: # This guy giving me a hard time for some reason if '<!-' in extension_dict[key] or '-->' in extension_dict[key]: continue # Build this line of the template template += '|' + key + '=' + extension_dict[key] + '\n' template += '}}\n' return template ########################################## # Download the template for each extension ########################################## for this_extension in all_extensions: try: extension_name = this_extension.name.split(':')[1] # Keep empty vals around to create a list of poorly formatted extensions extensions[extension_name] = '' # Extract the wikitext. Normally wouldn't be this simple but # the extensions are well formatted, each ending with # \n}}. Could recursively look for sub templates to be more # sure we're at the end. wikitext = site.Pages['Extension:' + extension_name].edit() template_start = wikitext.find('{{Extension') if template_start == -1: template_start = wikitext.find('{{extension') if template_start == -1: raise template_end = template_start + wikitext[template_start:].find('\n}}') template = wikitext[template_start:template_end+3] extensions[extension_name] = template except: # If someone did something stupid, not worth breaking the bot continue # With just a little work we can turn the template into a dictionary # and then do some cleanup processing of its parameters. This bot # is definitely relying on the fact that the template ends with \n}} for extension in extensions.keys(): extension_dict = {} hooks, tags, types = [], [], [] template = extensions[extension] # Some people like to have funky spacing. Double up just in case template = template.replace(' |','|').replace(' |','|').replace('| ','|').replace('| ','|') # This hacks off {{Extension and }}, and has the convenient side effect # of nuking |templatemode= when it shows up on the first line template = template.split('\n')[1:-1] # Can't allow newlines - saw way too many crazy template values. In order for # this to be sane the template must have a pipe as the first non whitespace char # on each line filtered_template = [] for line in template: if len(line): if line[0] == '|': filtered_template.append(line) template = filtered_template # Don't allow subpage extensions if '/' in extension: continue for param in template: try: param = param.split('=',1) key = param[0].replace('|','').strip() value = param[1].strip() except: continue # Can't do this? Not my fault. if not len(value.strip()): continue if 'name' == key: # Sometimes the name field doesn't contain the actual name of the extension value = extension if key.find('hook') is not -1: hooks.append(value) continue if key.find('tag') is not -1: tags.append(value) continue if key.find('type') is not -1: types.append(value) continue # Have a look at LocalisationUpdate for nested templateness that is just not ok. if '{{' in value and not '}}' in value: continue if '}}' in value and not '{{' in value: continue # These always turn out to be copy/paste jobs from the prototype template if '<!-' in value or '-->' in value: continue if '<ref>' in value: value = value.replace('<ref>',' ') if '</ref>' in value: value = value.replace('</ref>',' ') extension_dict[key] = value if hooks: hooks.sort() hooks = '<br/>'.join(hooks) extension_dict['hooks'] = hooks if tags: tags.sort() tags = '<br/>'.join(tags) extension_dict['tags'] = tags if types: types.sort() types = '<br/>'.join(types) extension_dict['types'] = types # Sometimes the name isn't specified at all if not extension_dict.has_key('name'): extension_dict['name'] = extension # If there is a newline in a template parameter, that's probably # going to mess things up keys = extension_dict.keys() # Don't allow empty templates, or templates with just one parameter if not len(keys) or len(keys) == 1: continue extensions_dicts[extension] = extension_dict # sorted list of the full matrix for later use sorted_matrix = extensions_dicts.keys() sorted_matrix.sort() # convert all parseable dates into a common wikitable-sortable format months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] for extension in extensions_dicts.keys(): if extensions_dicts[extension].has_key('update'): try: this_date = parse(extensions_dicts[extension]['update']) this_day = this_date.day this_month = months[this_date.month-1] this_year = this_date.year extensions_dicts[extension]['update'] = str(this_day) + ' ' + \ str(this_month) + ' ' + \ str(this_year) except: del extensions_dicts[extension]['update'] # figure out what versions of mediawiki this extension works on # this just looks for a string match of the version. i personally # don't trust the +,>=,etc.. sign people like to use, for example, 1.12+. # that generally means that they tested it on 1.12, but not the # versions that came afterwards. really sketchy is >=1.6. yeah right. for extension in extensions_dicts.keys(): if extensions_dicts[extension].has_key('mediawiki'): supported_versions = [] version_text = extensions_dicts[extension]['mediawiki'] # TODO: this will break if mw goes past 1.20 or into 2.x for version in xrange(2,20): this_version = '1.' + str(version) if version_text.find(this_version) != -1: supported_versions.append(this_version) if not extensions_by_mw_version.has_key(this_version): extensions_by_mw_version[this_version] = [extension] else: extensions_by_mw_version[this_version].append(extension) extensions_dicts[extension]['mediawiki'] = ', '.join(supported_versions) ########################################## # Get the last day that each extension and its talk page were edited # NEW: Also get the creation date of the extension ########################################## for extension in extensions_dicts.keys(): this_extension = site.Pages["Extension:" + extension] if this_extension.exists: # should never fail! this_date = this_extension.touched this_day = this_date.tm_mday this_month = months[this_date.tm_mon-1] this_year = this_date.tm_year this_date = str(this_day) + ' ' + \ str(this_month) + ' ' + \ str(this_year) extensions_dicts[extension]['lastupdated'] = this_date first_edit_timestamp = list(this_extension.revisions())[-1]['timestamp'] first_edit_year = first_edit_timestamp.tm_year first_edit_month = months[first_edit_timestamp.tm_mon-1] first_edit_day = first_edit_timestamp.tm_mday first_edit_date = str(first_edit_day) + ' ' + \ str(first_edit_month) + ' ' + \ str(first_edit_year) extensions_dicts[extension]['created'] = first_edit_date this_extension = site.Pages["Extension_talk:" + extension] if this_extension.exists: this_date = this_extension.touched this_day = this_date.tm_mday this_month = months[this_date.tm_mon-1] this_year = this_date.tm_year this_date = str(this_day) + ' ' + \ str(this_month) + ' ' + \ str(this_year) extensions_dicts[extension]['lastupdatedtalk'] = this_date ########################################## # Create lists the most recently edited, discussed, updated and created extensions # A bit redundant with above code, but its more clear to break it out # Key to sorting by date is a tuple with (year,month,day). easy peasy. ########################################## for extension in extensions_dicts.keys(): if extensions_dicts[extension].has_key('lastupdated'): this_date = parse(extensions_dicts[extension]['lastupdated']) recently_edited.append((this_date.year, this_date.month, this_date.day, extension)) if extensions_dicts[extension].has_key('lastupdatedtalk'): this_date = parse(extensions_dicts[extension]['lastupdatedtalk']) recently_discussed.append((this_date.year, this_date.month, this_date.day, extension)) if extensions_dicts[extension].has_key('update'): this_date = parse(extensions_dicts[extension]['update']) recently_updated.append((this_date.year, this_date.month, this_date.day, extension)) if extensions_dicts[extension].has_key('created'): this_date = parse(extensions_dicts[extension]['created']) recently_created.append((this_date.year, this_date.month, this_date.day, extension)) recently_edited.sort() recently_discussed.sort() recently_updated.sort() recently_created.sort() recently_edited.reverse() recently_discussed.reverse() recently_updated.reverse() recently_created.reverse() ########################################## # extensions by type ########################################## for extension in extensions_dicts.keys(): this_extension = extensions_dicts[extension] if this_extension.has_key('types'): this_extensions_types = this_extension['types'].split('<br/>') for this_type in this_extensions_types: this_type = this_type.lower() if '--' in this_type: this_type = this_type.split('<!--')[0] if not extensions_by_type.has_key(this_type): if '--' in this_type: this_type = this_type.split('<!--')[0] extensions_by_type[this_type] = [extension] else: extensions_by_type[this_type].append(extension) else: if not extensions_by_type.has_key('notype'): extensions_by_type['notype'] = [extension] else: extensions_by_type['notype'].append(extension) for this_type in extensions_by_type.keys(): if not len(extensions_by_type[this_type]) >= 5: del extensions_by_type[this_type] ########################################## # extensions by status ########################################## extensions_by_status = {} for extension in extensions_dicts.keys(): this_extension = extensions_dicts[extension] if this_extension.has_key('status'): this_status = this_extension['status'].lower() # Make sure this is a single word status - sanity check if len(this_status.split(' ')) == 1: if not extensions_by_status.has_key(this_status): extensions_by_status[this_status] = [extension] else: extensions_by_status[this_status].append(extension) ########################################## # Create main extension matrix output page ########################################## extension_matrix = '' prefix = 'Extension Matrix' updated = 'Last updated: ' + \ datetime.datetime.now().strftime("%Y-%m-%d %H:%M") + ' MST. ' num_listed = 'Listing ' + str(len(extensions_dicts)) + \ ' out of ' + str(len(extensions)) + \ ' members of [[:Category:Extensions]]<br/>' extension_matrix = updated + num_listed + '\n' extension_matrix += '== Entire Extension Matrix ==\n' extension_matrix += '* [[' + prefix + '/AllExtensions|View all extensions]] (very large!)\n' # Create the entire extension matrix entire_matrix = '{{ExtensionMatrixHeader}}' for extension in sorted_matrix: entire_matrix += BuildTemplate(extensions_dicts[extension]) entire_matrix += '{{ExtensionMatrixFooter}}' page = site.Pages[prefix + "/AllExtensions"] page.save(entire_matrix) # One subpage for each version of mediawiki that has extensions which mention it extension_matrix += '== By explicitly supported MediaWiki version ==\n* ' for version in xrange(2,20): version = '1.' + str(version) if extensions_by_mw_version.has_key(version): num_extensions = str(len(extensions_by_mw_version[version])) extension_matrix += '[[' + prefix + '/' + version + '|' + version + ']] (' + num_extensions + '), ' # Create an extension matrix for each version this_version_matrix = '{{ExtensionMatrixHeader}}' for extension in extensions_by_mw_version[version]: this_version_matrix += BuildTemplate(extensions_dicts[extension]) this_version_matrix += '{{ExtensionMatrixFooter}}' page = site.Pages[prefix + '/' + version] page.save(this_version_matrix) extension_matrix += '\n' # One subpage for each type of status extension_matrix += '== By status of extension ==\n*' status_keys = extensions_by_status.keys() status_keys.sort() for this_status in status_keys: num_extensions = str(len(extensions_by_status[this_status])) extension_matrix += '[[' + prefix + '/' + this_status + '|' + this_status + ']] (' + num_extensions + '), ' this_status_matrix = '{{ExtensionMatrixHeader}}' for extension in extensions_by_status[this_status]: this_status_matrix += BuildTemplate(extensions_dicts[extension]) this_status_matrix += '{{ExtensionMatrixFooter}}' page = site.Pages[prefix + '/' + this_status] page.save(this_status_matrix) extension_matrix += '\n' # One subpage for each extension type extension_matrix += '== By type of extension ==\n* ' type_keys = extensions_by_type.keys() type_keys.sort() for this_type in type_keys[1:]: # [1:] gets rid of weird 'Alterego/ExtensionMatrix' type num_extensions = str(len(extensions_by_type[this_type])) extension_matrix += '[[' + prefix + '/' + this_type + '|' + this_type + ']] (' + num_extensions + '), ' this_type_matrix = '{{ExtensionMatrixHeader}}' for extension in extensions_by_type[this_type]: this_type_matrix += BuildTemplate(extensions_dicts[extension]) this_type_matrix += '{{ExtensionMatrixFooter}}' page = site.Pages[prefix + '/' + this_type] page.save(this_type_matrix) extension_matrix += '\n' # 100 most recently created extensions extension_matrix += '== 200 most recently created extensions ==\n* ' for extension in xrange(200): extension_name = recently_created[extension][3] extension_date = extensions_dicts[extension_name]['created'] extension_matrix += '[[Extension:' + extension_name + '|' + extension_name + ']] (' + extension_date + '), ' extension_matrix += '\n' # 100 most recently edited extension pages extension_matrix += '== 200 most recently edited extension pages ==\n* ' for extension in xrange(200): extension_name = recently_edited[extension][3] extension_date = extensions_dicts[extension_name]['lastupdated'] extension_matrix += '[[Extension:' + extension_name + '|' + extension_name + ']] (' + extension_date + '), ' extension_matrix += '\n' # 100 most recently edited extension talk pages extension_matrix += '== 200 most recently edited extension talk pages ==\n* ' for extension in xrange(200): extension_name = recently_discussed[extension][3] extension_date = extensions_dicts[extension_name]['lastupdatedtalk'] extension_matrix += '[[Extension_talk:' + extension_name + '|' + extension_name + ']] (' + extension_date + '), ' extension_matrix += '\n' # 100 most recently updated extension pages extension_matrix += '== 200 most recently updated extensions ==\n* ' for extension in xrange(200): extension_name = recently_updated[extension][3] extension_date = extensions_dicts[extension_name]['update'] extension_matrix += '[[Extension:' + extension_name + '|' + extension_name + ']] (' + extension_date + '), ' extension_matrix += '\n' # '\n{{ExtensionMatrixFooter}}\n' page = site.Pages[prefix] page.save(extension_matrix)