Canonical interwiki prefixes/PMWTableToWikiTable.php

This script takes the PMWTable generated by Chris G's botclasses/ParseMirroredWikiIndexBot.php and converts it to a wikitable. You can then put that in your MediaWiki:Interwiki-whitelist for use by InterwikiMap.

 * This script takes the PMWTable generated by ParseMirroredWikiIndex and converts it to a wiki * table. You can then put that in your MediaWiki:Interwiki-whitelist for use by InterwikiMap. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html */

// The prefix must have one of these in it, or "wiki" will be appended to the end of it $goodPrefixes = array(     'wiki',      'pedia' );

// These will be stripped out of wiki names when generating the wiki prefix $forbiddenChars = array (     ' ',      ':',      '&',      '=' );

// Skip these prefixes; they won't make it through the spam blacklist $blacklist = array (     'biosites.orgwiki' );

// Database $host = 'localhost'; $dbUser = 'root'; $dbPass = 'REMOVED'; $dbName = 'parse_mirrored_wikiindex_bot'; $tables = array(   'parsed_mirrored_wikiindex' => 'parsed-mirrored-wikiindex.sql', ); $db = new mysqli( $host, $dbUser, $dbPass, $dbName ); if ( !$db ) { die( 'Could not connect: ' . mysql_error); } $db->select_db ( "$dbName" );

// These statuses should all be lowercase. They are the wikis that will not be screened out // (private, cannot connect, inactive, and dead wikis are screened out) $goodStatuses = array (     'vibrant',      'active',      'new',      'in preparation',      'dormant',      'needslove',      'spammed',      'goalreached',      '' // Wikis with no status are included );

// These are the serialized data fields pulled from WikiIndex; presently, only the URL and // name are used $fields = array (     'wikiindex page title',      'name',      'URL',      'logo',      'wide logo',      'iw_url',      'recentchanges URL',      'wikinode URL',      'status',      'language',      'editmode',      'engine',      'license',      'maintopic',      'backupurl',      'backupdate',      'pages',      'statistics URL',      'wikiFactor',      'wikiFactor URL', );

// These are the Special:RecentChanges that will be str_replace'd with $1. If I knew regex, I'd use // regex, because probably some wiki software has url conventions that require it $RCPossibilities = array(	'Посебно:СкорашњеИзмене',	'Посебно:Скорашње_измене',	'Sipesol:Nupela senis',	'Extra:Neuste_Änderunge',	'Extra:Änderunge',	'Spèciâl:Dèrriérs_changements',	'Spèciâl:DèrriérsChangements',	'Speçiale:Ûrtime modiffiche',	'Sapaq:NaqhaHukchasqa',	'Wiki:Koartlyn feroare',	'Wiki:Koarts feroare',	'Spezial:Letzte_Änderungen',	'Spécial:Modifications_récentes',	'Spécial:Modifications_recentes',	'Spécial:ModificationsRécentes',	'Spécial:ModificationsRecentes',	'Especial:Zaguers_cambeos',	'Especial:Cambeos_recients',	'Spesiaal:Onlangse_wysigings',	'Spesiaal:Onlangsewysigings',	'Specialnje:Aktualne_změny',	'พิเศษ:ปรับปรุงล่าสุด',	'Especial:TrocamientosFreskos',	'Ippiziari:UlthimiMudìfigghi',	'Spezial:Toletzt ännert',	'Spezial:Neeste Ännern',	'Spezial:Toletzt ännert',	'Spezial:Neeste Ännern',	'විශේෂ:මෑත_වෙනස්වීම්', 'Xüsusi:SonDəyişikliklər', '特殊:最近更改', 'Arbednek:Chanjyow_a-dhiwedhes', 'Служебная:Свежие_правки', 'Schbädsjaal:Lädsdâ_Änârungâ', 'Special:Modificationes_recente', 'Serstakt:Seinastu broytingar', 'Specialus:Naujausi_keitimai', 'Spesial:Siste_endringar', '特殊:最近更改', 'Арнайы:Жуықтағы_өзгерістер', 'Spesial:Siste_endringer', 'Спэцыяльныя:Апошнія_зьмены', 'ارنايى:جۋىقتاعى_وزگەرىستەر', 'ବିଶେଷ:ନଗଦବଦଳ', 'Speciale:NdryshimeSëFundmi', 'Special:Nov_changes', 'Specialis:Nuper mutata', 'Specialis:Mutationes recentes', 'Speciális:Friss_változtatások', 'Speciaal:Lètste_verangeringe', 'Špeciálne:PoslednéÚpravy', 'Башка:УлхкомбаньПолафнематне', 'বিশেষ:শেহতীয়া_সালসলনি', 'Цастәи:АрҽеираҾыцқәа', 'Erenoamáš:Varas_rievdadusat', 'Pàtàkì:ÀwọnÀtúnṣeTuntun', 'Aptaca:NoeltafBetakseem', 'Especial:Cambios_recentes', 'Maalum:MabadalikoyaKaribuni', 'Сæрмагонд:ФæстагИвдтытæ', 'Specjalna:Ostatnie_zmiany', 'Specjalna:OZ', 'ހާއްޞަ:އެންމެ ފަހުގެ ބަދަލްތައް', 'Istimiwa:Paubahan_pahanyarnya', 'Posebno:Nedavne_promjene', 'Özel:SonDeğişiklikler', 'Kerfissíða:Nýlegar_breytingar', 'Speċjali:TibdilRiċenti', 'പ്രത്യേകം:സമീപകാലമാറ്റങ്ങൾ', 'Speciální:Poslední_změny', 'Speciální:Posledni_zmeny', 'Special:CambiamentRecent', 'Speciâl:UltinsCambiaments', 'Maasus:BitkiDiişikmäklär', '特別:最近修改', 'विशेष:अलीकडील_बदल', 'ځانګړی:اوسني_بدلونونه', 'Махсус:Соңгы_үзгәртүләр', 'Istimewa:Perubahan_terbaru', 'Istimewa:PerubahanTerbaru', 'Istimewa:RC', 'Istimewa:PT', 'מיוחד:שינויים_אחרונים', 'Speciaal:RecenteWijzigingen', 'ພິເສດ:ການດັດແກ້ຫຼ້າສຸດ', 'Սպասարկող:Վերջինփոփոխությունները', 'Posebno:NedavneIzmjene', 'વિશેષ:તાજાફેરફારો', 'Jagleel:Coppite yu mujj', '특수:최근바뀜', 'ܕܝܠܢܝܐ:ܫܘܚܠܦ̈ܐ_ܚܕ̈ܬܐ', 'Espesiál:Mudansa_foufoun_sira', 'Специјална:СкорешниПромени', 'Husus:AnyarRobah', 'خاص:اخر_تعديلات', 'Xısusi:VurnayışêPeyêni', 'באַזונדער:לעצטע_ענדערונגען', 'Specialine:TantoižedToižetused', 'Speciale:UltimeModifiche', 'სპეციალური:ბოლოცვლილებები', 'Arnawlı:Aqırg\'ı o\'zgerisler', 'Espesyal:Bag-ongGiusab', 'Spiciali:UltimeModifiche', 'Especial:Mudanças_recentes', 'Especial:Recentes', 'Especial:Mudanças_recentes', 'Espesyal:ChanjmanResan', 'Especial:CambiosRecientes', 'Especial:Cambios_recientes', 'ప్రత్యేక:ఇటీవలిమార్పులు', 'Manokana:Fanovàna_farany', 'विशेषम्:नवीनतम_परिवर्तन', 'ویژه:تغییرات_اخیر', 'خاص:أحدث_التغييرات', '特別:最近の更新', '特別:最近更新したページ', 'Đặc_biệt:Thay_đổi_gần_đây', 'Башка тевень:ЧыяконьПолавтомат', 'Toiminnot:Tuoreet_muutokset', 'Arnaýı:Jwıqtağı_özgerister', 'حاص:نوکین تغییرات', 'Posebno:ZadnjeSpremembe', 'Special:Schimbări_recente', 'Spezial:Rezent_Ännerungen', 'Berezi:AzkenAldaketak', 'Xususi:Ән_нујә_дәгишон', 'Taybet:Guherandinên_dawî', 'Posebno:Nedavne_izmjene', 'Specialaĵo:Lastaj_ŝanĝoj', 'Especial:Darrièrs_cambiaments', 'Especial:DarrièrsCambiaments', 'Especial:Darrièras_Modificacions', 'ពិសេស:បំលាស់ប្ដូរថ្មីៗ', 'Ειδικό:ΠρόσφατεςΑλλαγές', 'Eri:Viimased_muudatused', 'Speciel:Seneste_ændringer', 'Papa_nui:NāLoliHou', 'Papa_nui:NaLoliHou', 'Special:最近更改', 'Лӱмын_ыштыме:Пытартыш_тӧрлатымаш-влак', 'Spesyal:BakaseywanKenki', 'Спеціальна:Нові_редагування', 'Natatangi:Mga_huling_binago', 'Natatangi:HulingBinago', 'Специални:Последни_промени', 'Specala:RecentaChanji', 'تایبەت:دوایین_گۆڕانکارییەکان', 'Especial:Canvis_recents', 'Khas:Perubahan_terkini', 'Special:Senaste_ändringar', 'Шпеціална:Послїднї_зміны', 'Speciale:ÙltimiCanbiamenti', 'Spesiaal:Leste_wiezigingen', 'Kusuih:Neuubah_baro', 'Specialne:Aktualne_změny', 'خاص:تازيون تبديليون', 'Special:RecentChanges', 'Patikos:Votükamsnulik', 'Dibar:KemmoùDiwezhañ', 'do=recent' => 'id=$1', // AwkiAwki 'index.php?page=RecentChanges' => 'index.php?page=$1', // Bitweaver #'_Recent', // EditMe; disabled because of conflicts with SeedWiki 'AllRecentChanges', // PmWiki 'WebChanges', // TKWiki '.cgi?RecentChanges' => '.cgi?$1', // UseModWiki 'space/changes' => '$1', // Wikispaces ); // Deal with these ones that have an English Special: followed by a foreign language RecentChanges foreach ( $RCPossibilities as $RCPossibility ) {     $exploded = explode ( ':', $RCPossibility );      if ( isset ( $exploded[1] ) ) {	    $RCPossibilities[] = 'Special:' . $exploded[1];      } }

// Retrieve the interwiki map $wgInterwikiMapUserAgent = 'User-Agent: LeucosticteBot (http://mediawiki.org/wiki/User:LeucosticteBot)'; $opts = array(       'http'=>array( 'method' => "GET", 'header' => $wgInterwikiMapUserAgent ) ); $wgInterwikiMapApiArgs = '?action=query&meta=siteinfo&siprop=interwikimap&format=json'; $url = 'https://meta.wikipedia.org/w/api.php'; $url .= $wgInterwikiMapApiArgs; $streamContext = stream_context_create( $opts ); $contents = file_get_contents ( $url, false, $streamContext ); if ( !$contents ) { die ( "Retrieval from $url failed\n" ); } $apiPull = json_decode ( $contents, true ); if ( !$apiPull ) { die( "json decode of $url failed\n" ); } $apiPull = $apiPull['query']['interwikimap']; foreach ( $apiPull as $apiPullElement ) { $apiResult[$apiPullElement["prefix"]] = $apiPullElement["url"]; }

$wikitable = '' . "\n\n==Current interwiki map==\n\n". '{| class="plainlinks"'. "\n";

// Gather stored data from database table $res = $db->query ( "SELECT * FROM parsed_mirrored_wikiindex" ); while ( $row = $res->fetch_assoc ) { $unserialized = unserialize ( $row['pmw_data'] ); $unserialized['wikiindex page title'] = $row['pmw_wikiindex_page_title']; // Get the RC URL and convert it to the iw_url $unserialized['iw_url'] = ''; if ( isset ( $unserialized['recentchanges URL'] ) ) { $unserialized['recentchanges URL'] = strtolower ( $unserialized['recentchanges URL'] ); foreach ( $RCPossibilities as $key => $RCPossibility ) { $RCPossibility = strtolower ( $RCPossibility ); if ( !is_int ( $key ) ) { $unserialized['iw_url'] = str_replace ( $key, $RCPossibility,			     $unserialized['recentchanges URL'] ); } elseif ( strpos ( $unserialized['recentchanges URL'], $RCPossibility ) ) { $unserialized['iw_url'] = str_replace ( $RCPossibility, '$1',			     $unserialized['recentchanges URL'] ); }	   }	    if ( !isset ( $unserialized['iw_prefix'] ) ) { $unserialized['iw_prefix'] = ''; }	   if ( !isset ( $unserialized['status'] ) ) { $unserialized['status'] = ''; }	   // If it's in meta-wiki's map, use that prefix foreach ( $apiPull as $apiPullElement ) { if ( $unserialized['iw_url'] == $apiPullElement['url'] ) { $unserialized['iw_prefix'] = $apiPullElement['prefix']; }	   }	    // If it's not in meta-wiki's map, but has an active status, then convert the wiki name // or wikiindex page title to a prefix if ( !$unserialized['iw_prefix'] && in_array ( strtolower ( $unserialized['status'] ), $goodStatuses ) && !in_array ( $unserialized['iw_prefix'], $blacklist ) ) { if ( isset ( $unserialized['name'] ) ) { $name = $unserialized['name']; } else { $name = $unserialized['wikiindex page title']; }		 foreach ( $forbiddenChars as $forbiddenChar) { $name = str_replace ( $forbiddenChar, '', $name ); }		 $name = strtolower ( $name ); $approvedPrefix = false; foreach ( $goodPrefixes as $goodPrefix ) { if ( strpos ( $name, $goodPrefix ) ) { $approvedPrefix = true; }		 }		  if ( !$approvedPrefix ) { $name .= 'wiki'; }		 $unserialized['iw_prefix'] = $name; }	   if ( $unserialized['iw_prefix'] ) { $wikitable .= '| '. $unserialized['iw_prefix']. ' || ' . $unserialized['iw_url'] . "\n". '|-' . "\n"; }     } } $wikitable .= "|}\n"; $file = fopen ( 'PMWOutput.txt', 'w' ); fwrite ( $file, $wikitable ); fclose ( $file );