Extension:Webservice/webservice.php
From MediaWiki.org
<?php # Example WikiMedia extension # with WikiMedia's extension mechanism it is possible to define # new tags of the form # <TAGNAME> some text </TAGNAME> # the function registered by the extension gets the text between the # tags as input and can transform it into arbitrary HTML code. # Note: The output is not interpreted as WikiText but directly # included in the HTML output. So Wiki markup is not supported. # To activate the extension, include it from your LocalSettings.php # with: include("extensions/YourExtensionName.php"); if( !defined( 'MEDIAWIKI' ) ) { die(); } $wgExtensionFunctions[] = "wfWebserviceExtension"; $wgHooks['LanguageGetMagic'][] = 'wfWebserviceExtensionMagic'; $wgExtensionCredits['other'][] = array( 'name' => 'Webservice', 'author' => 'Chris Reigrut', 'version' => '0.3.0', 'url' => 'http://www.mediawiki.org/wiki/Extension:Webservice', 'description' => 'Pull data from an external webservice into a wiki page' ); function wfWebserviceExtension() { global $wgParser; # register the extension with the WikiText parser $wgParser->setHook( "webservice", "renderWebserviceTag" ); $wgParser->setFunctionHook( "webservice", "renderWebserviceParserFunction" ); } function wfWebserviceExtensionMagic( &$magicWords, $langCode ) { # Add the magic word # The first array element is case sensitive, in this case it is not case sensitive # All remaining elements are synonyms for our parser function $magicWords['webservice'] = array( 0, 'webservice' ); # unless we return true, other parser functions extensions won't get loaded. return true; } function renderWebserviceParserFunction( &$parser, $url = '', $wikitext = '', $options = '' ) { $parser->disableCache(); $wikitext = renderWebservice($url, $wikitext, $options); //Needed after upgrade to 1.13.2 to continue to process embedded functions/templates $parsedWikitext = $parser->replaceVariables($wikitext); return $parsedWikitext; } # The callback function for converting the input text to HTML output function renderWebserviceTag( $input, $argv, &$parser ) { $wikitext = renderWebservice($argv['racf'], $input, $argv['options']); # Parse the wikitext with a local parser and return it $localParser = new Parser(); $localParser->disableCache(); return $localParser->parse($wikitext, $parser->mTitle, $parser->mOptions, false)->getText(); } function renderWebservice($url, $wikitext, $options) { //global $wgUseTidy, $wgAlwaysUseTidy, $wgTidyBin, $wgTidyConf, $wgTidyOpts, $wgTidyInternal; global $wgMemc, $messageMemc, $parserMemc; $cache = $parserMemc; // Cache XML in the parser cache $output = ''; $error = ''; if (option_tidy_xhtml($options)) { $cacheKey = wfMemcKey('ws',$url, 'XHTML'); } else if (option_tidy_xml($options)) { $cacheKey = wfMemcKey('ws',$url, 'XML'); } else { $cacheKey = wfMemcKey('ws',$url); } $xml = $cache->get($cacheKey); //$output = "<pre>{$cacheKey}</pre>"; if ($xml == null) { //$output = "<pre>NO CACHE HIT: {$cacheKey}</pre>"; $ch = getCurlConnection($url, $options); $xml = curl_exec ( $ch ); $errno = curl_errno($ch); curl_close($ch); //$output .= "<h2>Beginning</h2><pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$xml}\n\n</nowiki></pre>"; if ($errno == 0 && $xml!==false) { // Tidy document, if requested if (stripos($options,'TIDY')!== false) { //$output .= "<h2>TidyConfig</h2><pre><nowiki>wgUseTidy: {$wgUseTidy}\n\nwgAlwaysUseTidy: {$wgAlwaysUseTidy}\n\nwgTidyBin: {$wgTidyBin}\n\nwgTidyConf: {$wgTidyConf}\n\nwgTidyOpts: {$wgTidyOpts}\n\nwgTidyInternal: {$wgTidyInternal}\n\n</nowiki></pre>"; $xml = tidy($xml, $options); //$output .= "<h2>Post-tidy</h2><pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$tidyxml}\n\n</nowiki></pre>"; } // Hack to remove a default namespace, which causes parsing problems $xml = str_replace("xmlns=", "defaultns=", $xml); //$output .= "<h2>Pre-parse</h2><pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$xml}\n\n</nowiki></pre>"; if (option_cache($options) != null) { $cache->set( $cacheKey, $xml, option_cache($options)); } else { // $cache->set( $cacheKey, $xml, 60 * 60); // Default cache period 1HR } } else { $error = "Webservice error {$errno}: Unable to contact webservice"; //$error .= "<pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$xml}\n\n</nowiki></pre>"; } } if ($error == '') { if ($xml != null) { if ($wikitext == '') { $output .= "<pre>No wikitext specified! XML is:\n$xml</pre>"; } else { // load xml document $dom = new DomDocument(); if (@$dom->loadXML($xml)) { // create DOMXPath object $xpath = new Domxpath($dom); // Find all instances of text delimited by % signs preg_match_all('/%[^%]*%/', $wikitext, $matches, PREG_OFFSET_CAPTURE); foreach(array_reverse($matches[0]) as $match) { if (strlen($match[0])==2) { // Found %% in the wikitext, so replace it with % $wikitext = substr_replace($wikitext, '%', $match[1], 2); } else { // Found some xpath between % signs in the wikitext, so replace the whole thing with what the xpath points to $xpathquery = substr($match[0],1,-1); @$xpathresult = $xpath->query($xpathquery); $xpathresulttext = ''; if ($xpathresult != NULL) { if ($xpathresult->length > 0) { $xpathresulttext = $xpathresult->item(0)->nodeValue; } else { // Xpath was OK, but returned no results $xpathresulttext = "<nowiki>Webservice warning: No value </nowiki><span class=\"error\" style='display:none'><nowiki>for $xpathquery</nowiki></span>"; // Clear out cache in case this is an error with the back-end service $cache->delete($cacheKey); } } else { $xpathresulttext = "<nowiki>Webservice warning: Bad xpath </nowiki><span class=\"error\" style='display:none'><nowiki>for $xpathquery</nowiki></span>"; } $wikitext = substr_replace($wikitext, $xpathresulttext , $match[1], strlen($match[0])); } } $output .= $wikitext; } else { $error = "Webservice error {$errno}: Invalid XML"; //$error .= "<pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$xml}\n\n</nowiki></pre>"; } } } else { $error = "Webservice error {$errno}: XML is null!"; //$error .= "<pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$xml}\n\n</nowiki></pre>"; } } if ($error != '') { $output = "<span class=\"error\">$error</span>"; $cache->delete($cacheKey); } return $output; } // Return a CURL connection object with all of the appropriate settings function getCurlConnection($url, $options) { global $wgHTTPProxy; $ch = curl_init($url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5); // Handle https connections if (stripos($url, 'HTTPS:') !== false) { curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); } // Masqeruade as IE, if requested if (options_ie($options)) { curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)'); } // Add a cookie, if specified if (option_cookie($options) != null) { curl_setopt($ch, CURLOPT_COOKIE, option_cookie($options)); } // Add proxy information, if needed if ($wgHTTPProxy !== false && isProxiedURL($url)) { curl_setopt($ch, CURLOPT_PROXY, $wgHTTPProxy); } return $ch; } //Determine whether or not the given URL should go through the proxy, based on the array of //proxy exceptions. Might want to bake this into Mediawiki itself later. function isProxiedURL($url) { global $wgHTTPProxyExceptions; if (is_array($wgHTTPProxyExceptions)) { foreach($wgHTTPProxyExceptions as $ex) { if (preg_match($ex, $url)) { // URL matches a proxy exception, so it's not proxied return false; } } } // URL didn't match any of the proxy exceptions, so it is proxied return true; } // Call tidy via the PHP5 internal interface. In order to make this more generic, // we should probably also support external and possibly pre-PHP5 tidy in the future. function tidy($xml, $options) { $tidyxml = null; //Don't wrap text--it causes potential parsing problems later //Mediawiki expects UTF-8, so set that, too $tidyConf = array( 'wrap' => FALSE, 'output-encoding' => 'utf8', ); // Set the input encoding, if specified (otherwise, use UTF-8) if (option_encoding($options) != null) { $tidyConf['input-encoding'] = option_encoding($options); } else { $tidyConf['input-encoding'] = 'utf8'; } // Process TIDY options if (option_tidy_xhtml($options)) { $tidyConf['output-xhtml'] = TRUE; $tidyConf['numeric-entities'] = TRUE; } if (option_tidy_xml($options)) { $tidyConf['output-xml'] = TRUE; $tidyConf['numeric-entities'] = TRUE; } $tidy = tidy_parse_string( $xml, $tidyConf, $tidyConf['input-encoding'] ); tidy_clean_repair($tidy); if( tidy_get_status($tidy) != 2 ) { // 2 is magic number for fatal error // http://www.php.net/manual/en/function.tidy-get-status.php $tidyxml = tidy_get_output($tidy); } return $tidyxml; } function options_ie($options) { return bool_regex('/\bIE\b/', $options); } function option_tidy_xhtml($options) { return bool_regex('/\bTIDY-XHTML\b/', $options); } function option_tidy_xml($options) { return bool_regex('/\bTIDY-XML\b/', $options); } function option_cookie($options) { return regex('/\bCOOKIE:(\S*)/', $options); } function option_cache($options) { return regex('/\bCACHE:(\d*)/', $options); } function option_encoding($options) { return regex('/\bENCODING:(\S*)/', $options); } function bool_regex($regex, $options) { $result = regex($regex, $options); return $result != null; } function regex($regex, $options) { $result = null; if (preg_match($regex, $options, $matches)) { if (count($matches) > 1) { $result = $matches[1]; } else { $result = $matches[0]; } } return $result; }
