Extension:Webservice/webservice.php

From MediaWiki.org
Jump to navigation Jump to search
<?php
# Example WikiMedia extension
# with WikiMedia's extension mechanism it is possible to define
# new tags of the form
# <TAGNAME> some text </TAGNAME>
# the function registered by the extension gets the text between the
# tags as input and can transform it into arbitrary HTML code.
# Note: The output is not interpreted as WikiText but directly
#       included in the HTML output. So Wiki markup is not supported.
# To activate the extension, include it from your LocalSettings.php
# with: include("extensions/YourExtensionName.php");

if( !defined( 'MEDIAWIKI' ) ) {
        die();
}

$wgExtensionFunctions[] = "wfWebserviceExtension";
$wgHooks['LanguageGetMagic'][]  = 'wfWebserviceExtensionMagic';

$wgExtensionCredits['other'][] = array(
        'name' => 'Webservice',
        'author' => 'Chris Reigrut',
        'version' => '0.3.0',
        'url' => 'http://www.mediawiki.org/wiki/Extension:Webservice',
        'description' => 'Pull data from an external webservice into a wiki page'
);

function wfWebserviceExtension() {
    global $wgParser;
    # register the extension with the WikiText parser
    $wgParser->setHook( "webservice", "renderWebserviceTag" );
    $wgParser->setFunctionHook( "webservice", "renderWebserviceParserFunction" );
}

function wfWebserviceExtensionMagic( &$magicWords, $langCode ) {
        # Add the magic word
        # The first array element is case sensitive, in this case it is not case sensitive
        # All remaining elements are synonyms for our parser function
        $magicWords['webservice'] = array( 0, 'webservice' );
        # unless we return true, other parser functions extensions won't get loaded.
        return true;
}

function renderWebserviceParserFunction( &$parser, $url = '', $wikitext = '', $options = '' ) {
	$parser->disableCache();
	$wikitext = renderWebservice($url, $wikitext, $options);
	
	//Needed after upgrade to 1.13.2 to continue to process embedded functions/templates
	$parsedWikitext = $parser->replaceVariables($wikitext);
	
	return $parsedWikitext;
}

# The callback function for converting the input text to HTML output
function renderWebserviceTag( $input, $argv, &$parser ) {
	$wikitext = renderWebservice($argv['racf'], $input, $argv['options']);
	# Parse the wikitext with a local parser and return it
	$localParser = new Parser();
	$localParser->disableCache();
	return $localParser->parse($wikitext, $parser->mTitle, $parser->mOptions, false)->getText();
}

function renderWebservice($url, $wikitext, $options) {
	//global $wgUseTidy, $wgAlwaysUseTidy, $wgTidyBin, $wgTidyConf, $wgTidyOpts, $wgTidyInternal;
	global 	$wgMemc, $messageMemc, $parserMemc;
	$cache = $parserMemc; // Cache XML in the parser cache
	
	$output = '';
	$error = '';

	if (option_tidy_xhtml($options)) {
		$cacheKey = wfMemcKey('ws',$url, 'XHTML');
	} else if (option_tidy_xml($options)) {
		$cacheKey = wfMemcKey('ws',$url, 'XML');
	} else { 
		$cacheKey = wfMemcKey('ws',$url);
	}
	
	$xml = $cache->get($cacheKey);
	//$output = "<pre>{$cacheKey}</pre>";
	if ($xml == null) {
		//$output = "<pre>NO CACHE HIT: {$cacheKey}</pre>";
		
		$ch = getCurlConnection($url, $options);
			
		$xml = curl_exec ( $ch );
		$errno = curl_errno($ch);
		curl_close($ch);
		
		//$output .= "<h2>Beginning</h2><pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$xml}\n\n</nowiki></pre>";
			
		if ($errno == 0 && $xml!==false) {
			// Tidy document, if requested
			if (stripos($options,'TIDY')!== false) {
				//$output .= "<h2>TidyConfig</h2><pre><nowiki>wgUseTidy: {$wgUseTidy}\n\nwgAlwaysUseTidy: {$wgAlwaysUseTidy}\n\nwgTidyBin: {$wgTidyBin}\n\nwgTidyConf: {$wgTidyConf}\n\nwgTidyOpts: {$wgTidyOpts}\n\nwgTidyInternal: {$wgTidyInternal}\n\n</nowiki></pre>";
				$xml = tidy($xml, $options);
				//$output .= "<h2>Post-tidy</h2><pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$tidyxml}\n\n</nowiki></pre>";
			}
		
	
			// Hack to remove a default namespace, which causes parsing problems
			$xml = str_replace("xmlns=", "defaultns=", $xml);
			//$output .= "<h2>Pre-parse</h2><pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$xml}\n\n</nowiki></pre>";
			
			if (option_cache($options) != null) {
				$cache->set( $cacheKey, $xml, option_cache($options));
			} else {
				// $cache->set( $cacheKey, $xml, 60 * 60);  // Default cache period 1HR
			}
		} else {
			$error = "Webservice error {$errno}: Unable to contact webservice";
			//$error .= "<pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$xml}\n\n</nowiki></pre>"; 
		}
	}
	if ($error == '') {
		if ($xml != null) {
			if ($wikitext == '') {
				$output .= "<pre>No wikitext specified!  XML is:\n$xml</pre>";
			} else {
				// load xml document
				$dom = new DomDocument();
				
				if (@$dom->loadXML($xml)) {
					//	create DOMXPath object
					$xpath = new Domxpath($dom);
					
					// Find all instances of text delimited by % signs
					preg_match_all('/%[^%]*%/', $wikitext, $matches, PREG_OFFSET_CAPTURE);
					foreach(array_reverse($matches[0]) as $match) {
						if (strlen($match[0])==2) {
					   	// Found %% in the wikitext, so replace it with %
					   	$wikitext = substr_replace($wikitext, '%', $match[1], 2);
						} else {
							// Found some xpath between % signs in the wikitext, so replace the whole thing with what the xpath points to
							$xpathquery = substr($match[0],1,-1);
					   	@$xpathresult = $xpath->query($xpathquery);
					   	$xpathresulttext = '';
					   	if ($xpathresult != NULL) {
					   		if ($xpathresult->length > 0) {
					   			$xpathresulttext = $xpathresult->item(0)->nodeValue;
					   		} else {
					   			// Xpath was OK, but returned no results
					   			$xpathresulttext = "<nowiki>Webservice warning: No value </nowiki><span class=\"error\" style='display:none'><nowiki>for $xpathquery</nowiki></span>";
					   			// Clear out cache in case this is an error with the back-end service
					   			$cache->delete($cacheKey);
					   		}
				   		} else {
				   			$xpathresulttext = "<nowiki>Webservice warning: Bad xpath </nowiki><span class=\"error\" style='display:none'><nowiki>for $xpathquery</nowiki></span>";
				   		}
				   		$wikitext = substr_replace($wikitext, $xpathresulttext , $match[1], strlen($match[0]));
						}
					}
					$output .= $wikitext;
				} else {
					$error = "Webservice error {$errno}: Invalid XML";
					//$error .= "<pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$xml}\n\n</nowiki></pre>"; 
				}
			}
		} else {
			$error = "Webservice error {$errno}: XML is null!";
			//$error .= "<pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$xml}\n\n</nowiki></pre>"; 
		}
	}
	
	if ($error != '') {
		$output = "<span class=\"error\">$error</span>";
		$cache->delete($cacheKey);
	}
	return $output;
}

// Return a CURL connection object with all of the appropriate settings
function getCurlConnection($url, $options) {
	global $wgHTTPProxy;
	
	$ch = curl_init($url);
	
	curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
	curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);

	// Handle https connections
	if (stripos($url, 'HTTPS:') !== false) {
		curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
	}

	// Masqeruade as IE, if requested
	if (options_ie($options)) {
		curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)');
	}
	
	// Add a cookie, if specified
	if (option_cookie($options) != null) {
		curl_setopt($ch, CURLOPT_COOKIE, option_cookie($options));
	}
	
	// Add proxy information, if needed
	if ($wgHTTPProxy !== false && isProxiedURL($url)) {
		curl_setopt($ch, CURLOPT_PROXY, $wgHTTPProxy);
	}
	
	return $ch;
}

//Determine whether or not the given URL should go through the proxy, based on the array of 
//proxy exceptions.  Might want to bake this into Mediawiki itself later.
function isProxiedURL($url) {
	global $wgHTTPProxyExceptions;
	
	if (is_array($wgHTTPProxyExceptions)) {
		foreach($wgHTTPProxyExceptions as $ex) {
			if (preg_match($ex, $url)) {
				// URL matches a proxy exception, so it's not proxied
				return false;
			}
		}
	}
		
	// URL didn't match any of the proxy exceptions, so it is proxied
	return true;
}

// Call tidy via the PHP5 internal interface.  In order to make this more generic,
// we should probably also support external and possibly pre-PHP5 tidy in the future.
function tidy($xml, $options) {
	$tidyxml = null;
	
	//Don't wrap text--it causes potential parsing problems later
	//Mediawiki expects UTF-8, so set that, too
	$tidyConf = array(
		'wrap' => FALSE,
	   'output-encoding' => 'utf8',
	);

	// Set the input encoding, if specified (otherwise, use UTF-8)
	if (option_encoding($options) != null) {
		$tidyConf['input-encoding'] = option_encoding($options);
	} else {
		$tidyConf['input-encoding'] = 'utf8';
	}
	
	// Process TIDY options
	if (option_tidy_xhtml($options)) {
		$tidyConf['output-xhtml'] = TRUE;
		$tidyConf['numeric-entities'] = TRUE;
	}
	
	if (option_tidy_xml($options)) {
		$tidyConf['output-xml'] = TRUE;
		$tidyConf['numeric-entities'] = TRUE;
	}
	
	$tidy = tidy_parse_string( $xml, $tidyConf, $tidyConf['input-encoding'] );
	tidy_clean_repair($tidy);
	if( tidy_get_status($tidy) != 2 ) {
		// 2 is magic number for fatal error
		// http://www.php.net/manual/en/function.tidy-get-status.php
		$tidyxml = tidy_get_output($tidy);
	}

	return $tidyxml;
}

function options_ie($options) {
	return bool_regex('/\bIE\b/', $options);
}

function option_tidy_xhtml($options) {
	return bool_regex('/\bTIDY-XHTML\b/', $options);
}

function option_tidy_xml($options) {
	return bool_regex('/\bTIDY-XML\b/', $options);
}

function option_cookie($options) {
	return regex('/\bCOOKIE:(\S*)/', $options);
}

function option_cache($options) {
	return regex('/\bCACHE:(\d*)/', $options);
}

function option_encoding($options) {
	return regex('/\bENCODING:(\S*)/', $options);
}

function bool_regex($regex, $options) {
	$result = regex($regex, $options);
	return $result != null;
}

function regex($regex, $options) {
	$result = null;
	
	if (preg_match($regex, $options, $matches)) {
		if (count($matches) > 1) {
			$result = $matches[1];
		} else {
			$result = $matches[0];
		}
	}
	
	return $result;
}