Extension:Webservice/webservice.php

From MediaWiki.org
Jump to: navigation, search
<?php
# Example WikiMedia extension
# with WikiMedia's extension mechanism it is possible to define
# new tags of the form
# <TAGNAME> some text </TAGNAME>
# the function registered by the extension gets the text between the
# tags as input and can transform it into arbitrary HTML code.
# Note: The output is not interpreted as WikiText but directly
#       included in the HTML output. So Wiki markup is not supported.
# To activate the extension, include it from your LocalSettings.php
# with: include("extensions/YourExtensionName.php");

if( !defined( 'MEDIAWIKI' ) ) {
        die();
}
 
$wgExtensionFunctions[] = "wfWebserviceExtension";
$wgHooks['LanguageGetMagic'][]  = 'wfWebserviceExtensionMagic';
 
$wgExtensionCredits['other'][] = array(
        'name' => 'Webservice',
        'author' => 'Chris Reigrut',
        'version' => '0.3.0',
        'url' => 'http://www.mediawiki.org/wiki/Extension:Webservice',
        'description' => 'Pull data from an external webservice into a wiki page'
);
 
function wfWebserviceExtension() {
    global $wgParser;
    # register the extension with the WikiText parser
    $wgParser->setHook( "webservice", "renderWebserviceTag" );
    $wgParser->setFunctionHook( "webservice", "renderWebserviceParserFunction" );
}
 
function wfWebserviceExtensionMagic( &$magicWords, $langCode ) {
        # Add the magic word
        # The first array element is case sensitive, in this case it is not case sensitive
        # All remaining elements are synonyms for our parser function
        $magicWords['webservice'] = array( 0, 'webservice' );
        # unless we return true, other parser functions extensions won't get loaded.
        return true;
}
 
function renderWebserviceParserFunction( &$parser, $url = '', $wikitext = '', $options = '' ) {
        $parser->disableCache();
        $wikitext = renderWebservice($url, $wikitext, $options);
 
        //Needed after upgrade to 1.13.2 to continue to process embedded functions/templates
        $parsedWikitext = $parser->replaceVariables($wikitext);
 
        return $parsedWikitext;
}
 
# The callback function for converting the input text to HTML output
function renderWebserviceTag( $input, $argv, &$parser ) {
        $wikitext = renderWebservice($argv['racf'], $input, $argv['options']);
        # Parse the wikitext with a local parser and return it
        $localParser = new Parser();
        $localParser->disableCache();
        return $localParser->parse($wikitext, $parser->mTitle, $parser->mOptions, false)->getText();
}
 
function renderWebservice($url, $wikitext, $options) {
        //global $wgUseTidy, $wgAlwaysUseTidy, $wgTidyBin, $wgTidyConf, $wgTidyOpts, $wgTidyInternal;
        global  $wgMemc, $messageMemc, $parserMemc;
        $cache = $parserMemc; // Cache XML in the parser cache
 
        $output = '';
        $error = '';
 
        if (option_tidy_xhtml($options)) {
                $cacheKey = wfMemcKey('ws',$url, 'XHTML');
        } else if (option_tidy_xml($options)) {
                $cacheKey = wfMemcKey('ws',$url, 'XML');
        } else { 
                $cacheKey = wfMemcKey('ws',$url);
        }
 
        $xml = $cache->get($cacheKey);
        //$output = "<pre>{$cacheKey}</pre>";
        if ($xml == null) {
                //$output = "<pre>NO CACHE HIT: {$cacheKey}</pre>";
 
                $ch = getCurlConnection($url, $options);
 
                $xml = curl_exec ( $ch );
                $errno = curl_errno($ch);
                curl_close($ch);
 
                //$output .= "<h2>Beginning</h2><pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$xml}\n\n</nowiki></pre>";
 
                if ($errno == 0 && $xml!==false) {
                        // Tidy document, if requested
                        if (stripos($options,'TIDY')!== false) {
                                //$output .= "<h2>TidyConfig</h2><pre><nowiki>wgUseTidy: {$wgUseTidy}\n\nwgAlwaysUseTidy: {$wgAlwaysUseTidy}\n\nwgTidyBin: {$wgTidyBin}\n\nwgTidyConf: {$wgTidyConf}\n\nwgTidyOpts: {$wgTidyOpts}\n\nwgTidyInternal: {$wgTidyInternal}\n\n</nowiki></pre>";
                                $xml = tidy($xml, $options);
                                //$output .= "<h2>Post-tidy</h2><pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$tidyxml}\n\n</nowiki></pre>";
                        }
 
 
                        // Hack to remove a default namespace, which causes parsing problems
                        $xml = str_replace("xmlns=", "defaultns=", $xml);
                        //$output .= "<h2>Pre-parse</h2><pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$xml}\n\n</nowiki></pre>";
 
                        if (option_cache($options) != null) {
                                $cache->set( $cacheKey, $xml, option_cache($options));
                        } else {
                                // $cache->set( $cacheKey, $xml, 60 * 60);  // Default cache period 1HR
                        }
                } else {
                        $error = "Webservice error {$errno}: Unable to contact webservice";
                        //$error .= "<pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$xml}\n\n</nowiki></pre>"; 
                }
        }
        if ($error == '') {
                if ($xml != null) {
                        if ($wikitext == '') {
                                $output .= "<pre>No wikitext specified!  XML is:\n$xml</pre>";
                        } else {
                                // load xml document
                                $dom = new DomDocument();
 
                                if (@$dom->loadXML($xml)) {
                                        //      create DOMXPath object
                                        $xpath = new Domxpath($dom);
 
                                        // Find all instances of text delimited by % signs
                                        preg_match_all('/%[^%]*%/', $wikitext, $matches, PREG_OFFSET_CAPTURE);
                                        foreach(array_reverse($matches[0]) as $match) {
                                                if (strlen($match[0])==2) {
                                                // Found %% in the wikitext, so replace it with %
                                                $wikitext = substr_replace($wikitext, '%', $match[1], 2);
                                                } else {
                                                        // Found some xpath between % signs in the wikitext, so replace the whole thing with what the xpath points to
                                                        $xpathquery = substr($match[0],1,-1);
                                                @$xpathresult = $xpath->query($xpathquery);
                                                $xpathresulttext = '';
                                                if ($xpathresult != NULL) {
                                                        if ($xpathresult->length > 0) {
                                                                $xpathresulttext = $xpathresult->item(0)->nodeValue;
                                                        } else {
                                                                // Xpath was OK, but returned no results
                                                                $xpathresulttext = "<nowiki>Webservice warning: No value </nowiki><span class=\"error\" style='display:none'><nowiki>for $xpathquery</nowiki></span>";
                                                                // Clear out cache in case this is an error with the back-end service
                                                                $cache->delete($cacheKey);
                                                        }
                                                } else {
                                                        $xpathresulttext = "<nowiki>Webservice warning: Bad xpath </nowiki><span class=\"error\" style='display:none'><nowiki>for $xpathquery</nowiki></span>";
                                                }
                                                $wikitext = substr_replace($wikitext, $xpathresulttext , $match[1], strlen($match[0]));
                                                }
                                        }
                                        $output .= $wikitext;
                                } else {
                                        $error = "Webservice error {$errno}: Invalid XML";
                                        //$error .= "<pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$xml}\n\n</nowiki></pre>"; 
                                }
                        }
                } else {
                        $error = "Webservice error {$errno}: XML is null!";
                        //$error .= "<pre><nowiki>url: {$url}\n\nwikitext: {$wikitext}\n\noptions: {$options}\n\nxml: {$xml}\n\n</nowiki></pre>"; 
                }
        }
 
        if ($error != '') {
                $output = "<span class=\"error\">$error</span>";
                $cache->delete($cacheKey);
        }
        return $output;
}
 
// Return a CURL connection object with all of the appropriate settings
function getCurlConnection($url, $options) {
        global $wgHTTPProxy;
 
        $ch = curl_init($url);
 
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
 
        // Handle https connections
        if (stripos($url, 'HTTPS:') !== false) {
                curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
        }
 
        // Masqeruade as IE, if requested
        if (options_ie($options)) {
                curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)');
        }
 
        // Add a cookie, if specified
        if (option_cookie($options) != null) {
                curl_setopt($ch, CURLOPT_COOKIE, option_cookie($options));
        }
 
        // Add proxy information, if needed
        if ($wgHTTPProxy !== false && isProxiedURL($url)) {
                curl_setopt($ch, CURLOPT_PROXY, $wgHTTPProxy);
        }
 
        return $ch;
}
 
//Determine whether or not the given URL should go through the proxy, based on the array of 
//proxy exceptions.  Might want to bake this into Mediawiki itself later.
function isProxiedURL($url) {
        global $wgHTTPProxyExceptions;
 
        if (is_array($wgHTTPProxyExceptions)) {
                foreach($wgHTTPProxyExceptions as $ex) {
                        if (preg_match($ex, $url)) {
                                // URL matches a proxy exception, so it's not proxied
                                return false;
                        }
                }
        }
 
        // URL didn't match any of the proxy exceptions, so it is proxied
        return true;
}
 
// Call tidy via the PHP5 internal interface.  In order to make this more generic,
// we should probably also support external and possibly pre-PHP5 tidy in the future.
function tidy($xml, $options) {
        $tidyxml = null;
 
        //Don't wrap text--it causes potential parsing problems later
        //Mediawiki expects UTF-8, so set that, too
        $tidyConf = array(
                'wrap' => FALSE,
           'output-encoding' => 'utf8',
        );
 
        // Set the input encoding, if specified (otherwise, use UTF-8)
        if (option_encoding($options) != null) {
                $tidyConf['input-encoding'] = option_encoding($options);
        } else {
                $tidyConf['input-encoding'] = 'utf8';
        }
 
        // Process TIDY options
        if (option_tidy_xhtml($options)) {
                $tidyConf['output-xhtml'] = TRUE;
                $tidyConf['numeric-entities'] = TRUE;
        }
 
        if (option_tidy_xml($options)) {
                $tidyConf['output-xml'] = TRUE;
                $tidyConf['numeric-entities'] = TRUE;
        }
 
        $tidy = tidy_parse_string( $xml, $tidyConf, $tidyConf['input-encoding'] );
        tidy_clean_repair($tidy);
        if( tidy_get_status($tidy) != 2 ) {
                // 2 is magic number for fatal error
                // http://www.php.net/manual/en/function.tidy-get-status.php
                $tidyxml = tidy_get_output($tidy);
        }
 
        return $tidyxml;
}
 
function options_ie($options) {
        return bool_regex('/\bIE\b/', $options);
}
 
function option_tidy_xhtml($options) {
        return bool_regex('/\bTIDY-XHTML\b/', $options);
}
 
function option_tidy_xml($options) {
        return bool_regex('/\bTIDY-XML\b/', $options);
}
 
function option_cookie($options) {
        return regex('/\bCOOKIE:(\S*)/', $options);
}
 
function option_cache($options) {
        return regex('/\bCACHE:(\d*)/', $options);
}
 
function option_encoding($options) {
        return regex('/\bENCODING:(\S*)/', $options);
}
 
function bool_regex($regex, $options) {
        $result = regex($regex, $options);
        return $result != null;
}
 
function regex($regex, $options) {
        $result = null;
 
        if (preg_match($regex, $options, $matches)) {
                if (count($matches) > 1) {
                        $result = $matches[1];
                } else {
                        $result = $matches[0];
                }
        }
 
        return $result;
}
Personal tools
Namespaces

Variants
Actions
Navigation
Support
Download
Development
Communication
Print/export
Toolbox