Extension:SyntaxHighlight GeSHi remote

What can this extension do?
This extension extends Extension:SyntaxHighlight_GeSHi to support highlighting remote files. It allows to embed the hole file or to extract parts of it based on line numbers and special markers inside the remote file.

Unlike similar extensions, this one disallows the inclusion of local files to avoid security issues. Only URLs with a non-empty scheme, which does not match 'file', are allowed.

Usage
The provided patch adds four parameters to the  tag from Extension:SyntaxHighlight_GeSHi.
 * url
 * Specifies that the content of the syntaxhighlight tag is a URL pointing to a source file


 * filterlines
 * Specifies line ranges which should be extracted from the source file, e.g.


 * filtersections
 * Specifies section titles which should be extracted from the source file. Sections are started in the source file via a special token (see sectionmarker attribute below) followed by the section title. Sections end if a new section starts or if a sectionmarker followed by white-space only is found. For example . Section titles can be Perl compatible regular expressions, delimited by a comma.


 * sectionmarker
 * Specifies a Perl compatible regular expression which matches a section marker in the source file. The default is  which is a C-style comment with followed directly by a &gt; sign.

Examples
Suppose the following file is accessible via the URL http://mydomain.org/source.cpp

Then,

Limitations

 * No nesting of sections
 * Section markers must be the same inside one source file
 * The MediaWiki cache will prevent modified remote content to show up on the wiki page. You can work around this the following way:
 * Use http://www.mediawiki.org/wiki/Extension:MagicNoCache and add the magic word __NOCACHE__ to your page
 * Create a link which purges the cache for a given page: [?action=purge ]

Download instructions
Please save the code found below in a file (for example ~/SyntaxHighlight_GeSHi_remote.patch).

Installation
These installation instructions assume that you have already installed Extension:SyntaxHighlight_GeSHi.

Open a terminal and cd into. Note: $IP stands for the root directory of your MediaWiki installation, the same directory that holds LocalSettings.php. Then apply the patch to your SyntaxHighlight_GeSHi installation:

Code
using GeSHi - Generic Syntax Highlighter',

+	'syntaxhighlight-desc'        => 'Provides syntax highlighting   using GeSHi - Generic Syntax Highlighter, patched for remote file support',

'syntaxhighlight-specify'     => 'You need to specify a language like this:',

'syntaxhighlight-supported'   => 'Supported languages for syntax highlighting:',

'syntaxhighlight-err-loading' => '(error loading supported language list)',

'syntaxhighlight-err-language' => 'Invalid language.',

'geshi.css'                   => '/* CSS placed here will be applied to GeSHi syntax highlighting */',

+	'syntaxhighlight-err-urlscheme' => 'Invalid URL scheme: $1',

+	'syntaxhighlight-err-sectionmarker' => 'sectionmarker $1 must not contain an empty regular expression',

+	'syntaxhighlight-err-sectionmarker-missing' => 'The attribute sectionmarker is missing',

);

/** Message documentation (Message documentation)

Index: SyntaxHighlight_GeSHi.php

=
======================================================

--- SyntaxHighlight_GeSHi.php	(revision 74418)

+++ SyntaxHighlight_GeSHi.php	(working copy)

@@ -43,7 +43,7 @@

$wgExtensionCredits['parserhook']['SyntaxHighlight_GeSHi'] = array(

'path'          => __FILE__,

'name'          => 'SyntaxHighlight',

-	'author'        => array( 'Brion Vibber', 'Tim Starling', 'Rob Church', 'Niklas Laxström' ),

+	'author'        => array( 'Brion Vibber', 'Tim Starling', 'Rob Church', 'Niklas Laxström', 'patched by Sascha for URL support' ),

'descriptionmsg' => 'syntaxhighlight-desc',

'url'           => 'http://www.mediawiki.org/wiki/Extension:SyntaxHighlight_GeSHi',

);

Index: SyntaxHighlight_GeSHi.class.php

=
======================================================

--- SyntaxHighlight_GeSHi.class.php	(revision 74418)

+++ SyntaxHighlight_GeSHi.class.php	(working copy)

@@ -46,7 +46,42 @@

wfProfileOut( __METHOD__ );

return $error;

}

-		$geshi = self::prepare( $text, $lang );

+

+		// validate attributes for remote file support

+		$isurl = false;

+		if( isset( $args['url'] ) ) {

+			$urlscheme = parse_url( $text, PHP_URL_SCHEME );

+			if ( empty($urlscheme) || strcasecmp( $urlscheme, "file" ) == 0 ) {

+				$error = self::formatError( htmlspecialchars( wfMsgForContent( 'syntaxhighlight-err-urlscheme', $urlscheme ) ) );

+				wfProfileOut( __METHOD__ );

+				return $error;

+			}

+			$isurl = true;

+		}

+		$sectionmarker = "|\s*//>|";

+		if( isset( $args['sectionmarker'] ) ) {

+			$sectionmarker = $args['sectionmarker'];

+			if (strlen($sectionmarker) < 3) {

+				$error = self::formatError( htmlspecialchars( wfMsgForContent( 'syntaxhighlight-err-sectionmarker', $sectionmarker ) ) );

+				wfProfileOut( __METHOD__ );

+				return $error;

+			}

+		}

+		$filtersections = "";

+		if ( isset( $args['filtersections'] ) ) {

+			$filtersections = $args['filtersections'];

+		}

+		if ( empty($filtersections) == false && empty($sectionmarker) ) {

+			$error = self::formatError( htmlspecialchars( wfMsgForContent( 'syntaxhighlight-err-sectionmarker-missing' ) ) );

+			wfProfileOut( __METHOD__ );

+			return $error;

+		}

+		$filterlines = "";

+		if ( isset( $args['filterlines'] ) ) {

+			$filterlines = $args['filterlines'];

+		}

+

+		$geshi = self::prepare( $text, $lang, $isurl, $sectionmarker, $filtersections, $filterlines );

if( !$geshi instanceof GeSHi ) {

$error = self::formatError( htmlspecialchars( wfMsgForContent( 'syntaxhighlight-err-language' ) ) );

wfProfileOut( __METHOD__ );

@@ -203,6 +238,98 @@

}

/**

+	 * Read a remote file and filter its contents. Filtering is done by including lines

+	 * between certain markers or having a specific line number.

+	 *

+	 * @param string $url The URL of the remote file

+	 * @param string $section_marker A regular expression matching the beginning of a section

+	 * @param string $filter_sections A string containing section names which should be included, deliminated by a comma

+	 * @param string $filter_lines A string containing line numbers or ranges which should be included, deliminated by a comma

+	 */

+	private static function parseRemoteFile( $url, $section_marker, $filter_sections, $filter_lines ) {

+		$remotefile = fopen($url, 'r');

+		if ($remotefile == 0) {

+			$output = "Error: could not open ". $input;

+		}

+		else {

+			$section_array = explode(",", $filter_sections);

+			$lines_array = explode(",", $filter_lines);

+

+			// build a array containing ranges of line numbers ((3,6),(10,10),(20,34),...)

+			$linerange_array = array;

+			foreach ($lines_array as $linefilter) {

+				$range = explode("-", $linefilter);

+				if (count($range) == 1) {

+					array_push($linerange_array, array($range[0], $range[0]));

+				}

+				else {

+					array_push($linerange_array, array($range[0], $range[1]));

+				}

+			}

+

+			// build a array of regular expressions for the required sections

+			$section_regexp_array = array;

+			foreach($section_array as $section) {

+				$section_regexp = substr($section_marker, 0, strlen($section_marker)-1). "\s*". $section. substr($section_marker, 0, 1);

+				array_push($section_regexp_array, $section_regexp);

+			}

+

+			// matches any section, even an empty one (which serves for signalling the end of a named section)

+			$section_unknown_regexp = substr($section_marker, 0, strlen($section_marker)-1). ".*" . substr($section_marker, 0, 1);

+

+			$section_started = false;

+			$linecount = 0;

+			$output = "";

+			while (!feof($remotefile)) {

+				$linecount += 1;

+				$line = fgets($remotefile);

+

+				if ($section_started == true) {

+					// we found a required section previously

+					if (preg_match($section_unknown_regexp, $line)) {

+						// found the beginning of an unknown section or the end of the current

+ 						// required section

+						$section_started = false;

+					}

+					else {

+						$output .= $line;

+						continue;

+					}

+				}

+

+				$added = false;

+				// check if the line should be included due to required line numbers

+				foreach($linerange_array as $linerange) {

+					if ($linecount >= $linerange[0] && $linecount <= $linerange[1]) {

+						$output .= $line;

+						$added = true;

+						// additionally check if this line signals a section start

+						foreach($section_regexp_array as $section_regexp) {

+							if (preg_match($section_regexp, $line)) {

+								$section_started = true;

+								break;

+							}

+						}

+						break;

+					}

+				}

+

+				if($added == false) {

+					// line ranges did not match, check required sections

+					foreach($section_regexp_array as $section_regexp) {

+						if (preg_match($section_regexp, $line)) {

+							$section_started = true;

+							break;

+						}

+					}

+				}

+			}

+		}

+		fclose($remotefile);

+		return $output;

+	}

+

+	/**

* Initialise a GeSHi object to format some code, performing

* common setup for all our uses of it

*

@@ -210,9 +337,20 @@

* @param string $lang

* @return GeSHi

*/

-	private static function prepare( $text, $lang ) {

+	private static function prepare( $input, $lang, $url, $section_marker, $filter_sections, $filter_lines ) {

self::initialise;

-		$geshi = new GeSHi( $text, $lang );

+		if ($url) {

+			if (strlen($filter_sections) > 0 || strlen($filter_lines) > 0) {

+				$output = self::parseRemoteFile( $input, $section_marker, $filter_sections, $filter_lines );

+			}

+			else {

+				$output = file_get_contents($input);

+			}

+		}

+		else {

+			$output = $input;

+		}

+		$geshi = new GeSHi( $output, $lang );

if( $geshi->error == GESHI_ERROR_NO_SUCH_LANG ) {

return null;

}

@@ -336,4 +474,4 @@

return self::hSpecialVersion_GeSHi( $extensionTypes );

}

-}

\ No newline at end of file

+}