Extension:Wiki2LaTeX/Development/w2lParser.php

<?php

/* * File:   w2lParser.php * Created: 2007-03-02 * Version: 0.7 * * Purpose: * Contains the parser, which transforms Mediawiki-articles to LaTeX * * License: * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */

if ( !defined('MEDIAWIKI') ) { $msg = 'To install Wiki2LaTeX, put the following line in LocalSettings.php: '; $msg .= 'require_once( $IP."/extensions/path_to_Wiki2LaTeX_files/wiki2latex.php" );'; echo $msg; exit( 1 ); }

define('W2L_FILE', 1); define('W2L_STRING', 0);

define('W2L_TEMPLATE', 0); define('W2L_PARSERFUNCTION', 1); define('W2L_TRANSCLUSION', 2); define('W2L_VARIABLE', 3); define('W2L_COREPARSERFUNCTION', 4);

class Wiki2LaTeXParser { function __construct { $this->initiated = false; $this->doProfiling = true; $this->ProfileLog = array; $this->parsing    = ''; $this->config     = array; $this->tags       = array; $this->fragments  = array; $this->elements   = array; $this->rawtex_counter = 0; $this->marks_counter = 0; $this->nowikiMarks = array; $this->nowikiCounter = 0; $this->rawtex_replace = array; // some default settings $this->config["headings_toplevel"] = 'section'; $this->config["use_hyperref"]     = true; $this->config["leave_noinlcude"]  = false; $this->tag_source = array; $this->tag_replace = array; $this->tags_replace = array; $this->preReplace = array; $this->replace_search = array; // NEVER set one of these values via another way than by addSimpleReplace $this->replace_replace = array; $this->regexp_search = array; // NEVER set one of these values via another way than by addRegExp $this->regexp_replace = array; $this->hooks = array; $this->error_msg = array; $this->is_error = false; // takes parser functions

$this->curlyBraceDebugCounter = 0; $this->curlyBraceLength = 0; // Event-system $this->event_functions = array; $this->event_blacklist = array; $this->events         = array;

$this->mw_vars = array; $this->content_cache = array; // Parserfunctions... $this->pFunctions = array; // takes custom ones (#switch) $this->cpFunctions = array; // takes those without # $this->mask_chars = array;

$this->files_used = false; $this->files = array; $this->required_packages = array; $this->latex_headcode = array;

}

/* Public Functions */

public function setConfig($cArray) { foreach ($cArray as $key=>$value) { $this->setVal($key, $value); }		return true; }	public function setVal($key, $value) { $this->config[$key] = $value; return true; }

public function getVal($key) { return $this->config[$key]; }

public function addSimpleReplace($search, $replace, $case_sensitive = 1) { if ($case_sensitive == 0 ) { $this->ireplace_search[] = $search; $this->ireplace_replace[] = $replace; } else { $this->replace_search[] = $search; $this->replace_replace[] = $replace; }	}

public function addTagCallback($tag, $callback) { $this->tags[$tag] = $callback; $this->elements[] = $tag; }

public function addParserFunction($tag, $callback) { $this->pFunctions[$tag] = $callback; }

public function addCoreParserFunction($tag, $callback) { $this->addParserFunction($tag, $callback); }

public function addRegExp($search, $replace) { $this->regexp_search[] = $search; $this->regexp_replace[] = $replace; }

public function registerEventHandler($event, $func) { $this->events["$event"][] = $func; $this->event_functions[$this->getEventHandlerRepr($func)] = true; }

function deactivateEventHandler($repr) { $this->event_functions[$repr] = false; }

function getEventHandlers { return array_keys($this->event_functions); }

private function getEventHandlerRepr($fnc) { // return the name of the function // that is called. if it is a class, // return the method-name if ( is_array($fnc) ) { if ( is_object($fnc[0]) ) { $class = get_class($fnc[0]); } else { $class = $fnc[0]; }			return $class.'::'.$fnc[1]; } else { return $fnc; }	}

private function execEvent($event, $str) { if ( is_array($this->events["$event"]) ) { $to_call = $this->events["$event"]; foreach($to_call as $key => $fnc) { $repr = $this->getEventHandlerRepr($fnc); if ( $this->event_functions[$repr] == true ) { $str = call_user_func($fnc, $str, &$this); }			}		} 		return $str;

}

public function recursiveTagParse( $str = '' ) { $fName = __METHOD__; $this->profileIn($fName); $str = $this->internalParse($str); $this->profileOut($fName); return $str; }

public function parse($text, &$title, $mode = W2L_STR) { $this->profileIn(__METHOD__); /* parse a given wiki-string to latex */ /* if $transclusions is an array, then all transcluded files are in there */ $time_start = microtime(true);

if ($this->initiated == false ) { $this->initParsing; }		$this->mTitle =& $title;

$text = trim($text); $text = "\n".$text."\n";

$this->execEvent('W2L_AFTER_INIT', $text); $text = $this->execEvent('W2L_BEFORE_CUT', $text); $text = $this->preprocessString($text); $text = $this->execEvent('W2L_AFTER_CUT', $text); // First, strip out all comments...

$text = $this->execEvent('W2L_BEFORE_STRIP', $text); //$text = $this->stripComments($text); $text = $this->execEvent('W2L_AFTER_STRIP', $text);

switch ( $this->getVal('process_curly_braces') ) { case '0': // remove everything between curly braces $text = preg_replace('/\{\{(.*?)\}\}/sm', '', $text); break; case '1': // do nothing break; case '2': // process them $text = $this->processCurlyBraces($text); break; default: break; }		//$this->reportError($text, __METHOD__); $text = $this->extractParserExtensions($text); $text = $this->extractPre($text);

$text = $this->execEvent("W2L_AFTER_EXTRACT_TAGS", $text);

$text = $this->internalParse($text); $text = trim($text); // Some tidying $text = str_replace("\n\n\n", "\n\n", $text); $text = trim(str_replace("\n\n\n", "\n\n", $text)); // replace Extensions //Diese Art lässt nicht zu, nach chapter und section zu unterscheiden, eine Sache, die sowieso zur Zeit vernachlässigt wird

$text = $this->replacePre($text); $text = $this->replaceParserExtensions($text); $text = $this->replaceNoWikiMarkers($text); $text = $this->deMask($text); //$text = $this->replacePre($text); $text = trim($text); $text = str_replace("\n\n\n", "\n\n", $text); $text = $this->execEvent("W2L_FINISH", $text);

$time_end = microtime(true); $this->parse_time = $time_end - $time_start; $this->profileOut(__METHOD__); return $text; }

function addChar($html, $latex, $utf_dec = false) { if ($utf_dec === false ) { $ent_dec = ''; $ent_hex = ''; } else { $ent_dec = '&#'.$utf_dec.';'; $ent_hex = '&#x'.dechex($utf_dec).';'; }

$this->htmlEntities[] = array(			'html'   => $html,			'utf_hex' => $ent_dec,			'utf_dec' => $ent_hex,			'latex'   => $latex,			'xetex'   => '' // Future		); return true; }

function processHtmlEntities( $str ) { foreach($this->htmlEntities as $entity ) { $entity['html']   = str_replace('&', $this->Et, $entity['html']); $entity['utf_hex'] = str_replace('&', $this->Et, $entity['utf_hex']); $entity['utf_dec'] = str_replace('&', $this->Et, $entity['utf_dec']);

$str = strtr($str, array($entity['html'] => $entity['latex']));

if ( $entity['utf_hex'] != '' ) { $str = strtr($str, array($entity['utf_hex'] => $entity['latex'])); $str = strtr($str, array($entity['utf_dec'] => $entity['latex'])); }

unset($entity); }

return $str; }

function internalParse($str) { $this->profileIn(__METHOD__);

// Used for parsing the string as is, without comments, extension-tags, etc.		//echo "eins",$str; //$str = $this->doSimpleReplace($str);

$str = $this->execEvent('W2L_BEFORE_MASK', $str); $str = $this->maskLatexCommandChars($str); // Now we can begin parsing. We parse as close as possible the way mediawiki parses a string. // So, start with tables $str = $this->execEvent('W2L_BEFORE_TABLES', $str); $str = $this->doTableStuff($str); //echo "zwei",$str; // Next come these Blocklevel elments // Now go on with headings

$str = $this->doHeadings($str);

$str = $this->doQuotes($str);

$str = $this->doInternalLinks($str); $str = $this->doExternalLinks($str); $str = $this->doHTML($str); $str = $this->doQuotationMarks($str);

$str = $this->maskLatexSpecialChars($str); $str = $this->processHtmlEntities($str); $str = $this->maskLaTeX($str); $str = $this->doBlocklevels($str); $str = $this->maskMwSpecialChars($str);

$str = $this->doSimpleReplace($str); $str = $this->execEvent('W2L_INTERNAL_FINISH', $str); $this->profileOut(__METHOD__); return $str; }

public function getPerformanceProfile($export_as = 'xml') { if ( !$this->doProfiling ) { return false; }		switch ($export_as) { case 'array': return $this->ProfileLog; break; case 'xml': $xml_return = ""; foreach($this->ProfileLog AS $func_call) { $xml_return .= '<'.$func_call['type'].' fname="'.$func_call['function'].'" time="'.$func_call['time'].'" />'."\n"; }				return $xml_return; break; default: return false; break; }	}

public function getParseTime { return $this->parse_time; }

private function doQuotationMarks($str) { $fName = __METHOD__; $this->profileIn($fName); // This function also handles apostrophes //$this->setVal('typographic_quotes_detect', false);

$quotes = array(			'"' => '\dq{}', // "			"'" => '\rq{}', // '		);

if ($this->getVal('typographic_quotes_detect') == false ) { $str = strtr($str, $quotes); } else { // This one will need language packs. // These are the old replace-rules: // Quotes: // At first, normal quotes are converted to html-ones... // But better not use them, using these quotes breaks apostrophes... //			$sq_detect = array; $dq_detect = array; $sq_detect["\n'"] = "\n".$this->Et."#8218;"; // ' (am Zeilenanfang) $sq_detect[" '"] = ' '.$this->Et.'#8218;';  //  ' $sq_detect["{'"] = '{'.$this->Et.'#8218;';  // {' $sq_detect["('"] = '('.$this->Et.'#8218;';  // ('

$sq_detect["' "] = $this->Et.'#8217; ';  // ' $sq_detect["'."] = $this->Et."#8217;.";  // '. $sq_detect["',"] = $this->Et."8217;,";  // ', $sq_detect["';"] = $this->Et."#8217;;";  // '; $sq_detect["'?"] = $this->Et."#8217;?";  // '? $sq_detect["'!"] = $this->Et."#8217;!";  // '! $sq_detect["'}"] = $this->Et."#8217;}";  // '} $sq_detect["}'"] = "}".$this->Et."#8217;";  // }' $sq_detect["'\\"] = $this->Et."#8217;\\"; // '\ $sq_detect["'<"] = $this->Et."#8217;<";  // '< $sq_detect["'-"] = $this->Et."#8217;-";  // '- $sq_detect["'("] = $this->Et."#8217;(";  // '(			$sq_detect["')"]  = $this->Et."#8217;)";  // ') $sq_detect[")'"] = ")".$this->Et."#8217;";  // )'			$sq_detect["':"]  = $this->Et."#8217;:";  // ':			$sq_detect["'\n"] = $this->Et."#8217;\n"; // '  (am Zeilenende)

// Double-Quotes $dq_detect["\n\""] = "\n".$this->Et."#8222;"; // " (am Zeilenanfang) $dq_detect[' "'] = ' '.$this->Et.'#8222;';   //  " $dq_detect['("'] = '('.$this->Et.'#8222;';   // ("

$dq_detect['" '] = $this->Et.'#8221; ';   // " $dq_detect['".'] = $this->Et.'#8221;.';   // ". $dq_detect['",'] = $this->Et.'#8221;,';   // ", $dq_detect['";'] = $this->Et.'#8221;;';   // "; $dq_detect['"?'] = $this->Et.'#8221;?';   // "? $dq_detect['"!'] = $this->Et.'#8221;!';   // "! $dq_detect['"-'] = $this->Et.'#8221;-';   // "! $dq_detect['"}'] = $this->Et.'#8221;}';   // "} $dq_detect['")'] = $this->Et.'#8221;)';   // ")			$dq_detect['"\\'] = $this->Et.'#8221;\\';  // "\			$dq_detect['"<'] = $this->Et.'#8221;<';    // "<			$dq_detect['"('] = $this->Et.'#8221;(';    // "( $dq_detect['":'] = $this->Et.'#8221;:';   // ": $dq_detect["\"\n"] = $this->Et."#8221;\n"; // " (am Zeilenende)

// Replace quotes $str = strtr($str, $sq_detect); $str = strtr($str, $dq_detect); $str = strtr($str, $quotes); }

$this->profileOut($fName); return $str; }	/* Internal parsing functions */ public function initParsing { $fName = __METHOD__; $this->profileIn($fName); global $w2lTags; global $w2lEvents; global $w2lParserFunctions; global $w2lConfig; if ($this->initiated == true ) { return; }		$this->unique = $this->uniqueString; foreach($w2lTags as $key => $value) { $this->addTagCallback($key, $value); }		foreach($w2lEvents as $key => $value) { foreach($value as $value2) { $this->registerEventHandler($key, $value2); }		}

foreach($w2lParserFunctions as $key => $value) { $this->addParserFunction($key, $value); }

foreach($w2lConfig as $key => $value) { $this->setVal($key, $value); }

//$this->addCoreParserFunction; $this->addCoreParserFunction( 'int', array( 'CoreParserFunctions', 'intFunction' ) ); $this->addCoreParserFunction( 'ns', array( 'CoreParserFunctions', 'ns' ) ); $this->addCoreParserFunction( 'urlencode', array( 'CoreParserFunctions', 'urlencode' ) ); $this->addCoreParserFunction( 'lcfirst', array( 'CoreParserFunctions', 'lcfirst' ) ); $this->addCoreParserFunction( 'ucfirst', array( 'CoreParserFunctions', 'ucfirst' ) ); $this->addCoreParserFunction( 'lc', array( 'CoreParserFunctions', 'lc' ) ); $this->addCoreParserFunction( 'uc', array( 'CoreParserFunctions', 'uc' ) ); $this->addCoreParserFunction( 'localurl', array( 'CoreParserFunctions', 'localurl' ) ); $this->addCoreParserFunction( 'localurle', array( 'CoreParserFunctions', 'localurle' ) ); $this->addCoreParserFunction( 'fullurl', array( 'CoreParserFunctions', 'fullurl' ) ); $this->addCoreParserFunction( 'fullurle', array( 'CoreParserFunctions', 'fullurle' ) ); //$this->addCoreParserFunction( 'formatnum', array( 'CoreParserFunctions', 'formatnum' ) ); //$this->addCoreParserFunction( 'grammar', array( 'CoreParserFunctions', 'grammar' ) ); //$this->addCoreParserFunction( 'plural', array( 'CoreParserFunctions', 'plural' ) ); $this->addCoreParserFunction( 'numberofpages', array( 'CoreParserFunctions', 'numberofpages' ) ); $this->addCoreParserFunction( 'numberofusers', array( 'CoreParserFunctions', 'numberofusers' ) ); $this->addCoreParserFunction( 'numberofarticles', array( 'CoreParserFunctions', 'numberofarticles' ) ); $this->addCoreParserFunction( 'numberoffiles', array( 'CoreParserFunctions', 'numberoffiles' ) ); $this->addCoreParserFunction( 'numberofadmins', array( 'CoreParserFunctions', 'numberofadmins' ) ); $this->addCoreParserFunction( 'language', array( 'CoreParserFunctions', 'language' ) ); $this->addCoreParserFunction( 'padleft', array( 'CoreParserFunctions', 'padleft' ) ); $this->addCoreParserFunction( 'padright', array( 'CoreParserFunctions', 'padright' ) ); $this->addCoreParserFunction( 'anchorencode', array( 'CoreParserFunctions', 'anchorencode' ) ); $this->addCoreParserFunction( 'special', array( 'CoreParserFunctions', 'special' ) ); //$this->addCoreParserFunction( 'defaultsort', array( 'CoreParserFunctions', 'defaultsort' ) ); $this->addCoreParserFunction( 'pagesinnamespace', array( 'CoreParserFunctions', 'pagesinnamespace' ) );

// And here we add some replace-rules // To be honest: These rules are in the wrong place. Most of them shoulld come from outside or 'language-packs'.

$this->addSimpleReplace(" - "," -- "); $this->addSimpleReplace(" -\n"," --\n"); $this->addSimpleReplace("\n- ", "\n-- ");

$this->addSimpleReplace("...","{\dots}"); include('w2lChars.php'); // Here come some regexps... $this->initiated = true; $this->profileOut($fName); return; }

function extractPre($str) { $fName = __METHOD__; $this->profileIn($fName); $work_str = explode("\n", $str); $debug = ''; $pre_line = false; $block_counter = 0;

$rplBlock = array; $preBlock = array;

foreach($work_str as $line) { // every line is here, now check for a blank at first position $first_char = $line{0};

$last_line = $pre_line; if ( ' ' == $first_char ) { if ($last_line == true) {

} else { ++$block_counter; $preBlock[$block_counter] = "\begin{verbatim}\n"; }

$rpl_line = substr($line, 1); $preBlock[$block_counter] .= $rpl_line."\n"; $rplBlock[$block_counter] .= $line."\n"; $pre_line = true; $debug .= '1'; } else { // check, whether last line was true, so we can create a block if ($last_line == true) { $preBlock[$block_counter] .= "\end{verbatim}\n"; //					// originale Zeilen, latex-zeilen, marker, //					$marker = $this->getMark('pre',$block_counter); $str = str_replace($rplBlock[$block_counter], $marker, $str); $this->preReplace[$marker] = $preBlock[$block_counter]; }				$pre_line = false; $debug .= '0'; }			//$debug .= $pre_line; }

//$this->preLineReplace = ; //echo $debug, ' Blocks: ', $block_counter, print_r($preBlock), print_r($rplBlock); $this->profileOut($fName); return $str; }

function replacePre($str) { $fName = __METHOD__; $this->profileIn($fName); $str = str_replace(array_keys($this->preReplace), array_values($this->preReplace), $str); $this->profileOut($fName); return $str; }

function matchNoWiki($str) { //		$str = preg_replace_callback('/ (.*)<\/nowiki>/smU', array($this,'noWikiMarker'), $str); return $str; }

function noWikiMarker($match) { //		++$this->nowikiCounter; $marker = $this->getMark('nowiki', $this->nowikiCounter); $str = $this->maskLatexCommandChars($match[1]); $str = $this->maskLatexSpecialChars($str); $str = $this->maskMwSpecialChars($str); $this->nowikiMarks[$marker] = $str; return $marker; }

function replaceNoWikiMarkers($str) { //		$str = strtr($str, $this->nowikiMarks); return $str; }

public function preprocessString($str) { //$this->reportError(strlen($str), __METHOD__); $str = $this->matchNoWiki($str); $str = $this->stripComments($str); //$this->reportError(strlen($str), __METHOD__); if ( $this->getVal('leave_noinclude') ) { $str = preg_replace('/ (.*)<\/noinclude>/smU', "$1", $str); $this->setVal('leave_noinclude', false); } else { $str = preg_replace('/ .*<\/noinclude>/smU', '', $str); }

if ( $this->getVal('insert_includeonly') ) { $str = preg_replace('/ (.*)<\/includeonly>/smU', "$1", $str); } else { $str = preg_replace('/ (.*)<\/includeonly>/smU', '', $str); $this->setVal('insert_includeonly', true); }

//$this->reportError(strlen($str), __METHOD__); $str = $this->execEvent('W2L_PREPROCESS', $str); //$this->reportError(strlen($str), __METHOD__); return $str; }

private function doBlockLevels( $str = '' ) { $fName = __METHOD__; $this->profileIn($fName); $text = $str; $linestart = true; # Parsing through the text line by line. The main thing # happening here is handling of block-level elements p, pre, # and making lists from lines starting with * # : etc.		# $textLines = explode( "\n", $text );

$lastPrefix = $output = ''; $this->mDTopen = $inBlockElem = false; $prefixLength = 0; $paragraphStack = false;

if ( !$linestart ) { $output .= array_shift( $textLines ); }		foreach ( $textLines as $oLine ) { $lastPrefixLength = strlen( $lastPrefix ); $preCloseMatch = preg_match('/mInPre ) { # Multiple prefixes may abut each other for nested lists. $prefixLength = strspn( $oLine, '*#:;' ); $pref = substr( $oLine, 0, $prefixLength );

# eh? $pref2 = str_replace( ';', ':', $pref ); $t = substr( $oLine, $prefixLength ); $this->mInPre = !empty($preOpenMatch); } else { # Don't interpret any other prefixes in preformatted text $prefixLength = 0; $pref = $pref2 = ''; $t = $oLine; }

# List generation if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) { # Same as the last item, so no need to deal with nesting or opening stuff $output .= $this->nextItem( substr( $pref, -1 ) ); $paragraphStack = false;

if ( substr( $pref, -1 ) == ';') { # The one nasty exception: definition lists work like this: # ; title : definition text # So we check for : in the remainder text to split up the # title and definition, without b0rking links. $term = $t2 = ''; if ($this->findColonNoLinks($t, $term, $t2) !== false) { $t = $t2; $output .= $term. $this->nextItem( ':' ); }				}			} elseif( $prefixLength || $lastPrefixLength ) { # Either open or close a level... $commonPrefixLength = $this->getCommon( $pref, $lastPrefix ); $paragraphStack = false;

while( $commonPrefixLength < $lastPrefixLength ) { $output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} ); --$lastPrefixLength; }				if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) { $output .= $this->nextItem( $pref{$commonPrefixLength-1} ); }				while ( $prefixLength > $commonPrefixLength ) { $char = substr( $pref, $commonPrefixLength, 1 ); $output .= $this->openList( $char );

if ( ';' == $char ) { # FIXME: This is dupe of code above if ($this->findColonNoLinks($t, $term, $t2) !== false) { $t = $t2; $output .= $term. $this->nextItem( ':' ); }					}					++$commonPrefixLength; }				$lastPrefix = $pref2; }			if( 0 == $prefixLength ) { wfProfileIn( "$fname-paragraph" ); # No prefix (not in list)--go to paragraph mode // XXX: use a stack for nestable elements like span, table and div

}			// somewhere above we forget to get out of pre block (bug 785) if($preCloseMatch && $this->mInPre) { $this->mInPre = false; }			if ($paragraphStack === false) { $output .= $t."\n"; }		}		while ( $prefixLength ) { $output .= $this->closeList( $pref2{$prefixLength-1} ); --$prefixLength; }		if ( '' != $this->mLastSection ) { $output .= 'mLastSection. '>';			$this->mLastSection = ''; }   $this->profileOut($fName); return $output;

}

/* private */ function nextItem( $char ) { if ( '*' == $char || '#' == $char ) { return '\item '; } else if ( ':' == $char || ';' == $char ) { $close = ''; if ( $this->mDTopen ) { $close = ''; } if ( ';' == $char ) { $this->mDTopen = true; return $close. ''; } else { $this->mDTopen = false; return $close. ''; }		}		return ''; }	/* private */ function closeParagraph { $result = ''; if ( '' != $this->mLastSection ) { $result = 'mLastSection. ">\n"; }		$this->mInPre = false; $this->mLastSection = ''; return $result; }	/* private */ function openList( $char ) { if ( $this->getVal('use_paralist') ) { $list_ul_env = 'compactitem'; $list_ol_env = 'compactenum'; } else { $list_ul_env = 'itemize'; $list_ol_env = 'enumerate'; }		$result = $this->closeParagraph;

if ( '*' == $char ) { $result .= '\begin{'.$list_ul_env.'}'."\n".'\item '; } else if ( '#' == $char ) { $result .= '\begin{'.$list_ol_env.'}'."\n".'\item '; } else if ( ':' == $char ) { $result .= ''; } else if ( ';' == $char ) { $result .= ''; $this->mDTopen = true; }		else { $result = ''; }

return $result; }	/* private */ function closeList( $char ) { if ( $this->getVal('use_paralist') ) { $list_ul_env = 'compactitem'; $list_ol_env = 'compactenum'; } else { $list_ul_env = 'itemize'; $list_ol_env = 'enumerate'; }		if ( '*' == $char ) { $text = '\end{'.$list_ul_env.'}'; } else if ( '#' == $char ) { $text = '\end{'.$list_ol_env.'}'; } else if ( ':' == $char ) { if ( $this->mDTopen ) { $this->mDTopen = false; $text = ''; } else { $text = '</dl>'; }		}		else {	return ''; } return $text."\n"; }	/* private */ function getCommon( $st1, $st2 ) { $fl = strlen( $st1 ); $shorter = strlen( $st2 ); if ( $fl < $shorter ) { $shorter = $fl; }

for ( $i = 0; $i < $shorter; ++$i ) { if ( $st1{$i} != $st2{$i} ) { break; } }		return $i; }

/**	 * Split up a string on ':', ignoring any occurences inside tags * to prevent illegal overlapping. * @param string $str the string to split * @param string &$before set to everything before the ':' * @param string &$after set to everything after the ':' * return string the position of the ':', or false if none found */	function findColonNoLinks($str, &$before, &$after) { $fname = 'Parser::findColonNoLinks'; //wfProfileIn( $fname );

$pos = strpos( $str, ':' ); if( $pos === false ) { // Nothing to find! wfProfileOut( $fname ); return false; }

$lt = strpos( $str, '<' ); if( $lt === false || $lt > $pos ) { // Easy; no tag nesting to worry about $before = substr( $str, 0, $pos ); $after = substr( $str, $pos+1 ); //wfProfileOut( $fname ); return $pos; }	}

private function doHeadings( $str = '' ) { $this->profileIn(__METHOD__); // Here we're going to parse headings // Without support for \part. Needs to be implemented seperately... // Method from mediawiki for ( $i = 6; $i >= 1; --$i ) { $h = str_repeat( '=', $i ); $str = preg_replace( "/^{$h}(.+){$h}\\s*$/m", "<h{$i}>\\1</h{$i}>\\2", $str ); }

//$pr_match = ; $str = preg_replace_callback('^\<h([1-6])\>(.+)\</h([1-6])\>^', array($this, 'processHeadings'), $str); //$str = str_ireplace($headings_html, $headings_latex, $str);

$this->profileOut(__METHOD__); return $str; }

private function processHeadings($matches) { $heading = trim($matches[2]); $level = trim($matches[1]); //echo $level; if ( in_array( $this->getVal("documentclass"), array('report' ,'book'))) { --$level; }

// Beware: using chapter removes support for \subparagraph $headings_latex = $headings_latex = array('part', 'chapter', 'section', 'subsection',  'subsubsection', 'paragraph', 'subparagraph');

$headings_latex_koma = array('addpar', 'addchap', 'addsec', 'subsection', 'subsubsection', 'paragraph', 'subparagraph');

$asteriks = $this->getMark('Asteriks'); $this->mask($asteriks,'*');

if ( substr($heading, 0, 3) == '***' ) { // ***     $heading = substr($heading, 3); return '\\'.$headings_latex_koma[$level].$asteriks.'{'.$heading.'}'; } elseif ( substr($heading, 0, 2) == '**' ) { // **     $heading = substr($heading, 2); return '\\'.$headings_latex_koma[$level].'{'.$heading.'}'; } elseif ( substr($heading, 0, 1) == '*' ) { // *     $heading = substr($heading, 1); return '\\'.$headings_latex[$level].$asteriks.'{'.$heading.'}'; } else { // standard return '\\'.$headings_latex[$level].'{'.$heading.'}'; }

//return '\\'.$sec_command; }

function mask($key, $value) { $this->mask_chars[$key] = $value; }

function deMask($str) { $str = str_replace(array_keys($this->mask_chars), array_values($this->mask_chars), $str); return $str; }

private function doInternalLinks( $str = '' ) { $fName = __METHOD__; $this->profileIn($fName); // match everything within ... $str = preg_replace_callback('/\[\[(.*?)\]\]/', array($this, 'internalLinkHelper'), $str); $this->profileOut($fName); return $str; }

private function translateNamespace($part1 = '') { global $wgContLang; $namespaces_raw = $wgContLang->getNamespaces; //echo $part1; $namespaces = array_map('strtolower', $namespaces_raw); $nss = array_flip($namespaces); //print_r($nss); $found = false;

if ( intval( $part1 ) || $part1 == "0" ) { $text = intval( $part1 ) ; $found = true; } else { $param = str_replace( ' ', '_', strtolower( $part1 ) ); //$param = strtolower($param); //echo $param; if (array_key_exists($param, $nss) ) { $text = $nss[$param]; //echo $text; $found = true; } else { $index = Namespace::getCanonicalIndex( strtolower( $param ) ); if ( !is_null( $index ) ) { $text = $index ; $found = true; }   }  }  if ( $found ) { return $text; } else { //echo $part1; return array( 'found' => false ); } }

private function internalLinkHelper($matches) { // Here we can handle every possibility of links: // category-links, image-links, Page-links... Whatever $link = trim($matches[1]); $links = explode("|", $link, 2); //print_r($links); //echo "\n"; $ns_id = 0; if ( substr_count($links[0],':') == 0 ) { $ns_id = NS_MAIN; } else { $namespace = explode(':', $links[0], 2); $ns_id = $this->translateNamespace($namespace[0]); }

if ( is_array($ns_id ) ) { // error! $this->reportError( wfMsg('w2l_parser_no_namespace', $links[0]), __METHOD__); }   //echo $ns_id; switch ($ns_id) { case NS_CATEGORY: return ''; break; case NS_MEDIA: // this is just a link to the mediawiki-page return $link; break; case NS_IMAGE: $parts = explode("|", $link); $imagename = array_shift($parts); // still need to remove the Namespace: $tmp_name = explode(':', $imagename, 2); $imagename = $tmp_name[1];

$imgwidth = "10cm"; foreach ($parts as $part) { if (preg_match("/\d+px/", $part)) continue;

if (preg_match("/(\d+cm)/", $part, $widthmatch)) { $imgwidth = $widthmatch[1]; continue; }

if (preg_match("/thumb|thumbnail|frame/", $part)) continue;

if (preg_match("/left|right|center|none/", $part)) continue;

$caption = trim($part); }			$title = Title::makeTitleSafe( NS_IMAGE, $imagename ); $file = Image::newFromTitle( $title ); $file->loadFromFile; if ( $file && $file->exists ) { $imagepath = $file->getPath; $imagepath = str_replace('\\', '/', $imagepath); $underscore = $this->getMark('underscore'); $imagepath = str_replace('_', $underscore, $imagepath); $this->mask($underscore, '_'); } else { // does not exist!!! return $link; }

$title = $file->getTitle->getText; $this->addPackageDependency('graphicx'); return "\\begin{center} \\resizebox{".$imgwidth."}{!}{\includegraphics}\\\\ \\textit\end{center}\n"; break; }   if ( $link{0} == ':' ) { // Thats a link to a category $link = substr($link, 1); $test = explode(':', $link, 2); if ( $this->translateNamespace($test[0]) != NS_CATEGORY ) { // Whatever that was. Seems like an error! $link = ':'.$link; }   }		// First, check for | $pipe_count = substr_count($link, '|'); if ( $pipe_count >= 1 ) { //$links = explode("|", $link); if (empty($links[1])) { return $links[0]; } else { return $links[1]; }		} else { return $link; }	}

private function doExternalLinks( $str ) { $fName = __METHOD__; $this->profileIn($fName); // Match everything within [...] $str = preg_replace_callback('/\[(.*?)\]/', array($this, 'externalLinkHelper'), $str); $this->profileOut($fName); return $str; }	private function externalLinkHelper($matches) { $match = trim($matches[1]); if ( (substr($match, 0, 7) != 'http://') ) { return "[".$match."]"; }

if ( strstr($match, ' ') !== false ) { // mit Text! $link = explode(' ', $match, 2); // in $link[0] ist die URL! return $link[1]; } else { // nur URL! return '\url{'.$match.'}'; }	}

private function extractParserExtensions( $str = '' ) { $fName = __METHOD__; $this->profileIn($fName); $matches = array; $unique = 'W2l-'.$this->uniqueString; //$unique .= //echo $str = $this->extractTagsAndParams($this->elements, $str, &$matches, $unique); //var_dump($matches); var_dump($string); //echo $string; // second: Some other aspects... // Now call all the registered Callback-function with their contents. foreach($matches as $key=>$match) { $input = $match[1]; $tag = $match[0]; $argv = array; $argv = $match[2]; $rpl = call_user_func($this->tags[$tag], $input, $argv, &$this, 'latex'); $this->tag_replace["$key"] = $rpl; }		$this->profileOut($fName);

return $str; }

private function replaceParserExtensions( $str ) { $fName = __METHOD__; $this->profileIn($fName);

$str = str_replace(array_keys($this->tag_replace), array_values($this->tag_replace), $str); $this->profileOut($fName); return $str; }

private function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){ static $n = 1; $stripped = ''; $matches = array;

$taglist = implode( '|', $elements ); $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";

while ( '' != $text ) { $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE ); $stripped .= $p[0]; if( count( $p ) < 5 ) { break; }			if( count( $p ) > 5 ) { // comment $element   = $p[4]; $attributes = ''; $close     = ''; $inside    = $p[5]; } else { // tag $element   = $p[1]; $attributes = $p[2]; $close     = $p[3]; $inside    = $p[4]; }

//$marker = "($uniq_prefix-$element-" . sprintf('%08X', $n++) . '-QINU)'; $marker = $this->getMark($element, $n++); $stripped .= $marker;

if ( $close === '/>' ) { // Empty element tag, $content = null; $text = $inside; $tail = null; } else { if( $element == '!--' ) { $end = '/(-->)/'; } else { $end = "/(<\\/$element\\s*>)/i"; }				$q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE ); $content = $q[0]; if( count( $q ) < 3 ) { # No end tag -- let it run out to the end of the text. $tail = ''; $text = ''; } else { $tail = $q[1]; $text = $q[2]; }			}

$matches[$marker] = array( $element,				$content,				Sanitizer::decodeTagAttributes( $attributes ),				"<$element$attributes$close$content$tail" ); }		return $stripped; }

private function doQuotes( $text ) { $fName = __METHOD__; $this->profileIn($fName);

$arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); if ( count( $arr ) == 1 ) { // No char. return; $this->profileOut($fName);

return $text; } else { # First, do some preliminary work. This may shift some apostrophes from # being mark-up to being text. It also counts the number of occurrences # of bold and italics mark-ups. $i = 0; $numbold = 0; $numitalics = 0; foreach ( $arr as $r ) {				if ( ( $i % 2 ) == 1 ) {					# If there are ever four apostrophes, assume the first is supposed to # be text, and the remaining three constitute mark-up for bold text. if ( strlen( $arr[$i] ) == 4 ) {						$arr[$i-1] .= "'"; $arr[$i] = "'''"; }					# If there are more than 5 apostrophes in a row, assume they're all # text except for the last 5. else if ( strlen( $arr[$i] ) > 5 ) {						$arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 ); $arr[$i] = "'"; }					# Count the number of occurrences of bold and italics mark-ups. # We are not counting sequences of five apostrophes. if ( strlen( $arr[$i] ) == 2 ) $numitalics++; else if ( strlen( $arr[$i] ) == 3 ) $numbold++;    else if ( strlen( $arr[$i] ) == 5 ) { $numitalics++; $numbold++; } }				$i++; }

# If there is an odd number of both bold and italics, it is likely # that one of the bold ones was meant to be an apostrophe followed # by italics. Which one we cannot know for certain, but it is more # likely to be one that has a single-letter word before it. if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {				$i = 0; $firstsingleletterword = -1; $firstmultiletterword = -1; $firstspace = -1; foreach ( $arr as $r ) {					if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) ) {						$x1 = substr ($arr[$i-1], -1); $x2 = substr ($arr[$i-1], -2, 1); if ($x1 == ' ') { if ($firstspace == -1) $firstspace = $i; } else if ($x2 == ' ') { if ($firstsingleletterword == -1) $firstsingleletterword = $i; } else { if ($firstmultiletterword == -1) $firstmultiletterword = $i; }					}					$i++; }

# If there is a single-letter word, use it! if ($firstsingleletterword > -1) {					$arr [ $firstsingleletterword ] = "''"; $arr [ $firstsingleletterword-1 ] .= "'"; }				# If not, but there's a multi-letter word, use that one. else if ($firstmultiletterword > -1) {					$arr [ $firstmultiletterword ] = "''"; $arr [ $firstmultiletterword-1 ] .= "'"; }				# ... otherwise use the first one that has neither. # (notice that it is possible for all three to be -1 if, for example,				# there is only one pentuple-apostrophe in the line) else if ($firstspace > -1) {					$arr [ $firstspace ] = "''"; $arr [ $firstspace-1 ] .= "'"; }			}

# Now let's actually convert our apostrophic mush to HTML! $output = ''; $buffer = ''; $state = ''; $i = 0; foreach ($arr as $r) {				if (($i % 2) == 0) {					if ($state == 'both') $buffer .= $r; else $output .= $r; }				else {					if (strlen ($r) == 2) {						if ($state == 'i') { $output .= '</i>'; $state = ''; } else if ($state == 'bi') { $output .= '</i>'; $state = 'b'; } else if ($state == 'ib') { $output .= '</b></i><b>'; $state = 'b'; } else if ($state == 'both') { $output .= '<b>.$buffer.'; $state = 'b'; } else # $state can be 'b' or '' { $output .= '<i>'; $state .= 'i'; } }					else if (strlen ($r) == 3) {						if ($state == 'b') { $output .= '</b>'; $state = ''; } else if ($state == 'bi') { $output .= '</i></b><i>'; $state = 'i'; } else if ($state == 'ib') { $output .= '</b>'; $state = 'i'; } else if ($state == 'both') { $output .= '<i>'.$buffer.''; $state = 'i'; } else # $state can be 'i' or '' { $output .= '<b>'; $state .= 'b'; } }					else if (strlen ($r) == 5) {						if ($state == 'b') { $output .= '</b><i>'; $state = 'i'; } else if ($state == 'i') { $output .= '</i><b>'; $state = 'b'; } else if ($state == 'bi') { $output .= '</i></b>'; $state = ''; } else if ($state == 'ib') { $output .= '</b></i>'; $state = ''; } else if ($state == 'both') { $output .= .$buffer.; $state = ''; } else # ($state == '') { $buffer = ''; $state = 'both'; } }				}				$i++; }			# Now close all remaining tags. Notice that the order is important. if ($state == 'b' || $state == 'ib') $output .= '</b>'; if ($state == 'i' || $state == 'bi' || $state == 'ib') $output .= '</i>'; if ($state == 'bi') $output .= '</b>'; if ($state == 'both') $output .= .$buffer.; }		$this->profileOut($fName); return $output; }

private function doSimpleReplace( $str ) { $fName = __METHOD__; $this->profileIn($fName); // Here we're replacing. $str = str_replace($this->replace_search, $this->replace_replace, $str); $str = str_ireplace($this->ireplace_search, $this->ireplace_replace, $str); $this->profileOut($fName); return $str; }

private function doRegExp( $str ) { $fName = __METHOD__; $this->profileIn($fName); // Here we're going to run all these regexps $str = preg_replace($this->regexp_search, $this->regexp_replace, $str); $this->profileOut($fName); return $str; }

private function doTableStuff( $str ) { $this->profileIn(__METHOD__); // The string is no longer corrected. // So: The table beginns with: \{| anbd ends with |\}. // Using these two lines corrects them: //$correct = array("\n\{|" => "\n{|", "|\}\n"=> "|}\n"); //$str = str_replace(array_keys($correct), array_values($correct), $str);

// IF your table function(s) need only the tables: // Match everything within {|...|} //$str = preg_replace_callback('/\\\|({\|.*?\|\\\})/sm', array($this, 'externalTableHelper'), $str); $this->execEvent("W2L_TABLES", $str); // Still parsing tables, as there might be some left: // Hopefully this doesn't break anything in the parsed tables... 		$str = $this->externalTableHelper($str); $this->profileOut(__METHOD__); return $str; }

/*      	 * Restores pre, math, and other extensions removed by strip *	 * always call unstripNoWiki after this one * @private */	private function unstrip( $text, &$state ) { if ( !isset( $state['general'] ) ) { return $text; }

wfProfileIn( __METHOD__ ); # TODO: good candidate for FSS $text = strtr( $text, $state['general'] ); wfProfileOut( __METHOD__ ); return $text; }

/**	 * Always call this after unstrip to preserve the order *	 * @private */	private function unstripNoWiki( $text, &$state ) { if ( !isset( $state['nowiki'] ) ) { return $text; }

wfProfileIn( __METHOD__ ); # TODO: good candidate for FSS $text = strtr( $text, $state['nowiki'] ); wfProfileOut( __METHOD__ );

return $text; }

private function unstripForHTML( $text ) { $text = $this->unstrip( $text, $this->mStripState ); $text = $this->unstripNoWiki( $text, $this->mStripState ); $this->addLatexHeadCode('\\newcolumntype{Y}{>{\\raggedright\arraybackslash}X}'); return $text; }

/*        * externalTableHelper is really a transplanted version of Parser::doTableStuff * from mediaWiki. Translates wiki tables to LaTeX tables. * Currently ingnores all attributes of the table, except latexfmt, which tells * how many rows there are, and which type of cells should be used for each row. * An extra cell type Y is introduced for left-aligned text than can wrap. * Example: " {| latexfmt="|l|X|Y|l| ..."        */	private function externalTableHelper($t) {		$correct = array("\n\{|" => "\n{|", "|\}\n"=> "|}\n");		$t = str_replace(array_keys($correct), array_values($correct), $t);		$t = trim($t);		$t = explode ( "\n", $t ) ;		$ltd = array  ; # Is current cell TD or TH?		$tr = array  ; # Is currently a tr tag open?		$ltr = array  ; # tr attributes		$has_opened_tr = array; # Did this table open a element?		$anyCells = false;		$firstCellOfRow = true;		$ltx_caption = '';		$in_table = 0;		foreach ( $t AS $k => $x )		{			$x = trim ( $x ) ;			$fc = substr ( $x , 0 , 1 ) ;			if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {				//Start of table: Extract LaTeX tips from attributes, make header.				$attributes = $this->unstripForHTML( $matches[2] );				$attributes_test = $this->parseAttrString($attributes);				if ( array_key_exists('latexfmt', $attributes_test) ) { $latexformat = $attributes_test['latexfmt']; $latexformat = str_replace("\\", "", $latexformat); }				if ( array_key_exists('latexwidth', $attributes_test) ) { $latexwidth = $attributes_test['latexwidth']; $latexwidth = str_replace('\(\backslash{}\)', '\\', $latexwidth); } else { $latexwidth = '\linewidth'; }				/*				preg_match("/latexfmt=\"(.*?)\"/", $attributes, $latexformat); $latexwidth = '\linewidth'; if ( preg_match("/latexwidth=\"(.*?)\"/", $attributes, $latexwidth_a) ) { $latexwidth = $latexwidth_a[1]; $latexwidth = str_replace('\(\backslash{}\)', '\\', $latexwidth); }				//var_dump($latexwidth); $latexformat = $latexformat[1]; $latexformat = str_replace("\\", "", $latexformat);*/ if ($in_table == false ) { $t[$k] = "\begin{tabularx}\\hline"; } else { $t[$k] = "{\begin{tabularx}\\hline"; }				$in_table++; array_push ( $ltd, '' ) ; array_push ( $tr, false ) ; array_push ( $ltr, '' ) ; array_push ( $has_opened_tr, false ); $this->addPackageDependency('tabularx'); $firstCellOfRow=true; }			else if ( ('|}' == substr ( $x, 0 , 2 )) || ('|\}' == substr ( $x , 0 , 3 ))) { //End of table. Pop stacks and print latex ending. $l = array_pop ( $ltd ) ; if ( !array_pop ( $has_opened_tr ) ) $t[$k-1] = $t[$k-1]. "\\\\ \hline"; if ( array_pop ( $tr ) ) $t[$k-1] = $t[$k-1]. '\\\\ \hline'; array_pop ( $ltr ) ; if ($in_table > 1) { $t[$k] = "\end{tabularx}}".trim($ltx_caption); } else { $t[$k] = "\end{tabularx}\n".trim($ltx_caption); }				$in_table--; $ltx_caption = ''; }			else if ( '|-' == substr ( $x, 0 , 2 ) ) { # Allows for |--- if (strpos($x, '') == 1) { $add_hline = '\hline'; //echo 'yes!'; } else { $add_hline = ''; }				$x = substr ( $x, 1 ) ; while ( $x != '' && substr ( $x, 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ; $z = '' ; $l = array_pop ( $ltd ) ; array_pop ( $has_opened_tr ); array_push ( $has_opened_tr, true ) ;

if ( array_pop ( $tr ) ) $t[$k-1] = $t[$k-1]. '\\\\ \hline'.$add_hline; array_pop ( $ltr ) ; $t[$k] = $z ; array_push ( $tr, false ) ; array_push ( $ltd, '' ) ;

$attributes = $this->unstripForHTML( $x ); array_push ( $ltr, Sanitizer::fixTagAttributes ( $attributes, 'tr' ) ) ; $firstCellOfRow = true; $add_hline = ''; //$cellcounter[] = 0; }			else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x, 0 , 2 ) ) { # Caption # $x is a table row if ( '|+' == substr ( $x, 0 , 2 ) ) { $fc = '+' ; $x = substr ( $x, 1 ) ; }				$after = substr ( $x, 1 ) ; if ( $fc == '!' ) $after = str_replace ( '!!', '||' , $after ) ;

// Split up multiple cells on the same line. // FIXME: This can result in improper nesting of tags processed // by earlier parser steps, but should avoid splitting up eg // attribute values containing literal "||". //var_dump($after); $after = wfExplodeMarkup( '||', $after ); //var_dump($after); $t[$k] = '' ; # Loop through each table cell foreach ( $after AS $theline ) {					$z = '' ; if ( $fc != '+' ) {						$tra = array_pop ( $ltr ) ; if ( !array_pop ( $tr ) ) $z = "\n" ; // has been: "\n" array_push ( $tr, true ) ; array_push ( $ltr, '' ) ; array_pop ( $has_opened_tr ); array_push ( $has_opened_tr, true ) ; //var_dump($ltr); }

$l = array_pop ( $ltd ) ; //heading cells and normal cells are equal in LaTeX: if ( ($fc == '|' || $fc == '!') && !$firstCellOfRow) $l = ' & '; else if ( $fc == '+' ) { $ltx_caption .= $theline; continue; //Missing support for caption here! }					else $l = '' ; //$firstCellOfRow = false; array_push ( $ltd, $l ) ;

# Cell parameters $y = explode ( '|', $theline , 2 ) ; # Note that a '|' inside an invalid link should not # be mistaken as delimiting cell parameters if ( strpos( $y[0], '[[' ) !== false ) {						$y = array ($theline);					}					if ( count ( $y ) == 1 ) {						if ($fc == '!') { //Heading cell highlighting										$y = "{$z}{$l}" . "\\textbf{" . "{$y[0]}}" ;						} else {							$y = "{$z}{$l}{$y[0]}" ;						}           				} else {						$attributes = $this->unstripForHTML( $y[0] );

$multi_col = $this->checkColspan($attributes); //$y = "{$z}<{$l}".Sanitizer::fixTagAttributes($attributes, $l).">{$y[1]}" ; if ( $firstCellOfRow == false ) { $addSep = '&'; } else { $addSep = ''; }						$y="{$z}".$addSep.'\multicolumn{'.$multi_col['colspan'].'}{'.$multi_col['latexfmt'].'}{'.$y[1].'}'; }					$firstCellOfRow = false; // was some lines up... $t[$k] .= $y; $anyCells = true; }			}		}		//var_dump($t); $t = implode ( "\n", $t ) ; # special case: don't return empty table //if(!$anyCells) $t = ''; //$t .= trim($ltx_caption); return $t; }	function checkColspan($str) { // just a test now $result = array; $attr = $this->parseAttrString($str); if ( array_key_exists('colspan', $attr) ) { $result['colspan'] = $attr['colspan']; } else { return false; }		if ( array_key_exists('latexfmt', $attr) ) { $result['latexfmt'] = $attr['latexfmt']; } else { $result['latexfmt'] = '|l|'; }

return $result; }	private function stripComments( $text = '' ) { $fName = __METHOD__; $this->profileIn(__METHOD__); /* strips out Mediawiki-comments, which are in fact HTML comments */ $mode = ''; // This approach is from mediawiki while (($start = strpos($text, '', $start + 4); if ($end === false) { # Unterminated comment; bail out break; }

$end += 3;

# Trim space and newline if the comment is both # preceded and followed by a newline $spaceStart = max($start - 1, 0); $spaceLen = $end - $spaceStart; while (substr($text, $spaceStart, 1) === ' ' && $spaceStart > 0) { $spaceStart--; $spaceLen++; }			while (substr($text, $spaceStart + $spaceLen, 1) === ' ') $spaceLen++; if (substr($text, $spaceStart, 1) === "\n" and substr($text, $spaceStart + $spaceLen, 1) === "\n") { # Remove the comment, leading and trailing # spaces, and leave only one newline. $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1); }			else { # Remove just the comment. $text = substr_replace($text, '', $start, $end - $start); }		} // bis hierher $this->profileOut($fName); return $text; }

private function doHTML($str) { $fName = __METHOD__; $this->profileIn($fName); // First step only. Needs to be far more complex!!! // For some HTML-Tag-support // Add $replacing = array(		  ' ' => '\begin{center}',		   ' ' => '\end{center}',			"<i>"     => '\textit{',			"</i>"    => '}',			"<b>"     => '\textbf{',			"</b>"    => '}',			""    => '\texttt{',			"</tt>"   => '}'		); $str = str_ireplace(array_keys($replacing), array_values($replacing), $str); $this->profileOut($fName); return $str; }	/* Toolkit functions */ private function uniqueString { return dechex(mt_rand(0, 0x7fffffff)). dechex(mt_rand(0, 0x7fffffff)); }	/* Profiling and debugging functions */ private function profileIn($fName) { if ($this->doProfiling) { $time = microtime; $this->ProfileLog[] = array("function"=>$fName, "time"=>$time, "type" => "in"); }		return; }	private function profileOut($fName) { if ($this->doProfiling) { $time = microtime; $this->ProfileLog[] = array("function"=>$fName, "time"=>$time, "type" => "out"); }		return; }	private function profileMsg($msg) { if ($this->doProfiling) { $time = microtime; $this->ProfileLog[] = array("function"=>$msg, "time"=>$time, "action" => "msg"); }		return; }

public function maskLaTeX($str) { $fName = __METHOD__; $this->profileIn($fName); $latex = array(			'LaTeX'   => '\LaTeX{}',			'TeX'      => '\TeX{}',			'LaTeX 2e' => '\LaTeXe{}'		); $str = strtr($str, $latex); $this->profileOut($fName); return $str; }

public function maskLatexCommandChars($str) { $fName = __METHOD__; $this->profileIn($fName); // Chars, which are important for latex commands: // {,},\,&		$this->Et = $this->getMark("Et"); $this->mask($this->Et, '\&'); $chars = array(			'\\' => "\(\backslash{}\)",			"{" => "\{",			"}" => "\}",			'&' => $this->Et,		); $str = strtr($str, $chars); $this->profileOut($fName); return $str; }

public function maskMwSpecialChars($str) { $fName = __METHOD__; $this->profileIn($fName); // Special chars from mediawiki: // #,*,[,],{,},|		$chars = array(			'#' => "\#",			"*" => "\(\ast{}\)",		); $str = strtr($str, $chars); $this->profileOut($fName); return $str; }

public function maskLatexSpecialChars($str) { $fName = __METHOD__; $this->profileIn($fName); // _,%,§,$,&,#,€,		$chars = array(			'_' => '\_',			'%' => '\%',			'$' => '\$'		); $str = strtr($str, $chars); $this->profileOut($fName); return $str; }

public function getMark($tag, $number = -1) { // This function takes strings, which are to be inserted in verabtimenv, // like links, f.e.		// returns a marker $fName = __METHOD__; $this->profileIn($fName); ++$this->marks_counter; if ($number == -1) { $number = $this->marks_counter; }		$marker = '((UNIQ-W2L-'.$this->unique.'-'.$tag.'-'.sprintf('%08X', $number).'-QINU))'; $this->profileOut($fName); return $marker; }

public function processCurlyBraces($str) { $fName = __METHOD__; $this->profileIn($fName); $new_str = ''; if ($this->initiated == false ) { $this->initParsing; }

++$this->curlyBraceDebugCounter; $this->curlyBraceLength = $this->curlyBraceLength + strlen($str); //$this->reportError($str, __METHOD__); // This function processes all templates, variables and parserfunctions $marker = $this->getMark('pipe');// $this->uniqueString; //$str = preg_replace('/\[\[(.*)\|(.*)\]\]/U', '$1'.$marker.'$2', $str); $test = $this->split_str($str); //var_dump($test); foreach($test as $part) { // if first if (substr($part, 0,2 ) == '{{' ) { //$part = preg_replace('/\[\[(.*)\|(.*)\]\]/U', '$1'.$marker.'$2', $part);

$match[0] = $part; $match[1] = substr($part, 2, -2); //$this->reportError($match[0], __METHOD__);# //$this->reportError($match[1], __METHOD__);#

$part = $this->doCurlyBraces($match); //$part = str_replace($marker, '|', $part); }			$new_str .= $part; }

//$str = preg_replace_callback('/\{\{(.*?)\}\}/sm', array($this, 'doCurlyBraces'), $str);

//$new_str = str_replace($marker, '|', $new_str); $chars = array('\{\{\{' => '{{{', '\}\}\}' => '}}}'); $new_str = strtr($new_str, $chars); $this->profileOut($fName); return $new_str; }

private function doCurlyBraces($matches) { $orig = $matches[0]; $match = $matches[1]; //$this->reportError($match, __METHOD__); $args = array; //$match = strtr($match, array("\n"=>"")); $match = trim($match); //echo $match;

// new if ( substr_count($match, '|') !== 0 ) { $tmp = explode('|', $match, 2); $identifier = $tmp[0]; $args = $tmp[1]; } else { $identifier = $match; $args = ''; }		$tmp = ''; $type = $this->checkIdentifier($identifier); //$this->reportError($identifier."->".$type, __METHOD__); switch ($type) { case W2L_TEMPLATE: if ( '' == $args ) { // no arguments $args = array; }				$args = $this->processArgString($args); // check the name

$tmp = $this->getContentByTitle($identifier, NS_TEMPLATE); //$this->reportError(strlen($tmp), __METHOD__); $tmp = $this->preprocessString($tmp); //$this->reportError(strlen($tmp), __METHOD__); $tmp = $this->processTemplateVariables($tmp, $args); //$this->reportError(strlen($tmp), __METHOD__); $tmp = $this->processCurlyBraces($tmp); break; case W2L_PARSERFUNCTION: $fnc = explode(':', $identifier, 2); $expr = $fnc[1]; $function = substr($fnc[0], 1); $mark = $this->getMark('pipe');

$args = preg_replace('/\{\{\{(.*)\|(.*)\}\}\}/U', '', $args); $args = $this->processCurlyBraces($args); $args = preg_replace('/\[\[(.*)\|(.*)\]\]/U', '$1'.$mark.'$2', $args); $args = explode('|', $args);// ((>)|(<)) $new_args = array; foreach ($args as $value) { $value = str_replace($mark, '|', $value); $new_args[] = $value; }				$tmp = $this->processParserFunction($function, $expr, $new_args); break; case W2L_COREPARSERFUNCTION: $fnc = explode(':', $identifier, 2); $expr = $fnc[1]; $function = $fnc[0]; $mark = $this->getMark('pipe');

$args = preg_replace('/\{\{\{(.*)\|(.*)\}\}\}/U', '', $args); $args = $this->processCurlyBraces($args); $args = preg_replace('/\[\[(.*)\|(.*)\]\]/U', '$1'.$mark.'$2', $args); $args = explode('|',$args);// ((>)|(<)) $new_args = array; foreach ($args as $value) { $value = str_replace($mark, '|', $value); $new_args[] = $value; }				$tmp = $this->processParserFunction($function, $expr, $new_args); break; case W2L_TRANSCLUSION: if ( '' == $args ) { // no arguments $args = array; }				$title = substr($identifier, 1); $args = $this->processArgString($args); //echo $title; $tmp = $this->getContentByTitle($title); $tmp = $this->preprocessString($tmp); $tmp = $this->processTemplateVariables($tmp, $args); $tmp = $this->processCurlyBraces($tmp); break; case W2L_VARIABLE: $tmp = $this->mw_vars[$identifier]; break; default: $this->reportError( wfMsg('w2l_parser_no_default_value'), __METHOD__); break; }

return trim($tmp);

}

private function processArgString($str) { $args = array; $tmp = explode('|', $str);

$current_arg = 0; foreach($tmp as $keyvaluepair) { ++$current_arg;

if (substr_count($keyvaluepair, '=')) { $keyvaluepair = explode('=', $keyvaluepair, 2); $key = trim($keyvaluepair[0]); $value = trim($keyvaluepair[1]);

$args[$key] = $value; } else { $args[$current_arg] = $keyvaluepair; }		}		//echo $args; //var_dump($args); return $args; }

private function processTemplateVariables($str, $args = array) { // replace the content by the args... $this->templateVars = array; $this->templateVars = $args; $str = preg_replace_callback('/\{\{\{(.*?)\}\}\}/sm', array($this, 'doTemplateVariables'), $str); $chars = array('{{{'=>'\{\{\{', '}}}' => '\}\}\}'); $str = strtr($str, $chars); unset($this->templateVars); return $str; }	private function doTemplateVariables($match) { // replace the content by the args... //var_dump($match); if ( substr_count($match[1],'|') ) { $with_default = explode('|', $match[1], 2); //echo 'mit def-wert'; //var_dump($with_default); $content = $this->templateVars[$with_default[0]]; //var_dump($content); if ( empty($content) ) { return $with_default[1]; } else { return $content; }		} else { $content = $this->templateVars[$match[1]]; //echo 'kein def-wert'; //var_dump($content); if ( empty($content) ) { return $match[0]; } else { return $content; }		}

}

private function processParserFunction($fnc, $expr, $args) {

$params = array(&$this, trim($expr)); foreach($args as $value) { $params[] = trim($value); }		//echo $fnc; $content = call_user_func_array($this->pFunctions[$fnc], $params); if ( is_array($content) ) { return ''; }		//var_dump($params); //$this->reportError($content,__METHOD__); return $content;

}

private function split_str($str) { //		$table_open_mark = $this->getMark('table-open'); $table_close_mark = $this->getMark('table-close');

$str = str_replace("\n{|", $table_open_mark, $str); $str = str_replace("|}\n", $table_close_mark, $str);

$before_last_char = ''; $last_char = ''; $cur_char = ''; $cb_counter = 0; $char_counter = 0; $split_array = array; $block = 0; $in_block = false;

$tmp_char = str_split($str); //var_dump($tmp_char); foreach($tmp_char as $cur_char) { //			//$cur_char = $str{$char_counter};

switch ($cur_char) { case '{': ++$cb_counter; if ($cb_counter == 1) { ++$block; $split_array[$block] = ''; $split_array[$block] .= $cur_char;

} else { $split_array[$block] .= $cur_char; }				break; case '}': --$cb_counter; if ($cb_counter == 0) { $split_array[$block] .= $cur_char; //var_dump($split_array[$block]); ++$block; $split_array[$block] = '';

} else { $split_array[$block] .= $cur_char; }				break; default: $split_array[$block] .= $cur_char; break; }

$before_last_char = $last_char; $last_char = $cur_char; ++$char_counter; //if ( !isset($str{$char_counter}) ) {

//		var_dump($cur_char); //		break; //}		}

foreach($split_array as $key => $value) { $value = str_replace( $table_open_mark,"\n{|", $value); $value = str_replace( $table_close_mark, "|}\n", $value); $new_split[$key] = $value; }		//echo strlen($str), ' ',$char_counter, "\n"; return $new_split; }

public function getContentByTitle( $title_str, $namespace = NS_MAIN) { $title_str = trim($title_str);

if ( $this->getVal("use_cache") AND array_key_exists($title_str, $this->content_cache) ) { $this->reportError(wfMsg('w2l_parser_cachehit', $title_str), __METHOD__); return $this->content_cache[$title_str]; }		$title = Title::newFromText( $title_str, $namespace);

if ( !is_a($title, 'Title') ) { $text = $title_str; $this->reportError("title_str=".$title_str, __METHOD__); return $text; }		if ( $title->exists ) { $rev = new Article( &$title, 0 ); $text = $rev->getContent; } else { $text = $title_str; $this->reportError( wfMsg('w2l_parser_article_not_existing', $title_str ), __METHOD__); }		if ( $this->getVal("use_cache") ) { $this->content_cache[$title_str] = $text; }

return $text; }	public function checkIdentifier($str) { $str = trim($str); //$this->reportError($str, __METHOD__); if ( array_key_exists($str, $this->mw_vars) ) return W2L_VARIABLE;

if ( '#' == $str{0} ) return W2L_PARSERFUNCTION;

if ( ':' == $str{0} ) return W2L_TRANSCLUSION; $test = explode(':', $str, 2); //$this->reportError($test[0], __METHOD__); //$this->reportError(array_key_exists($test[0], $this->pFunctions), __METHOD__); if ( array_key_exists($test[0], $this->pFunctions) == true) return W2L_COREPARSERFUNCTION;

return W2L_TEMPLATE; }

public function getContentByPageId($id) {} public function getContentByRevId($id) {} public function getContentByUrl($url) {}

public function reportError( $msg, $fnc ) { $this->error_msg[] = $fnc.': '.$msg."\n"; $this->is_error = true; }

public function getErrorMessages { if ( $this->is_error == true) { //'<textarea style="height:200px;">'.$parsed.' '; $errors = wfMsg('w2l_parser_protocol')."\n"; $errors .= '<textarea style="height:200px;">'; foreach ($this->error_msg as $error_line) { $errors .= $error_line; }			$errors .= ' '."\n"; return $errors; } else { return ''; }	}

public function setMwVariables($vars) { $this->mw_vars = $vars; return true; }	public function addPackageDependency($package, $options = '') { $this->required_packages[$package] = $options; return true; }	public function addLatexHeadCode($code) { $this->latex_headcode[] = $code; }	public function getLatexHeadCode { $code = array_unique($this->latex_headcode); return implode("\n", $code); }	public function getUsePackageBlock { $packages = ''; foreach($this->required_packages as $package => $options) { $packages .= '\usepackage'; if ( $options != '' ) { $packages .= '['.$options.']'; }			$packages .= '{'.$package.'}'."\n"; }		return $packages; }

function parseAttrString($str) { $result = array; $con = true; $i = 1; while ($con == true) { $search_char = ' ='; $str = trim($str); if ( empty($str) ) { $con = false; continue; }			if ($i>10000) { $con = false; continue; }			$str = $str.' '; // search for attributename... $howmany = strcspn($str, $search_char); $attr = substr($str, 0, $howmany); $str = substr($str, $howmany); // get value $attr_value = ''; $fChar = $str{0}; if ( $fChar == '=' ) $str = substr($str, 1); $fChar=$str{0}; if ( $fChar == '"' ) {				// next to search for is " $search_char = '"';				$str = substr($str, 1);				$howmany = strcspn($str, $search_char);				$attr_value = substr($str, 0, $howmany);				$str = substr($str, ++$howmany);			} elseif ( $fChar == "'" ) {				$search_char = "'";				$str = substr($str, 1);				$howmany = strcspn($str, $search_char);				$attr_value = substr($str, 0, $howmany);				$str = substr($str, ++$howmany);			} elseif ($fChar== ' ') {				$attr_value = '';			} else {				$search_char = ' ';				//$str = substr($str, 1);				$howmany = strcspn($str, $search_char);				$attr_value = substr($str, 0, $howmany);				$str = substr($str, ++$howmany);			}			// save it to the array					$result[$attr] = $attr_value;			$i++;			}		return $result;	}	// Wiki-Parser functions	function &getTitle { return $this->mTitle; } }