Extension:Wiki2LaTeX/Development/w2lParser.php
From MediaWiki.org
<?php
/*
* File: w2lParser.php
* Created: 2007-03-02
* Version: 0.7
*
* Purpose:
* Contains the parser, which transforms Mediawiki-articles to LaTeX
*
* License:
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
if ( !defined('MEDIAWIKI') ) {
$msg = 'To install Wiki2LaTeX, put the following line in LocalSettings.php:<br/>';
$msg .= '<tt>require_once( $IP."/extensions/path_to_Wiki2LaTeX_files/wiki2latex.php" );</tt>';
echo $msg;
exit( 1 );
}
define('W2L_FILE', 1);
define('W2L_STRING', 0);
define('W2L_TEMPLATE', 0);
define('W2L_PARSERFUNCTION', 1);
define('W2L_TRANSCLUSION', 2);
define('W2L_VARIABLE', 3);
define('W2L_COREPARSERFUNCTION', 4);
class Wiki2LaTeXParser {
function __construct() {
$this->initiated = false;
$this->doProfiling = true;
$this->ProfileLog = array();
$this->parsing = '';
$this->config = array();
$this->tags = array();
$this->fragments = array();
$this->elements = array();
$this->rawtex_counter = 0;
$this->marks_counter = 0;
$this->nowikiMarks = array();
$this->nowikiCounter = 0;
$this->rawtex_replace = array();
// some default settings
$this->config["headings_toplevel"] = 'section';
$this->config["use_hyperref"] = true;
$this->config["leave_noinlcude"] = false;
$this->tag_source = array();
$this->tag_replace = array();
$this->tags_replace = array();
$this->preReplace = array();
$this->replace_search = array(); // NEVER set one of these values via another way than by addSimpleReplace
$this->replace_replace = array();
$this->regexp_search = array(); // NEVER set one of these values via another way than by addRegExp
$this->regexp_replace = array();
$this->hooks = array();
$this->error_msg = array();
$this->is_error = false;
// takes parser functions
$this->curlyBraceDebugCounter = 0;
$this->curlyBraceLength = 0;
// Event-system
$this->event_functions = array();
$this->event_blacklist = array();
$this->events = array();
$this->mw_vars = array();
$this->content_cache = array();
// Parserfunctions...
$this->pFunctions = array(); // takes custom ones (#switch)
$this->cpFunctions = array(); // takes those without #
$this->mask_chars = array();
$this->files_used = false;
$this->files = array();
$this->required_packages = array();
$this->latex_headcode = array();
}
/* Public Functions */
public function setConfig($cArray) {
foreach ($cArray as $key=>$value) {
$this->setVal($key, $value);
}
return true;
}
public function setVal($key, $value) {
$this->config[$key] = $value;
return true;
}
public function getVal($key) {
return $this->config[$key];
}
public function addSimpleReplace($search, $replace, $case_sensitive = 1) {
if ($case_sensitive == 0 ) {
$this->ireplace_search[] = $search;
$this->ireplace_replace[] = $replace;
} else {
$this->replace_search[] = $search;
$this->replace_replace[] = $replace;
}
}
public function addTagCallback($tag, $callback) {
$this->tags[$tag] = $callback;
$this->elements[] = $tag;
}
public function addParserFunction($tag, $callback) {
$this->pFunctions[$tag] = $callback;
}
public function addCoreParserFunction($tag, $callback) {
$this->addParserFunction($tag, $callback);
}
public function addRegExp($search, $replace) {
$this->regexp_search[] = $search;
$this->regexp_replace[] = $replace;
}
public function registerEventHandler($event, $func) {
$this->events["$event"][] = $func;
$this->event_functions[$this->getEventHandlerRepr($func)] = true;
}
function deactivateEventHandler($repr) {
$this->event_functions[$repr] = false;
}
function getEventHandlers() {
return array_keys($this->event_functions);
}
private function getEventHandlerRepr($fnc) {
// return the name of the function
// that is called. if it is a class,
// return the method-name
if ( is_array($fnc) ) {
if ( is_object($fnc[0]) ) {
$class = get_class($fnc[0]);
} else {
$class = $fnc[0];
}
return $class.'::'.$fnc[1];
} else {
return $fnc;
}
}
private function execEvent($event, $str) {
if ( is_array($this->events["$event"]) ) {
$to_call = $this->events["$event"];
foreach($to_call as $key => $fnc) {
$repr = $this->getEventHandlerRepr($fnc);
if ( $this->event_functions[$repr] == true ) {
$str = call_user_func($fnc, $str, &$this);
}
}
}
return $str;
}
public function recursiveTagParse( $str = '' ) {
$fName = __METHOD__;
$this->profileIn($fName);
$str = $this->internalParse($str);
$this->profileOut($fName);
return $str;
}
public function parse($text, &$title, $mode = W2L_STR) {
$this->profileIn(__METHOD__);
/* parse a given wiki-string to latex */
/* if $transclusions is an array, then all transcluded files are in there */
$time_start = microtime(true);
if ($this->initiated == false ) {
$this->initParsing();
}
$this->mTitle =& $title;
$text = trim($text);
$text = "\n".$text."\n";
$this->execEvent('W2L_AFTER_INIT', $text);
$text = $this->execEvent('W2L_BEFORE_CUT', $text);
$text = $this->preprocessString($text);
$text = $this->execEvent('W2L_AFTER_CUT', $text);
// First, strip out all comments...
$text = $this->execEvent('W2L_BEFORE_STRIP', $text);
//$text = $this->stripComments($text);
$text = $this->execEvent('W2L_AFTER_STRIP', $text);
switch ( $this->getVal('process_curly_braces') ) {
case '0': // remove everything between curly braces
$text = preg_replace('/\{\{(.*?)\}\}/sm', '', $text);
break;
case '1': // do nothing
break;
case '2': // process them
$text = $this->processCurlyBraces($text);
break;
default:
break;
}
//$this->reportError($text, __METHOD__);
$text = $this->extractParserExtensions($text);
$text = $this->extractPre($text);
$text = $this->execEvent("W2L_AFTER_EXTRACT_TAGS", $text);
$text = $this->internalParse($text);
$text = trim($text);
// Some tidying
$text = str_replace("\n\n\n", "\n\n", $text);
$text = trim(str_replace("\n\n\n", "\n\n", $text));
// replace Extensions
//Diese Art lässt nicht zu, nach chapter und section zu unterscheiden, eine Sache, die sowieso zur Zeit vernachlässigt wird
$text = $this->replacePre($text);
$text = $this->replaceParserExtensions($text);
$text = $this->replaceNoWikiMarkers($text);
$text = $this->deMask($text);
//$text = $this->replacePre($text);
$text = trim($text);
$text = str_replace("\n\n\n", "\n\n", $text);
$text = $this->execEvent("W2L_FINISH", $text);
$time_end = microtime(true);
$this->parse_time = $time_end - $time_start;
$this->profileOut(__METHOD__);
return $text;
}
function addChar($html, $latex, $utf_dec = false) {
if ($utf_dec === false ) {
$ent_dec = '';
$ent_hex = '';
} else {
$ent_dec = '&#'.$utf_dec.';';
$ent_hex = '&#x'.dechex($utf_dec).';';
}
$this->htmlEntities[] = array(
'html' => $html,
'utf_hex' => $ent_dec,
'utf_dec' => $ent_hex,
'latex' => $latex,
'xetex' => '' // Future
);
return true;
}
function processHtmlEntities( $str ) {
foreach($this->htmlEntities as $entity ) {
$entity['html'] = str_replace('&', $this->Et, $entity['html']);
$entity['utf_hex'] = str_replace('&', $this->Et, $entity['utf_hex']);
$entity['utf_dec'] = str_replace('&', $this->Et, $entity['utf_dec']);
$str = strtr($str, array($entity['html'] => $entity['latex']));
if ( $entity['utf_hex'] != '' ) {
$str = strtr($str, array($entity['utf_hex'] => $entity['latex']));
$str = strtr($str, array($entity['utf_dec'] => $entity['latex']));
}
unset($entity);
}
return $str;
}
function internalParse($str) {
$this->profileIn(__METHOD__);
// Used for parsing the string as is, without comments, extension-tags, etc.
//echo "eins",$str;
//$str = $this->doSimpleReplace($str);
$str = $this->execEvent('W2L_BEFORE_MASK', $str);
$str = $this->maskLatexCommandChars($str);
// Now we can begin parsing. We parse as close as possible the way mediawiki parses a string.
// So, start with tables
$str = $this->execEvent('W2L_BEFORE_TABLES', $str);
$str = $this->doTableStuff($str);
//echo "zwei",$str;
// Next come these Blocklevel elments
// Now go on with headings
$str = $this->doHeadings($str);
$str = $this->doQuotes($str);
$str = $this->doInternalLinks($str);
$str = $this->doExternalLinks($str);
$str = $this->doHTML($str);
$str = $this->doQuotationMarks($str);
$str = $this->maskLatexSpecialChars($str);
$str = $this->processHtmlEntities($str);
$str = $this->maskLaTeX($str);
$str = $this->doBlocklevels($str);
$str = $this->maskMwSpecialChars($str);
$str = $this->doSimpleReplace($str);
$str = $this->execEvent('W2L_INTERNAL_FINISH', $str);
$this->profileOut(__METHOD__);
return $str;
}
public function getPerformanceProfile($export_as = 'xml') {
if ( !$this->doProfiling ) {
return false;
}
switch ($export_as) {
case 'array':
return $this->ProfileLog;
break;
case 'xml':
$xml_return = "";
foreach($this->ProfileLog AS $func_call) {
$xml_return .= '<'.$func_call['type'].' fname="'.$func_call['function'].'" time="'.$func_call['time'].'" />'."\n";
}
return $xml_return;
break;
default:
return false;
break;
}
}
public function getParseTime() {
return $this->parse_time;
}
private function doQuotationMarks($str) {
$fName = __METHOD__;
$this->profileIn($fName);
// This function also handles apostrophes
//$this->setVal('typographic_quotes_detect', false);
$quotes = array(
'"' => '\dq{}', // "
"'" => '\rq{}', // '
);
if ($this->getVal('typographic_quotes_detect') == false ) {
$str = strtr($str, $quotes);
} else {
// This one will need language packs.
// These are the old replace-rules:
// Quotes:
// At first, normal quotes are converted to html-ones...
// But better not use them, using these quotes breaks apostrophes...
//
$sq_detect = array();
$dq_detect = array();
$sq_detect["\n'"] = "\n".$this->Et."#8218;"; // ' (am Zeilenanfang)
$sq_detect[" '"] = ' '.$this->Et.'#8218;'; // '
$sq_detect["{'"] = '{'.$this->Et.'#8218;'; // {'
$sq_detect["('"] = '('.$this->Et.'#8218;'; // ('
$sq_detect["' "] = $this->Et.'#8217; '; // '
$sq_detect["'."] = $this->Et."#8217;."; // '.
$sq_detect["',"] = $this->Et."8217;,"; // ',
$sq_detect["';"] = $this->Et."#8217;;"; // ';
$sq_detect["'?"] = $this->Et."#8217;?"; // '?
$sq_detect["'!"] = $this->Et."#8217;!"; // '!
$sq_detect["'}"] = $this->Et."#8217;}"; // '}
$sq_detect["}'"] = "}".$this->Et."#8217;"; // }'
$sq_detect["'\\"] = $this->Et."#8217;\\"; // '\
$sq_detect["'<"] = $this->Et."#8217;<"; // '<
$sq_detect["'-"] = $this->Et."#8217;-"; // '-
$sq_detect["'("] = $this->Et."#8217;("; // '(
$sq_detect["')"] = $this->Et."#8217;)"; // ')
$sq_detect[")'"] = ")".$this->Et."#8217;"; // )'
$sq_detect["':"] = $this->Et."#8217;:"; // ':
$sq_detect["'\n"] = $this->Et."#8217;\n"; // ' (am Zeilenende)
// Double-Quotes
$dq_detect["\n\""] = "\n".$this->Et."#8222;"; // " (am Zeilenanfang)
$dq_detect[' "'] = ' '.$this->Et.'#8222;'; // "
$dq_detect['("'] = '('.$this->Et.'#8222;'; // ("
$dq_detect['" '] = $this->Et.'#8221; '; // "
$dq_detect['".'] = $this->Et.'#8221;.'; // ".
$dq_detect['",'] = $this->Et.'#8221;,'; // ",
$dq_detect['";'] = $this->Et.'#8221;;'; // ";
$dq_detect['"?'] = $this->Et.'#8221;?'; // "?
$dq_detect['"!'] = $this->Et.'#8221;!'; // "!
$dq_detect['"-'] = $this->Et.'#8221;-'; // "!
$dq_detect['"}'] = $this->Et.'#8221;}'; // "}
$dq_detect['")'] = $this->Et.'#8221;)'; // ")
$dq_detect['"\\'] = $this->Et.'#8221;\\'; // "\
$dq_detect['"<'] = $this->Et.'#8221;<'; // "<
$dq_detect['"('] = $this->Et.'#8221;('; // "(
$dq_detect['":'] = $this->Et.'#8221;:'; // ":
$dq_detect["\"\n"] = $this->Et."#8221;\n"; // " (am Zeilenende)
// Replace quotes
$str = strtr($str, $sq_detect);
$str = strtr($str, $dq_detect);
$str = strtr($str, $quotes);
}
$this->profileOut($fName);
return $str;
}
/* Internal parsing functions */
public function initParsing() {
$fName = __METHOD__;
$this->profileIn($fName);
global $w2lTags;
global $w2lEvents;
global $w2lParserFunctions;
global $w2lConfig;
if ($this->initiated == true ) {
return;
}
$this->unique = $this->uniqueString();
foreach($w2lTags as $key => $value) {
$this->addTagCallback($key, $value);
}
foreach($w2lEvents as $key => $value) {
foreach($value as $value2) {
$this->registerEventHandler($key, $value2);
}
}
foreach($w2lParserFunctions as $key => $value) {
$this->addParserFunction($key, $value);
}
foreach($w2lConfig as $key => $value) {
$this->setVal($key, $value);
}
//$this->addCoreParserFunction();
$this->addCoreParserFunction( 'int', array( 'CoreParserFunctions', 'intFunction' ) );
$this->addCoreParserFunction( 'ns', array( 'CoreParserFunctions', 'ns' ) );
$this->addCoreParserFunction( 'urlencode', array( 'CoreParserFunctions', 'urlencode' ) );
$this->addCoreParserFunction( 'lcfirst', array( 'CoreParserFunctions', 'lcfirst' ) );
$this->addCoreParserFunction( 'ucfirst', array( 'CoreParserFunctions', 'ucfirst' ) );
$this->addCoreParserFunction( 'lc', array( 'CoreParserFunctions', 'lc' ) );
$this->addCoreParserFunction( 'uc', array( 'CoreParserFunctions', 'uc' ) );
$this->addCoreParserFunction( 'localurl', array( 'CoreParserFunctions', 'localurl' ) );
$this->addCoreParserFunction( 'localurle', array( 'CoreParserFunctions', 'localurle' ) );
$this->addCoreParserFunction( 'fullurl', array( 'CoreParserFunctions', 'fullurl' ) );
$this->addCoreParserFunction( 'fullurle', array( 'CoreParserFunctions', 'fullurle' ) );
//$this->addCoreParserFunction( 'formatnum', array( 'CoreParserFunctions', 'formatnum' ) );
//$this->addCoreParserFunction( 'grammar', array( 'CoreParserFunctions', 'grammar' ) );
//$this->addCoreParserFunction( 'plural', array( 'CoreParserFunctions', 'plural' ) );
$this->addCoreParserFunction( 'numberofpages', array( 'CoreParserFunctions', 'numberofpages' ) );
$this->addCoreParserFunction( 'numberofusers', array( 'CoreParserFunctions', 'numberofusers' ) );
$this->addCoreParserFunction( 'numberofarticles', array( 'CoreParserFunctions', 'numberofarticles' ) );
$this->addCoreParserFunction( 'numberoffiles', array( 'CoreParserFunctions', 'numberoffiles' ) );
$this->addCoreParserFunction( 'numberofadmins', array( 'CoreParserFunctions', 'numberofadmins' ) );
$this->addCoreParserFunction( 'language', array( 'CoreParserFunctions', 'language' ) );
$this->addCoreParserFunction( 'padleft', array( 'CoreParserFunctions', 'padleft' ) );
$this->addCoreParserFunction( 'padright', array( 'CoreParserFunctions', 'padright' ) );
$this->addCoreParserFunction( 'anchorencode', array( 'CoreParserFunctions', 'anchorencode' ) );
$this->addCoreParserFunction( 'special', array( 'CoreParserFunctions', 'special' ) );
//$this->addCoreParserFunction( 'defaultsort', array( 'CoreParserFunctions', 'defaultsort' ) );
$this->addCoreParserFunction( 'pagesinnamespace', array( 'CoreParserFunctions', 'pagesinnamespace' ) );
// And here we add some replace-rules
// To be honest: These rules are in the wrong place. Most of them shoulld come from outside or 'language-packs'.
$this->addSimpleReplace(" - "," -- ");
$this->addSimpleReplace(" -\n"," --\n");
$this->addSimpleReplace("\n- ", "\n-- ");
$this->addSimpleReplace("...","{\dots}");
include('w2lChars.php');
// Here come some regexps...
$this->initiated = true;
$this->profileOut($fName);
return;
}
function extractPre($str) {
$fName = __METHOD__;
$this->profileIn($fName);
$work_str = explode("\n", $str);
$debug = '';
$pre_line = false;
$block_counter = 0;
$rplBlock = array();
$preBlock = array();
foreach($work_str as $line) {
// every line is here, now check for a blank at first position
$first_char = $line{0};
$last_line = $pre_line;
if ( ' ' == $first_char ) {
if ($last_line == true) {
} else {
++$block_counter;
$preBlock[$block_counter] = "\begin{verbatim}\n";
}
$rpl_line = substr($line, 1);
$preBlock[$block_counter] .= $rpl_line."\n";
$rplBlock[$block_counter] .= $line."\n";
$pre_line = true;
$debug .= '1';
} else {
// check, wether last line was true, so we can create a block
if ($last_line == true) {
$preBlock[$block_counter] .= "\end{verbatim}\n";
//
// originale Zeilen, latex-zeilen, marker,
//
$marker = $this->getMark('pre',$block_counter);
$str = str_replace($rplBlock[$block_counter], $marker, $str);
$this->preReplace[$marker] = $preBlock[$block_counter];
}
$pre_line = false;
$debug .= '0';
}
//$debug .= $pre_line;
}
//$this->preLineReplace = ();
//echo $debug, ' Blocks: ', $block_counter, print_r($preBlock), print_r($rplBlock);
$this->profileOut($fName);
return $str;
}
function replacePre($str) {
$fName = __METHOD__;
$this->profileIn($fName);
$str = str_replace(array_keys($this->preReplace), array_values($this->preReplace), $str);
$this->profileOut($fName);
return $str;
}
function matchNoWiki($str) {
//
$str = preg_replace_callback('/<nowiki>(.*)<\/nowiki>/smU', array($this,'noWikiMarker'), $str);
return $str;
}
function noWikiMarker($match) {
//
++$this->nowikiCounter;
$marker = $this->getMark('nowiki', $this->nowikiCounter);
$str = $this->maskLatexCommandChars($match[1]);
$str = $this->maskLatexSpecialChars($str);
$str = $this->maskMwSpecialChars($str);
$this->nowikiMarks[$marker] = $str;
return $marker;
}
function replaceNoWikiMarkers($str) {
//
$str = strtr($str, $this->nowikiMarks);
return $str;
}
public function preprocessString($str) {
//$this->reportError(strlen($str), __METHOD__);
$str = $this->matchNoWiki($str);
$str = $this->stripComments($str);
//$this->reportError(strlen($str), __METHOD__);
if ( $this->getVal('leave_noinclude') ) {
$str = preg_replace('/<noinclude>(.*)<\/noinclude>/smU', "$1", $str);
$this->setVal('leave_noinclude', false);
} else {
$str = preg_replace('/<noinclude>.*<\/noinclude>/smU', '', $str);
}
if ( $this->getVal('insert_includeonly') ) {
$str = preg_replace('/<includeonly>(.*)<\/includeonly>/smU', "$1", $str);
} else {
$str = preg_replace('/<includeonly>(.*)<\/includeonly>/smU', '', $str);
$this->setVal('insert_includeonly', true);
}
//$this->reportError(strlen($str), __METHOD__);
$str = $this->execEvent('W2L_PREPROCESS', $str);
//$this->reportError(strlen($str), __METHOD__);
return $str;
}
private function doBlockLevels( $str = '' ) {
$fName = __METHOD__;
$this->profileIn($fName);
$text = $str;
$linestart = true;
# Parsing through the text line by line. The main thing
# happening here is handling of block-level elements p, pre,
# and making lists from lines starting with * # : etc.
#
$textLines = explode( "\n", $text );
$lastPrefix = $output = '';
$this->mDTopen = $inBlockElem = false;
$prefixLength = 0;
$paragraphStack = false;
if ( !$linestart ) {
$output .= array_shift( $textLines );
}
foreach ( $textLines as $oLine ) {
$lastPrefixLength = strlen( $lastPrefix );
$preCloseMatch = preg_match('/<\\/pre/i', $oLine );
$preOpenMatch = preg_match('/<pre/i', $oLine );
if ( !$this->mInPre ) {
# Multiple prefixes may abut each other for nested lists.
$prefixLength = strspn( $oLine, '*#:;' );
$pref = substr( $oLine, 0, $prefixLength );
# eh?
$pref2 = str_replace( ';', ':', $pref );
$t = substr( $oLine, $prefixLength );
$this->mInPre = !empty($preOpenMatch);
} else {
# Don't interpret any other prefixes in preformatted text
$prefixLength = 0;
$pref = $pref2 = '';
$t = $oLine;
}
# List generation
if( $prefixLength && 0 == strcmp( $lastPrefix, $pref2 ) ) {
# Same as the last item, so no need to deal with nesting or opening stuff
$output .= $this->nextItem( substr( $pref, -1 ) );
$paragraphStack = false;
if ( substr( $pref, -1 ) == ';') {
# The one nasty exception: definition lists work like this:
# ; title : definition text
# So we check for : in the remainder text to split up the
# title and definition, without b0rking links.
$term = $t2 = '';
if ($this->findColonNoLinks($t, $term, $t2) !== false) {
$t = $t2;
$output .= $term . $this->nextItem( ':' );
}
}
} elseif( $prefixLength || $lastPrefixLength ) {
# Either open or close a level...
$commonPrefixLength = $this->getCommon( $pref, $lastPrefix );
$paragraphStack = false;
while( $commonPrefixLength < $lastPrefixLength ) {
$output .= $this->closeList( $lastPrefix{$lastPrefixLength-1} );
--$lastPrefixLength;
}
if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
$output .= $this->nextItem( $pref{$commonPrefixLength-1} );
}
while ( $prefixLength > $commonPrefixLength ) {
$char = substr( $pref, $commonPrefixLength, 1 );
$output .= $this->openList( $char );
if ( ';' == $char ) {
# FIXME: This is dupe of code above
if ($this->findColonNoLinks($t, $term, $t2) !== false) {
$t = $t2;
$output .= $term . $this->nextItem( ':' );
}
}
++$commonPrefixLength;
}
$lastPrefix = $pref2;
}
if( 0 == $prefixLength ) {
wfProfileIn( "$fname-paragraph" );
# No prefix (not in list)--go to paragraph mode
// XXX: use a stack for nestable elements like span, table and div
}
// somewhere above we forget to get out of pre block (bug 785)
if($preCloseMatch && $this->mInPre) {
$this->mInPre = false;
}
if ($paragraphStack === false) {
$output .= $t."\n";
}
}
while ( $prefixLength ) {
$output .= $this->closeList( $pref2{$prefixLength-1} );
--$prefixLength;
}
if ( '' != $this->mLastSection ) {
$output .= '</' . $this->mLastSection . '>';
$this->mLastSection = '';
}
$this->profileOut($fName);
return $output;
}
/* private */ function nextItem( $char ) {
if ( '*' == $char || '#' == $char ) { return '\item '; }
else if ( ':' == $char || ';' == $char ) {
$close = '</dd>';
if ( $this->mDTopen ) { $close = '</dt>'; }
if ( ';' == $char ) {
$this->mDTopen = true;
return $close . '<dt>';
} else {
$this->mDTopen = false;
return $close . '<dd>';
}
}
return '<!-- ERR 2 -->';
}
/* private */ function closeParagraph() {
$result = '';
if ( '' != $this->mLastSection ) {
$result = '</' . $this->mLastSection . ">\n";
}
$this->mInPre = false;
$this->mLastSection = '';
return $result;
}
/* private */ function openList( $char ) {
if ( $this->getVal('use_paralist') ) {
$list_ul_env = 'compactitem';
$list_ol_env = 'compactenum';
} else {
$list_ul_env = 'itemize';
$list_ol_env = 'enumerate';
}
$result = $this->closeParagraph();
if ( '*' == $char ) { $result .= '\begin{'.$list_ul_env.'}'."\n".'\item '; }
else if ( '#' == $char ) { $result .= '\begin{'.$list_ol_env.'}'."\n".'\item '; }
else if ( ':' == $char ) { $result .= '<dl><dd>'; }
else if ( ';' == $char ) {
$result .= '<dl><dt>';
$this->mDTopen = true;
}
else { $result = '<!-- ERR 1 -->'; }
return $result;
}
/* private */ function closeList( $char ) {
if ( $this->getVal('use_paralist') ) {
$list_ul_env = 'compactitem';
$list_ol_env = 'compactenum';
} else {
$list_ul_env = 'itemize';
$list_ol_env = 'enumerate';
}
if ( '*' == $char ) { $text = '\end{'.$list_ul_env.'}'; }
else if ( '#' == $char ) { $text = '\end{'.$list_ol_env.'}'; }
else if ( ':' == $char ) {
if ( $this->mDTopen ) {
$this->mDTopen = false;
$text = '</dt></dl>';
} else {
$text = '</dd></dl>';
}
}
else { return '<!-- ERR 3 -->'; }
return $text."\n";
}
/* private */ function getCommon( $st1, $st2 ) {
$fl = strlen( $st1 );
$shorter = strlen( $st2 );
if ( $fl < $shorter ) { $shorter = $fl; }
for ( $i = 0; $i < $shorter; ++$i ) {
if ( $st1{$i} != $st2{$i} ) { break; }
}
return $i;
}
/**
* Split up a string on ':', ignoring any occurences inside tags
* to prevent illegal overlapping.
* @param string $str the string to split
* @param string &$before set to everything before the ':'
* @param string &$after set to everything after the ':'
* return string the position of the ':', or false if none found
*/
function findColonNoLinks($str, &$before, &$after) {
$fname = 'Parser::findColonNoLinks';
//wfProfileIn( $fname );
$pos = strpos( $str, ':' );
if( $pos === false ) {
// Nothing to find!
wfProfileOut( $fname );
return false;
}
$lt = strpos( $str, '<' );
if( $lt === false || $lt > $pos ) {
// Easy; no tag nesting to worry about
$before = substr( $str, 0, $pos );
$after = substr( $str, $pos+1 );
//wfProfileOut( $fname );
return $pos;
}
}
private function doHeadings( $str = '' ) {
$this->profileIn(__METHOD__);
// Here we're going to parse headings
// Without support for \part. Needs to be implemented seperately...
// Method from mediawiki
for ( $i = 6; $i >= 1; --$i ) {
$h = str_repeat( '=', $i );
$str = preg_replace( "/^{$h}(.+){$h}\\s*$/m", "<h{$i}>\\1</h{$i}>\\2", $str );
}
//$pr_match = ;
$str = preg_replace_callback('^\<h([1-6])\>(.+)\</h([1-6])\>^', array($this, 'processHeadings'), $str);
//$str = str_ireplace($headings_html, $headings_latex, $str);
$this->profileOut(__METHOD__);
return $str;
}
private function processHeadings($matches) {
$heading = trim($matches[2]);
$level = trim($matches[1]);
//echo $level;
if ( in_array( $this->getVal("documentclass"), array('report' ,'book'))) {
--$level;
}
// Beware: using chapter removes support for \subparagraph
$headings_latex = $headings_latex = array('part', 'chapter', 'section', 'subsection', 'subsubsection', 'paragraph', 'subparagraph');
$headings_latex_koma = array('addpar', 'addchap', 'addsec', 'subsection', 'subsubsection', 'paragraph', 'subparagraph');
$asteriks = $this->getMark('Asteriks');
$this->mask($asteriks,'*');
if ( substr($heading, 0, 3) == '***' ) {
// ***
$heading = substr($heading, 3);
return '\\'.$headings_latex_koma[$level].$asteriks.'{'.$heading.'}';
} elseif ( substr($heading, 0, 2) == '**' ) {
// **
$heading = substr($heading, 2);
return '\\'.$headings_latex_koma[$level].'{'.$heading.'}';
} elseif ( substr($heading, 0, 1) == '*' ) {
// *
$heading = substr($heading, 1);
return '\\'.$headings_latex[$level].$asteriks.'{'.$heading.'}';
} else {
// standard
return '\\'.$headings_latex[$level].'{'.$heading.'}';
}
//return '\\'.$sec_command;
}
function mask($key, $value) {
$this->mask_chars[$key] = $value;
}
function deMask($str) {
$str = str_replace(array_keys($this->mask_chars), array_values($this->mask_chars), $str);
return $str;
}
private function doInternalLinks( $str = '' ) {
$fName = __METHOD__;
$this->profileIn($fName);
// match everything within [[...]]
$str = preg_replace_callback('/\[\[(.*?)\]\]/', array($this, 'internalLinkHelper'), $str);
$this->profileOut($fName);
return $str;
}
private function translateNamespace($part1 = '') {
global $wgContLang;
$namespaces_raw = $wgContLang->getNamespaces();
//echo $part1;
$namespaces = array_map('strtolower', $namespaces_raw);
$nss = array_flip($namespaces);
//print_r($nss);
$found = false;
if ( intval( $part1 ) || $part1 == "0" ) {
$text = intval( $part1 ) ;
$found = true;
} else {
$param = str_replace( ' ', '_', strtolower( $part1 ) );
//$param = strtolower($param);
//echo $param;
if (array_key_exists($param, $nss) ) {
$text = $nss[$param];
//echo $text;
$found = true;
} else {
$index = Namespace::getCanonicalIndex( strtolower( $param ) );
if ( !is_null( $index ) ) {
$text = $index ;
$found = true;
}
}
}
if ( $found ) {
return $text;
} else {
//echo $part1;
return array( 'found' => false );
}
}
private function internalLinkHelper($matches) {
// Here we can handle every possibility of links:
// category-links, image-links, Page-links... Whatever
$link = trim($matches[1]);
$links = explode("|", $link, 2);
//print_r($links);
//echo "\n";
$ns_id = 0;
if ( substr_count($links[0],':') == 0 ) {
$ns_id = NS_MAIN;
} else {
$namespace = explode(':', $links[0], 2);
$ns_id = $this->translateNamespace($namespace[0]);
}
if ( is_array($ns_id ) ) {
// error!
$this->reportError( wfMsg('w2l_parser_no_namespace', $links[0]), __METHOD__);
}
//echo $ns_id;
switch ($ns_id) {
case NS_CATEGORY:
return '';
break;
case NS_MEDIA:
// this is just a link to the mediawiki-page
return $link;
break;
case NS_IMAGE:
$parts = explode("|", $link);
$imagename = array_shift($parts);
// still need to remove the Namespace:
$tmp_name = explode(':', $imagename, 2);
$imagename = $tmp_name[1];
$imgwidth = "10cm";
foreach ($parts as $part) {
if (preg_match("/\d+px/", $part)) continue;
if (preg_match("/(\d+cm)/", $part, $widthmatch)) {
$imgwidth = $widthmatch[1];
continue;
}
if (preg_match("/thumb|thumbnail|frame/", $part)) continue;
if (preg_match("/left|right|center|none/", $part)) continue;
$caption = trim($part);
}
$title = Title::makeTitleSafe( NS_IMAGE, $imagename );
$file = Image::newFromTitle( $title );
$file->loadFromFile();
if ( $file && $file->exists() ) {
$imagepath = $file->getPath();
$imagepath = str_replace('\\', '/', $imagepath);
$underscore = $this->getMark('underscore');
$imagepath = str_replace('_', $underscore, $imagepath);
$this->mask($underscore, '_');
} else {
// does not exist!!!
return $link;
}
$title = $file->getTitle()->getText();
$this->addPackageDependency('graphicx');
return "\\begin{center} \\resizebox{".$imgwidth."}{!}{\includegraphics{{$imagepath}}}\\\\ \\textit{{$caption}}\end{center}\n";
break;
}
if ( $link{0} == ':' ) {
// Thats a link to a category
$link = substr($link, 1);
$test = explode(':', $link, 2);
if ( $this->translateNamespace($test[0]) != NS_CATEGORY ) {
// Whatever that was. Seems like an error!
$link = ':'.$link;
}
}
// First, check for |
$pipe_count = substr_count($link, '|');
if ( $pipe_count >= 1 ) {
//$links = explode("|", $link);
if (empty($links[1])) {
return $links[0];
} else {
return $links[1];
}
} else {
return $link;
}
}
private function doExternalLinks( $str ) {
$fName = __METHOD__;
$this->profileIn($fName);
// Match everything within [...]
$str = preg_replace_callback('/\[(.*?)\]/', array($this, 'externalLinkHelper'), $str);
$this->profileOut($fName);
return $str;
}
private function externalLinkHelper($matches) {
$match = trim($matches[1]);
if ( (substr($match, 0, 7) != 'http://') ) {
return "[".$match."]";
}
if ( strstr($match, ' ') !== false ) {
// mit Text!
$link = explode(' ', $match, 2); // in $link[0] ist die URL!
return $link[1];
} else {
// nur URL!
return '\url{'.$match.'}';
}
}
private function extractParserExtensions( $str = '' ) {
$fName = __METHOD__;
$this->profileIn($fName);
$matches = array();
$unique = 'W2l-'.$this->uniqueString();
//$unique .=
//echo
$str = $this->extractTagsAndParams($this->elements, $str, &$matches, $unique);
//var_dump($matches); var_dump($string);
//echo $string;
// second: Some other aspects...
// Now call all the registered Callback-function with their contents.
foreach($matches as $key=>$match) {
$input = $match[1];
$tag = $match[0];
$argv = array();
$argv = $match[2];
$rpl = call_user_func($this->tags[$tag], $input, $argv, &$this, 'latex');
$this->tag_replace["$key"] = $rpl;
}
$this->profileOut($fName);
return $str;
}
private function replaceParserExtensions( $str ) {
$fName = __METHOD__;
$this->profileIn($fName);
$str = str_replace(array_keys($this->tag_replace), array_values($this->tag_replace), $str);
$this->profileOut($fName);
return $str;
}
private function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){
static $n = 1;
$stripped = '';
$matches = array();
$taglist = implode( '|', $elements );
$start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
while ( '' != $text ) {
$p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
$stripped .= $p[0];
if( count( $p ) < 5 ) {
break;
}
if( count( $p ) > 5 ) {
// comment
$element = $p[4];
$attributes = '';
$close = '';
$inside = $p[5];
} else {
// tag
$element = $p[1];
$attributes = $p[2];
$close = $p[3];
$inside = $p[4];
}
//$marker = "($uniq_prefix-$element-" . sprintf('%08X', $n++) . '-QINU)';
$marker = $this->getMark($element, $n++);
$stripped .= $marker;
if ( $close === '/>' ) {
// Empty element tag, <tag />
$content = null;
$text = $inside;
$tail = null;
} else {
if( $element == '!--' ) {
$end = '/(-->)/';
} else {
$end = "/(<\\/$element\\s*>)/i";
}
$q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
$content = $q[0];
if( count( $q ) < 3 ) {
# No end tag -- let it run out to the end of the text.
$tail = '';
$text = '';
} else {
$tail = $q[1];
$text = $q[2];
}
}
$matches[$marker] = array( $element,
$content,
Sanitizer::decodeTagAttributes( $attributes ),
"<$element$attributes$close$content$tail" );
}
return $stripped;
}
private function doQuotes( $text ) {
$fName = __METHOD__;
$this->profileIn($fName);
$arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
if ( count( $arr ) == 1 ) {
// No char. return;
$this->profileOut($fName);
return $text;
} else {
# First, do some preliminary work. This may shift some apostrophes from
# being mark-up to being text. It also counts the number of occurrences
# of bold and italics mark-ups.
$i = 0;
$numbold = 0;
$numitalics = 0;
foreach ( $arr as $r )
{
if ( ( $i % 2 ) == 1 )
{
# If there are ever four apostrophes, assume the first is supposed to
# be text, and the remaining three constitute mark-up for bold text.
if ( strlen( $arr[$i] ) == 4 )
{
$arr[$i-1] .= "'";
$arr[$i] = "'''";
}
# If there are more than 5 apostrophes in a row, assume they're all
# text except for the last 5.
else if ( strlen( $arr[$i] ) > 5 )
{
$arr[$i-1] .= str_repeat( "'", strlen( $arr[$i] ) - 5 );
$arr[$i] = "'''''";
}
# Count the number of occurrences of bold and italics mark-ups.
# We are not counting sequences of five apostrophes.
if ( strlen( $arr[$i] ) == 2 ) $numitalics++; else
if ( strlen( $arr[$i] ) == 3 ) $numbold++; else
if ( strlen( $arr[$i] ) == 5 ) { $numitalics++; $numbold++; }
}
$i++;
}
# If there is an odd number of both bold and italics, it is likely
# that one of the bold ones was meant to be an apostrophe followed
# by italics. Which one we cannot know for certain, but it is more
# likely to be one that has a single-letter word before it.
if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) )
{
$i = 0;
$firstsingleletterword = -1;
$firstmultiletterword = -1;
$firstspace = -1;
foreach ( $arr as $r )
{
if ( ( $i % 2 == 1 ) and ( strlen( $r ) == 3 ) )
{
$x1 = substr ($arr[$i-1], -1);
$x2 = substr ($arr[$i-1], -2, 1);
if ($x1 == ' ') {
if ($firstspace == -1) $firstspace = $i;
} else if ($x2 == ' ') {
if ($firstsingleletterword == -1) $firstsingleletterword = $i;
} else {
if ($firstmultiletterword == -1) $firstmultiletterword = $i;
}
}
$i++;
}
# If there is a single-letter word, use it!
if ($firstsingleletterword > -1)
{
$arr [ $firstsingleletterword ] = "''";
$arr [ $firstsingleletterword-1 ] .= "'";
}
# If not, but there's a multi-letter word, use that one.
else if ($firstmultiletterword > -1)
{
$arr [ $firstmultiletterword ] = "''";
$arr [ $firstmultiletterword-1 ] .= "'";
}
# ... otherwise use the first one that has neither.
# (notice that it is possible for all three to be -1 if, for example,
# there is only one pentuple-apostrophe in the line)
else if ($firstspace > -1)
{
$arr [ $firstspace ] = "''";
$arr [ $firstspace-1 ] .= "'";
}
}
# Now let's actually convert our apostrophic mush to HTML!
$output = '';
$buffer = '';
$state = '';
$i = 0;
foreach ($arr as $r)
{
if (($i % 2) == 0)
{
if ($state == 'both')
$buffer .= $r;
else
$output .= $r;
}
else
{
if (strlen ($r) == 2)
{
if ($state == 'i')
{ $output .= '</i>'; $state = ''; }
else if ($state == 'bi')
{ $output .= '</i>'; $state = 'b'; }
else if ($state == 'ib')
{ $output .= '</b></i><b>'; $state = 'b'; }
else if ($state == 'both')
{ $output .= '<b><i>'.$buffer.'</i>'; $state = 'b'; }
else # $state can be 'b' or ''
{ $output .= '<i>'; $state .= 'i'; }
}
else if (strlen ($r) == 3)
{
if ($state == 'b')
{ $output .= '</b>'; $state = ''; }
else if ($state == 'bi')
{ $output .= '</i></b><i>'; $state = 'i'; }
else if ($state == 'ib')
{ $output .= '</b>'; $state = 'i'; }
else if ($state == 'both')
{ $output .= '<i><b>'.$buffer.'</b>'; $state = 'i'; }
else # $state can be 'i' or ''
{ $output .= '<b>'; $state .= 'b'; }
}
else if (strlen ($r) == 5)
{
if ($state == 'b')
{ $output .= '</b><i>'; $state = 'i'; }
else if ($state == 'i')
{ $output .= '</i><b>'; $state = 'b'; }
else if ($state == 'bi')
{ $output .= '</i></b>'; $state = ''; }
else if ($state == 'ib')
{ $output .= '</b></i>'; $state = ''; }
else if ($state == 'both')
{ $output .= '<i><b>'.$buffer.'</b></i>'; $state = ''; }
else # ($state == '')
{ $buffer = ''; $state = 'both'; }
}
}
$i++;
}
# Now close all remaining tags. Notice that the order is important.
if ($state == 'b' || $state == 'ib')
$output .= '</b>';
if ($state == 'i' || $state == 'bi' || $state == 'ib')
$output .= '</i>';
if ($state == 'bi')
$output .= '</b>';
if ($state == 'both')
$output .= '<b><i>'.$buffer.'</i></b>';
}
$this->profileOut($fName);
return $output;
}
private function doSimpleReplace( $str ) {
$fName = __METHOD__;
$this->profileIn($fName);
// Here we're replacing.
$str = str_replace($this->replace_search, $this->replace_replace, $str);
$str = str_ireplace($this->ireplace_search, $this->ireplace_replace, $str);
$this->profileOut($fName);
return $str;
}
private function doRegExp( $str ) {
$fName = __METHOD__;
$this->profileIn($fName);
// Here we're going to run all these regexps
$str = preg_replace($this->regexp_search, $this->regexp_replace, $str);
$this->profileOut($fName);
return $str;
}
private function doTableStuff( $str ) {
$this->profileIn(__METHOD__);
// The string is no longer corrected.
// So: The table beginns with: \{| anbd ends with |\}.
// Using these two lines corrects them:
//$correct = array("\n\{|" => "\n{|", "|\}\n"=> "|}\n");
//$str = str_replace(array_keys($correct), array_values($correct), $str);
// IF your table function(s) need only the tables:
// Match everything within {|...|}
//$str = preg_replace_callback('/\\\|({\|.*?\|\\\})/sm', array($this, 'externalTableHelper'), $str);
$this->execEvent("W2L_TABLES", $str);
// Still parsing tables, as there might be some left:
// Hopefully this doesn't break anything in the parsed tables...
$str = $this->externalTableHelper($str);
$this->profileOut(__METHOD__);
return $str;
}
/*
* Restores pre, math, and other extensions removed by strip()
*
* always call unstripNoWiki() after this one
* @private
*/
private function unstrip( $text, &$state ) {
if ( !isset( $state['general'] ) ) {
return $text;
}
wfProfileIn( __METHOD__ );
# TODO: good candidate for FSS
$text = strtr( $text, $state['general'] );
wfProfileOut( __METHOD__ );
return $text;
}
/**
* Always call this after unstrip() to preserve the order
*
* @private
*/
private function unstripNoWiki( $text, &$state ) {
if ( !isset( $state['nowiki'] ) ) {
return $text;
}
wfProfileIn( __METHOD__ );
# TODO: good candidate for FSS
$text = strtr( $text, $state['nowiki'] );
wfProfileOut( __METHOD__ );
return $text;
}
private function unstripForHTML( $text ) {
$text = $this->unstrip( $text, $this->mStripState );
$text = $this->unstripNoWiki( $text, $this->mStripState );
$this->addLatexHeadCode('\\newcolumntype{Y}{>{\\raggedright\arraybackslash}X}');
return $text;
}
/*
* externalTableHelper is really a transplanted version of Parser::doTableStuff
* from mediaWiki. Translates wiki tables to LaTeX tables.
* Currently ingnores all attributes of the table, except latexfmt, which tells
* how many rows there are, and which type of cells should be used for each row.
* An extra cell type Y is introduced for left-aligned text than can wrap.
* Example: " {| latexfmt="|l|X|Y|l| ..."
*/
private function externalTableHelper($t) {
$correct = array("\n\{|" => "\n{|", "|\}\n"=> "|}\n");
$t = str_replace(array_keys($correct), array_values($correct), $t);
$t = trim($t);
$t = explode ( "\n" , $t ) ;
$ltd = array () ; # Is current cell TD or TH?
$tr = array () ; # Is currently a tr tag open?
$ltr = array () ; # tr attributes
$has_opened_tr = array(); # Did this table open a <tr> element?
$anyCells = false;
$firstCellOfRow = true;
$ltx_caption = '';
$in_table = 0;
foreach ( $t AS $k => $x )
{
$x = trim ( $x ) ;
$fc = substr ( $x , 0 , 1 ) ;
if ( preg_match( '/^(:*)\{\|(.*)$/', $x, $matches ) ) {
//Start of table: Extract LaTeX tips from attributes, make header.
$attributes = $this->unstripForHTML( $matches[2] );
$attributes_test = $this->parseAttrString($attributes);
if ( array_key_exists('latexfmt', $attributes_test) ) {
$latexformat = $attributes_test['latexfmt'];
$latexformat = str_replace("\\", "", $latexformat);
}
if ( array_key_exists('latexwidth', $attributes_test) ) {
$latexwidth = $attributes_test['latexwidth'];
$latexwidth = str_replace('\(\backslash{}\)', '\\', $latexwidth);
} else {
$latexwidth = '\linewidth';
}
/*
preg_match("/latexfmt=\"(.*?)\"/", $attributes, $latexformat);
$latexwidth = '\linewidth';
if ( preg_match("/latexwidth=\"(.*?)\"/", $attributes, $latexwidth_a) ) {
$latexwidth = $latexwidth_a[1];
$latexwidth = str_replace('\(\backslash{}\)', '\\', $latexwidth);
}
//var_dump($latexwidth);
$latexformat = $latexformat[1];
$latexformat = str_replace("\\", "", $latexformat);*/
if ($in_table == false ) {
$t[$k] = "\begin{tabularx}{{$latexwidth}}{{$latexformat}}\\hline";
} else {
$t[$k] = "{\begin{tabularx}{{$latexwidth}}{{$latexformat}}\\hline";
}
$in_table++;
array_push ( $ltd , '' ) ;
array_push ( $tr , false ) ;
array_push ( $ltr , '' ) ;
array_push ( $has_opened_tr, false );
$this->addPackageDependency('tabularx');
$firstCellOfRow=true;
}
else if ( ('|}' == substr ( $x , 0 , 2 )) || ('|\}' == substr ( $x , 0 , 3 ))) {
//End of table. Pop stacks and print latex ending.
$l = array_pop ( $ltd ) ;
if ( !array_pop ( $has_opened_tr ) ) $t[$k-1] = $t[$k-1] . "\\\\ \hline";
if ( array_pop ( $tr ) ) $t[$k-1] = $t[$k-1] . '\\\\ \hline';
array_pop ( $ltr ) ;
if ($in_table > 1) {
$t[$k] = "\end{tabularx}}".trim($ltx_caption);
} else {
$t[$k] = "\end{tabularx}\n".trim($ltx_caption);
}
$in_table--;
$ltx_caption = '';
}
else if ( '|-' == substr ( $x , 0 , 2 ) ) { # Allows for |---------------
if (strpos($x, '----') == 1) {
$add_hline = '\hline';
//echo 'yes!';
} else {
$add_hline = '';
}
$x = substr ( $x , 1 ) ;
while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ;
$z = '' ;
$l = array_pop ( $ltd ) ;
array_pop ( $has_opened_tr );
array_push ( $has_opened_tr , true ) ;
if ( array_pop ( $tr ) ) $t[$k-1] = $t[$k-1] . '\\\\ \hline'.$add_hline;
array_pop ( $ltr ) ;
$t[$k] = $z ;
array_push ( $tr , false ) ;
array_push ( $ltd , '' ) ;
$attributes = $this->unstripForHTML( $x );
array_push ( $ltr , Sanitizer::fixTagAttributes ( $attributes, 'tr' ) ) ;
$firstCellOfRow = true;
$add_hline = '';
//$cellcounter[] = 0;
}
else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) { # Caption
# $x is a table row
if ( '|+' == substr ( $x , 0 , 2 ) ) {
$fc = '+' ;
$x = substr ( $x , 1 ) ;
}
$after = substr ( $x , 1 ) ;
if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ;
// Split up multiple cells on the same line.
// FIXME: This can result in improper nesting of tags processed
// by earlier parser steps, but should avoid splitting up eg
// attribute values containing literal "||".
//var_dump($after);
$after = wfExplodeMarkup( '||', $after );
//var_dump($after);
$t[$k] = '' ;
# Loop through each table cell
foreach ( $after AS $theline )
{
$z = '' ;
if ( $fc != '+' )
{
$tra = array_pop ( $ltr ) ;
if ( !array_pop ( $tr ) ) $z = "\n" ; // has been: "\n"
array_push ( $tr , true ) ;
array_push ( $ltr , '' ) ;
array_pop ( $has_opened_tr );
array_push ( $has_opened_tr , true ) ;
//var_dump($ltr);
}
$l = array_pop ( $ltd ) ;
//heading cells and normal cells are equal in LaTeX:
if ( ($fc == '|' || $fc == '!') && !$firstCellOfRow) $l = ' & ';
else if ( $fc == '+' ) {
$ltx_caption .= $theline;
continue; //Missing support for caption here!
}
else $l = '' ;
//$firstCellOfRow = false;
array_push ( $ltd , $l ) ;
# Cell parameters
$y = explode ( '|' , $theline , 2 ) ;
# Note that a '|' inside an invalid link should not
# be mistaken as delimiting cell parameters
if ( strpos( $y[0], '[[' ) !== false ) {
$y = array ($theline);
}
if ( count ( $y ) == 1 ) {
if ($fc == '!') { //Heading cell highlighting
$y = "{$z}{$l}" . "\\textbf{" . "{$y[0]}}" ;
} else {
$y = "{$z}{$l}{$y[0]}" ;
}
} else {
$attributes = $this->unstripForHTML( $y[0] );
$multi_col = $this->checkColspan($attributes);
//$y = "{$z}<{$l}".Sanitizer::fixTagAttributes($attributes, $l).">{$y[1]}" ;
if ( $firstCellOfRow == false ) {
$addSep = '&';
} else {
$addSep = '';
}
$y="{$z}".$addSep.'\multicolumn{'.$multi_col['colspan'].'}{'.$multi_col['latexfmt'].'}{'.$y[1].'}';
}
$firstCellOfRow = false; // was some lines up...
$t[$k] .= $y;
$anyCells = true;
}
}
}
//var_dump($t);
$t = implode ( "\n" , $t ) ;
# special case: don't return empty table
//if(!$anyCells) $t = '';
//$t .= trim($ltx_caption);
return $t;
}
function checkColspan($str) {
// just a test now
$result = array();
$attr = $this->parseAttrString($str);
if ( array_key_exists('colspan', $attr) ) {
$result['colspan'] = $attr['colspan'];
} else {
return false;
}
if ( array_key_exists('latexfmt', $attr) ) {
$result['latexfmt'] = $attr['latexfmt'];
} else {
$result['latexfmt'] = '|l|';
}
return $result;
}
private function stripComments( $text = '' ) {
$fName = __METHOD__;
$this->profileIn(__METHOD__);
/* strips out Mediawiki-comments, which are in fact HTML comments */
$mode = '';
// This approach is from mediawiki
while (($start = strpos($text, '<!--')) !== false) {
$end = strpos($text, '-->', $start + 4);
if ($end === false) {
# Unterminated comment; bail out
break;
}
$end += 3;
# Trim space and newline if the comment is both
# preceded and followed by a newline
$spaceStart = max($start - 1, 0);
$spaceLen = $end - $spaceStart;
while (substr($text, $spaceStart, 1) === ' ' && $spaceStart > 0) {
$spaceStart--;
$spaceLen++;
}
while (substr($text, $spaceStart + $spaceLen, 1) === ' ')
$spaceLen++;
if (substr($text, $spaceStart, 1) === "\n" and substr($text, $spaceStart + $spaceLen, 1) === "\n") {
# Remove the comment, leading and trailing
# spaces, and leave only one newline.
$text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
}
else {
# Remove just the comment.
$text = substr_replace($text, '', $start, $end - $start);
}
} // bis hierher
$this->profileOut($fName);
return $text;
}
private function doHTML($str) {
$fName = __METHOD__;
$this->profileIn($fName);
// First step only. Needs to be far more complex!!!
// For some HTML-Tag-support
// Add <center></center>
$replacing = array(
'<center>' => '\begin{center}',
'</center>' => '\end{center}',
"<i>" => '\textit{',
"</i>" => '}',
"<b>" => '\textbf{',
"</b>" => '}',
"<tt>" => '\texttt{',
"</tt>" => '}'
);
$str = str_ireplace(array_keys($replacing), array_values($replacing), $str);
$this->profileOut($fName);
return $str;
}
/* Toolkit functions */
private function uniqueString() {
return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff));
}
/* Profiling and debugging functions */
private function profileIn($fName) {
if ($this->doProfiling) {
$time = microtime();
$this->ProfileLog[] = array("function"=>$fName, "time"=>$time, "type" => "in");
}
return;
}
private function profileOut($fName) {
if ($this->doProfiling) {
$time = microtime();
$this->ProfileLog[] = array("function"=>$fName, "time"=>$time, "type" => "out");
}
return;
}
private function profileMsg($msg) {
if ($this->doProfiling) {
$time = microtime();
$this->ProfileLog[] = array("function"=>$msg, "time"=>$time, "action" => "msg");
}
return;
}
public function maskLaTeX($str) {
$fName = __METHOD__;
$this->profileIn($fName);
$latex = array(
'LaTeX' => '\LaTeX{}',
'TeX' => '\TeX{}',
'LaTeX 2e' => '\LaTeXe{}'
);
$str = strtr($str, $latex);
$this->profileOut($fName);
return $str;
}
public function maskLatexCommandChars($str) {
$fName = __METHOD__;
$this->profileIn($fName);
// Chars, which are important for latex commands:
// {,},\,&
$this->Et = $this->getMark("Et");
$this->mask($this->Et, '\&');
$chars = array(
'\\' => "\(\backslash{}\)",
"{" => "\{",
"}" => "\}",
'&' => $this->Et,
);
$str = strtr($str, $chars);
$this->profileOut($fName);
return $str;
}
public function maskMwSpecialChars($str) {
$fName = __METHOD__;
$this->profileIn($fName);
// Special chars from mediawiki:
// #,*,[,],{,},|
$chars = array(
'#' => "\#",
"*" => "\(\ast{}\)",
);
$str = strtr($str, $chars);
$this->profileOut($fName);
return $str;
}
public function maskLatexSpecialChars($str) {
$fName = __METHOD__;
$this->profileIn($fName);
// _,%,§,$,&,#,€,
$chars = array(
'_' => '\_',
'%' => '\%',
'$' => '\$'
);
$str = strtr($str, $chars);
$this->profileOut($fName);
return $str;
}
public function getMark($tag, $number = -1) {
// This function takes strings, which are to be inserted in verabtimenv,
// like links, f.e.
// returns a marker
$fName = __METHOD__;
$this->profileIn($fName);
++$this->marks_counter;
if ($number == -1) {
$number = $this->marks_counter;
}
$marker = '((UNIQ-W2L-'.$this->unique.'-'.$tag.'-'.sprintf('%08X', $number).'-QINU))';
$this->profileOut($fName);
return $marker;
}
public function processCurlyBraces($str) {
$fName = __METHOD__;
$this->profileIn($fName);
$new_str = '';
if ($this->initiated == false ) {
$this->initParsing();
}
++$this->curlyBraceDebugCounter;
$this->curlyBraceLength = $this->curlyBraceLength + strlen($str);
//$this->reportError($str, __METHOD__);
// This function processes all templates, variables and parserfunctions
$marker = $this->getMark('pipe');// $this->uniqueString();
//$str = preg_replace('/\[\[(.*)\|(.*)\]\]/U', '[[$1'.$marker.'$2]]', $str);
$test = $this->split_str($str);
//var_dump($test);
foreach($test as $part) {
// if first
if (substr($part, 0,2 ) == '{{' ) {
//$part = preg_replace('/\[\[(.*)\|(.*)\]\]/U', '[[$1'.$marker.'$2]]', $part);
$match[0] = $part;
$match[1] = substr($part, 2, -2);
//$this->reportError($match[0], __METHOD__);#
//$this->reportError($match[1], __METHOD__);#
$part = $this->doCurlyBraces($match);
//$part = str_replace($marker, '|', $part);
}
$new_str .= $part;
}
//$str = preg_replace_callback('/\{\{(.*?)\}\}/sm', array($this, 'doCurlyBraces'), $str);
//$new_str = str_replace($marker, '|', $new_str);
$chars = array('\{\{\{' => '{{{', '\}\}\}' => '}}}');
$new_str = strtr($new_str, $chars);
$this->profileOut($fName);
return $new_str;
}
private function doCurlyBraces($matches) {
$orig = $matches[0];
$match = $matches[1];
//$this->reportError($match, __METHOD__);
$args = array();
//$match = strtr($match, array("\n"=>""));
$match = trim($match);
//echo $match;
// new
if ( substr_count($match, '|') !== 0 ) {
$tmp = explode('|', $match, 2);
$identifier = $tmp[0];
$args = $tmp[1];
} else {
$identifier = $match;
$args = '';
}
$tmp = '';
$type = $this->checkIdentifier($identifier);
//$this->reportError($identifier."->".$type, __METHOD__);
switch ($type) {
case W2L_TEMPLATE:
if ( '' == $args ) {
// no arguments
$args = array();
}
$args = $this->processArgString($args);
// check the name
$tmp = $this->getContentByTitle($identifier, NS_TEMPLATE);
//$this->reportError(strlen($tmp), __METHOD__);
$tmp = $this->preprocessString($tmp);
//$this->reportError(strlen($tmp), __METHOD__);
$tmp = $this->processTemplateVariables($tmp, $args);
//$this->reportError(strlen($tmp), __METHOD__);
$tmp = $this->processCurlyBraces($tmp);
break;
case W2L_PARSERFUNCTION:
$fnc = explode(':', $identifier, 2);
$expr = $fnc[1];
$function = substr($fnc[0], 1);
$mark = $this->getMark('pipe');
$args = preg_replace('/\{\{\{(.*)\|(.*)\}\}\}/U', '{{{$1'.$mark.'$2}}}', $args);
$args = $this->processCurlyBraces($args);
$args = preg_replace('/\[\[(.*)\|(.*)\]\]/U', '[[$1'.$mark.'$2]]', $args);
$args = explode('|', $args);// ((>)|(<))
$new_args = array();
foreach ($args as $value) {
$value = str_replace($mark, '|', $value);
$new_args[] = $value;
}
$tmp = $this->processParserFunction($function, $expr, $new_args);
break;
case W2L_COREPARSERFUNCTION:
$fnc = explode(':', $identifier, 2);
$expr = $fnc[1];
$function = $fnc[0];
$mark = $this->getMark('pipe');
$args = preg_replace('/\{\{\{(.*)\|(.*)\}\}\}/U', '{{{$1'.$mark.'$2}}}', $args);
$args = $this->processCurlyBraces($args);
$args = preg_replace('/\[\[(.*)\|(.*)\]\]/U', '[[$1'.$mark.'$2]]', $args);
$args = explode('|',$args);// ((>)|(<))
$new_args = array();
foreach ($args as $value) {
$value = str_replace($mark, '|', $value);
$new_args[] = $value;
}
$tmp = $this->processParserFunction($function, $expr, $new_args);
break;
case W2L_TRANSCLUSION:
if ( '' == $args ) {
// no arguments
$args = array();
}
$title = substr($identifier, 1);
$args = $this->processArgString($args);
//echo $title;
$tmp = $this->getContentByTitle($title);
$tmp = $this->preprocessString($tmp);
$tmp = $this->processTemplateVariables($tmp, $args);
$tmp = $this->processCurlyBraces($tmp);
break;
case W2L_VARIABLE:
$tmp = $this->mw_vars[$identifier];
break;
default:
$this->reportError( wfMsg('w2l_parser_no_default_value'), __METHOD__);
break;
}
return trim($tmp);
}
private function processArgString($str) {
$args = array();
$tmp = explode('|', $str);
$current_arg = 0;
foreach($tmp as $keyvaluepair) {
++$current_arg;
if (substr_count($keyvaluepair, '=')) {
$keyvaluepair = explode('=', $keyvaluepair, 2);
$key = trim($keyvaluepair[0]);
$value = trim($keyvaluepair[1]);
$args[$key] = $value;
} else {
$args[$current_arg] = $keyvaluepair;
}
}
//echo $args;
//var_dump($args);
return $args;
}
private function processTemplateVariables($str, $args = array()) {
// replace the content by the args...
$this->templateVars = array();
$this->templateVars = $args;
$str = preg_replace_callback('/\{\{\{(.*?)\}\}\}/sm', array($this, 'doTemplateVariables'), $str);
$chars = array('{{{'=>'\{\{\{', '}}}' => '\}\}\}');
$str = strtr($str, $chars);
unset($this->templateVars);
return $str;
}
private function doTemplateVariables($match) {
// replace the content by the args...
//var_dump($match);
if ( substr_count($match[1],'|') ) {
$with_default = explode('|', $match[1], 2);
//echo 'mit def-wert';
//var_dump($with_default);
$content = $this->templateVars[$with_default[0]];
//var_dump($content);
if ( empty($content) ) {
return $with_default[1];
} else {
return $content;
}
} else {
$content = $this->templateVars[$match[1]];
//echo 'kein def-wert';
//var_dump($content);
if ( empty($content) ) {
return $match[0];
} else {
return $content;
}
}
}
private function processParserFunction($fnc, $expr, $args) {
$params = array(&$this, trim($expr));
foreach($args as $value) {
$params[] = trim($value);
}
//echo $fnc;
$content = call_user_func_array($this->pFunctions[$fnc], $params);
if ( is_array($content) ) {
return '';
}
//var_dump($params);
//$this->reportError($content,__METHOD__);
return $content;
}
private function split_str($str) {
//
$table_open_mark = $this->getMark('table-open');
$table_close_mark = $this->getMark('table-close');
$str = str_replace("\n{|", $table_open_mark, $str);
$str = str_replace("|}\n", $table_close_mark, $str);
$before_last_char = '';
$last_char = '';
$cur_char = '';
$cb_counter = 0;
$char_counter = 0;
$split_array = array();
$block = 0;
$in_block = false;
$tmp_char = str_split($str);
//var_dump($tmp_char);
foreach($tmp_char as $cur_char) {
//
//$cur_char = $str{$char_counter};
switch ($cur_char) {
case '{':
++$cb_counter;
if ($cb_counter == 1) {
++$block;
$split_array[$block] = '';
$split_array[$block] .= $cur_char;
} else {
$split_array[$block] .= $cur_char;
}
break;
case '}':
--$cb_counter;
if ($cb_counter == 0) {
$split_array[$block] .= $cur_char;
//var_dump($split_array[$block]);
++$block;
$split_array[$block] = '';
} else {
$split_array[$block] .= $cur_char;
}
break;
default:
$split_array[$block] .= $cur_char;
break;
}
$before_last_char = $last_char;
$last_char = $cur_char;
++$char_counter;
//if ( !isset($str{$char_counter}) ) {
// var_dump($cur_char);
// break;
//}
}
foreach($split_array as $key => $value) {
$value = str_replace( $table_open_mark,"\n{|", $value);
$value = str_replace( $table_close_mark, "|}\n", $value);
$new_split[$key] = $value;
}
//echo strlen($str), ' ',$char_counter, "\n";
return $new_split;
}
public function getContentByTitle( $title_str , $namespace = NS_MAIN) {
$title_str = trim($title_str);
if ( $this->getVal("use_cache") AND array_key_exists($title_str, $this->content_cache) ) {
$this->reportError(wfMsg('w2l_parser_cachehit', $title_str), __METHOD__);
return $this->content_cache[$title_str];
}
$title = Title::newFromText( $title_str , $namespace);
if ( !is_a($title, 'Title') ) {
$text = $title_str;
$this->reportError("title_str=".$title_str, __METHOD__);
return $text;
}
if ( $title->exists() ) {
$rev = new Article( &$title, 0 );
$text = $rev->getContent();
} else {
$text = $title_str;
$this->reportError( wfMsg('w2l_parser_article_not_existing', $title_str ), __METHOD__);
}
if ( $this->getVal("use_cache") ) {
$this->content_cache[$title_str] = $text;
}
return $text;
}
public function checkIdentifier($str) {
$str = trim($str);
//$this->reportError($str, __METHOD__);
if ( array_key_exists($str, $this->mw_vars) )
return W2L_VARIABLE;
if ( '#' == $str{0} )
return W2L_PARSERFUNCTION;
if ( ':' == $str{0} )
return W2L_TRANSCLUSION;
$test = explode(':', $str, 2);
//$this->reportError($test[0], __METHOD__);
//$this->reportError(array_key_exists($test[0], $this->pFunctions), __METHOD__);
if ( array_key_exists($test[0], $this->pFunctions) == true)
return W2L_COREPARSERFUNCTION;
return W2L_TEMPLATE;
}
public function getContentByPageId($id) {}
public function getContentByRevId($id) {}
public function getContentByUrl($url) {}
public function reportError( $msg, $fnc ) {
$this->error_msg[] = $fnc.': '.$msg."\n";
$this->is_error = true;
}
public function getErrorMessages() {
if ( $this->is_error == true) {
//'<textarea style="height:200px;">'.$parsed.'</textarea>';
$errors = wfMsg('w2l_parser_protocol')."\n";
$errors .= '<textarea style="height:200px;">';
foreach ($this->error_msg as $error_line) {
$errors .= $error_line;
}
$errors .= '</textarea>'."\n";
return $errors;
} else {
return '';
}
}
public function setMwVariables($vars) {
$this->mw_vars = $vars;
return true;
}
public function addPackageDependency($package, $options = '') {
$this->required_packages[$package] = $options;
return true;
}
public function addLatexHeadCode($code) {
$this->latex_headcode[] = $code;
}
public function getLatexHeadCode() {
$code = array_unique($this->latex_headcode);
return implode("\n", $code);
}
public function getUsePackageBlock() {
$packages = '';
foreach($this->required_packages as $package => $options) {
$packages .= '\usepackage';
if ( $options != '' ) {
$packages .= '['.$options.']';
}
$packages .= '{'.$package.'}'."\n";
}
return $packages;
}
function parseAttrString($str) {
$result = array();
$con = true;
$i = 1;
while ($con == true) {
$search_char = ' =';
$str = trim($str);
if ( empty($str) ) {
$con = false;
continue;
}
if ($i>10000) {
$con = false;
continue;
}
$str = $str.' ';
// search for attributename...
$howmany = strcspn($str, $search_char);
$attr = substr($str, 0, $howmany);
$str = substr($str, $howmany);
// get value
$attr_value = '';
$fChar = $str{0};
if ( $fChar == '=' )
$str = substr($str, 1);
$fChar=$str{0};
if ( $fChar == '"' ) {
// next to search for is "
$search_char = '"';
$str = substr($str, 1);
$howmany = strcspn($str, $search_char);
$attr_value = substr($str, 0, $howmany);
$str = substr($str, ++$howmany);
} elseif ( $fChar == "'" ) {
$search_char = "'";
$str = substr($str, 1);
$howmany = strcspn($str, $search_char);
$attr_value = substr($str, 0, $howmany);
$str = substr($str, ++$howmany);
} elseif ($fChar== ' ') {
$attr_value = '';
} else {
$search_char = ' ';
//$str = substr($str, 1);
$howmany = strcspn($str, $search_char);
$attr_value = substr($str, 0, $howmany);
$str = substr($str, ++$howmany);
}
// save it to the array
$result[$attr] = $attr_value;
$i++;
}
return $result;
}
// Wiki-Parser functions
function &getTitle() { return $this->mTitle; }
}
