MediaWiki r39406 - Code Review

Jump to: navigation, search
Repository:MediaWiki
Revision:r39405‎ | r39406 (on ViewVC)‎ | r39407 >
Date:11:46, 15 August 2008
Author:guyvdb
Status:old
Tags:
Comment:
New implementation of wikidiff3 algorithm replacing the previous one and basic HTML diffing support, both integrated with DifferenceEngine
Modified paths:

Diff [purge]

Index: trunk/phase3/skins/common/diff.js
===================================================================
--- trunk/phase3/skins/common/diff.js	(revision 39405)
+++ trunk/phase3/skins/common/diff.js	(revision 39406)
@@ -17,4 +17,16 @@
 	lastSheet.insertRule(
 		"table.diff td div { overflow: visible; }",
 		lastSheet.cssRules.length);
-}
\ No newline at end of file
+}
+    
+function tipA(content){
+    return false;
+}
+
+function tipR(content){
+    return false;
+}
+
+function tipC(content){
+    return false;
+}
Index: trunk/phase3/skins/common/diff.css
===================================================================
--- trunk/phase3/skins/common/diff.css	(revision 39405)
+++ trunk/phase3/skins/common/diff.css	(revision 39406)
@@ -74,3 +74,66 @@
 	   table: */
 	/* overflow: visible; */
 }
+
+/*
+ * Styles for the HTML Diff
+ */
+span.diff-html-added {
+  font-size: 100%;
+  background-color: #20ff20
+}
+
+span.diff-html-removed {
+  font-size: 100%;
+  text-decoration: line-through;
+  background-color: #ff2020
+}
+
+span.diff-html-changed {
+  background: url(images/diffunderline.gif) bottom repeat-x;
+  *background-color: #c6c6fd; /* light blue */
+}
+
+span.diff-html-added img{
+ border: 5px solid #ccffcc;
+}
+
+span.diff-html-removed img{
+ border: 5px solid #fdc6c6;
+}
+
+span.diff-html-changed img{
+ border: 5px dotted #000099;
+ 
+}
+
+span.diff-html-changed  {
+  position: relative;   /* this is key */
+  cursor: help;
+}
+ 
+span.diff-html-changed span.tip {
+  display: none;        /* so is this */
+}
+
+/* tooltip will display on :hover event */
+ 
+span.diff-html-changed:hover span.tip {
+  display: block;
+  z-index: 95;
+  position: absolute;
+  top: 2.5em;
+  left: 0;
+  width: auto;
+  line-height: 1.2em;
+  padding: 3px 7px 4px 6px;
+  border: 1px solid #336;
+  background-color: #f7f7ee;
+  font-family: arial, helvetica, sans-serif;
+  font-size: 10px;
+  font-weight: normal;
+  color: #000;
+  text-align: left;
+}
+
+
Index: trunk/phase3/skins/common/images/diffunderline.gif
===================================================================
--- trunk/phase3/skins/common/images/diffunderline.gif	(revision 0)
+++ trunk/phase3/skins/common/images/diffunderline.gif	(revision 39406)
@@ -0,0 +1 @@
+GIF89a‘ÿÿÿÛÿÿ™!ùÿ,„%!g;
\ No newline at end of file
Index: trunk/phase3/includes/HTMLDiff.php
===================================================================
--- trunk/phase3/includes/HTMLDiff.php	(revision 0)
+++ trunk/phase3/includes/HTMLDiff.php	(revision 39406)
@@ -0,0 +1,1997 @@
+<?php
+/* Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * or see http://www.gnu.org/
+ */
+
+
+/**
+ * Any element in the DOM tree of an HTML document.
+ */
+abstract class Node{
+
+	protected $parent;
+
+
+	function __construct($parent){
+		$this->parent = $parent;
+		if(!is_null($parent)){
+			$parent->addChild($this);
+		}
+	}
+
+	public function getParent(){
+		return $this->parent;
+	}
+
+	public function getParentTree(){
+		if(!is_null($this->parent)){
+			$parentTree = $this->parent->getParentTree();
+			$parentTree[] = $this->parent;
+			return $parentTree;
+		}else{
+			return array();
+		}
+	}
+
+	public abstract function getMinimalDeletedSet($id);
+
+	public function detectIgnorableWhiteSpace(){
+		// no op
+	}
+
+	public function getLastCommonParent(Node $other){
+		if(is_null($other))
+		throw new Exception('The given node is NULL');
+
+		$result = new LastCommonParentResult();
+
+		$myParents = $this->getParentTree();
+		$otherParents = $other->getParentTree();
+
+		$i = 1;
+		$isSame = true;
+		while ($isSame && $i < sizeof($myParents) && $i < sizeof($otherParents)) {
+			if (!$myParents[$i]->isSameTag($otherParents[$i])) {
+				$isSame = false;
+			} else {
+				// After the while, the index i-1 must be the last common parent
+				$i++;
+			}
+		}
+
+		$result->setLastCommonParentDepth($i - 1);
+		$result->setLastCommonParent($myParents[$i - 1]);
+
+		if (!$isSame) {
+			$result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i]));
+			$result->setSplittingNeeded();
+		} else if (sizeof($myParents) < sizeof($otherParents)) {
+			$result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this));
+		} else if (sizeof($myParents) > sizeof($otherParents)) {
+			// All tags matched but there are tags left in this tree
+			$result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i]));
+			$result->setSplittingNeeded();
+		} else {
+			// All tags matched untill the very last one in both trees
+			// or there were no tags besides the BODY
+			$result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this));
+		}
+		return $result;
+	}
+
+	public function setParent($parent) {
+		$this->parent = $parent;
+	}
+
+	public abstract function copyTree();
+
+	public function inPre() {
+		$tree = $this->getParentTree();
+		foreach ($tree as $ancestor) {
+			if ($ancestor->isPre()) {
+				return true;
+			}
+		}
+		return false;
+	}
+
+	private $whiteBefore = false;
+
+	private $whiteAfter = false;
+
+	public function isWhiteBefore() {
+		return $this->whiteBefore;
+	}
+
+	public function setWhiteBefore($whiteBefore) {
+		$this->whiteBefore = $whiteBefore;
+	}
+
+	public function isWhiteAfter() {
+		return $this->whiteAfter;
+	}
+
+	public function setWhiteAfter($whiteAfter) {
+		$this->whiteAfter = $whiteAfter;
+	}
+
+	public abstract function getLeftMostChild();
+
+	public abstract function getRightMostChild();
+}
+
+/**
+ * Node that can contain other nodes. Represents an HTML tag.
+ */
+class TagNode extends Node{
+
+	public $children = array();
+
+	protected $qName;
+
+	protected $attributes = array();
+
+	function __construct($parent, $qName, /*array*/ $attributes) {
+		parent::__construct($parent);
+		$this->qName = strtolower($qName);
+		foreach($attributes as $key => $value){
+			$this->attributes[strtolower($key)] = $value;
+		}
+	}
+
+	public function addChild(Node $node, $index=-1) {
+		if ($node->getParent() !== $this)
+		throw new Exception(
+                    'The new child must have this node as a parent.');
+		if($index == -1){
+			$this->children[] = $node;
+		}else{
+			array_splice($this->children,$index,0,array($node));
+		}
+	}
+
+	public function getIndexOf(Node $child) {
+		// don't trust array_search with objects
+		foreach($this->children as $key=>$value){
+			if($value === $child){
+				return $key;
+			}
+		}
+		return NULL;
+	}
+
+	public function getChild($i) {
+		return $this->children[$i];
+	}
+
+	public function getNbChildren() {
+		return count($this->children);
+	}
+
+	public function getQName() {
+		return $this->qName;
+	}
+
+	public function getAttributes() {
+		return $this->attributes;
+	}
+
+	public function isSameTag(TagNode $other) {
+		if (is_null($other))
+		return false;
+		return $this->getOpeningTag() === $other->getOpeningTag();
+	}
+
+	public function getOpeningTag() {
+		$s = '<'.$this->getQName();
+		foreach ($this->attributes as $attribute => $value) {
+			$s .= ' ' . $attribute . '="' . $value . '"';
+		}
+		return $s .= '>';
+	}
+
+	public function getEndTag() {
+		return '</' . $this->getQName() + '>"';
+	}
+
+	public function getMinimalDeletedSet($id) {
+		$nodes = array();
+
+		if ($this->getNbChildren() == 0)
+		return $nodes;
+
+		$hasNotDeletedDescendant = false;
+
+		foreach ($this->children as $child) {
+			$childrenChildren = $child->getMinimalDeletedSet($id);
+			$nodes = array_merge($nodes, $childrenChildren);
+			if (!$hasNotDeletedDescendant
+			&& !(count($childrenChildren) == 1 && $childrenChildren[0]===$child)) {
+				// This child is not entirely deleted
+				$hasNotDeletedDescendant = true;
+			}
+		}
+		if (!$hasNotDeletedDescendant) {
+			$nodes = array($this);
+		}
+		return $nodes;
+	}
+
+	public function toString() {
+		return $this->getOpeningTag();
+	}
+
+	public function splitUntill(TagNode $parent, Node $split, $includeLeft) {
+		$splitOccured = false;
+		if ($parent !== $this) {
+			$part1 = new TagNode(NULL, $this->getQName(), $this->getAttributes());
+			$part2 = new TagNode(NULL, $this->getQName(), $this->getAttributes());
+			$part1->setParent($this->getParent());
+			$part2->setParent($this->getParent());
+
+			$i = 0;
+			$nbChildren = $this->getNbChildren();
+			while ($i < $nbChildren && $this->children[$i] !== $split) {
+				$this->children[$i]->setParent($part1);
+				$part1->addChild($this->children[$i]);
+				++$i;
+			}
+			if ($i < $nbChildren) {
+				if ($includeLeft) {
+					$this->children[$i]->setParent($part1);
+					$part1->addChild($this->children[$i]);
+				} else {
+					$this->children[$i]->setParent($part2);
+					$part2->addChild($this->children[$i]);
+				}
+				++$i;
+			}
+			while ($i < $nbChildren) {
+				$this->children[$i]->setParent($part2);
+				$part2->addChild($this->children[$i]);
+				++$i;
+			}
+			$myindexinparent = $this->parent->getIndexOf($this);
+			if ($part1->getNbChildren() > 0)
+			$this->parent->addChild($part1,$myindexinparent);
+
+			if ($part2->getNbChildren() > 0)
+			$this->parent->addChild($part2,$myindexinparent);
+
+			if ($part1->getNbChildren() > 0 && $part2->getNbChildren() > 0) {
+				$splitOccured = true;
+			}
+
+			$this->parent->removeChild($myindexinparent);
+
+			if ($includeLeft)
+			$this->parent->splitUntill($parent, $part1, $includeLeft);
+			else
+			$this->parent->splitUntill($parent, $part2, $includeLeft);
+		}
+		return $splitOccured;
+
+	}
+
+	private function removeChild($index) {
+		unset($this->children[$index]);
+		$this->children = array_values($this->children);
+	}
+
+	public static $blocks = array('html'=>TRUE,'body'=>TRUE,'p'=>TRUE,'blockquote'=>TRUE,
+    	'h1'=>TRUE,'h2'=>TRUE,'h3'=>TRUE,'h4'=>TRUE,'h5'=>TRUE,'pre'=>TRUE,'div'=>TRUE,'ul'=>TRUE,'ol'=>TRUE,'li'=>TRUE,
+    	'table'=>TRUE,'tbody'=>TRUE,'tr'=>TRUE,'td'=>TRUE,'th'=>TRUE,'br'=>TRUE);
+
+	public static function isBlockLevelName($qName) {
+		return array_key_exists(strtolower($qName),self::$blocks);
+	}
+
+	public static function isBlockLevelNode(Node $node) {
+		if(! $node instanceof TagNode)
+		return false;
+		return self::isBlockLevelName($node->getQName());
+	}
+
+	public function isBlockLevel() {
+		return  self::isBlockLevelNode($this);
+	}
+
+	public static function isInlineName($qName) {
+		return !self::isBlockLevelName($qName);
+	}
+
+	public static function isInlineNode(Node $node) {
+		return !self::isBlockLevelNode($node);
+	}
+
+	public function isInline() {
+		return self::isInlineNode($this);
+	}
+
+	public function copyTree() {
+		$newThis = new TagNode(NULL, $this->getQName(), $this->getAttributes());
+		$newThis->setWhiteBefore($this->isWhiteBefore());
+		$newThis->setWhiteAfter($this->isWhiteAfter());
+		foreach($this->children as $child) {
+			$newChild = $child->copyTree();
+			$newChild->setParent($newThis);
+			$newThis->addChild($newChild);
+		}
+		return $newThis;
+	}
+
+	public function getMatchRatio(TagNode $other) {
+		$txtComp = new TextOnlyComparator($other);
+		return $txtComp->getMatchRatio(new TextOnlyComparator($this));
+	}
+
+	public function expandWhiteSpace() {
+		$shift = 0;
+		$spaceAdded = false;
+
+		$nbOriginalChildren = $this->getNbChildren();
+		for ($i = 0; $i < $nbOriginalChildren; ++$i) {
+			$child = $this->getChild($i + $shift);
+
+			if($child instanceof TagNode){
+				if (!$child->isPre()) {
+					$child->expandWhiteSpace();
+				}
+			}
+			if (!$spaceAdded && $child->isWhiteBefore()) {
+				$ws = new WhiteSpaceNode(NULL, ' ', $child->getLeftMostChild());
+				$ws->setParent($this);
+				$this->addChild($ws,$i + ($shift++));
+			}
+			if ($child->isWhiteAfter()) {
+				$ws = new WhiteSpaceNode(NULL, ' ', $child->getRightMostChild());
+				$ws->setParent($this);
+				$this->addChild($ws,$i + 1 + ($shift++));
+				$spaceAdded = true;
+			} else {
+				$spaceAdded = false;
+			}
+
+		}
+	}
+
+	public function getLeftMostChild() {
+		if ($this->getNbChildren() < 1)
+		return $this;
+		return $this->getChild(0)->getLeftMostChild();
+
+	}
+
+	public function getRightMostChild() {
+		if ($this->getNbChildren() < 1)
+		return $this;
+		return $this->getChild($this->getNbChildren() - 1)->getRightMostChild();
+	}
+
+	public function isPre() {
+		return 0 == strcasecmp($this->getQName(),'pre');
+	}
+
+	public static function toDiffLine(TagNode $node){
+		return $node->getOpeningTag();
+	}
+}
+
+/**
+ * Represents a piece of text in the HTML file.
+ */
+class TextNode extends Node{
+
+	private $s;
+
+	private $modification;
+
+	function __construct($parent, $s) {
+		parent::__construct($parent);
+		$this->modification = new Modification(Modification::NONE);
+		$this->s = $s;
+	}
+
+	public function copyTree() {
+		$clone = clone $this;
+		$clone->setParent(NULL);
+		return $clone;
+	}
+
+	public function getLeftMostChild() {
+		return $this;
+	}
+
+	public function getRightMostChild() {
+		return $this;
+	}
+
+	public function getMinimalDeletedSet($id) {
+		if ($this->getModification()->getType() == Modification::REMOVED
+		&& $this->getModification()->getID() == $id){
+			return array($this);
+		}
+		return array();
+	}
+
+	public function getModification() {
+		return $this->modification;
+	}
+
+	public function getText() {
+		return $this->s;
+	}
+
+	public function isSameText($other) {
+		if (is_null($other) || ! $other instanceof TextNode){
+			return false;
+		}
+		return str_replace('\n', ' ',$this->getText()) === str_replace('\n', ' ',$other->getText());
+	}
+
+	public function setModification(Modification $m) {
+		//wfDebug("Registered modification for node '$this->s' as ".Modification::typeToString($m->getType()));
+		$this->modification = $m;
+	}
+
+	public function toString() {
+		return $this->getText();
+	}
+
+	public static function toDiffLine(TextNode $node){
+		return str_replace('\n', ' ',$node->getText());
+	}
+}
+
+class WhiteSpaceNode extends TextNode {
+
+	function __construct($parent, $s, Node $like = NULL) {
+		parent::__construct($parent, $s);
+		if(!is_null($like) && $like instanceof TextNode){
+			$newModification = clone $like->getModification();
+			$newModification->setFirstOfID(false);
+			$this->setModification($newModification);
+		}
+	}
+
+	public static function isWhiteSpace($c) {
+		switch ($c) {
+			case ' ':
+			case '\t':
+			case '\r':
+			case '\n':
+				return true;
+			default:
+				return false;
+		}
+	}
+}
+
+/**
+ * Represents the root of a HTML document.
+ */
+class BodyNode extends TagNode {
+
+	function __construct() {
+		parent::__construct(NULL, 'body', array());
+	}
+
+	public function copyTree() {
+		$newThis = new BodyNode();
+		foreach ($this->children as $child) {
+			$newChild = $child->copyTree();
+			$newChild->setParent($newThis);
+			$newThis->addChild($newChild);
+		}
+		return $newThis;
+	}
+
+	public function getMinimalDeletedSet($id) {
+		$nodes = array();
+		foreach ($this->children as $child) {
+			$childrenChildren = $child->getMinimalDeletedSet($id);
+			$nodes = array_merge($nodes, $childrenChildren);
+		}
+		return $nodes;
+	}
+
+}
+
+/**
+ * Represents an image in HTML. Even though images do not contain any text they
+ * are independent visible objects on the page. They are logically a TextNode.
+ */
+class ImageNode extends TextNode {
+
+	private $attributes;
+
+	function __construct(TagNode $parent, /*array*/ $attrs) {
+		if(!array_key_exists('src',$attrs)){
+			//wfDebug('Image without a source:');
+			foreach($attrs as $key => $value){
+				//wfDebug("$key = $value");
+			}
+			parent::__construct($parent, '<img></img>');
+		}else{
+			parent::__construct($parent, '<img>' . strtolower($attrs['src']) . '</img>');
+		}
+		$this->attributes = $attrs;
+	}
+
+	public function isSameText($other) {
+		if (is_null($other) || ! $other instanceof ImageNode)
+		return false;
+		return $this->getText() === $other->getText();
+	}
+
+	public function getAttributes() {
+		return $this->attributes;
+	}
+
+}
+
+/**
+ * When detecting the last common parent of two nodes, all results are stored as
+ * a LastCommonParentResult.
+ */
+class LastCommonParentResult {
+
+	// Parent
+	private $parent;
+
+	public function getLastCommonParent() {
+		return $this->parent;
+	}
+
+	public function setLastCommonParent(TagNode $parent) {
+		$this->parent = $parent;
+	}
+
+	// Splitting
+	private $splittingNeeded = false;
+
+	public function isSplittingNeeded() {
+		return $this->splittingNeeded;
+	}
+
+	public function setSplittingNeeded() {
+		$this->splittingNeeded = true;
+	}
+
+	// Depth
+	private $lastCommonParentDepth = -1;
+
+	public function getLastCommonParentDepth() {
+		return $this->lastCommonParentDepth;
+	}
+
+	public function setLastCommonParentDepth($depth) {
+		$this->lastCommonParentDepth = $depth;
+	}
+
+	// Index
+	private $indexInLastCommonParent = -1;
+
+	public function getIndexInLastCommonParent() {
+		return $this->indexInLastCommonParent;
+	}
+
+	public function setIndexInLastCommonParent($index) {
+		$this->indexInLastCommonParent = $index;
+	}
+}
+
+class Modification{
+
+	const NONE = 1;
+	const REMOVED = 2;
+	const ADDED = 4;
+	const CHANGED = 8;
+
+	private $type;
+
+	private $id = -1;
+
+	private $prevMod;
+
+	private $nextMod;
+
+	private $firstOfID = false;
+
+	function __construct($type) {
+		$this->type = $type;
+	}
+
+	public function copy() {
+		$newM = new Modification($this->getType());
+		$newM->setID($this->getID());
+		$newM->setChanges($this->getChanges());
+		$newM->setFirstOfID($this->isFirstOfID());
+		$newM->setNext($this->getNext());
+		$newM->setPrevious($this->getPrevious());
+		return $newM;
+	}
+
+	public function getType() {
+		return $this->type;
+	}
+
+	public function setID($id) {
+		$this->id = $id;
+	}
+
+	public function getID() {
+		return $this->id;
+	}
+
+	public function setPrevious($m) {
+		$this->prevMod = $m;
+	}
+
+	public function getPrevious() {
+		return $this->prevMod;
+	}
+
+	public function setNext($m) {
+		$this->nextMod = $m;
+	}
+
+	public function getNext() {
+		return $this->nextMod;
+	}
+
+	private $changes;
+
+	public function setChanges($changes) {
+		$this->changes = $changes;
+	}
+
+	public function getChanges() {
+		return $this->changes;
+	}
+
+	public function isFirstOfID() {
+		return $this->firstOfID;
+	}
+
+	public function setFirstOfID($firstOfID) {
+		$this->firstOfID = $firstOfID;
+	}
+
+	public static function typeToString($type){
+		switch($type){
+			case self::NONE: return 'none';
+			case self::REMOVED: return 'removed';
+			case self::ADDED: return 'added';
+			case self::CHANGED: return 'changed';
+		}
+	}
+}
+
+class DomTreeBuilder {
+
+	private $textNodes = array();
+
+	private $bodyNode;
+
+	private $currentParent;
+
+	private $newWord = "";
+
+	protected $bodyStarted = false;
+
+	protected $bodyEnded = false;
+
+	private $whiteSpaceBeforeThis = false;
+
+	private $lastSibling;
+
+	function __construct(){
+		$this->bodyNode = $this->currentParent = new BodyNode();
+	}
+
+	public function getBodyNode() {
+		return $this->bodyNode;
+	}
+
+	public function getTextNodes() {
+		return $this->textNodes;
+	}
+
+	/**
+	 * Must be called manually
+	 */
+	public function endDocument() {
+		$this->endWord();
+		//wfDebug(sizeof($this->textNodes) . ' text nodes in document.');
+	}
+
+	public function startElement($parser, $name, /*array*/ $attributes) {
+		if(!strcasecmp($name, 'body')==0){
+			//wfDebug("Starting $name node.");
+			$this->endWord();
+
+			$newTagNode = new TagNode($this->currentParent, $name, $attributes);
+			$this->currentParent = $newTagNode;
+			$this->lastSibling = NULL;
+			if ($this->whiteSpaceBeforeThis && $newTagNode->isInline()) {
+				$newTagNode->setWhiteBefore(true);
+			}
+			$this->whiteSpaceBeforeThis = false;
+		}
+	}
+
+	public function endElement($parser, $name) {
+		if(!strcasecmp($name, 'body')==0){
+			//wfDebug("Ending $name node.");
+			if (0 == strcasecmp($name,'img')) {
+				// Insert a dummy leaf for the image
+				$img = new ImageNode($this->currentParent, $this->currentParent->getAttributes());
+				$img->setWhiteBefore($this->whiteSpaceBeforeThis);
+				$this->lastSibling = $img;
+				$this->textNodes[] = $img;
+			}
+			$this->endWord();
+			if ($this->currentParent->isInline()) {
+				$this->lastSibling = $this->currentParent;
+			} else {
+				$this->lastSibling = NULL;
+			}
+			$this->currentParent = $this->currentParent->getParent();
+			$this->whiteSpaceBeforeThis = false;
+		}else{
+			$this->endDocument();
+		}
+	}
+
+	public function characters($parser, $data){
+		//wfDebug('Parsing '. strlen($data).' characters.');
+		$array = str_split($data);
+		foreach($array as $c) {
+			if (self::isDelimiter($c)) {
+				$this->endWord();
+				if (WhiteSpaceNode::isWhiteSpace($c) && !$this->currentParent->isPre()
+				&& !$this->currentParent->inPre()) {
+					if (!is_null($this->lastSibling)){
+						$this->lastSibling->setWhiteAfter(true);
+					}
+					$this->whiteSpaceBeforeThis = true;
+				} else {
+					$textNode = new TextNode($this->currentParent, $c);
+					$textNode->setWhiteBefore($this->whiteSpaceBeforeThis);
+					$this->whiteSpaceBeforeThis = false;
+					$this->lastSibling = $textNode;
+					$this->textNodes[] = $textNode;
+				}
+			} else {
+				$this->newWord .= $c;
+			}
+
+		}
+	}
+
+	private function endWord() {
+		if (strlen($this->newWord) > 0) {
+			$node = new TextNode($this->currentParent, $this->newWord);
+			$node->setWhiteBefore($this->whiteSpaceBeforeThis);
+			$this->whiteSpaceBeforeThis = false;
+			$this->lastSibling = $node;
+			$this->textNodes[] = $node;
+			$this->newWord = "";
+		}
+	}
+
+	public static function isDelimiter($c) {
+		switch ($c) {
+			// Basic Delimiters
+			case '/':
+			case '.':
+			case '!':
+			case ',':
+			case ';':
+			case '?':
+			case '=':
+			case '\'':
+			case '"':
+				// Extra Delimiters
+			case '[':
+			case ']':
+			case '{':
+			case '}':
+			case '(':
+			case ')':
+			case '&':
+			case '|':
+			case '\\':
+			case '-':
+			case '_':
+			case '+':
+			case '*':
+			case ':':
+				return true;
+			default:
+				return WhiteSpaceNode::isWhiteSpace($c);
+		}
+	}
+
+	public function getDiffLines(){
+		return array_map(array('TextNode','toDiffLine'), $this->textNodes);
+	}
+}
+
+class TextNodeDiffer{
+
+	private $textNodes;
+	private $bodyNode;
+
+	private $oldTextNodes;
+	private $oldBodyNode;
+
+	private $lastModified = array();
+
+	function __construct(DomTreeBuilder $tree, DomTreeBuilder $oldTree) {
+		$this->textNodes = $tree->getTextNodes();
+		$this->bodyNode = $tree->getBodyNode();
+		$this->oldTextNodes = $oldTree->getTextNodes();
+		$this->oldBodyNode = $oldTree->getBodyNode();
+	}
+
+	public function getBodyNode() {
+		return $this->bodyNode;
+	}
+
+	private $newID = 0;
+
+	public function markAsNew($start, $end) {
+		if ($end <= $start)
+		return;
+
+		if ($this->whiteAfterLastChangedPart)
+		$this->textNodes[$start]->setWhiteBefore(false);
+
+		$nextLastModified = array();
+
+		for ($i = $start; $i < $end; ++$i) {
+			$mod = new Modification(Modification::ADDED);
+			$mod->setID($this->newID);
+			if (sizeof($this->lastModified) > 0) {
+				$mod->setPrevious($this->lastModified[0]);
+				if (is_null($this->lastModified[0]->getNext())) {
+					foreach ($this->lastModified as $lastMod) {
+						$lastMod->setNext($mod);
+					}
+				}
+			}
+			$nextLastModified[] = $mod;
+			$this->textNodes[$i]->setModification($mod);
+		}
+		if ($start < $end) {
+			$this->textNodes[$start]->getModification()->setFirstOfID(true);
+		}
+		++$this->newID;
+		$this->lastModified = $nextLastModified;
+	}
+
+	private $changedID = 0;
+
+	private $changedIDUsed = false;
+
+	public function handlePossibleChangedPart($leftstart, $leftend,	$rightstart, $rightend) {
+		$i = $rightstart;
+		$j = $leftstart;
+
+		if ($this->changedIDUsed) {
+			++$this->changedID;
+			$this->changedIDUsed = false;
+		}
+
+		$nextLastModified = array();
+
+		$changes;
+		while ($i < $rightend) {
+			$acthis = new AncestorComparator($this->textNodes[$i]->getParentTree());
+			$acother = new AncestorComparator($this->oldTextNodes[$j]->getParentTree());
+			$result = $acthis->getResult($acother);
+			unset($acthis, $acother);
+
+			$nbLastModified = sizeof($this->lastModified);
+			if ($result->isChanged()) {
+				$mod = new Modification(Modification::CHANGED);
+
+				if (!$this->changedIDUsed) {
+					$mod->setFirstOfID(true);
+					if (sizeof($nextLastModified) > 0) {
+						$this->lastModified = $nextLastModified;
+						$nextLastModified = array();
+					}
+				} else if (!is_null($result->getChanges()) && $result->getChanges() != $this->changes) {
+					++$this->changedID;
+					$mod->setFirstOfID(true);
+					if (sizeof($nextLastModified) > 0) {
+						$this->lastModified = $nextLastModified;
+						$nextLastModified = array();
+					}
+				}
+
+				if ($nbLastModified > 0) {
+					$mod->setPrevious($this->lastModified[0]);
+					if (is_null($this->lastModified[0]->getNext())) {
+						foreach ($this->lastModified as $lastMod) {
+							$lastMod->setNext($mod);
+						}
+					}
+				}
+				$nextLastModified[] = $mod;
+
+				$mod->setChanges($result->getChanges());
+				$mod->setID($this->changedID);
+
+				$this->textNodes[$i]->setModification($mod);
+				$this->changes = $result->getChanges();
+				$this->changedIDUsed = true;
+			} else if ($this->changedIDUsed) {
+				++$this->changedID;
+				$this->changedIDUsed = false;
+			}
+			++$i;
+			++$j;
+		}
+		if (sizeof($nextLastModified) > 0){
+			$this->lastModified = $nextLastModified;
+		}
+	}
+
+	// used to remove the whitespace between a red and green block
+	private $whiteAfterLastChangedPart = false;
+
+	private $deletedID = 0;
+
+	public function markAsDeleted($start, $end, $before) {
+
+		if ($end <= $start)
+		return;
+
+		if ($before > 0 && $this->textNodes[$before - 1]->isWhiteAfter()) {
+			$this->whiteAfterLastChangedPart = true;
+		} else {
+			$this->whiteAfterLastChangedPart = false;
+		}
+
+		$nextLastModified = array();
+
+		for ($i = $start; $i < $end; ++$i) {
+			$mod = new Modification(Modification::REMOVED);
+			$mod->setID($this->deletedID);
+			if (sizeof($this->lastModified) > 0) {
+				$mod->setPrevious($this->lastModified[0]);
+				if (is_null($this->lastModified[0]->getNext())) {
+					foreach ($this->lastModified as $lastMod) {
+						$lastMod->setNext($mod);
+					}
+				}
+			}
+			$nextLastModified[] = $mod;
+
+			// oldTextNodes is used here because we're going to move its deleted
+			// elements
+			// to this tree!
+			$this->oldTextNodes[$i]->setModification($mod);
+		}
+		$this->oldTextNodes[$start]->getModification()->setFirstOfID(true);
+
+		$deletedNodes = $this->oldBodyNode->getMinimalDeletedSet($this->deletedID);
+
+		//wfDebug("Minimal set of deleted nodes of size " . sizeof($deletedNodes));
+
+		// Set prevLeaf to the leaf after which the old HTML needs to be
+		// inserted
+		if ($before > 0){
+			$prevLeaf = $this->textNodes[$before - 1];
+		}
+		// Set nextLeaf to the leaf before which the old HTML needs to be
+		// inserted
+		if ($before < sizeof($this->textNodes)){
+			$nextLeaf = $this->textNodes[$before];
+		}
+
+		while (sizeof($deletedNodes) > 0) {
+			if (isset($prevLeaf)) {
+				$prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]);
+			} else {
+				$prevResult = new LastCommonParentResult();
+				$prevResult->setLastCommonParent($this->getBodyNode());
+				$prevResult->setIndexInLastCommonParent(0);
+			}
+			if (isset($nextleaf)) {
+				$nextResult = $nextLeaf->getLastCommonParent($deletedNodes[sizeof($deletedNodes) - 1]);
+			} else {
+				$nextResult = new LastCommonParentResult();
+				$nextResult->setLastCommonParent($this->getBodyNode());
+				$nextResult->setIndexInLastCommonParent($this->getBodyNode()->getNbChildren());
+			}
+
+			if ($prevResult->getLastCommonParentDepth() == $nextResult->getLastCommonParentDepth()) {
+				// We need some metric to choose which way to add-...
+				if ($deletedNodes[0]->getParent() === $deletedNodes[sizeof($deletedNodes) - 1]->getParent()
+				&& $prevResult->getLastCommonParent() === $nextResult->getLastCommonParent()) {
+					// The difference is not in the parent
+					$prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1);
+				} else {
+					// The difference is in the parent, so compare them
+					// now THIS is tricky
+					$distancePrev = $deletedNodes[0]->getParent()->getMatchRatio($prevResult->getLastCommonParent());
+					$distanceNext = $deletedNodes[sizeof($deletedNodes) - 1]->getParent()->getMatchRatio($nextResult->getLastCommonParent());
+
+					if ($distancePrev <= $distanceNext) {
+						$prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1);
+					} else {
+						$nextResult->setLastCommonParentDepth($nextResult->getLastCommonParentDepth() + 1);
+					}
+				}
+
+			}
+
+			if ($prevResult->getLastCommonParentDepth() > $nextResult->getLastCommonParentDepth()) {
+				// Inserting at the front
+				if ($prevResult->isSplittingNeeded()) {
+					$prevLeaf->getParent()->splitUntill($prevResult->getLastCommonParent(), $prevLeaf, true);
+				}
+				$prevLeaf = $deletedNodes[0]->copyTree();
+				unset($deletedNodes[0]);
+				$deletedNodes = array_values($deletedNodes);
+				$prevLeaf->setParent($prevResult->getLastCommonParent());
+				$prevResult->getLastCommonParent()->addChild($prevLeaf,$prevResult->getIndexInLastCommonParent() + 1);
+			} else if ($prevResult->getLastCommonParentDepth() < $nextResult->getLastCommonParentDepth()) {
+				// Inserting at the back
+				if ($nextResult->isSplittingNeeded()) {
+					$splitOccured = $nextLeaf->getParent()->splitUntill($nextResult->getLastCommonParent(), $nextLeaf, false);
+					if ($splitOccured) {
+						// The place where to insert is shifted one place to the
+						// right
+						$nextResult->setIndexInLastCommonParent($nextResult->getIndexInLastCommonParent() + 1);
+					}
+				}
+				$nextLeaf = $deletedNodes[sizeof(deletedNodes) - 1]->copyTree();
+				unset($deletedNodes[sizeof(deletedNodes) - 1]);
+				$deletedNodes = array_values($deletedNodes);
+				$nextLeaf->setParent($nextResult->getLastCommonParent());
+				$nextResult->getLastCommonParent()->addChild($nextLeaf,$nextResult->getIndexInLastCommonParent());
+			} else
+			throw new Exception("Uh?");
+		}
+		$this->lastModified = $nextLastModified;
+		++$this->deletedID;
+	}
+
+	public function expandWhiteSpace() {
+		$this->getBodyNode()->expandWhiteSpace();
+	}
+
+	public function lengthNew(){
+		return sizeof($this->textNodes);
+	}
+
+	public function lengthOld(){
+		return sizeof($this->oldTextNodes);
+	}
+}
+
+class HTMLDiffer{
+	
+	private $output;
+	
+	function __construct($output){
+		$this->output = $output;
+	}
+
+	function htmlDiff($from, $to){
+		// Create an XML parser
+		$xml_parser = xml_parser_create('');
+
+		$domfrom = new DomTreeBuilder();
+
+		// Set the functions to handle opening and closing tags
+		xml_set_element_handler($xml_parser, array($domfrom,"startElement"), array($domfrom,"endElement"));
+
+		// Set the function to handle blocks of character data
+		xml_set_character_data_handler($xml_parser, array($domfrom,"characters"));
+
+		;
+		//wfDebug('Parsing '.strlen($from)." characters worth of HTML");
+		if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE)
+		|| !xml_parse($xml_parser, $from, FALSE)
+		|| !xml_parse($xml_parser, '</body>', TRUE)){
+			wfDebug(sprintf("XML error: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser)));
+		}
+		xml_parser_free($xml_parser);
+		unset($from);
+
+		$xml_parser = xml_parser_create('');
+
+		$domto = new DomTreeBuilder();
+
+		// Set the functions to handle opening and closing tags
+		xml_set_element_handler($xml_parser, array($domto,"startElement"), array($domto,"endElement"));
+
+		// Set the function to handle blocks of character data
+		xml_set_character_data_handler($xml_parser, array($domto,"characters"));
+
+		//wfDebug('Parsing '.strlen($to)." characters worth of HTML");
+		if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE)
+		|| !xml_parse($xml_parser, $to, FALSE)
+		|| !xml_parse($xml_parser, '</body>', TRUE)){
+			wfDebug(sprintf("XML error in HTML diff: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser)));
+		}
+		xml_parser_free($xml_parser);
+		unset($to);
+
+		$diffengine = new _DiffEngine();
+		$differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines()));
+		unset($xml_parser,$diffengine);
+
+		$domdiffer = new TextNodeDiffer($domto, $domfrom);
+
+		$currentIndexLeft = 0;
+		$currentIndexRight = 0;
+		foreach ($differences as $d) {
+			if ($d->leftstart > $currentIndexLeft) {
+				$domdiffer->handlePossibleChangedPart($currentIndexLeft, $d->leftstart,
+				$currentIndexRight, $d->rightstart);
+			}
+			if ($d->leftlength > 0) {
+				$domdiffer->markAsDeleted($d->leftstart, $d->leftend, $d->rightstart);
+			}
+			$domdiffer->markAsNew($d->rightstart, $d->rightend);
+
+			$currentIndexLeft = $d->leftend;
+			$currentIndexRight = $d->rightend;
+		}
+		if ($currentIndexLeft < $domdiffer->lengthOld()) {
+			$domdiffer->handlePossibleChangedPart($currentIndexLeft,$domdiffer->lengthOld(), $currentIndexRight,$domdiffer->lengthNew());
+		}
+
+		$domdiffer->expandWhiteSpace();
+		$output = new HTMLOutput('htmldiff', $this->output);
+		$output->parse($domdiffer->getBodyNode());
+	}
+
+	private function preProcess(/*array*/ $differences){
+		$newRanges = array();
+
+		$nbDifferences = sizeof($differences);
+		for ($i = 0; $i < $nbDifferences; ++$i) {
+			$leftStart = $differences[$i]->leftstart;
+			$leftEnd = $differences[$i]->leftend;
+			$rightStart = $differences[$i]->rightstart;
+			$rightEnd = $differences[$i]->rightend;
+
+			$leftLength = $leftEnd - $leftStart;
+			$rightLength = $rightEnd - $rightStart;
+
+			while ($i + 1 < $nbDifferences && self::score($leftLength, $differences[$i + 1]->leftlength,
+			$rightLength, $differences[$i + 1]->rightlength) > ($differences[$i + 1]->leftstart - $leftEnd)) {
+				$leftEnd = $differences[$i + 1]->leftend;
+				$rightEnd = $differences[$i + 1]->rightend;
+				$leftLength = $leftEnd - $leftStart;
+				$rightLength = $rightEnd - $rightStart;
+				++$i;
+			}
+			$newRanges[] = new RangeDifference($leftStart, $leftEnd, $rightStart, $rightEnd);
+		}
+		return $newRanges;
+	}
+
+	/**
+	 * Heuristic to merge differences for readability.
+	 */
+	public static function score($ll, $nll, $rl, $nrl) {
+		if (($ll == 0 && $nll == 0)
+		|| ($rl == 0 && $nrl == 0)){
+			return 0;
+		}
+		$numbers = array($ll, $nll, $rl, $nrl);
+		$d = 0;
+		foreach ($numbers as $number) {
+			while ($number > 3) {
+				$d += 3;
+				$number -= 3;
+				$number *= 0.5;
+			}
+			$d += $number;
+
+		}
+		return $d / (1.5 * sizeof($numbers));
+	}
+
+}
+
+class TextOnlyComparator{
+
+	public $leafs = array();
+
+	function _construct(TagNode $tree) {
+		$this->addRecursive($tree);
+		$this->leafs = array_map(array('TextNode','toDiffLine'), $this->leafs);
+	}
+
+	private function addRecursive(TagNode $tree) {
+		foreach ($tree->children as $child) {
+			if ($child instanceof TagNode) {
+				$this->addRecursive($child);
+			} else if ($child instanceof TextNode) {
+				$this->leafs[] = $node;
+			}
+		}
+	}
+
+	public function getMatchRatio(TextOnlyComparator $other) {
+		$nbOthers = sizeof($other->leafs);
+		$nbThis = sizeof($this->leafs);
+		if($nbOthers == 0 || $nbThis == 0){
+			return -log(0);
+		}
+		
+		$diffengine = new _DiffEngine();
+		$diffengine->diff_local($this->leafs, $other->leafs);
+
+		$distanceThis = array_sum($diffengine->xchanged);
+		$distanceOther = array_sum($diffengine->ychanged);
+
+		return ((0.0 + $distanceOther) / $nbOthers + (0.0 + $distanceThis)
+		/ $nbThis) / 2.0;
+	}
+}
+
+class AncestorComparatorResult {
+
+	private $changed = false;
+
+	private $changes = "";
+
+	public function isChanged() {
+		return $this->changed;
+	}
+
+	public function setChanged($changed) {
+		$this->changed = $changed;
+	}
+
+	public function getChanges() {
+		return $this->changes;
+	}
+
+	public function setChanges($changes) {
+		$this->changes = $changes;
+	}
+}
+
+/**
+ * A comparator used when calculating the difference in ancestry of two Nodes.
+ */
+class AncestorComparator{
+
+	public $ancestors;
+	public $ancestorsText;
+
+	function __construct(/*array*/ $ancestors) {
+		$this->ancestors = $ancestors;
+		$this->ancestorsText = array_map(array('TagNode','toDiffLine'), $ancestors);
+	}
+
+	private $compareTxt = "";
+
+	public function getCompareTxt() {
+		return $this->compareTxt;
+	}
+
+	public function getResult(AncestorComparator $other) {
+		$result = new AncestorComparatorResult();
+
+		$diffengine = new _DiffEngine();
+		$differences = $diffengine->diff_range($this->ancestorsText, $other->ancestorsText);
+
+		if (sizeof($differences) == 0){
+			return $result;
+		}
+		$changeTxt = new ChangeTextGenerator($this, $other);
+
+		$result->setChanged(true);
+		$result->setChanges($changeTxt->getChanged($differences)->toString());
+
+		return $result;
+	}
+}
+
+class ChangeTextGenerator {
+
+	private $new;
+	private $old;
+
+	private $factory;
+
+	function __construct(AncestorComparator $old, AncestorComparator $new) {
+		$this->new = $new;
+		$this->old = $old;
+		$this->factory = new TagToStringFactory();
+	}
+
+	public function getChanged(/*array*/ $differences) {
+		$txt = new ChangeText;
+
+		$rootlistopened = false;
+
+		if (sizeof($differences) > 1) {
+			$txt->addHtml('<ul class="changelist">');
+			$rootlistopened = true;
+		}
+
+		$nbDifferences = sizeof($differences);
+		for ($j = 0; $j < $nbDifferences; ++$j) {
+			$d = $differences[$j];
+
+			$lvl1listopened = false;
+
+			if ($rootlistopened) {
+				$txt->addHtml('<li>');
+			}
+
+			if ($d->leftlength + $d->rightlength > 1) {
+				$txt->addHtml('<ul class="changelist">');
+				$lvl1listopened = true;
+			}
+
+			// left are the old ones
+			for ($i = $d->leftstart; $i < $d->leftend; ++$i) {
+				if ($lvl1listopened){
+					$txt->addHtml('<li>');
+				}
+				// add a bullet for a old tag
+				$this->addTagOld($txt, $this->old->ancestors[$i]);
+
+				if ($lvl1listopened){
+					$txt->addHtml('</li>');
+				}
+			}
+
+			// right are the new ones
+			for ($i = $d->rightstart; $i < $d->rightend; ++$i) {
+				if ($lvl1listopened){
+					$txt->addHtml('<li>');
+				}
+
+				// add a bullet for a new tag
+				$this->addTagNew($txt, $this->new->ancestors[$i]);
+
+				if ($lvl1listopened){
+					$txt->addHtml('</li>');
+				}
+
+			}
+
+			if ($lvl1listopened) {
+				$txt->addHtml('</ul>');
+			}
+
+			if ($rootlistopened) {
+				$txt->addHtml('</li>');
+			}
+		}
+
+		if ($rootlistopened) {
+			$txt->addHtml('</ul>');
+		}
+
+		return $txt;
+
+	}
+
+	private function addTagOld(ChangeText $txt, TagNode $ancestor) {
+		$this->factory->create($ancestor)->getRemovedDescription($txt);
+	}
+
+	private function addTagNew(ChangeText $txt, TagNode $ancestor) {
+		$this->factory->create($ancestor)->getAddedDescription($txt);
+	}
+}
+
+class ChangeText {
+
+	private $txt = "";
+
+	const newLine = "<br/>";
+
+	public function addText($s) {
+		$s = $this->clean($s);
+		$this->txt .= $s;
+	}
+
+	public function addHtml($s) {
+		$this->txt .= $s;
+	}
+
+	public function addNewLine() {
+		$this->addHtml(self::newLine);
+	}
+
+	public function toString() {
+		return $this->txt;
+	}
+
+	private function clean($s) {
+		return htmlspecialchars($s);
+	}
+}
+
+class TagToStringFactory {
+
+	private static $containerTags = array(
+        'html' => TRUE, 
+        'body' => TRUE, 
+        'p'  => TRUE, 
+        'blockquote' => TRUE, 
+        'h1' => TRUE, 
+        'h2' => TRUE, 
+        'h3' => TRUE, 
+        'h4' => TRUE, 
+        'h5' => TRUE, 
+        'pre' => TRUE, 
+        'div' => TRUE, 
+        'ul' => TRUE, 
+        'ol' => TRUE, 
+        'li' => TRUE, 
+        'table' => TRUE, 
+        'tbody' => TRUE, 
+        'tr' => TRUE, 
+        'td' => TRUE, 
+        'th' => TRUE, 
+        'br' => TRUE, 
+        'hr' => TRUE, 
+        'code' => TRUE, 
+        'dl' => TRUE, 
+        'dt' => TRUE, 
+        'dd' => TRUE, 
+        'input' => TRUE, 
+        'form' => TRUE, 
+        'img' => TRUE,
+	// in-line tags that can be considered containers not styles
+        'span' => TRUE, 
+        'a' => TRUE
+	);
+
+	private static $styleTags = array(
+        'i' => TRUE, 
+        'b' => TRUE, 
+        'strong' => TRUE, 
+        'em' => TRUE, 
+        'font' => TRUE, 
+        'big' => TRUE, 
+        'del' => TRUE, 
+        'tt' => TRUE, 
+        'sub' => TRUE, 
+        'sup' => TRUE, 
+        'strike' => TRUE
+	);
+
+	const MOVED = 1;
+	const STYLE = 2;
+	const UNKNOWN = 4;
+
+	public function create(TagNode $node) {
+		$sem = $this->getChangeSemantic($node->getQName());
+		if (0 == strcasecmp($node->getQName(),'a')){
+			return new AnchorToString($node, $sem);
+		}
+		if (0 == strcasecmp($node->getQName(),'img')){
+			return new NoContentTagToString($node, $sem);
+		}
+		return new TagToString($node, $sem);
+	}
+
+	protected function getChangeSemantic($qname) {
+		if (array_key_exists(strtolower($qname),self::$containerTags)){
+			return self::MOVED;
+		}
+		if (array_key_exists(strtolower($qname),self::$styleTags)){
+			return self::STYLE;
+		}
+		return self::UNKNOWN;
+	}
+}
+
+class TagToString {
+
+	protected $node;
+
+	protected $sem;
+
+	function __construct(TagNode $node, $sem) {
+		$this->node = $node;
+		$this->sem = $sem;
+	}
+
+	public function getDescription() {
+		return $this->getString('diff-' . $this->node->getQName());
+	}
+
+	public function getRemovedDescription(ChangeText $txt) {
+
+		if ($this->sem == TagToStringFactory::MOVED) {
+			$txt->addText($this->getMovedOutOf() . ' ' . strtolower($this->getArticle()) . ' ');
+			$txt->addHtml('<b>');
+			$txt->addText(strtolower($this->getDescription()));
+			$txt->addHtml('</b>');
+		} else if ($this->sem == TagToStringFactory::STYLE) {
+			$txt->addHtml('<b>');
+			$txt->addText($this->getDescription());
+			$txt->addHtml('</b>');
+			$txt->addText(' ' . strtolower($this->getStyleRemoved()));
+		} else {
+			$txt->addHtml('<b>');
+			$txt->addText($this->getDescription());
+			$txt->addHtml('</b>');
+			$txt->addText(' ' . strtolower($this->getRemoved()));
+		}
+		$this->addAttributes($txt, $this->node->getAttributes());
+		$txt->addText('.');
+	}
+
+	public function getAddedDescription(ChangeText $txt) {
+
+		if ($this->sem == TagToStringFactory::MOVED) {
+			$txt->addText($this->getMovedTo() . ' ' . strtolower($this->getArticle()) . ' ');
+			$txt->addHtml('<b>');
+			$txt->addText(strtolower($this->getDescription()));
+			$txt->addHtml('</b>');
+		} else if ($this->sem == TagToStringFactory::STYLE) {
+			$txt->addHtml('<b>');
+			$txt->addText($this->getDescription());
+			$txt->addHtml('</b>');
+			$txt->addText(' ' . strtolower($this->getStyleAdded()));
+		} else {
+			$txt->addHtml('<b>');
+			$txt->addText($this->getDescription());
+			$txt->addHtml('</b>');
+			$txt->addText(' ' . strtolower($this->getAdded()));
+		}
+		$this->addAttributes($txt, $this->node->getAttributes());
+		$txt->addText('.');
+	}
+
+	protected function getMovedTo() {
+		return $this->getString('diff-movedto');
+	}
+
+	protected function getStyleAdded() {
+		return $this->getString('diff-styleadded');
+	}
+
+	protected function getAdded() {
+		return $this->getString('diff-added');
+	}
+
+	protected function getMovedOutOf() {
+		return $this->getString('diff-movedoutof');
+	}
+
+	protected function getStyleRemoved() {
+		return $this->getString('diff-styleremoved');
+	}
+
+	protected function getRemoved() {
+		return $this->getString('diff-removed');
+	}
+
+	protected function addAttributes(ChangeText $txt, array $attributes) {
+		if (sizeof($attributes) < 1)
+		return;
+
+		$keys = array_keys($attributes);
+
+		$txt->addText(' ' . strtolower($this->getWith()) . ' '
+		. $this->translateArgument($keys[0]) . ' '
+		. $attributes[$keys[0]]);
+		for ($i = 1; $i < sizeof($attributes) - 1; $i++) {
+			$txt->addText(', ' . $this->translateArgument($keys[$i]) . ' '
+			. $attributes[$keys[$i]]);
+		}
+		if (sizeof($attributes) > 1) {
+			$txt->addText(' '
+			. strtolower($this->getAnd())
+			. ' '
+			. $this->translateArgument($keys[sizeof($attributes) - 1]) . ' '
+			. $attributes[$keys[sizeof($attributes) - 1]]);
+		}
+	}
+
+	private function getAnd() {
+		return $this->getString('diff-and');
+	}
+
+	private function getWith() {
+		return $this->getString('diff-with');
+	}
+
+	protected function translateArgument($name) {
+		if (0 == strcasecmp($name,'src'))
+		return strtolower($this->getSource());
+		if (0 == strcasecmp($name,'width'))
+		return strtolower($this->getWidth());
+		if (0 == strcasecmp($name,'height'))
+		return strtolower($this->getHeight());
+		return $name;
+	}
+
+	private function getHeight() {
+		return $this->getString('diff-height');
+	}
+
+	private function getWidth() {
+		return $this->getString('diff-width');
+	}
+
+	protected function getSource() {
+		return $this->getString('diff-source');
+	}
+
+	protected function getArticle() {
+		return $this->getString('diff-' . $this->node->getQName() . '-article');
+	}
+
+	public static $bundle = array(
+    'diff-movedto' => 'Moved to',
+	'diff-styleadded' => 'Style added',
+	'diff-added' => 'Added',
+	'diff-changedto' => 'Changed to',
+	'diff-movedoutof' => 'Moved out of',
+	'diff-styleremoved' => 'Style removed',
+	'diff-removed' => 'Removed',
+	'diff-changedfrom' => 'Changed from',
+	'diff-source' => 'Source',
+	'diff-withdestination' => 'With destination',
+	'diff-and' => 'And',
+	'diff-with' => 'With',
+	'diff-width' => 'Width',
+	'diff-height' => 'Height',
+	'diff-html-article' => 'A',
+	'diff-html' => 'Html page',
+	'diff-body-article' => 'A',
+	'diff-body' => 'Html document',
+	'diff-p-article' => 'A',
+	'diff-p' => 'Paragraph',
+	'diff-blockquote-article' => 'A',
+	'diff-blockquote' => 'Quote',
+	'diff-h1-article' => 'A',
+	'diff-h1' => 'Heading (level 1)',
+	'diff-h2-article' => 'A',
+	'diff-h2' => 'Heading (level 2)',
+	'diff-h3-article' => 'A',
+	'diff-h3' => 'Heading (level 3)',
+	'diff-h4-article' => 'A',
+	'diff-h4' => 'Heading (level 4)',
+	'diff-h5-article' => 'A',
+	'diff-h5' => 'Heading (level 5)',
+	'diff-pre-article' => 'A',
+	'diff-pre' => 'Preformatted block',
+	'diff-div-article' => 'A',
+	'diff-div' => 'Division',
+	'diff-ul-article' => 'An',
+	'diff-ul' => 'Unordered list',
+	'diff-ol-article' => 'An',
+	'diff-ol' => 'Ordered list',
+	'diff-li-article' => 'A',
+	'diff-li' => 'List item',
+	'diff-table-article' => 'A',
+	'diff-table' => 'Table',
+	'diff-tbody-article' => 'A',
+	'diff-tbody' => "Table's content",
+	'diff-tr-article' => 'A',
+	'diff-tr' => 'Row',
+	'diff-td-article' => 'A',
+	'diff-td' => 'Cell',
+	'diff-th-article' => 'A',
+	'diff-th' => 'Header',
+	'diff-br-article' => 'A',
+	'diff-br' => 'Break',
+	'diff-hr-article' => 'A',
+	'diff-hr' => 'Horizontal rule',
+	'diff-code-article' => 'A',
+	'diff-code' => 'Computer code block',
+	'diff-dl-article' => 'A',
+	'diff-dl' => 'Definition list',
+	'diff-dt-article' => 'A',
+	'diff-dt' => 'Definition term',
+	'diff-dd-article' => 'A',
+	'diff-dd' => 'Definition',
+	'diff-input-article' => 'An',
+	'diff-input' => 'Input',
+	'diff-form-article' => 'A',
+	'diff-form' => 'Form',
+	'diff-img-article' => 'An',
+	'diff-img' => 'Image',
+	'diff-span-article' => 'A',
+	'diff-span' => 'Span',
+	'diff-a-article' => 'A',
+	'diff-a' => 'Link',
+	'diff-i' => 'Italics',
+	'diff-b' => 'Bold',
+	'diff-strong' => 'Strong',
+	'diff-em' => 'Emphasis',
+	'diff-font' => 'Font',
+	'diff-big' => 'Big',
+	'diff-del' => 'Deleted',
+	'diff-tt' => 'Fixed width',
+	'diff-sub' => 'Subscript',
+	'diff-sup' => 'Superscript',
+	'diff-strike' => 'Strikethrough'
+	);
+
+	public function getString($key) {
+		return self::$bundle[$key];
+	}
+}
+
+class NoContentTagToString extends TagToString {
+
+	function __construct(TagNode $node, $sem) {
+		parent::__construct($node, $sem);
+	}
+
+	public function getAddedDescription(ChangeText $txt) {
+		$txt.addText($this->getChangedTo() . ' ' + strtolower($this->getArticle()) . ' ');
+		$txt.addHtml('<b>');
+		$txt.addText(strtolower($this->getDescription()));
+		$txt.addHtml('</b>');
+
+		$this->addAttributes($txt, $this->node->getAttributes());
+		$txt.addText('.');
+	}
+
+	private function getChangedTo() {
+		return $this->getString('diff-changedto');
+	}
+
+	public function getRemovedDescription(ChangeText $txt) {
+		$txt.addText($this->getChangedFrom() . ' ' + strtolower($this->getArticle()) . ' ');
+		$txt.addHtml('<b>');
+		$txt.addText(strtolower($this->getDescription()));
+		$txt.addHtml('</b>');
+
+		$this->addAttributes($txt, $this->node->getAttributes());
+		$txt.addText('.');
+	}
+
+	private function getChangedFrom() {
+		return $this->getString('diff-changedfrom');
+	}
+}
+
+class AnchorToString extends TagToString {
+
+	function __construct(TagNode $node, $sem) {
+		parent::__construct($node, $sem);
+	}
+
+	protected function addAttributes(ChangeText $txt, array $attributes) {
+		if (array_key_exists('href',$attributes)) {
+			$txt->addText(' ' . strtolower($this->getWithDestination()) . ' ' . $attributes['href']);
+			unset($attributes['href']);
+		}
+		parent::addAttributes($txt, $attributes);
+	}
+
+	private function getWithDestination() {
+		return $this->getString('diff-withdestination');
+	}
+
+}
+
+/**
+ * Takes a branch root and creates an HTML file for it.
+ */
+class HTMLOutput{
+
+	private $prefix;
+	private $handler;
+
+	function __construct($prefix, $handler) {
+		$this->prefix = $prefix;
+		$this->handler = $handler;
+	}
+
+	public function parse(TagNode $node) {
+
+		if (0 != strcasecmp($node->getQName(),'img') && 0 != strcasecmp($node->getQName(),'body')) {
+			$this->handler->startElement($node->getQName(), $node->getAttributes());
+		}
+
+		$newStarted = false;
+		$remStarted = false;
+		$changeStarted = false;
+		$changeTXT = '';
+
+		foreach ($node->children as $child) {
+			if ($child instanceof TagNode) {
+				if ($newStarted) {
+					$this->handler->endElement('span');
+					$newStarted = false;
+				} else if ($changeStarted) {
+					$this->handler->endElement('span');
+					$changeStarted = false;
+				} else if ($remStarted) {
+					$this->handler->endElement('span');
+					$remStarted = false;
+				}
+				$this->parse($child);
+			} else if ($child instanceof TextNode) {
+				$mod = $child->getModification();
+
+				if ($newStarted && ($mod->getType() != Modification::ADDED || $mod->isFirstOfID())) {
+					$this->handler->endElement('span');
+					$newStarted = false;
+				} else if ($changeStarted && ($mod->getType() != Modification::CHANGED || $mod->getChanges() != $changeTXT || $mod->isFirstOfID())) {
+					$this->handler->endElement('span');
+					$changeStarted = false;
+				} else if ($remStarted && ($mod->getType() != Modification::REMOVED || $mod ->isFirstOfID())) {
+					$this->handler->endElement('span');
+					$remStarted = false;
+				}
+
+				// no else because a removed part can just be closed and a new
+				// part can start
+				if (!$newStarted && $mod->getType() == Modification::ADDED) {
+					$attrs = array('class'=>'diff-html-added');
+					if ($mod->isFirstOfID()) {
+						$attrs['id'] = 'added-' . $this->prefix . '-' . $mod->getID();
+					}
+					$this->addAttributes($mod, $attrs);
+					$attrs['onclick'] = 'return tipA(constructToolTipA(this));';
+					$this->handler->startElement('span', $attrs);
+					$newStarted = true;
+				} else if (!$changeStarted && $mod->getType() == Modification::CHANGED) {
+					$attrs = array('class'=>'diff-html-changed');
+					if ($mod->isFirstOfID()) {
+						$attrs['id'] = 'changed-' . $this->prefix . '-' . $mod->getID();
+					}
+					$this->addAttributes($mod, $attrs);
+					$attrs['onclick'] = 'return tipC(constructToolTipC(this));';
+					$this->handler->startElement('span', $attrs);
+					
+					//tooltip
+					$this->handler->startElement('span', array('class'=>'tip'));
+					$this->handler->characters($mod->getChanges());
+					$this->handler->endElement('span');
+			
+					$changeStarted = true;
+					$changeTXT = $mod->getChanges();
+				} else if (!$remStarted && $mod->getType() == Modification::REMOVED) {
+					$attrs = array('class'=>'diff-html-removed');
+					if ($mod->isFirstOfID()) {
+						$attrs['id'] = 'removed-' . $this->prefix . '-' . $mod->getID();
+					}
+					$this->addAttributes($mod, $attrs);
+					$attrs['onclick'] = 'return tipR(constructToolTipR(this));';
+					$this->handler->startElement('span', $attrs);
+					$remStarted = true;
+				}
+
+				$chars = $child->getText();
+
+				if ($child instanceof ImageNode) {
+					$this->writeImage($child);
+				} else {
+					$this->handler->characters($chars);
+				}
+
+			}
+		}
+
+		if ($newStarted) {
+			$this->handler->endElement('span');
+			$newStarted = false;
+		} else if ($changeStarted) {
+			$this->handler->endElement('span');
+			$changeStarted = false;
+		} else if ($remStarted) {
+			$this->handler->endElement('span');
+			$remStarted = false;
+		}
+
+		if (0 != strcasecmp($node->getQName(),'img')
+		&& 0 != strcasecmp($node->getQName(),'body'))
+		$this->handler->endElement($node->getQName());
+	}
+
+	private function writeImage(ImageNode  $imgNode){
+		$attrs = $imgNode->getAttributes();
+		if ($imgNode->getModification()->getType() == Modification::REMOVED)
+		$attrs['changeType']='diff-removed-image';
+		else if ($imgNode->getModification()->getType() == Modification::ADDED)
+		$attrs['changeType'] = 'diff-added-image';
+		$attrs['onload'] = 'updateOverlays()';
+		$attrs['onError'] = 'updateOverlays()';
+		$attrs['onAbort'] = 'updateOverlays()';
+		$this->handler->startElement('img', $attrs);
+		$this->handler->endElement('img');
+	}
+
+	private function addAttributes(Modification $mod, /*array*/ &$attrs) {
+		if (is_null($mod->getPrevious())) {
+			$previous = 'first-' . $this->prefix;
+		} else {
+			$previous = Modification::typeToString($mod->getPrevious()->getType()) . '-' . $this->prefix . '-'
+			. $mod->getPrevious()->getID();
+		}
+		$attrs['previous'] = $previous;
+
+		$changeId = Modification::typeToString($mod->getType()) . '-' + $this->prefix . '-' . $mod->getID();
+		$attrs['changeId'] = $changeId;
+
+		if (is_null($mod->getNext())) {
+			$next = 'last-' . $this->prefix;
+		} else {
+			$next = Modification::typeToString($mod->getNext()->getType()) . '-' . $this->prefix . '-'
+			. $mod->getNext()->getID();
+		}
+		$attrs['next'] = $next;
+	}
+}
+
+class EchoingContentHandler{
+
+	function startElement($qname, /*array*/ $arguments){
+		echo '<'.$qname;
+		foreach($arguments as $key => $value){
+			echo ' '.$key.'="'.Sanitizer::encodeAttribute($value).'"';
+		}
+		echo '>';
+	}
+
+	function endElement($qname){
+		echo '</'.$qname.'>';
+	}
+
+	function characters($chars){
+		echo $chars;
+	}
+
+}
+
+class DelegatingContentHandler{
+	
+	private $delegate;
+	
+	function __construct($delegate){
+		$this->delegate=$delegate;
+	}
+
+	function startElement($qname, /*array*/ $arguments){
+		$this->delegate->addHtml('<'.$qname) ;
+		foreach($arguments as $key => $value){
+			$this->delegate->addHtml(' '.$key.'="'. Sanitizer::encodeAttribute($value) .'"');
+		}
+		$this->delegate->addHtml('>');
+	}
+
+	function endElement($qname){
+		$this->delegate->addHtml('</'.$qname.'>');
+	}
+
+	function characters($chars){
+		$this->delegate->addHtml( $chars );
+	}
+
+}
Index: trunk/phase3/includes/Diff.php
===================================================================
--- trunk/phase3/includes/Diff.php	(revision 39405)
+++ trunk/phase3/includes/Diff.php	(revision 39406)
@@ -18,454 +18,488 @@
  */
 
 /**
- * Implementation of the Diff algorithm.
- * 
- * DO NOT USE ON PRODUCTION SYSTEMS
- * 
- * The algorithm is based on the O(NP) LCS algorithm as descibed by Wu et al. in "An O(NP) Sequence Comparison Algorithm"
- * and extended with my own ideas.
+ * This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which
+ * in turn is based on Myers' "An O(ND) difference algorithm and its variations"
+ * (http://citeseer.ist.psu.edu/myers86ond.html) with range compression (see Wu et al.'s
+ * "An O(NP) Sequence Comparison Algorithm").
  *
- * @return array($from_removed, $to_added)
- * 		   TRUE if the token was removed or added.
+ * This implementation supports an upper bound on the excution time.
  *
+ * Complexity: O((M + N)D) worst case time, O(M + N + D^2) expected time, O(M + N) space
+ *
  * @author Guy Van den Broeck
+ *
  */
-function wikidiff3_diff( /*array*/ $from, /*array*/ $to, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){
-	wfProfileIn( __METHOD__ );
+class WikiDiff3{
 
-	$m = sizeof($from);
-	$n = sizeof($to);
+	//Input variables
+	private $from;
+	private $to;
+	private $m;
+	private $n;
 
-	$result_from =  array_fill(0,$m,TRUE);
-	$result_to =  array_fill(0,$n,TRUE);
+	private $tooLong;
+	private $powLimit;
 
-	//reduce the complexity for the next step (intentionally done twice)
-	//remove common tokens at the start
-	$i=0;
-	while($i<$m && $i<$n && $from[$i]===$to[$i]){
-		$result_from[$i] = $result_to[$i] = FALSE;
-		unset($from[$i],$to[$i]);
-		++$i;
+	//State variables
+	private $maxDifferences;
+	
+	//Output variables
+	public $length;
+	public $added;
+	public $removed;
+	public $heuristicUsed;
+	
+	function __construct($tooLong = 2000000, $powLimit = 1.45){
+		$this->tooLong = $tooLong;
+		$this->powLimit = $powLimit;
 	}
 
-	//remove common tokens at the end
-	$j=1;
-	while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){
-		$result_from[$m-$j] = $result_to[$n-$j] = FALSE;
-		unset($from[$m-$j],$to[$n-$j]);
-		++$j;
-	}
+	public function diff(/*array*/ $from, /*array*/ $to){
+		$m = sizeof($from);
+		$n = sizeof($to);
+		
+		$this->heuristicUsed = FALSE;
 
-	$newFrom = $newFromIndex = $newTo = $newToIndex = array();
+		$result_from =  $m>0?array_fill(0,$m,true):array();
+		$result_to =  $n>0?array_fill(0,$n,true):array();
 
-	//remove tokens not in both sequences
-	$shared = array_fill_keys($from,FALSE);
-	foreach($to as $index => $el){
-		if(array_key_exists($el,$shared)){
-			//keep it
-			$shared[$el] = TRUE;
-			$newTo[] = $el;
-			$newToIndex[] = $index;
+		//reduce the complexity for the next step (intentionally done twice)
+		//remove common tokens at the start
+		$i=0;
+		while($i<$m && $i<$n && $from[$i]===$to[$i]){
+			$result_from[$i] = $result_to[$i] = false;
+			unset($from[$i],$to[$i]);
+			++$i;
 		}
-	}
-	foreach($from as $index => $el){
-		if($shared[$el]){
-			//keep it
-			$newFrom[] = $el;
-			$newFromIndex[] = $index;
+
+		//remove common tokens at the end
+		$j=1;
+		while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){
+			$result_from[$m-$j] = $result_to[$n-$j] = false;
+			unset($from[$m-$j],$to[$n-$j]);
+			++$j;
 		}
-	}
 
-	unset($from, $to, $shared);
+		$newFrom = $newFromIndex = $newTo = $newToIndex = array();
 
-	$m2 = sizeof($newFrom);
-	$n2 = sizeof($newTo);
-	$offsetx = $offsety = 0;
-	$from_inLcs = new InLcs($m2);
-	$to_inLcs = new InLcs($n2);
+		//remove tokens not in both sequences
+		$shared = array_fill_keys($from,false);
+		foreach($to as $index => $el){
+			if(array_key_exists($el,$shared)){
+				//keep it
+				$this->to[] = $el;
+				$shared[$el] = true;
+				$newToIndex[] = $index;
+			}
+		}
+		foreach($from as $index => $el){
+			if($shared[$el]){
+				//keep it
+				$this->from[] = $el;
+				$newFromIndex[] = $index;
+			}
+		}
 
-	wikidiff3_diffPart($newFrom, $newTo, $from_inLcs, $to_inLcs, $m2, $n2, $offsetx, $offsety, $m2, $boundRunningTime, $max_NP_before_bound);
+		unset($shared);
 
-	foreach($from_inLcs->inLcs as $key => $in){
-		if($in){
-			$result_from[$newFromIndex[$key]] = FALSE;
+		$this->m = sizeof($this->from);
+		$this->n = sizeof($this->to);
+		$this->removed =  $this->m>0?array_fill(0,$this->m,true):array();
+		$this->added =  $this->n>0?array_fill(0,$this->n,true):array();
+
+		$this->longestCommonSubsequence();
+
+		$this->m = $m;
+		$this->n = $n;
+		$this->length += $i+$j-1;
+
+		foreach($this->removed as $key => $removed){
+			if(!$removed){
+				$result_from[$newFromIndex[$key]] = false;
+			}
 		}
-	}
-	foreach($to_inLcs->inLcs as $key => $in){
-		if($in){
-			$result_to[$newToIndex[$key]] = FALSE;
+		foreach($this->added as $key => $added){
+			if(!$added){
+				$result_to[$newToIndex[$key]] = false;
+			}
 		}
+		unset($this->added, $this->removed);
+		return array($result_from, $result_to);
 	}
-	
-	wfProfileOut( __METHOD__ );
-	return array($result_from, $result_to);
-}
 
-function wikidiff3_diffPart( /*array*/ $a, /*array*/ $b, InLcs $a_inLcs, InLcs $b_inLcs, $m, $n, $offsetx, $offsety, $bestKnownLcs, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){
-	if($bestKnownLcs==0 || $m==0 || $n==0){
-		return;
-	}
-	
-	if($m>$n){
-		return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound);
-	}
+	public function longestCommonSubsequence() {
+		if ($this->m == 0 || $this->n == 0) {
+			$this->length = 0;
+			return;
+		}
 
-	$a_inLcs_sym = &$a_inLcs->inLcs;
-	$b_inLcs_sym = &$b_inLcs->inLcs;
+		$this->maxDifferences = ceil(($this->m + $this->n) / 2.0);
+		if ($this->m * $this->n > $this->tooLong) {
+			// limit complexity to D^POW_LIMIT for long sequences
+			$this->maxDifferences = floor(pow($this->maxDifferences, $this->powLimit - 1.0));
+			wfDebug("Limiting max number of differences to $this->maxDifferences");
+		}
 
-	//remove common tokens at the start
-	while($m>0 && $n>0 && $a[0]===$b[0]){
-		$a_inLcs_sym[$offsetx] = $b_inLcs_sym[$offsety] = TRUE;
-		++$offsetx;	++$offsety;
-		--$m; --$n;
-		--$bestKnownLcs;
-		array_shift($a);
-		array_shift($b);
-	}
+		/*
+		 * The common prefixes and suffixes are always part of some LCS, include
+		 * them now to reduce our search space
+		 */
+		$max = min($this->m, $this->n);
+		for ($forwardBound = 0; $forwardBound < $max
+		&& $this->from[$forwardBound]===$this->to[$forwardBound]; ++$forwardBound) {
+			$this->removed[$forwardBound] = $this->added[$forwardBound] = false;
+		}
 
-	//remove common tokens at the end
-	while($m>0 && $n>0 && $a[$m-1]===$b[$n-1]){
-		$a_inLcs_sym[$offsetx+$m-1] = $b_inLcs_sym[$offsety+$n-1] = TRUE;
-		--$m; --$n;
-		--$bestKnownLcs;
-		array_pop($a);
-		array_pop($b);
-	}
+		$backBoundL1 = $this->m - 1;
+		$backBoundL2 = $this->n - 1;
 
-	if($bestKnownLcs==0 || $m==0 || $n==0){
-		return;
+		while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound
+		&& $this->from[$backBoundL1] === $this->to[$backBoundL2]) {
+			$this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false;
+		}
+
+		$temp = array_fill(0,$this->m + $this->n + 1, 0);
+		$V = array($temp,$temp);
+		$snake = array(0,0,0);
+
+		$this->length = $forwardBound + $this->m - $backBoundL1 - 1
+		+ $this->lcs_rec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2,
+		$V, $snake);
 	}
-	if($m>$n){
-		return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound);
-	}
 
-	$delta = $n-$m;
-	$delta_plus_1 = $delta+1;
-	$delta_min_1 = $delta-1;
+	private function lcs_rec($bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake) {
+		// check that both sequences are non-empty
+		if ($bottoml1 > $topl1 || $bottoml2 > $topl2) {
+			return 0;
+		}
 
-	$fpForw = $snakeBeginForw = $snakeEndForw = array_fill( 0, $delta_plus_1, -1);
-	$lcsSizeForw = $snakekForw = $lcsSizeBack = $snakekBack = array_fill( 0, $delta_plus_1, 0);
-	$fpBack = $snakeBeginBack = $snakeEndBack = array_fill( 0, $delta_plus_1, $n);
-	$overlap = $delta>1 ? array_fill( 1, $delta_min_1, FALSE) : array();
-	$bestLcsLength = $bestLcsLengthTop = $bestLcsLengthBottom = -1;
+		$d = $this->find_middle_snake($bottoml1, $topl1, $bottoml2, $topl2, $V, $snake);
 
-	if($boundRunningTime){
-		$maxp_before_bound = max((-$delta+sqrt($delta*$delta+($max_NP_before_bound<<2)))>>1,2);
-		if($maxp_before_bound>=$m){
-			$boundRunningTime = false;
-			unset($maxp_before_bound);
+		// need to store these so we don't lose them when they're overwritten by
+		// the recursion
+		$len = $snake[2];
+		$startx = $snake[0];
+		$starty = $snake[1];
+
+		// the middle snake is part of the LCS, store it
+		for ($i = 0; $i < $len; ++$i) {
+			$this->removed[$startx + $i] = $this->added[$starty + $i] = false;
 		}
+
+		if ($d > 1) {
+			return $len
+			+ $this->lcs_rec($bottoml1, $startx - 1, $bottoml2, $starty - 1, $V, $snake)
+			+ $this->lcs_rec($startx + $len, $topl1, $starty + $len, $topl2, $V, $snake);
+		} else if ($d == 1) {
+			/*
+			 * In this case the sequences differ by exactly 1 line. We have
+			 * already saved all the lines after the difference in the for loop
+			 * above, now we need to save all the lines before the difference.
+			 */
+			$max = min($startx - $bottoml1, $starty - $bottoml2);
+			for ($i = 0; $i < $max; ++$i) {
+				$this->removed[$bottoml1 + $i] = $this->added[$bottoml2 + $i] = false;
+			}
+			return $max + $len;
+		}
+		return $len;
 	}
 
-	$p=-1;
-	$m_min_1 = $m-1;
-	$maxp=$m_min_1-$bestLcsLength;
+	private function find_middle_snake($bottoml1, $topl1, $bottoml2,$topl2, &$V, &$snake) {
+		$from = &$this->from;
+		$to = &$this->to;
+		$V0 = &$V[0];
+		$V1 = &$V[1];
+		$snake0 = &$snake[0];
+		$snake1 = &$snake[1];
+		$snake2 = &$snake[2];
+		$bottoml1_min_1 = $bottoml1-1;
+		$bottoml2_min_1 = $bottoml2-1;
+		$N = $topl1 - $bottoml1_min_1;
+		$M = $topl2 - $bottoml2_min_1;
+		$delta = $N - $M;
+		$maxabsx = $N+$bottoml1;
+		$maxabsy = $M+$bottoml2;
+		$limit = min($this->maxDifferences, ceil(($N + $M ) / 2)); // ceil((N+M)/2)
 
-	while($p<$maxp){
+		//value_to_add_forward: a 0 or 1 that we add to the start
+		// offset
+		// to make it odd/even
+		if (($M & 1) == 1) {
+			$value_to_add_forward = 1;
+		} else {
+			$value_to_add_forward = 0;
+		}
 
-		if($boundRunningTime && $p>$maxp_before_bound){
-			// bound the running time by stopping early
-			if($bestLcsLength>=0){
-				// accept the best LCS thusfar
-				break;
-			}else{
-				$bestLcsProgressForw = $bestkForw = 0;
-				foreach($lcsSizeForw as $k => $localLcsProgress){
-					if($localLcsProgress>$bestLcsProgressForw || ($localLcsProgress==$bestLcsProgressForw && $bestkForw > $k)){
-						$bestLcsProgressForw = $localLcsProgress;
-						$bestkForw = $k;
-					}
-				}
-				$bestLcsProgressBack = $bestkBack = 0;
-				foreach($lcsSizeBack as $k => $localLcsProgress){
-					if($localLcsProgress>$bestLcsProgressBack || ($localLcsProgress==$bestLcsProgressBack && $bestkBack < $k)){
-						$bestLcsProgressBack = $localLcsProgress;
-						$bestkBack = $k;
-					}
-				}
-				if($fpForw[$bestkForw]-$bestkForw>$fpBack[$bestkBack]-$bestkBack){
-					// This is hard, maybe try some more? Can this even happen? A solution will be found soon anyway.
-				}else{
-					// lets do this
-					$topSnakeStart = $snakeBeginForw[$bestkForw];
-					$topSnakeEnd = $snakeEndForw[$bestkForw];
-					$topSnakek = $snakekForw[$bestkForw];
-					$bestLcsLengthTop = $lcsSizeForw[$bestkBack] + $topSnakeStart - $topSnakeEnd;
+		if (($N & 1) == 1) {
+			$value_to_add_backward = 1;
+		} else {
+			$value_to_add_backward = 0;
+		}
 
-					$bottomSnakeStart = $snakeEndBack[$bestkBack]+1;
-					$bottomSnakeEnd = $snakeBeginBack[$bestkBack]+1;
-					$bottomSnakek = $snakekBack[$bestkBack];
-					$bestLcsLengthBottom =  $lcsSizeBack[$bestkBack] + $bottomSnakeStart - $bottomSnakeEnd;
+		$start_forward = -$M;
+		$end_forward = $N;
+		$start_backward = -$N;
+		$end_backward = $M;
 
-					if($bottomSnakeStart-$topSnakeEnd>$n*0.6 && ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek)>$m*0.6){
-						// cut the sequence from both sides (there isn't enough progress, 60% of the sequence is not covered)
-						// also diff the middle now
-						if($bottomSnakeEnd>($fpForw[$bestkForw]>>1)){
-							// do the middle diff between the snakes
-							wfDebug("  Limiting diff run time from both sides, middle between snakes\n");
-							$m_t = ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek);
-							$n_t = $bottomSnakeStart-$topSnakeEnd;
-							$a_t = array_slice($a, $topSnakeEnd-$topSnakek, $m_t);
-							$b_t = array_slice($b, $topSnakeEnd, $n_t);
-							$offsetx_t = $offsetx+($topSnakeEnd-$topSnakek);
-							$offsety_t = $offsety+$topSnakeEnd;
-						}else{
-							// the snake is too early in the sequence, do the middle diff between endpoints of progress
-							wfDebug("  Limiting diff run time from both sides, middle between endpoints\n");
-							$m_t = ($fpBack[$bestkBack]+1-$bestkBack)-($fpForw[$bestkForw]-$bestkForw);
-							$n_t = $fpBack[$bestkBack]+1-$fpForw[$bestkForw];
-							$a_t = array_slice($a, $fpForw[$bestkForw]-$bestkForw, $m_t);
-							$b_t = array_slice($b, $fpForw[$bestkForw], $n_t);
-							$offsetx_t = $offsetx+($fpForw[$bestkForw]-$bestkForw);
-							$offsety_t = $offsety+$fpForw[$bestkForw];
-						}
-						wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $n_t, $offsetx_t, $offsety_t, $m_t, $boundRunningTime, $max_NP_before_bound);
-					}else if(min($n-$bottomSnakeStart, $m-($bottomSnakeStart-$bottomSnakek))>min($topSnakeEnd, $topSnakeEnd-$topSnakek)){
-						// the bottom snake is more interesting
-						wfDebug("Limiting diff run time from bottom side\n");
-						$topSnakeStart = $bottomSnakeStart;
-						$topSnakeEnd = $bottomSnakeEnd;
-						$topSnakek = $bottomSnakek;
-						$bestLcsLengthTop = $topSnakeEnd-$topSnakek;
-						$bottomSnakeStart = $bottomSnakeEnd;
-					}else{
-						// the top snake is more interesting
-						wfDebug("  Limiting diff run time from top side\n");
-						$bottomSnakeStart = $topSnakeEnd;
-						$bottomSnakeEnd = $topSnakeEnd;
-						$bottomSnakek = $topSnakek;
-						$bestLcsLengthBottom =  $m-($bottomSnakeEnd-$bottomSnakek);
-					}
-					break;
-				}
-			}
-		}
-		++$p;
-		$overlap[-$p] = $overlap[$delta+$p] = FALSE;
+		$limit_min_1 = $limit-1;
+		$limit_plus_1 = $limit+1;
 
-		$min_p_min_1 = -$p-1;
-		$delta_plus_1_plus_p = $delta_plus_1+$p;
+		$V0[$limit_plus_1] = 0;
+		$V1[$limit_min_1] = $N;
+		$limit = min($this->maxDifferences, ceil(($N + $M ) / 2)); // ceil((N+M)/2)
+			
+		if (($delta & 1) == 1) {
+			for ($d = 0; $d <= $limit; ++$d) {
+				$start_diag = max($value_to_add_forward + $start_forward, -$d);
+				$end_diag = min($end_forward, $d);
+				$value_to_add_forward = 1 - $value_to_add_forward;
 
-		// forward
-		$fpForw[$min_p_min_1] = $snakeEndForw[$min_p_min_1] = $snakekForw[$min_p_min_1] = $snakeBeginForw[$min_p_min_1] = $fpForw[$delta_plus_1_plus_p] =
-		$snakeBeginForw[$delta_plus_1_plus_p] = $snakeEndForw[$delta_plus_1_plus_p] = $snakekForw[$delta_plus_1_plus_p] = -1;
-		$lcsSizeForw[$min_p_min_1] = $lcsSizeForw[$delta_plus_1_plus_p] = 0;
+				// compute forward furthest reaching paths
+				for ($k = $start_diag; $k <= $end_diag; $k += 2) {
+					if ($k == -$d
+					|| ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) {
+						$x = $V0[$limit_plus_1 + $k];
+					} else {
+						$x = $V0[$limit_min_1 + $k] + 1;
+					}
 
-		$k=-$p;
-		do {
-			$k_plus_1 = $k+1;
-			$k_min_1 = $k-1;
+					$absx = $snake0 = $x + $bottoml1;
+					$absy = $snake1 = $x - $k + $bottoml2;
+						
+					while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) {
+						++$absx;
+						++$absy;
+					}
+					$x = $absx-$bottoml1;
+						
+					$snake2 = $absx -$snake0;
+					$V0[$limit + $k] = $x;
+					if ($k >= $delta - $d + 1 && $k <= $delta + $d - 1
+					&& $x >= $V1[$limit + $k - $delta]) {
+						return 2 * $d - 1;
+					}
 
-			$fpBelow = $fpForw[$k_min_1]+1;
-			$fpAbove = $fpForw[$k_plus_1];
-			$y = &$fpForw[$k];
-			if($fpBelow>$fpAbove){
-				$oldy = $y = $fpBelow;
-				$lcsSizeForw[$k] = $lcsSizeForw[$k_min_1];
-				$snakeBeginForw[$k] = $snakeBeginForw[$k_min_1];
-				$snakeEndForw[$k] = $snakeEndForw[$k_min_1];
-				$snakekForw[$k] = $snakekForw[$k_min_1];
-			}else{
-				$oldy = $y = $fpAbove;
-				$lcsSizeForw[$k] = $lcsSizeForw[$k_plus_1];
-				$snakeBeginForw[$k] = $snakeBeginForw[$k_plus_1];
-				$snakeEndForw[$k] = $snakeEndForw[$k_plus_1];
-				$snakekForw[$k] = $snakekForw[$k_plus_1];
-			}
-			$x = $y-$k;
-			while($x < $m && $y < $n && $a[$x]===$b[$y]){
-				++$x;
-				++$y;
-			}
-			if($y-$oldy>0){
-				$lcsSizeForw[$k] += $y-$oldy;
-				$snakeBeginForw[$k] = $oldy;
-				$snakeEndForw[$k]= $y;
-				$snakekForw[$k] = $k;
-			}
-			if($y>=$fpBack[$k] && !$overlap[$k]){
-				// there is overlap
-				$overlap[$k]=TRUE;
-				$lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k];
-				if($y>$fpBack[$k]+1){
-					$snakeoverlap = $y-$fpBack[$k]-1;
-					$lcsLength -= $snakeoverlap;
+					// check to see if we can cut down the diagonal range
+					if ($x >= $N && $end_forward > $k - 1) {
+						$end_forward = $k - 1;
+					} else if ($absy-$bottoml2 >= $M) {
+						$start_forward = $k + 1;
+						$value_to_add_forward = 0;
+					}
 				}
-				if($lcsLength>$bestLcsLength){
-					// a better sequence found!
-					$bestLcsLength = $lcsLength;
 
-					$topSnakeStart = $snakeBeginForw[$k];
-					$topSnakeEnd = $snakeEndForw[$k];
-					$topSnakek = $snakekForw[$k];
+				$start_diag = max($value_to_add_backward + $start_backward, -$d);
+				$end_diag = min($end_backward, $d);
+				$value_to_add_backward = 1 - $value_to_add_backward;
 
-					// aligned snakes bite (\   )
-					//                     ( \ \)
-					$bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k]));
-					$bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart);
-					$bottomSnakek = $snakekBack[$k];
+				// compute backward furthest reaching paths
+				for ($k = $start_diag; $k <= $end_diag; $k += 2) {
+					if ($k == $d
+					|| ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) {
+						$x = $V1[$limit_min_1 + $k];
+					} else {
+						$x = $V1[$limit_plus_1 + $k] - 1;
+					}
 
-					if($bottomSnakeEnd<$y){
-						$bottomSnakeStart = $y;
-						$bottomSnakeEnd = $y;
-						$bottomSnakek = $k;
+					$y = $x - $k - $delta;
+
+					$snake2 = 0;
+					while ($x > 0 && $y > 0
+					&& $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) {
+						--$x;
+						--$y;
+						++$snake2;
 					}
+					$V1[$limit + $k] = $x;
 
-					$bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]);
-					$bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]);
-					if($bestKnownLcs==$lcsLength){
-						$fpForw[$k]=$y;
-						break 2;
+					// check to see if we can cut down our diagonal range
+					if ($x <= 0) {
+						$start_backward = $k + 1;
+						$value_to_add_backward = 0;
+					} else if ($y <= 0 && $end_backward > $k - 1) {
+						$end_backward = $k - 1;
 					}
-					$maxp=$m_min_1-$bestLcsLength;
 				}
 			}
-			if($k<$delta_min_1){
-				++$k;
-			}else if($k>$delta){
-				--$k;
-			}else if($k==$delta_min_1){
-				$k = $delta+$p;
-			}else{
-				break;
-			}
-		} while(TRUE);
+		} else {
+			for ($d = 0; $d <= $limit; ++$d) {
+				$start_diag = max($value_to_add_forward + $start_forward, -$d);
+				$end_diag = min($end_forward, $d);
+				$value_to_add_forward = 1 - $value_to_add_forward;
 
-		// backward
-		$fpBack[$min_p_min_1] = $snakeBeginBack[$min_p_min_1] = $snakeEndBack[$min_p_min_1] = $snakekBack[$min_p_min_1] =
-		$fpBack[$delta_plus_1_plus_p] = $snakeBeginBack[$delta_plus_1_plus_p] = $snakeEndBack[$delta_plus_1_plus_p] = $snakekBack[$delta_plus_1_plus_p] = $n;
-		$lcsSizeBack[$min_p_min_1] = $lcsSizeBack[$delta_plus_1_plus_p] = 0;
+				// compute forward furthest reaching paths
+				for ($k = $start_diag; $k <= $end_diag; $k += 2) {
+					if ($k == -$d
+					|| ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) {
+						$x = $V0[$limit_plus_1 + $k];
+					} else {
+						$x = $V0[$limit_min_1 + $k] + 1;
+					}
 
-		$k=$delta+$p;
-		do {
-			$k_plus_1 = $k+1;
-			$k_min_1 = $k-1;
+					$absx = $snake0 = $x + $bottoml1;
+					$absy = $snake1 = $x - $k + $bottoml2;
+						
+					while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) {
+						++$absx;
+						++$absy;
+					}
+					$x = $absx-$bottoml1;
+					$snake2 = $absx -$snake0;
+					$V0[$limit + $k] = $x;
 
-			$fpBelow = $fpBack[$k_min_1];
-			$fpAbove = $fpBack[$k_plus_1]-1;
-			$y = &$fpBack[$k];
-			if($fpBelow<$fpAbove){
-				$oldy = $y = $fpBelow;
-				$lcsSizeBack[$k] = $lcsSizeBack[$k_min_1];
-				$snakeBeginBack[$k] = $snakeBeginBack[$k_min_1];
-				$snakeEndBack[$k] = $snakeEndBack[$k_min_1];
-				$snakekBack[$k] = $snakekBack[$k_min_1];
-			}else{
-				$oldy = $y = $fpAbove;
-				$lcsSizeBack[$k] = $lcsSizeBack[$k_plus_1];
-				$snakeBeginBack[$k] = $snakeBeginBack[$k_plus_1];
-				$snakeEndBack[$k] = $snakeEndBack[$k_plus_1];
-				$snakekBack[$k] = $snakekBack[$k_plus_1];
-			}
-			$x = $y-$k;
-			while($x > -1 && $y > -1 && $a[$x]===$b[$y]){
-				--$x;
-				--$y;
-			}
-			if($oldy-$y>0){
-				$lcsSizeBack[$k] += $oldy-$y;
-				$snakeBeginBack[$k] = $oldy;
-				$snakeEndBack[$k] = $y;
-				$snakekBack[$k] = $k;
-			}
-			if($fpForw[$k]>=$y && !$overlap[$k]){
-				// there is overlap
-				$overlap[$k]=TRUE;
-				$lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k];
-				if($fpForw[$k]>$y+1){
-					$snakeoverlap = $fpForw[$k]-$y-1;
-					$lcsLength -= $snakeoverlap;
+					// check to see if we can cut down the diagonal range
+					if ($x >= $N && $end_forward > $k - 1) {
+						$end_forward = $k - 1;
+					} else if ($absy-$bottoml2 >= $M) {
+						$start_forward = $k + 1;
+						$value_to_add_forward = 0;
+					}
 				}
-				if($lcsLength>$bestLcsLength){
-					// a better sequence found!
-					$bestLcsLength = $lcsLength;
 
-					$topSnakeStart = $snakeBeginForw[$k];
-					$topSnakeEnd = $snakeEndForw[$k];
-					$topSnakek = $snakekForw[$k];
+				$start_diag = max($value_to_add_backward + $start_backward, -$d);
+				$end_diag = min($end_backward, $d);
+				$value_to_add_backward = 1 - $value_to_add_backward;
 
-					// aligned snakes bite (\   )
-					//                     ( \ \)
-					$bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k]));
-					$bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart);
-					$bottomSnakek = $snakekBack[$k];
+				// compute backward furthest reaching paths
+				for ($k = $start_diag; $k <= $end_diag; $k += 2) {
+					if ($k == $d
+					|| ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) {
+						$x = $V1[$limit_min_1 + $k];
+					} else {
+						$x = $V1[$limit_plus_1 + $k] - 1;
+					}
 
-					if($bottomSnakeEnd<$fpForw[$k]){
-						$bottomSnakeStart = $fpForw[$k];
-						$bottomSnakeEnd = $fpForw[$k];
-						$bottomSnakek = $k;
+					$y = $x - $k - $delta;
+
+					$snake2 = 0;
+					while ($x > 0 && $y > 0
+					&& $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) {
+						--$x;
+						--$y;
+						++$snake2;
 					}
+					$V1[$limit + $k] = $x;
 
-					$bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]);
-					$bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]);
-					if($bestKnownLcs==$lcsLength){
-						$fpBack[$k] = $y;
-						break 2;
+					if ($k >= -$delta - $d && $k <= $d - $delta
+					&& $x <= $V0[$limit + $k + $delta]) {
+						$snake0 = $bottoml1 + $x;
+						$snake1 = $bottoml2 + $y;
+						return 2 * $d;
 					}
-					$maxp=$m_min_1-$bestLcsLength;
+
+					// check to see if we can cut down our diagonal range
+					if ($x <= 0) {
+						$start_backward = $k + 1;
+						$value_to_add_backward = 0;
+					} else if ($y <= 0 && $end_backward > $k - 1) {
+						$end_backward = $k - 1;
+					}
 				}
 			}
-			if($k>1){
-				--$k;
-			}else if($k<0){
-				++$k;
-			}else if($k==1){
-				$k = -$p;
-			}else{
-				break;
-			}
-		} while(TRUE);
-	}
+		}
+		/*
+		 * computing the true LCS is too expensive, instead find the diagonal
+		 * with the most progress and pretend a midle snake of length 0 occurs
+		 * there.
+		 */
 
-	unset($fpForw, $fpBack, $lcsSizeForw, $lcsSizeBack);
-	unset($snakeBeginForw, $snakeBeginBack, $snakeEndForw, $snakeEndBack, $snakekForw, $snakekBack);
-	unset($overlap);
+		$most_progress = self::findMostProgress($M, $N, $limit, $V);
 
-	// Mark snakes as in LCS
-	$maxi = $offsetx+$topSnakeEnd-$topSnakek;
-	for($i=$offsetx+$topSnakeStart-$topSnakek;$i<$maxi;++$i){
-		$a_inLcs_sym[$i] = TRUE;
+		$snake0 = $bottoml1 + $most_progress[0];
+		$snake1 = $bottoml2 + $most_progress[1];
+		$snake2 = 0;
+		wfDebug('Computing the LCS is too expensive. Using a heuristic.');
+		$this->heuristicUsed = true;
+		return 5; /*
+		* HACK: since we didn't really finish the LCS computation
+		* we don't really know the length of the SES. We don't do
+		* anything with the result anyway, unless it's <=1. We know
+		* for a fact SES > 1 so 5 is as good a number as any to
+		* return here
+		*/
 	}
-	$maxi = $offsety+$topSnakeEnd;
-	for($i=$offsety+$topSnakeStart;$i<$maxi;++$i){
-		$b_inLcs_sym[$i] = TRUE;
-	}
-	$maxi = $offsetx+$bottomSnakeEnd-$bottomSnakek;
-	for($i=$offsetx+$bottomSnakeStart-$bottomSnakek;$i<$maxi;++$i){
-		$a_inLcs_sym[$i] = TRUE;
-	}
-	$maxi = $offsety+$bottomSnakeEnd;
-	for($i=$offsety+$bottomSnakeStart;$i<$maxi;++$i){
-		$b_inLcs_sym[$i] = TRUE;
-	}
 
-	$m_t = $topSnakeStart-$topSnakek;
-	$a_t = array_slice($a, 0, $m_t);
-	$b_t = array_slice($b, 0, $topSnakeStart);
+	private static function findMostProgress($M, $N, $limit, $V) {
+		$delta = $N - $M;
 
-	$m_b = $m-($bottomSnakeEnd-$bottomSnakek);
-	$n_b = $n-$bottomSnakeEnd;
-	$a_b = array_slice($a, $bottomSnakeEnd-$bottomSnakek, $m_b);
-	$b_b = array_slice($b, $bottomSnakeEnd, $n_b);
+		if (($M & 1) == ($limit & 1)) {
+			$forward_start_diag = max(-$M, -$limit);
+		} else {
+			$forward_start_diag = max(1 - $M, -$limit);
+		}
 
-	wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $topSnakeStart, $offsetx, $offsety, $bestLcsLengthTop, $boundRunningTime, $max_NP_before_bound);
+		$forward_end_diag = min($N, $limit);
 
-	wikidiff3_diffPart($a_b, $b_b, $a_inLcs, $b_inLcs, $m_b, $n_b, $offsetx+($bottomSnakeEnd-$bottomSnakek), $offsety+$bottomSnakeEnd, $bestLcsLengthBottom, $boundRunningTime, $max_NP_before_bound);
-}
+		if (($N & 1) == ($limit & 1)) {
+			$backward_start_diag = max(-$N, -$limit);
+		} else {
+			$backward_start_diag = max(1 - $N, -$limit);
+		}
 
-class InLcs {
+		$backward_end_diag = -min($M, $limit);
 
-	public $inLcs;
+		$temp = array(0,0,0);
 
-	function __construct($length){
-		$this->inLcs = $length>0 ? array_fill(0,$length,FALSE): array();
-	}
 
-	/**
-	 * Get the length of the Longest Common Subsequence (computed)
-	 */
-	public function getLcsLength(){
-		return array_sum($this->inLcs);
+		$max_progress = array_fill(0,ceil(max($forward_end_diag
+		- $forward_start_diag, $backward_end_diag - $backward_start_diag) / 2), $temp);
+		$num_progress = 0; // the 1st entry is current, it is initialized
+		// with 0s
+
+		// first search the forward diagonals
+		for ($k = $forward_start_diag; $k <= $forward_end_diag; $k += 2) {
+			$x = $V[0][$limit + $k];
+			$y = $x - $k;
+			if ($x > $N || $y > $M) {
+				continue;
+			}
+
+			$progress = $x + $y;
+			if ($progress > $max_progress[0][2]) {
+				$num_progress = 0;
+				$max_progress[0][0] = $x;
+				$max_progress[0][1] = $y;
+				$max_progress[0][2] = $progress;
+			} else if ($progress == $max_progress[0][2]) {
+				++$num_progress;
+				$max_progress[$num_progress][0] = $x;
+				$max_progress[$num_progress][1] = $y;
+				$max_progress[$num_progress][2] = $progress;
+			}
+		}
+
+		$max_progress_forward = true; // initially the maximum
+		// progress is in the forward
+		// direction
+
+		// now search the backward diagonals
+		for ($k = $backward_start_diag; $k <= $backward_end_diag; $k += 2) {
+			$x = $V[1][$limit + $k];
+			$y = $x - $k - $delta;
+			if ($x < 0 || $y < 0) {
+				continue;
+			}
+
+			$progress = $N - $x + $M - $y;
+			if ($progress > $max_progress[0][2]) {
+				$num_progress = 0;
+				$max_progress_forward = false;
+				$max_progress[0][0] = $x;
+				$max_progress[0][1] = $y;
+				$max_progress[0][2] = $progress;
+			} else if ($progress == $max_progress[0][2] && !$max_progress_forward) {
+				++$num_progress;
+				$max_progress[$num_progress][0] = $x;
+				$max_progress[$num_progress][1] = $y;
+				$max_progress[$num_progress][2] = $progress;
+			}
+		}
+
+		// return the middle diagonal with maximal progress.
+		return $max_progress[floor($num_progress / 2)];
 	}
 
 }
+
Index: trunk/phase3/includes/DifferenceEngine.php
===================================================================
--- trunk/phase3/includes/DifferenceEngine.php	(revision 39405)
+++ trunk/phase3/includes/DifferenceEngine.php	(revision 39406)
@@ -74,9 +74,10 @@
 	}
 
 	function showDiffPage( $diffOnly = false ) {
-		global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol;
+		global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol, $wgEnableHtmlDiff;
 		wfProfileIn( __METHOD__ );
 
+
 		# If external diffs are enabled both globally and for the user,
 		# we'll use the application/x-external-editor interface to call
 		# an external diff tool like kompare, kdiff3, etc.
@@ -265,11 +266,16 @@
 			'<div id="mw-diff-ntitle3">' . $newminor . $sk->revComment( $this->mNewRev, !$diffOnly, true ) . $rdel . "</div>" .
 			'<div id="mw-diff-ntitle4">' . $nextlink . $patrol . '</div>';
 
-		$this->showDiff( $oldHeader, $newHeader );
+		if($wgEnableHtmlDiff){
+			$this->renderHtmlDiff();
+		}else{
 
-		if ( !$diffOnly )
-		$this->renderNewRevision();
+			$this->showDiff( $oldHeader, $newHeader );
 
+			if ( !$diffOnly ){
+				$this->renderNewRevision();
+			}
+		}
 		wfProfileOut( __METHOD__ );
 	}
 
@@ -319,6 +325,65 @@
 		wfProfileOut( __METHOD__ );
 	}
 
+
+	function renderHtmlDiff() {
+		global $wgOut, $IP;
+		wfProfileIn( __METHOD__ );
+
+		$this->showDiffStyle();
+
+		require_once( "$IP/includes/HTMLDiff.php" );
+
+		#add deleted rev tag if needed
+		if( !$this->mNewRev->userCan(Revision::DELETED_TEXT) ) {
+			$wgOut->addWikiMsg( 'rev-deleted-text-permission' );
+		} else if( $this->mNewRev->isDeleted(Revision::DELETED_TEXT) ) {
+			$wgOut->addWikiMsg( 'rev-deleted-text-view' );
+		}
+
+		if( !$this->mNewRev->isCurrent() ) {
+			$oldEditSectionSetting = $wgOut->parserOptions()->setEditSection( false );
+		}
+
+		$this->loadText();
+
+		if( is_object( $this->mOldRev ) ) {
+			$wgOut->setRevisionId( $this->mOldRev->getId() );
+		}
+
+		global $wgTitle, $wgParser, $wgTitle;
+		$popts = $wgOut->parserOptions();
+		$oldTidy = $popts->setTidy( TRUE );
+
+		$parserOutput = $wgParser->parse($this->mOldtext, $wgTitle, $popts, TRUE, TRUE, $wgOut->mRevisionId );
+		$popts->setTidy( $oldTidy );
+
+		//only for new?
+		//$wgOut->addParserOutputNoText( $parserOutput );
+		$oldHtml =	$parserOutput->getText();
+		wfRunHooks( 'OutputPageBeforeHTML',array( &$wgOut, &$oldHtml ) );
+
+		if( is_object( $this->mNewRev ) ) {
+			$wgOut->setRevisionId( $this->mNewRev->getId() );
+		}
+
+		$popts = $wgOut->parserOptions();
+		$oldTidy = $popts->setTidy( TRUE );
+
+		$parserOutput = $wgParser->parse($this->mNewtext, $wgTitle, $popts, TRUE, TRUE, $wgOut->mRevisionId );
+		$popts->setTidy( $oldTidy );
+
+		//only for new?
+		$wgOut->addParserOutputNoText( $parserOutput );
+		$newHtml =	$parserOutput->getText();
+		wfRunHooks( 'OutputPageBeforeHTML',array( &$wgOut, &$newHtml ) );
+
+		$differ = new HTMLDiffer(new DelegatingContentHandler($wgOut));
+		$differ->htmlDiff($oldHtml, $newHtml);
+
+		wfProfileOut( __METHOD__ );
+	}
+
 	/**
 	 * Show the first revision of an article. Uses normal diff headers in
 	 * contrast to normal "old revision" display style.
@@ -893,6 +958,30 @@
 }
 
 /**
+ * Alternative representation of a set of changes, by the index
+ * ranges that are changed.
+ */
+class RangeDifference {
+
+	public $leftstart;
+	public $leftend;
+	public $leftlength;
+
+	public $rightstart;
+	public $rightend;
+	public $rightlength;
+
+	function __construct($leftstart, $leftend, $rightstart, $rightend){
+		$this->leftstart = $leftstart;
+		$this->leftend = $leftend;
+		$this->leftlength = $leftend-$leftstart;
+		$this->rightstart = $rightstart;
+		$this->rightend = $rightend;
+		$this->rightlength = $rightend-$rightstart;
+	}
+}
+
+/**
  * Class used internally by Diff to actually compute the diffs.
  *
  * The algorithm used here is mostly lifted from the perl module
@@ -920,22 +1009,108 @@
 
 	const MAX_XREF_LENGTH =  10000;
 
-	function diff ($from_lines, $to_lines) {
+	function diff ($from_lines, $to_lines){
 		wfProfileIn( __METHOD__ );
 
-		global $wgExternalDiffEngine;
+		// Diff and store locally
+		$this->diff_local($from_lines, $to_lines);
 
+		// Merge edits when possible
+		$this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
+		$this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
+
+		// Compute the edit operations.
 		$n_from = sizeof($from_lines);
 		$n_to = sizeof($to_lines);
 
+		$edits = array();
+		$xi = $yi = 0;
+		while ($xi < $n_from || $yi < $n_to) {
+			USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
+			USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
+
+			// Skip matching "snake".
+			$copy = array();
+			while ( $xi < $n_from && $yi < $n_to
+			&& !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
+				$copy[] = $from_lines[$xi++];
+				++$yi;
+			}
+			if ($copy)
+			$edits[] = new _DiffOp_Copy($copy);
+
+			// Find deletes & adds.
+			$delete = array();
+			while ($xi < $n_from && $this->xchanged[$xi])
+			$delete[] = $from_lines[$xi++];
+
+			$add = array();
+			while ($yi < $n_to && $this->ychanged[$yi])
+			$add[] = $to_lines[$yi++];
+
+			if ($delete && $add)
+			$edits[] = new _DiffOp_Change($delete, $add);
+			elseif ($delete)
+			$edits[] = new _DiffOp_Delete($delete);
+			elseif ($add)
+			$edits[] = new _DiffOp_Add($add);
+		}
+		wfProfileOut( __METHOD__ );
+		return $edits;
+	}
+
+	function diff_range ($from_lines, $to_lines){
+		wfProfileIn( __METHOD__ );
+
+		// Diff and store locally
+		$this->diff_local($from_lines, $to_lines);
+
+		// Compute the ranges operations.
+		$n_from = sizeof($from_lines);
+		$n_to = sizeof($to_lines);
+
+		$ranges = array();
+		$xi = $yi = 0;
+		while ($xi < $n_from || $yi < $n_to) {
+			// Matching "snake".
+			while ( $xi < $n_from && $yi < $n_to
+			&& !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
+				++$xi;
+				++$yi;
+			}
+			// Find deletes & adds.
+			$xstart = $xi;
+			while ($xi < $n_from && $this->xchanged[$xi]){
+				++$xi;
+			}
+
+			$ystart = $yi;
+			while ($yi < $n_to && $this->ychanged[$yi]){
+				++$yi;
+			}
+
+			if ($xi>$xstart || $yi>$ystart){
+				$ranges[] = new RangeDifference($xstart,$xi,$ystart,$yi);
+			}
+		}
+		wfProfileOut( __METHOD__ );
+		return $ranges;
+	}
+
+	function diff_local ($from_lines, $to_lines) {
+		global $wgExternalDiffEngine;
+		wfProfileIn( __METHOD__);
+
 		if($wgExternalDiffEngine == 'wikidiff3'){
 			// wikidiff3
 			global $IP;
 			require_once( "$IP/includes/Diff.php" );
-			list($this->xchanged, $this->ychanged) = wikidiff3_diff($from_lines, $to_lines, TRUE, 100000);
+			$wikidiff3 = new WikiDiff3();
+			list($this->xchanged, $this->ychanged) = $wikidiff3->diff($from_lines, $to_lines);
 		}else{
 			// old diff
-			wfProfileIn( __METHOD__ ." - basicdiff");
+			$n_from = sizeof($from_lines);
+			$n_to = sizeof($to_lines);
 			$this->xchanged = $this->ychanged = array();
 			$this->xv = $this->yv = array();
 			$this->xind = $this->yind = array();
@@ -980,48 +1155,8 @@
 
 			// Find the LCS.
 			$this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
-			wfProfileOut( __METHOD__ ." - basicdiff");
 		}
-
-		// Merge edits when possible
-		$this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
-		$this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
-
-		// Compute the edit operations.
-		$edits = array();
-		$xi = $yi = 0;
-		while ($xi < $n_from || $yi < $n_to) {
-			USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
-			USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
-
-			// Skip matching "snake".
-			$copy = array();
-			while ( $xi < $n_from && $yi < $n_to
-			&& !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
-				$copy[] = $from_lines[$xi++];
-				++$yi;
-			}
-			if ($copy)
-			$edits[] = new _DiffOp_Copy($copy);
-
-			// Find deletes & adds.
-			$delete = array();
-			while ($xi < $n_from && $this->xchanged[$xi])
-			$delete[] = $from_lines[$xi++];
-
-			$add = array();
-			while ($yi < $n_to && $this->ychanged[$yi])
-			$add[] = $to_lines[$yi++];
-
-			if ($delete && $add)
-			$edits[] = new _DiffOp_Change($delete, $add);
-			elseif ($delete)
-			$edits[] = new _DiffOp_Delete($delete);
-			elseif ($add)
-			$edits[] = new _DiffOp_Add($add);
-		}
 		wfProfileOut( __METHOD__ );
-		return $edits;
 	}
 
 	/**
@@ -1077,7 +1212,6 @@
 		$numer = $xlim - $xoff + $nchunks - 1;
 		$x = $xoff;
 		for ($chunk = 0; $chunk < $nchunks; $chunk++) {
-			wfProfileIn( __METHOD__ . "-chunk" );
 			if ($chunk > 0)
 			for ($i = 0; $i <= $this->lcs; $i++)
 			$ymids[$i][$chunk-1] = $this->seq[$i];
@@ -1130,7 +1264,6 @@
 		if ($end == 0 || $ypos > $this->seq[$end]) {
 			$this->seq[++$this->lcs] = $ypos;
 			$this->in_seq[$ypos] = 1;
-			wfProfileOut( __METHOD__ );
 			return $this->lcs;
 		}
 

Image changes

Index: /trunk/phase3/skins/common/images/diffunderline.gif
added

Follow-up revisions

Rev.Commit summaryAuthorDate
r39415Fixes for r39406:...ialex16:51, 15 August 2008

Status & tagging log

  • 15:30, 12 September 2011 Meno25 (Talk | contribs) changed the status of r39406 [removed: ok added: old]
Personal tools
Namespaces
Variants
Views
Actions
Site
Support
Download
Development
Communication
Toolbox