Index: trunk/phase3/skins/common/diff.js
===================================================================
--- trunk/phase3/skins/common/diff.js (revision 39405)
+++ trunk/phase3/skins/common/diff.js (revision 39406)
@@ -17,4 +17,16 @@
lastSheet.insertRule(
"table.diff td div { overflow: visible; }",
lastSheet.cssRules.length);
-}
\ No newline at end of file
+}
+
+function tipA(content){
+ return false;
+}
+
+function tipR(content){
+ return false;
+}
+
+function tipC(content){
+ return false;
+}
Index: trunk/phase3/skins/common/diff.css
===================================================================
--- trunk/phase3/skins/common/diff.css (revision 39405)
+++ trunk/phase3/skins/common/diff.css (revision 39406)
@@ -74,3 +74,66 @@
table: */
/* overflow: visible; */
}
+
+/*
+ * Styles for the HTML Diff
+ */
+span.diff-html-added {
+ font-size: 100%;
+ background-color: #20ff20
+}
+
+span.diff-html-removed {
+ font-size: 100%;
+ text-decoration: line-through;
+ background-color: #ff2020
+}
+
+span.diff-html-changed {
+ background: url(images/diffunderline.gif) bottom repeat-x;
+ *background-color: #c6c6fd; /* light blue */
+}
+
+span.diff-html-added img{
+ border: 5px solid #ccffcc;
+}
+
+span.diff-html-removed img{
+ border: 5px solid #fdc6c6;
+}
+
+span.diff-html-changed img{
+ border: 5px dotted #000099;
+
+}
+
+span.diff-html-changed {
+ position: relative; /* this is key */
+ cursor: help;
+}
+
+span.diff-html-changed span.tip {
+ display: none; /* so is this */
+}
+
+/* tooltip will display on :hover event */
+
+span.diff-html-changed:hover span.tip {
+ display: block;
+ z-index: 95;
+ position: absolute;
+ top: 2.5em;
+ left: 0;
+ width: auto;
+ line-height: 1.2em;
+ padding: 3px 7px 4px 6px;
+ border: 1px solid #336;
+ background-color: #f7f7ee;
+ font-family: arial, helvetica, sans-serif;
+ font-size: 10px;
+ font-weight: normal;
+ color: #000;
+ text-align: left;
+}
+
+
Index: trunk/phase3/skins/common/images/diffunderline.gif
===================================================================
--- trunk/phase3/skins/common/images/diffunderline.gif (revision 0)
+++ trunk/phase3/skins/common/images/diffunderline.gif (revision 39406)
@@ -0,0 +1 @@
+GIF89a ‘ ÿÿÿÛÿÿ ™ !ù ÿ , „%!g ;
\ No newline at end of file
Index: trunk/phase3/includes/HTMLDiff.php
===================================================================
--- trunk/phase3/includes/HTMLDiff.php (revision 0)
+++ trunk/phase3/includes/HTMLDiff.php (revision 39406)
@@ -0,0 +1,1997 @@
+<?php
+/* Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * or see http://www.gnu.org/
+ */
+
+
+/**
+ * Any element in the DOM tree of an HTML document.
+ */
+abstract class Node{
+
+ protected $parent;
+
+
+ function __construct($parent){
+ $this->parent = $parent;
+ if(!is_null($parent)){
+ $parent->addChild($this);
+ }
+ }
+
+ public function getParent(){
+ return $this->parent;
+ }
+
+ public function getParentTree(){
+ if(!is_null($this->parent)){
+ $parentTree = $this->parent->getParentTree();
+ $parentTree[] = $this->parent;
+ return $parentTree;
+ }else{
+ return array();
+ }
+ }
+
+ public abstract function getMinimalDeletedSet($id);
+
+ public function detectIgnorableWhiteSpace(){
+ // no op
+ }
+
+ public function getLastCommonParent(Node $other){
+ if(is_null($other))
+ throw new Exception('The given node is NULL');
+
+ $result = new LastCommonParentResult();
+
+ $myParents = $this->getParentTree();
+ $otherParents = $other->getParentTree();
+
+ $i = 1;
+ $isSame = true;
+ while ($isSame && $i < sizeof($myParents) && $i < sizeof($otherParents)) {
+ if (!$myParents[$i]->isSameTag($otherParents[$i])) {
+ $isSame = false;
+ } else {
+ // After the while, the index i-1 must be the last common parent
+ $i++;
+ }
+ }
+
+ $result->setLastCommonParentDepth($i - 1);
+ $result->setLastCommonParent($myParents[$i - 1]);
+
+ if (!$isSame) {
+ $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i]));
+ $result->setSplittingNeeded();
+ } else if (sizeof($myParents) < sizeof($otherParents)) {
+ $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this));
+ } else if (sizeof($myParents) > sizeof($otherParents)) {
+ // All tags matched but there are tags left in this tree
+ $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($myParents[$i]));
+ $result->setSplittingNeeded();
+ } else {
+ // All tags matched untill the very last one in both trees
+ // or there were no tags besides the BODY
+ $result->setIndexInLastCommonParent($myParents[$i - 1]->getIndexOf($this));
+ }
+ return $result;
+ }
+
+ public function setParent($parent) {
+ $this->parent = $parent;
+ }
+
+ public abstract function copyTree();
+
+ public function inPre() {
+ $tree = $this->getParentTree();
+ foreach ($tree as $ancestor) {
+ if ($ancestor->isPre()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private $whiteBefore = false;
+
+ private $whiteAfter = false;
+
+ public function isWhiteBefore() {
+ return $this->whiteBefore;
+ }
+
+ public function setWhiteBefore($whiteBefore) {
+ $this->whiteBefore = $whiteBefore;
+ }
+
+ public function isWhiteAfter() {
+ return $this->whiteAfter;
+ }
+
+ public function setWhiteAfter($whiteAfter) {
+ $this->whiteAfter = $whiteAfter;
+ }
+
+ public abstract function getLeftMostChild();
+
+ public abstract function getRightMostChild();
+}
+
+/**
+ * Node that can contain other nodes. Represents an HTML tag.
+ */
+class TagNode extends Node{
+
+ public $children = array();
+
+ protected $qName;
+
+ protected $attributes = array();
+
+ function __construct($parent, $qName, /*array*/ $attributes) {
+ parent::__construct($parent);
+ $this->qName = strtolower($qName);
+ foreach($attributes as $key => $value){
+ $this->attributes[strtolower($key)] = $value;
+ }
+ }
+
+ public function addChild(Node $node, $index=-1) {
+ if ($node->getParent() !== $this)
+ throw new Exception(
+ 'The new child must have this node as a parent.');
+ if($index == -1){
+ $this->children[] = $node;
+ }else{
+ array_splice($this->children,$index,0,array($node));
+ }
+ }
+
+ public function getIndexOf(Node $child) {
+ // don't trust array_search with objects
+ foreach($this->children as $key=>$value){
+ if($value === $child){
+ return $key;
+ }
+ }
+ return NULL;
+ }
+
+ public function getChild($i) {
+ return $this->children[$i];
+ }
+
+ public function getNbChildren() {
+ return count($this->children);
+ }
+
+ public function getQName() {
+ return $this->qName;
+ }
+
+ public function getAttributes() {
+ return $this->attributes;
+ }
+
+ public function isSameTag(TagNode $other) {
+ if (is_null($other))
+ return false;
+ return $this->getOpeningTag() === $other->getOpeningTag();
+ }
+
+ public function getOpeningTag() {
+ $s = '<'.$this->getQName();
+ foreach ($this->attributes as $attribute => $value) {
+ $s .= ' ' . $attribute . '="' . $value . '"';
+ }
+ return $s .= '>';
+ }
+
+ public function getEndTag() {
+ return '</' . $this->getQName() + '>"';
+ }
+
+ public function getMinimalDeletedSet($id) {
+ $nodes = array();
+
+ if ($this->getNbChildren() == 0)
+ return $nodes;
+
+ $hasNotDeletedDescendant = false;
+
+ foreach ($this->children as $child) {
+ $childrenChildren = $child->getMinimalDeletedSet($id);
+ $nodes = array_merge($nodes, $childrenChildren);
+ if (!$hasNotDeletedDescendant
+ && !(count($childrenChildren) == 1 && $childrenChildren[0]===$child)) {
+ // This child is not entirely deleted
+ $hasNotDeletedDescendant = true;
+ }
+ }
+ if (!$hasNotDeletedDescendant) {
+ $nodes = array($this);
+ }
+ return $nodes;
+ }
+
+ public function toString() {
+ return $this->getOpeningTag();
+ }
+
+ public function splitUntill(TagNode $parent, Node $split, $includeLeft) {
+ $splitOccured = false;
+ if ($parent !== $this) {
+ $part1 = new TagNode(NULL, $this->getQName(), $this->getAttributes());
+ $part2 = new TagNode(NULL, $this->getQName(), $this->getAttributes());
+ $part1->setParent($this->getParent());
+ $part2->setParent($this->getParent());
+
+ $i = 0;
+ $nbChildren = $this->getNbChildren();
+ while ($i < $nbChildren && $this->children[$i] !== $split) {
+ $this->children[$i]->setParent($part1);
+ $part1->addChild($this->children[$i]);
+ ++$i;
+ }
+ if ($i < $nbChildren) {
+ if ($includeLeft) {
+ $this->children[$i]->setParent($part1);
+ $part1->addChild($this->children[$i]);
+ } else {
+ $this->children[$i]->setParent($part2);
+ $part2->addChild($this->children[$i]);
+ }
+ ++$i;
+ }
+ while ($i < $nbChildren) {
+ $this->children[$i]->setParent($part2);
+ $part2->addChild($this->children[$i]);
+ ++$i;
+ }
+ $myindexinparent = $this->parent->getIndexOf($this);
+ if ($part1->getNbChildren() > 0)
+ $this->parent->addChild($part1,$myindexinparent);
+
+ if ($part2->getNbChildren() > 0)
+ $this->parent->addChild($part2,$myindexinparent);
+
+ if ($part1->getNbChildren() > 0 && $part2->getNbChildren() > 0) {
+ $splitOccured = true;
+ }
+
+ $this->parent->removeChild($myindexinparent);
+
+ if ($includeLeft)
+ $this->parent->splitUntill($parent, $part1, $includeLeft);
+ else
+ $this->parent->splitUntill($parent, $part2, $includeLeft);
+ }
+ return $splitOccured;
+
+ }
+
+ private function removeChild($index) {
+ unset($this->children[$index]);
+ $this->children = array_values($this->children);
+ }
+
+ public static $blocks = array('html'=>TRUE,'body'=>TRUE,'p'=>TRUE,'blockquote'=>TRUE,
+ 'h1'=>TRUE,'h2'=>TRUE,'h3'=>TRUE,'h4'=>TRUE,'h5'=>TRUE,'pre'=>TRUE,'div'=>TRUE,'ul'=>TRUE,'ol'=>TRUE,'li'=>TRUE,
+ 'table'=>TRUE,'tbody'=>TRUE,'tr'=>TRUE,'td'=>TRUE,'th'=>TRUE,'br'=>TRUE);
+
+ public static function isBlockLevelName($qName) {
+ return array_key_exists(strtolower($qName),self::$blocks);
+ }
+
+ public static function isBlockLevelNode(Node $node) {
+ if(! $node instanceof TagNode)
+ return false;
+ return self::isBlockLevelName($node->getQName());
+ }
+
+ public function isBlockLevel() {
+ return  self::isBlockLevelNode($this);
+ }
+
+ public static function isInlineName($qName) {
+ return !self::isBlockLevelName($qName);
+ }
+
+ public static function isInlineNode(Node $node) {
+ return !self::isBlockLevelNode($node);
+ }
+
+ public function isInline() {
+ return self::isInlineNode($this);
+ }
+
+ public function copyTree() {
+ $newThis = new TagNode(NULL, $this->getQName(), $this->getAttributes());
+ $newThis->setWhiteBefore($this->isWhiteBefore());
+ $newThis->setWhiteAfter($this->isWhiteAfter());
+ foreach($this->children as $child) {
+ $newChild = $child->copyTree();
+ $newChild->setParent($newThis);
+ $newThis->addChild($newChild);
+ }
+ return $newThis;
+ }
+
+ public function getMatchRatio(TagNode $other) {
+ $txtComp = new TextOnlyComparator($other);
+ return $txtComp->getMatchRatio(new TextOnlyComparator($this));
+ }
+
+ public function expandWhiteSpace() {
+ $shift = 0;
+ $spaceAdded = false;
+
+ $nbOriginalChildren = $this->getNbChildren();
+ for ($i = 0; $i < $nbOriginalChildren; ++$i) {
+ $child = $this->getChild($i + $shift);
+
+ if($child instanceof TagNode){
+ if (!$child->isPre()) {
+ $child->expandWhiteSpace();
+ }
+ }
+ if (!$spaceAdded && $child->isWhiteBefore()) {
+ $ws = new WhiteSpaceNode(NULL, ' ', $child->getLeftMostChild());
+ $ws->setParent($this);
+ $this->addChild($ws,$i + ($shift++));
+ }
+ if ($child->isWhiteAfter()) {
+ $ws = new WhiteSpaceNode(NULL, ' ', $child->getRightMostChild());
+ $ws->setParent($this);
+ $this->addChild($ws,$i + 1 + ($shift++));
+ $spaceAdded = true;
+ } else {
+ $spaceAdded = false;
+ }
+
+ }
+ }
+
+ public function getLeftMostChild() {
+ if ($this->getNbChildren() < 1)
+ return $this;
+ return $this->getChild(0)->getLeftMostChild();
+
+ }
+
+ public function getRightMostChild() {
+ if ($this->getNbChildren() < 1)
+ return $this;
+ return $this->getChild($this->getNbChildren() - 1)->getRightMostChild();
+ }
+
+ public function isPre() {
+ return 0 == strcasecmp($this->getQName(),'pre');
+ }
+
+ public static function toDiffLine(TagNode $node){
+ return $node->getOpeningTag();
+ }
+}
+
+/**
+ * Represents a piece of text in the HTML file.
+ */
+class TextNode extends Node{
+
+ private $s;
+
+ private $modification;
+
+ function __construct($parent, $s) {
+ parent::__construct($parent);
+ $this->modification = new Modification(Modification::NONE);
+ $this->s = $s;
+ }
+
+ public function copyTree() {
+ $clone = clone $this;
+ $clone->setParent(NULL);
+ return $clone;
+ }
+
+ public function getLeftMostChild() {
+ return $this;
+ }
+
+ public function getRightMostChild() {
+ return $this;
+ }
+
+ public function getMinimalDeletedSet($id) {
+ if ($this->getModification()->getType() == Modification::REMOVED
+ && $this->getModification()->getID() == $id){
+ return array($this);
+ }
+ return array();
+ }
+
+ public function getModification() {
+ return $this->modification;
+ }
+
+ public function getText() {
+ return $this->s;
+ }
+
+ public function isSameText($other) {
+ if (is_null($other) || ! $other instanceof TextNode){
+ return false;
+ }
+ return str_replace('\n', ' ',$this->getText()) === str_replace('\n', ' ',$other->getText());
+ }
+
+ public function setModification(Modification $m) {
+ //wfDebug("Registered modification for node '$this->s' as ".Modification::typeToString($m->getType()));
+ $this->modification = $m;
+ }
+
+ public function toString() {
+ return $this->getText();
+ }
+
+ public static function toDiffLine(TextNode $node){
+ return str_replace('\n', ' ',$node->getText());
+ }
+}
+
+class WhiteSpaceNode extends TextNode {
+
+ function __construct($parent, $s, Node $like = NULL) {
+ parent::__construct($parent, $s);
+ if(!is_null($like) && $like instanceof TextNode){
+ $newModification = clone $like->getModification();
+ $newModification->setFirstOfID(false);
+ $this->setModification($newModification);
+ }
+ }
+
+ public static function isWhiteSpace($c) {
+ switch ($c) {
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\n':
+ return true;
+ default:
+ return false;
+ }
+ }
+}
+
+/**
+ * Represents the root of a HTML document.
+ */
+class BodyNode extends TagNode {
+
+ function __construct() {
+ parent::__construct(NULL, 'body', array());
+ }
+
+ public function copyTree() {
+ $newThis = new BodyNode();
+ foreach ($this->children as $child) {
+ $newChild = $child->copyTree();
+ $newChild->setParent($newThis);
+ $newThis->addChild($newChild);
+ }
+ return $newThis;
+ }
+
+ public function getMinimalDeletedSet($id) {
+ $nodes = array();
+ foreach ($this->children as $child) {
+ $childrenChildren = $child->getMinimalDeletedSet($id);
+ $nodes = array_merge($nodes, $childrenChildren);
+ }
+ return $nodes;
+ }
+
+}
+
+/**
+ * Represents an image in HTML. Even though images do not contain any text they
+ * are independent visible objects on the page. They are logically a TextNode.
+ */
+class ImageNode extends TextNode {
+
+ private $attributes;
+
+ function __construct(TagNode $parent, /*array*/ $attrs) {
+ if(!array_key_exists('src',$attrs)){
+ //wfDebug('Image without a source:');
+ foreach($attrs as $key => $value){
+ //wfDebug("$key = $value");
+ }
+ parent::__construct($parent, '<img></img>');
+ }else{
+ parent::__construct($parent, '<img>' . strtolower($attrs['src']) . '</img>');
+ }
+ $this->attributes = $attrs;
+ }
+
+ public function isSameText($other) {
+ if (is_null($other) || ! $other instanceof ImageNode)
+ return false;
+ return $this->getText() === $other->getText();
+ }
+
+ public function getAttributes() {
+ return $this->attributes;
+ }
+
+}
+
+/**
+ * When detecting the last common parent of two nodes, all results are stored as
+ * a LastCommonParentResult.
+ */
+class LastCommonParentResult {
+
+ // Parent
+ private $parent;
+
+ public function getLastCommonParent() {
+ return $this->parent;
+ }
+
+ public function setLastCommonParent(TagNode $parent) {
+ $this->parent = $parent;
+ }
+
+ // Splitting
+ private $splittingNeeded = false;
+
+ public function isSplittingNeeded() {
+ return $this->splittingNeeded;
+ }
+
+ public function setSplittingNeeded() {
+ $this->splittingNeeded = true;
+ }
+
+ // Depth
+ private $lastCommonParentDepth = -1;
+
+ public function getLastCommonParentDepth() {
+ return $this->lastCommonParentDepth;
+ }
+
+ public function setLastCommonParentDepth($depth) {
+ $this->lastCommonParentDepth = $depth;
+ }
+
+ // Index
+ private $indexInLastCommonParent = -1;
+
+ public function getIndexInLastCommonParent() {
+ return $this->indexInLastCommonParent;
+ }
+
+ public function setIndexInLastCommonParent($index) {
+ $this->indexInLastCommonParent = $index;
+ }
+}
+
+class Modification{
+
+ const NONE = 1;
+ const REMOVED = 2;
+ const ADDED = 4;
+ const CHANGED = 8;
+
+ private $type;
+
+ private $id = -1;
+
+ private $prevMod;
+
+ private $nextMod;
+
+ private $firstOfID = false;
+
+ function __construct($type) {
+ $this->type = $type;
+ }
+
+ public function copy() {
+ $newM = new Modification($this->getType());
+ $newM->setID($this->getID());
+ $newM->setChanges($this->getChanges());
+ $newM->setFirstOfID($this->isFirstOfID());
+ $newM->setNext($this->getNext());
+ $newM->setPrevious($this->getPrevious());
+ return $newM;
+ }
+
+ public function getType() {
+ return $this->type;
+ }
+
+ public function setID($id) {
+ $this->id = $id;
+ }
+
+ public function getID() {
+ return $this->id;
+ }
+
+ public function setPrevious($m) {
+ $this->prevMod = $m;
+ }
+
+ public function getPrevious() {
+ return $this->prevMod;
+ }
+
+ public function setNext($m) {
+ $this->nextMod = $m;
+ }
+
+ public function getNext() {
+ return $this->nextMod;
+ }
+
+ private $changes;
+
+ public function setChanges($changes) {
+ $this->changes = $changes;
+ }
+
+ public function getChanges() {
+ return $this->changes;
+ }
+
+ public function isFirstOfID() {
+ return $this->firstOfID;
+ }
+
+ public function setFirstOfID($firstOfID) {
+ $this->firstOfID = $firstOfID;
+ }
+
+ public static function typeToString($type){
+ switch($type){
+ case self::NONE: return 'none';
+ case self::REMOVED: return 'removed';
+ case self::ADDED: return 'added';
+ case self::CHANGED: return 'changed';
+ }
+ }
+}
+
+class DomTreeBuilder {
+
+ private $textNodes = array();
+
+ private $bodyNode;
+
+ private $currentParent;
+
+ private $newWord = "";
+
+ protected $bodyStarted = false;
+
+ protected $bodyEnded = false;
+
+ private $whiteSpaceBeforeThis = false;
+
+ private $lastSibling;
+
+ function __construct(){
+ $this->bodyNode = $this->currentParent = new BodyNode();
+ }
+
+ public function getBodyNode() {
+ return $this->bodyNode;
+ }
+
+ public function getTextNodes() {
+ return $this->textNodes;
+ }
+
+ /**
+ * Must be called manually
+ */
+ public function endDocument() {
+ $this->endWord();
+ //wfDebug(sizeof($this->textNodes) . ' text nodes in document.');
+ }
+
+ public function startElement($parser, $name, /*array*/ $attributes) {
+ if(!strcasecmp($name, 'body')==0){
+ //wfDebug("Starting $name node.");
+ $this->endWord();
+
+ $newTagNode = new TagNode($this->currentParent, $name, $attributes);
+ $this->currentParent = $newTagNode;
+ $this->lastSibling = NULL;
+ if ($this->whiteSpaceBeforeThis && $newTagNode->isInline()) {
+ $newTagNode->setWhiteBefore(true);
+ }
+ $this->whiteSpaceBeforeThis = false;
+ }
+ }
+
+ public function endElement($parser, $name) {
+ if(!strcasecmp($name, 'body')==0){
+ //wfDebug("Ending $name node.");
+ if (0 == strcasecmp($name,'img')) {
+ // Insert a dummy leaf for the image
+ $img = new ImageNode($this->currentParent, $this->currentParent->getAttributes());
+ $img->setWhiteBefore($this->whiteSpaceBeforeThis);
+ $this->lastSibling = $img;
+ $this->textNodes[] = $img;
+ }
+ $this->endWord();
+ if ($this->currentParent->isInline()) {
+ $this->lastSibling = $this->currentParent;
+ } else {
+ $this->lastSibling = NULL;
+ }
+ $this->currentParent = $this->currentParent->getParent();
+ $this->whiteSpaceBeforeThis = false;
+ }else{
+ $this->endDocument();
+ }
+ }
+
+ public function characters($parser, $data){
+ //wfDebug('Parsing '. strlen($data).' characters.');
+ $array = str_split($data);
+ foreach($array as $c) {
+ if (self::isDelimiter($c)) {
+ $this->endWord();
+ if (WhiteSpaceNode::isWhiteSpace($c) && !$this->currentParent->isPre()
+ && !$this->currentParent->inPre()) {
+ if (!is_null($this->lastSibling)){
+ $this->lastSibling->setWhiteAfter(true);
+ }
+ $this->whiteSpaceBeforeThis = true;
+ } else {
+ $textNode = new TextNode($this->currentParent, $c);
+ $textNode->setWhiteBefore($this->whiteSpaceBeforeThis);
+ $this->whiteSpaceBeforeThis = false;
+ $this->lastSibling = $textNode;
+ $this->textNodes[] = $textNode;
+ }
+ } else {
+ $this->newWord .= $c;
+ }
+
+ }
+ }
+
+ private function endWord() {
+ if (strlen($this->newWord) > 0) {
+ $node = new TextNode($this->currentParent, $this->newWord);
+ $node->setWhiteBefore($this->whiteSpaceBeforeThis);
+ $this->whiteSpaceBeforeThis = false;
+ $this->lastSibling = $node;
+ $this->textNodes[] = $node;
+ $this->newWord = "";
+ }
+ }
+
+ public static function isDelimiter($c) {
+ switch ($c) {
+ // Basic Delimiters
+ case '/':
+ case '.':
+ case '!':
+ case ',':
+ case ';':
+ case '?':
+ case '=':
+ case '\'':
+ case '"':
+ // Extra Delimiters
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case '(':
+ case ')':
+ case '&':
+ case '|':
+ case '\\':
+ case '-':
+ case '_':
+ case '+':
+ case '*':
+ case ':':
+ return true;
+ default:
+ return WhiteSpaceNode::isWhiteSpace($c);
+ }
+ }
+
+ public function getDiffLines(){
+ return array_map(array('TextNode','toDiffLine'), $this->textNodes);
+ }
+}
+
+class TextNodeDiffer{
+
+ private $textNodes;
+ private $bodyNode;
+
+ private $oldTextNodes;
+ private $oldBodyNode;
+
+ private $lastModified = array();
+
+ function __construct(DomTreeBuilder $tree, DomTreeBuilder $oldTree) {
+ $this->textNodes = $tree->getTextNodes();
+ $this->bodyNode = $tree->getBodyNode();
+ $this->oldTextNodes = $oldTree->getTextNodes();
+ $this->oldBodyNode = $oldTree->getBodyNode();
+ }
+
+ public function getBodyNode() {
+ return $this->bodyNode;
+ }
+
+ private $newID = 0;
+
+ public function markAsNew($start, $end) {
+ if ($end <= $start)
+ return;
+
+ if ($this->whiteAfterLastChangedPart)
+ $this->textNodes[$start]->setWhiteBefore(false);
+
+ $nextLastModified = array();
+
+ for ($i = $start; $i < $end; ++$i) {
+ $mod = new Modification(Modification::ADDED);
+ $mod->setID($this->newID);
+ if (sizeof($this->lastModified) > 0) {
+ $mod->setPrevious($this->lastModified[0]);
+ if (is_null($this->lastModified[0]->getNext())) {
+ foreach ($this->lastModified as $lastMod) {
+ $lastMod->setNext($mod);
+ }
+ }
+ }
+ $nextLastModified[] = $mod;
+ $this->textNodes[$i]->setModification($mod);
+ }
+ if ($start < $end) {
+ $this->textNodes[$start]->getModification()->setFirstOfID(true);
+ }
+ ++$this->newID;
+ $this->lastModified = $nextLastModified;
+ }
+
+ private $changedID = 0;
+
+ private $changedIDUsed = false;
+
+ public function handlePossibleChangedPart($leftstart, $leftend, $rightstart, $rightend) {
+ $i = $rightstart;
+ $j = $leftstart;
+
+ if ($this->changedIDUsed) {
+ ++$this->changedID;
+ $this->changedIDUsed = false;
+ }
+
+ $nextLastModified = array();
+
+ $changes;
+ while ($i < $rightend) {
+ $acthis = new AncestorComparator($this->textNodes[$i]->getParentTree());
+ $acother = new AncestorComparator($this->oldTextNodes[$j]->getParentTree());
+ $result = $acthis->getResult($acother);
+ unset($acthis, $acother);
+
+ $nbLastModified = sizeof($this->lastModified);
+ if ($result->isChanged()) {
+ $mod = new Modification(Modification::CHANGED);
+
+ if (!$this->changedIDUsed) {
+ $mod->setFirstOfID(true);
+ if (sizeof($nextLastModified) > 0) {
+ $this->lastModified = $nextLastModified;
+ $nextLastModified = array();
+ }
+ } else if (!is_null($result->getChanges()) && $result->getChanges() != $this->changes) {
+ ++$this->changedID;
+ $mod->setFirstOfID(true);
+ if (sizeof($nextLastModified) > 0) {
+ $this->lastModified = $nextLastModified;
+ $nextLastModified = array();
+ }
+ }
+
+ if ($nbLastModified > 0) {
+ $mod->setPrevious($this->lastModified[0]);
+ if (is_null($this->lastModified[0]->getNext())) {
+ foreach ($this->lastModified as $lastMod) {
+ $lastMod->setNext($mod);
+ }
+ }
+ }
+ $nextLastModified[] = $mod;
+
+ $mod->setChanges($result->getChanges());
+ $mod->setID($this->changedID);
+
+ $this->textNodes[$i]->setModification($mod);
+ $this->changes = $result->getChanges();
+ $this->changedIDUsed = true;
+ } else if ($this->changedIDUsed) {
+ ++$this->changedID;
+ $this->changedIDUsed = false;
+ }
+ ++$i;
+ ++$j;
+ }
+ if (sizeof($nextLastModified) > 0){
+ $this->lastModified = $nextLastModified;
+ }
+ }
+
+ // used to remove the whitespace between a red and green block
+ private $whiteAfterLastChangedPart = false;
+
+ private $deletedID = 0;
+
+ public function markAsDeleted($start, $end, $before) {
+
+ if ($end <= $start)
+ return;
+
+ if ($before > 0 && $this->textNodes[$before - 1]->isWhiteAfter()) {
+ $this->whiteAfterLastChangedPart = true;
+ } else {
+ $this->whiteAfterLastChangedPart = false;
+ }
+
+ $nextLastModified = array();
+
+ for ($i = $start; $i < $end; ++$i) {
+ $mod = new Modification(Modification::REMOVED);
+ $mod->setID($this->deletedID);
+ if (sizeof($this->lastModified) > 0) {
+ $mod->setPrevious($this->lastModified[0]);
+ if (is_null($this->lastModified[0]->getNext())) {
+ foreach ($this->lastModified as $lastMod) {
+ $lastMod->setNext($mod);
+ }
+ }
+ }
+ $nextLastModified[] = $mod;
+
+ // oldTextNodes is used here because we're going to move its deleted
+ // elements
+ // to this tree!
+ $this->oldTextNodes[$i]->setModification($mod);
+ }
+ $this->oldTextNodes[$start]->getModification()->setFirstOfID(true);
+
+ $deletedNodes = $this->oldBodyNode->getMinimalDeletedSet($this->deletedID);
+
+ //wfDebug("Minimal set of deleted nodes of size " . sizeof($deletedNodes));
+
+ // Set prevLeaf to the leaf after which the old HTML needs to be
+ // inserted
+ if ($before > 0){
+ $prevLeaf = $this->textNodes[$before - 1];
+ }
+ // Set nextLeaf to the leaf before which the old HTML needs to be
+ // inserted
+ if ($before < sizeof($this->textNodes)){
+ $nextLeaf = $this->textNodes[$before];
+ }
+
+ while (sizeof($deletedNodes) > 0) {
+ if (isset($prevLeaf)) {
+ $prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]);
+ } else {
+ $prevResult = new LastCommonParentResult();
+ $prevResult->setLastCommonParent($this->getBodyNode());
+ $prevResult->setIndexInLastCommonParent(0);
+ }
+ if (isset($nextleaf)) {
+ $nextResult = $nextLeaf->getLastCommonParent($deletedNodes[sizeof($deletedNodes) - 1]);
+ } else {
+ $nextResult = new LastCommonParentResult();
+ $nextResult->setLastCommonParent($this->getBodyNode());
+ $nextResult->setIndexInLastCommonParent($this->getBodyNode()->getNbChildren());
+ }
+
+ if ($prevResult->getLastCommonParentDepth() == $nextResult->getLastCommonParentDepth()) {
+ // We need some metric to choose which way to add-...
+ if ($deletedNodes[0]->getParent() === $deletedNodes[sizeof($deletedNodes) - 1]->getParent()
+ && $prevResult->getLastCommonParent() === $nextResult->getLastCommonParent()) {
+ // The difference is not in the parent
+ $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1);
+ } else {
+ // The difference is in the parent, so compare them
+ // now THIS is tricky
+ $distancePrev = $deletedNodes[0]->getParent()->getMatchRatio($prevResult->getLastCommonParent());
+ $distanceNext = $deletedNodes[sizeof($deletedNodes) - 1]->getParent()->getMatchRatio($nextResult->getLastCommonParent());
+
+ if ($distancePrev <= $distanceNext) {
+ $prevResult->setLastCommonParentDepth($prevResult->getLastCommonParentDepth() + 1);
+ } else {
+ $nextResult->setLastCommonParentDepth($nextResult->getLastCommonParentDepth() + 1);
+ }
+ }
+
+ }
+
+ if ($prevResult->getLastCommonParentDepth() > $nextResult->getLastCommonParentDepth()) {
+ // Inserting at the front
+ if ($prevResult->isSplittingNeeded()) {
+ $prevLeaf->getParent()->splitUntill($prevResult->getLastCommonParent(), $prevLeaf, true);
+ }
+ $prevLeaf = $deletedNodes[0]->copyTree();
+ unset($deletedNodes[0]);
+ $deletedNodes = array_values($deletedNodes);
+ $prevLeaf->setParent($prevResult->getLastCommonParent());
+ $prevResult->getLastCommonParent()->addChild($prevLeaf,$prevResult->getIndexInLastCommonParent() + 1);
+ } else if ($prevResult->getLastCommonParentDepth() < $nextResult->getLastCommonParentDepth()) {
+ // Inserting at the back
+ if ($nextResult->isSplittingNeeded()) {
+ $splitOccured = $nextLeaf->getParent()->splitUntill($nextResult->getLastCommonParent(), $nextLeaf, false);
+ if ($splitOccured) {
+ // The place where to insert is shifted one place to the
+ // right
+ $nextResult->setIndexInLastCommonParent($nextResult->getIndexInLastCommonParent() + 1);
+ }
+ }
+ $nextLeaf = $deletedNodes[sizeof(deletedNodes) - 1]->copyTree();
+ unset($deletedNodes[sizeof(deletedNodes) - 1]);
+ $deletedNodes = array_values($deletedNodes);
+ $nextLeaf->setParent($nextResult->getLastCommonParent());
+ $nextResult->getLastCommonParent()->addChild($nextLeaf,$nextResult->getIndexInLastCommonParent());
+ } else
+ throw new Exception("Uh?");
+ }
+ $this->lastModified = $nextLastModified;
+ ++$this->deletedID;
+ }
+
+ public function expandWhiteSpace() {
+ $this->getBodyNode()->expandWhiteSpace();
+ }
+
+ public function lengthNew(){
+ return sizeof($this->textNodes);
+ }
+
+ public function lengthOld(){
+ return sizeof($this->oldTextNodes);
+ }
+}
+
+class HTMLDiffer{
+
+ private $output;
+
+ function __construct($output){
+ $this->output = $output;
+ }
+
+ function htmlDiff($from, $to){
+ // Create an XML parser
+ $xml_parser = xml_parser_create('');
+
+ $domfrom = new DomTreeBuilder();
+
+ // Set the functions to handle opening and closing tags
+ xml_set_element_handler($xml_parser, array($domfrom,"startElement"), array($domfrom,"endElement"));
+
+ // Set the function to handle blocks of character data
+ xml_set_character_data_handler($xml_parser, array($domfrom,"characters"));
+
+ ;
+ //wfDebug('Parsing '.strlen($from)." characters worth of HTML");
+ if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE)
+ || !xml_parse($xml_parser, $from, FALSE)
+ || !xml_parse($xml_parser, '</body>', TRUE)){
+ wfDebug(sprintf("XML error: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser)));
+ }
+ xml_parser_free($xml_parser);
+ unset($from);
+
+ $xml_parser = xml_parser_create('');
+
+ $domto = new DomTreeBuilder();
+
+ // Set the functions to handle opening and closing tags
+ xml_set_element_handler($xml_parser, array($domto,"startElement"), array($domto,"endElement"));
+
+ // Set the function to handle blocks of character data
+ xml_set_character_data_handler($xml_parser, array($domto,"characters"));
+
+ //wfDebug('Parsing '.strlen($to)." characters worth of HTML");
+ if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', FALSE)
+ || !xml_parse($xml_parser, $to, FALSE)
+ || !xml_parse($xml_parser, '</body>', TRUE)){
+ wfDebug(sprintf("XML error in HTML diff: %s at line %d",xml_error_string(xml_get_error_code($xml_parser)),xml_get_current_line_number($xml_parser)));
+ }
+ xml_parser_free($xml_parser);
+ unset($to);
+
+ $diffengine = new _DiffEngine();
+ $differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines()));
+ unset($xml_parser,$diffengine);
+
+ $domdiffer = new TextNodeDiffer($domto, $domfrom);
+
+ $currentIndexLeft = 0;
+ $currentIndexRight = 0;
+ foreach ($differences as $d) {
+ if ($d->leftstart > $currentIndexLeft) {
+ $domdiffer->handlePossibleChangedPart($currentIndexLeft, $d->leftstart,
+ $currentIndexRight, $d->rightstart);
+ }
+ if ($d->leftlength > 0) {
+ $domdiffer->markAsDeleted($d->leftstart, $d->leftend, $d->rightstart);
+ }
+ $domdiffer->markAsNew($d->rightstart, $d->rightend);
+
+ $currentIndexLeft = $d->leftend;
+ $currentIndexRight = $d->rightend;
+ }
+ if ($currentIndexLeft < $domdiffer->lengthOld()) {
+ $domdiffer->handlePossibleChangedPart($currentIndexLeft,$domdiffer->lengthOld(), $currentIndexRight,$domdiffer->lengthNew());
+ }
+
+ $domdiffer->expandWhiteSpace();
+ $output = new HTMLOutput('htmldiff', $this->output);
+ $output->parse($domdiffer->getBodyNode());
+ }
+
+ private function preProcess(/*array*/ $differences){
+ $newRanges = array();
+
+ $nbDifferences = sizeof($differences);
+ for ($i = 0; $i < $nbDifferences; ++$i) {
+ $leftStart = $differences[$i]->leftstart;
+ $leftEnd = $differences[$i]->leftend;
+ $rightStart = $differences[$i]->rightstart;
+ $rightEnd = $differences[$i]->rightend;
+
+ $leftLength = $leftEnd - $leftStart;
+ $rightLength = $rightEnd - $rightStart;
+
+ while ($i + 1 < $nbDifferences && self::score($leftLength, $differences[$i + 1]->leftlength,
+ $rightLength, $differences[$i + 1]->rightlength) > ($differences[$i + 1]->leftstart - $leftEnd)) {
+ $leftEnd = $differences[$i + 1]->leftend;
+ $rightEnd = $differences[$i + 1]->rightend;
+ $leftLength = $leftEnd - $leftStart;
+ $rightLength = $rightEnd - $rightStart;
+ ++$i;
+ }
+ $newRanges[] = new RangeDifference($leftStart, $leftEnd, $rightStart, $rightEnd);
+ }
+ return $newRanges;
+ }
+
+ /**
+ * Heuristic to merge differences for readability.
+ */
+ public static function score($ll, $nll, $rl, $nrl) {
+ if (($ll == 0 && $nll == 0)
+ || ($rl == 0 && $nrl == 0)){
+ return 0;
+ }
+ $numbers = array($ll, $nll, $rl, $nrl);
+ $d = 0;
+ foreach ($numbers as $number) {
+ while ($number > 3) {
+ $d += 3;
+ $number -= 3;
+ $number *= 0.5;
+ }
+ $d += $number;
+
+ }
+ return $d / (1.5 * sizeof($numbers));
+ }
+
+}
+
+class TextOnlyComparator{
+
+ public $leafs = array();
+
+ function _construct(TagNode $tree) {
+ $this->addRecursive($tree);
+ $this->leafs = array_map(array('TextNode','toDiffLine'), $this->leafs);
+ }
+
+ private function addRecursive(TagNode $tree) {
+ foreach ($tree->children as $child) {
+ if ($child instanceof TagNode) {
+ $this->addRecursive($child);
+ } else if ($child instanceof TextNode) {
+ $this->leafs[] = $node;
+ }
+ }
+ }
+
+ public function getMatchRatio(TextOnlyComparator $other) {
+ $nbOthers = sizeof($other->leafs);
+ $nbThis = sizeof($this->leafs);
+ if($nbOthers == 0 || $nbThis == 0){
+ return -log(0);
+ }
+
+ $diffengine = new _DiffEngine();
+ $diffengine->diff_local($this->leafs, $other->leafs);
+
+ $distanceThis = array_sum($diffengine->xchanged);
+ $distanceOther = array_sum($diffengine->ychanged);
+
+ return ((0.0 + $distanceOther) / $nbOthers + (0.0 + $distanceThis)
+ / $nbThis) / 2.0;
+ }
+}
+
+class AncestorComparatorResult {
+
+ private $changed = false;
+
+ private $changes = "";
+
+ public function isChanged() {
+ return $this->changed;
+ }
+
+ public function setChanged($changed) {
+ $this->changed = $changed;
+ }
+
+ public function getChanges() {
+ return $this->changes;
+ }
+
+ public function setChanges($changes) {
+ $this->changes = $changes;
+ }
+}
+
+/**
+ * A comparator used when calculating the difference in ancestry of two Nodes.
+ */
+class AncestorComparator{
+
+ public $ancestors;
+ public $ancestorsText;
+
+ function __construct(/*array*/ $ancestors) {
+ $this->ancestors = $ancestors;
+ $this->ancestorsText = array_map(array('TagNode','toDiffLine'), $ancestors);
+ }
+
+ private $compareTxt = "";
+
+ public function getCompareTxt() {
+ return $this->compareTxt;
+ }
+
+ public function getResult(AncestorComparator $other) {
+ $result = new AncestorComparatorResult();
+
+ $diffengine = new _DiffEngine();
+ $differences = $diffengine->diff_range($this->ancestorsText, $other->ancestorsText);
+
+ if (sizeof($differences) == 0){
+ return $result;
+ }
+ $changeTxt = new ChangeTextGenerator($this, $other);
+
+ $result->setChanged(true);
+ $result->setChanges($changeTxt->getChanged($differences)->toString());
+
+ return $result;
+ }
+}
+
+class ChangeTextGenerator {
+
+ private $new;
+ private $old;
+
+ private $factory;
+
+ function __construct(AncestorComparator $old, AncestorComparator $new) {
+ $this->new = $new;
+ $this->old = $old;
+ $this->factory = new TagToStringFactory();
+ }
+
+ public function getChanged(/*array*/ $differences) {
+ $txt = new ChangeText;
+
+ $rootlistopened = false;
+
+ if (sizeof($differences) > 1) {
+ $txt->addHtml('<ul class="changelist">');
+ $rootlistopened = true;
+ }
+
+ $nbDifferences = sizeof($differences);
+ for ($j = 0; $j < $nbDifferences; ++$j) {
+ $d = $differences[$j];
+
+ $lvl1listopened = false;
+
+ if ($rootlistopened) {
+ $txt->addHtml('<li>');
+ }
+
+ if ($d->leftlength + $d->rightlength > 1) {
+ $txt->addHtml('<ul class="changelist">');
+ $lvl1listopened = true;
+ }
+
+ // left are the old ones
+ for ($i = $d->leftstart; $i < $d->leftend; ++$i) {
+ if ($lvl1listopened){
+ $txt->addHtml('<li>');
+ }
+ // add a bullet for a old tag
+ $this->addTagOld($txt, $this->old->ancestors[$i]);
+
+ if ($lvl1listopened){
+ $txt->addHtml('</li>');
+ }
+ }
+
+ // right are the new ones
+ for ($i = $d->rightstart; $i < $d->rightend; ++$i) {
+ if ($lvl1listopened){
+ $txt->addHtml('<li>');
+ }
+
+ // add a bullet for a new tag
+ $this->addTagNew($txt, $this->new->ancestors[$i]);
+
+ if ($lvl1listopened){
+ $txt->addHtml('</li>');
+ }
+
+ }
+
+ if ($lvl1listopened) {
+ $txt->addHtml('</ul>');
+ }
+
+ if ($rootlistopened) {
+ $txt->addHtml('</li>');
+ }
+ }
+
+ if ($rootlistopened) {
+ $txt->addHtml('</ul>');
+ }
+
+ return $txt;
+
+ }
+
+ private function addTagOld(ChangeText $txt, TagNode $ancestor) {
+ $this->factory->create($ancestor)->getRemovedDescription($txt);
+ }
+
+ private function addTagNew(ChangeText $txt, TagNode $ancestor) {
+ $this->factory->create($ancestor)->getAddedDescription($txt);
+ }
+}
+
+class ChangeText {
+
+ private $txt = "";
+
+ const newLine = "<br/>";
+
+ public function addText($s) {
+ $s = $this->clean($s);
+ $this->txt .= $s;
+ }
+
+ public function addHtml($s) {
+ $this->txt .= $s;
+ }
+
+ public function addNewLine() {
+ $this->addHtml(self::newLine);
+ }
+
+ public function toString() {
+ return $this->txt;
+ }
+
+ private function clean($s) {
+ return htmlspecialchars($s);
+ }
+}
+
+class TagToStringFactory {
+
+ private static $containerTags = array(
+ 'html' => TRUE,
+ 'body' => TRUE,
+ 'p' => TRUE,
+ 'blockquote' => TRUE,
+ 'h1' => TRUE,
+ 'h2' => TRUE,
+ 'h3' => TRUE,
+ 'h4' => TRUE,
+ 'h5' => TRUE,
+ 'pre' => TRUE,
+ 'div' => TRUE,
+ 'ul' => TRUE,
+ 'ol' => TRUE,
+ 'li' => TRUE,
+ 'table' => TRUE,
+ 'tbody' => TRUE,
+ 'tr' => TRUE,
+ 'td' => TRUE,
+ 'th' => TRUE,
+ 'br' => TRUE,
+ 'hr' => TRUE,
+ 'code' => TRUE,
+ 'dl' => TRUE,
+ 'dt' => TRUE,
+ 'dd' => TRUE,
+ 'input' => TRUE,
+ 'form' => TRUE,
+ 'img' => TRUE,
+ // in-line tags that can be considered containers not styles
+ 'span' => TRUE,
+ 'a' => TRUE
+ );
+
+ private static $styleTags = array(
+ 'i' => TRUE,
+ 'b' => TRUE,
+ 'strong' => TRUE,
+ 'em' => TRUE,
+ 'font' => TRUE,
+ 'big' => TRUE,
+ 'del' => TRUE,
+ 'tt' => TRUE,
+ 'sub' => TRUE,
+ 'sup' => TRUE,
+ 'strike' => TRUE
+ );
+
+ const MOVED = 1;
+ const STYLE = 2;
+ const UNKNOWN = 4;
+
+ public function create(TagNode $node) {
+ $sem = $this->getChangeSemantic($node->getQName());
+ if (0 == strcasecmp($node->getQName(),'a')){
+ return new AnchorToString($node, $sem);
+ }
+ if (0 == strcasecmp($node->getQName(),'img')){
+ return new NoContentTagToString($node, $sem);
+ }
+ return new TagToString($node, $sem);
+ }
+
+ protected function getChangeSemantic($qname) {
+ if (array_key_exists(strtolower($qname),self::$containerTags)){
+ return self::MOVED;
+ }
+ if (array_key_exists(strtolower($qname),self::$styleTags)){
+ return self::STYLE;
+ }
+ return self::UNKNOWN;
+ }
+}
+
+class TagToString {
+
+ protected $node;
+
+ protected $sem;
+
+ function __construct(TagNode $node, $sem) {
+ $this->node = $node;
+ $this->sem = $sem;
+ }
+
+ public function getDescription() {
+ return $this->getString('diff-' . $this->node->getQName());
+ }
+
+ public function getRemovedDescription(ChangeText $txt) {
+
+ if ($this->sem == TagToStringFactory::MOVED) {
+ $txt->addText($this->getMovedOutOf() . ' ' . strtolower($this->getArticle()) . ' ');
+ $txt->addHtml('<b>');
+ $txt->addText(strtolower($this->getDescription()));
+ $txt->addHtml('</b>');
+ } else if ($this->sem == TagToStringFactory::STYLE) {
+ $txt->addHtml('<b>');
+ $txt->addText($this->getDescription());
+ $txt->addHtml('</b>');
+ $txt->addText(' ' . strtolower($this->getStyleRemoved()));
+ } else {
+ $txt->addHtml('<b>');
+ $txt->addText($this->getDescription());
+ $txt->addHtml('</b>');
+ $txt->addText(' ' . strtolower($this->getRemoved()));
+ }
+ $this->addAttributes($txt, $this->node->getAttributes());
+ $txt->addText('.');
+ }
+
+ public function getAddedDescription(ChangeText $txt) {
+
+ if ($this->sem == TagToStringFactory::MOVED) {
+ $txt->addText($this->getMovedTo() . ' ' . strtolower($this->getArticle()) . ' ');
+ $txt->addHtml('<b>');
+ $txt->addText(strtolower($this->getDescription()));
+ $txt->addHtml('</b>');
+ } else if ($this->sem == TagToStringFactory::STYLE) {
+ $txt->addHtml('<b>');
+ $txt->addText($this->getDescription());
+ $txt->addHtml('</b>');
+ $txt->addText(' ' . strtolower($this->getStyleAdded()));
+ } else {
+ $txt->addHtml('<b>');
+ $txt->addText($this->getDescription());
+ $txt->addHtml('</b>');
+ $txt->addText(' ' . strtolower($this->getAdded()));
+ }
+ $this->addAttributes($txt, $this->node->getAttributes());
+ $txt->addText('.');
+ }
+
+ protected function getMovedTo() {
+ return $this->getString('diff-movedto');
+ }
+
+ protected function getStyleAdded() {
+ return $this->getString('diff-styleadded');
+ }
+
+ protected function getAdded() {
+ return $this->getString('diff-added');
+ }
+
+ protected function getMovedOutOf() {
+ return $this->getString('diff-movedoutof');
+ }
+
+ protected function getStyleRemoved() {
+ return $this->getString('diff-styleremoved');
+ }
+
+ protected function getRemoved() {
+ return $this->getString('diff-removed');
+ }
+
+ protected function addAttributes(ChangeText $txt, array $attributes) {
+ if (sizeof($attributes) < 1)
+ return;
+
+ $keys = array_keys($attributes);
+
+ $txt->addText(' ' . strtolower($this->getWith()) . ' '
+ . $this->translateArgument($keys[0]) . ' '
+ . $attributes[$keys[0]]);
+ for ($i = 1; $i < sizeof($attributes) - 1; $i++) {
+ $txt->addText(', ' . $this->translateArgument($keys[$i]) . ' '
+ . $attributes[$keys[$i]]);
+ }
+ if (sizeof($attributes) > 1) {
+ $txt->addText(' '
+ . strtolower($this->getAnd())
+ . ' '
+ . $this->translateArgument($keys[sizeof($attributes) - 1]) . ' '
+ . $attributes[$keys[sizeof($attributes) - 1]]);
+ }
+ }
+
+ private function getAnd() {
+ return $this->getString('diff-and');
+ }
+
+ private function getWith() {
+ return $this->getString('diff-with');
+ }
+
+ protected function translateArgument($name) {
+ if (0 == strcasecmp($name,'src'))
+ return strtolower($this->getSource());
+ if (0 == strcasecmp($name,'width'))
+ return strtolower($this->getWidth());
+ if (0 == strcasecmp($name,'height'))
+ return strtolower($this->getHeight());
+ return $name;
+ }
+
+ private function getHeight() {
+ return $this->getString('diff-height');
+ }
+
+ private function getWidth() {
+ return $this->getString('diff-width');
+ }
+
+ protected function getSource() {
+ return $this->getString('diff-source');
+ }
+
+ protected function getArticle() {
+ return $this->getString('diff-' . $this->node->getQName() . '-article');
+ }
+
+ public static $bundle = array(
+ 'diff-movedto' => 'Moved to',
+ 'diff-styleadded' => 'Style added',
+ 'diff-added' => 'Added',
+ 'diff-changedto' => 'Changed to',
+ 'diff-movedoutof' => 'Moved out of',
+ 'diff-styleremoved' => 'Style removed',
+ 'diff-removed' => 'Removed',
+ 'diff-changedfrom' => 'Changed from',
+ 'diff-source' => 'Source',
+ 'diff-withdestination' => 'With destination',
+ 'diff-and' => 'And',
+ 'diff-with' => 'With',
+ 'diff-width' => 'Width',
+ 'diff-height' => 'Height',
+ 'diff-html-article' => 'A',
+ 'diff-html' => 'Html page',
+ 'diff-body-article' => 'A',
+ 'diff-body' => 'Html document',
+ 'diff-p-article' => 'A',
+ 'diff-p' => 'Paragraph',
+ 'diff-blockquote-article' => 'A',
+ 'diff-blockquote' => 'Quote',
+ 'diff-h1-article' => 'A',
+ 'diff-h1' => 'Heading (level 1)',
+ 'diff-h2-article' => 'A',
+ 'diff-h2' => 'Heading (level 2)',
+ 'diff-h3-article' => 'A',
+ 'diff-h3' => 'Heading (level 3)',
+ 'diff-h4-article' => 'A',
+ 'diff-h4' => 'Heading (level 4)',
+ 'diff-h5-article' => 'A',
+ 'diff-h5' => 'Heading (level 5)',
+ 'diff-pre-article' => 'A',
+ 'diff-pre' => 'Preformatted block',
+ 'diff-div-article' => 'A',
+ 'diff-div' => 'Division',
+ 'diff-ul-article' => 'An',
+ 'diff-ul' => 'Unordered list',
+ 'diff-ol-article' => 'An',
+ 'diff-ol' => 'Ordered list',
+ 'diff-li-article' => 'A',
+ 'diff-li' => 'List item',
+ 'diff-table-article' => 'A',
+ 'diff-table' => 'Table',
+ 'diff-tbody-article' => 'A',
+ 'diff-tbody' => "Table's content",
+ 'diff-tr-article' => 'A',
+ 'diff-tr' => 'Row',
+ 'diff-td-article' => 'A',
+ 'diff-td' => 'Cell',
+ 'diff-th-article' => 'A',
+ 'diff-th' => 'Header',
+ 'diff-br-article' => 'A',
+ 'diff-br' => 'Break',
+ 'diff-hr-article' => 'A',
+ 'diff-hr' => 'Horizontal rule',
+ 'diff-code-article' => 'A',
+ 'diff-code' => 'Computer code block',
+ 'diff-dl-article' => 'A',
+ 'diff-dl' => 'Definition list',
+ 'diff-dt-article' => 'A',
+ 'diff-dt' => 'Definition term',
+ 'diff-dd-article' => 'A',
+ 'diff-dd' => 'Definition',
+ 'diff-input-article' => 'An',
+ 'diff-input' => 'Input',
+ 'diff-form-article' => 'A',
+ 'diff-form' => 'Form',
+ 'diff-img-article' => 'An',
+ 'diff-img' => 'Image',
+ 'diff-span-article' => 'A',
+ 'diff-span' => 'Span',
+ 'diff-a-article' => 'A',
+ 'diff-a' => 'Link',
+ 'diff-i' => 'Italics',
+ 'diff-b' => 'Bold',
+ 'diff-strong' => 'Strong',
+ 'diff-em' => 'Emphasis',
+ 'diff-font' => 'Font',
+ 'diff-big' => 'Big',
+ 'diff-del' => 'Deleted',
+ 'diff-tt' => 'Fixed width',
+ 'diff-sub' => 'Subscript',
+ 'diff-sup' => 'Superscript',
+ 'diff-strike' => 'Strikethrough'
+ );
+
+ public function getString($key) {
+ return self::$bundle[$key];
+ }
+}
+
+class NoContentTagToString extends TagToString {
+
+ function __construct(TagNode $node, $sem) {
+ parent::__construct($node, $sem);
+ }
+
+ public function getAddedDescription(ChangeText $txt) {
+ $txt.addText($this->getChangedTo() . ' ' + strtolower($this->getArticle()) . ' ');
+ $txt.addHtml('<b>');
+ $txt.addText(strtolower($this->getDescription()));
+ $txt.addHtml('</b>');
+
+ $this->addAttributes($txt, $this->node->getAttributes());
+ $txt.addText('.');
+ }
+
+ private function getChangedTo() {
+ return $this->getString('diff-changedto');
+ }
+
+ public function getRemovedDescription(ChangeText $txt) {
+ $txt.addText($this->getChangedFrom() . ' ' + strtolower($this->getArticle()) . ' ');
+ $txt.addHtml('<b>');
+ $txt.addText(strtolower($this->getDescription()));
+ $txt.addHtml('</b>');
+
+ $this->addAttributes($txt, $this->node->getAttributes());
+ $txt.addText('.');
+ }
+
+ private function getChangedFrom() {
+ return $this->getString('diff-changedfrom');
+ }
+}
+
+class AnchorToString extends TagToString {
+
+ function __construct(TagNode $node, $sem) {
+ parent::__construct($node, $sem);
+ }
+
+ protected function addAttributes(ChangeText $txt, array $attributes) {
+ if (array_key_exists('href',$attributes)) {
+ $txt->addText(' ' . strtolower($this->getWithDestination()) . ' ' . $attributes['href']);
+ unset($attributes['href']);
+ }
+ parent::addAttributes($txt, $attributes);
+ }
+
+ private function getWithDestination() {
+ return $this->getString('diff-withdestination');
+ }
+
+}
+
+/**
+ * Takes a branch root and creates an HTML file for it.
+ */
+class HTMLOutput{
+
+ private $prefix;
+ private $handler;
+
+ function __construct($prefix, $handler) {
+ $this->prefix = $prefix;
+ $this->handler = $handler;
+ }
+
+ public function parse(TagNode $node) {
+
+ if (0 != strcasecmp($node->getQName(),'img') && 0 != strcasecmp($node->getQName(),'body')) {
+ $this->handler->startElement($node->getQName(), $node->getAttributes());
+ }
+
+ $newStarted = false;
+ $remStarted = false;
+ $changeStarted = false;
+ $changeTXT = '';
+
+ foreach ($node->children as $child) {
+ if ($child instanceof TagNode) {
+ if ($newStarted) {
+ $this->handler->endElement('span');
+ $newStarted = false;
+ } else if ($changeStarted) {
+ $this->handler->endElement('span');
+ $changeStarted = false;
+ } else if ($remStarted) {
+ $this->handler->endElement('span');
+ $remStarted = false;
+ }
+ $this->parse($child);
+ } else if ($child instanceof TextNode) {
+ $mod = $child->getModification();
+
+ if ($newStarted && ($mod->getType() != Modification::ADDED || $mod->isFirstOfID())) {
+ $this->handler->endElement('span');
+ $newStarted = false;
+ } else if ($changeStarted && ($mod->getType() != Modification::CHANGED || $mod->getChanges() != $changeTXT || $mod->isFirstOfID())) {
+ $this->handler->endElement('span');
+ $changeStarted = false;
+ } else if ($remStarted && ($mod->getType() != Modification::REMOVED || $mod ->isFirstOfID())) {
+ $this->handler->endElement('span');
+ $remStarted = false;
+ }
+
+ // no else because a removed part can just be closed and a new
+ // part can start
+ if (!$newStarted && $mod->getType() == Modification::ADDED) {
+ $attrs = array('class'=>'diff-html-added');
+ if ($mod->isFirstOfID()) {
+ $attrs['id'] = 'added-' . $this->prefix . '-' . $mod->getID();
+ }
+ $this->addAttributes($mod, $attrs);
+ $attrs['onclick'] = 'return tipA(constructToolTipA(this));';
+ $this->handler->startElement('span', $attrs);
+ $newStarted = true;
+ } else if (!$changeStarted && $mod->getType() == Modification::CHANGED) {
+ $attrs = array('class'=>'diff-html-changed');
+ if ($mod->isFirstOfID()) {
+ $attrs['id'] = 'changed-' . $this->prefix . '-' . $mod->getID();
+ }
+ $this->addAttributes($mod, $attrs);
+ $attrs['onclick'] = 'return tipC(constructToolTipC(this));';
+ $this->handler->startElement('span', $attrs);
+
+ //tooltip
+ $this->handler->startElement('span', array('class'=>'tip'));
+ $this->handler->characters($mod->getChanges());
+ $this->handler->endElement('span');
+
+ $changeStarted = true;
+ $changeTXT = $mod->getChanges();
+ } else if (!$remStarted && $mod->getType() == Modification::REMOVED) {
+ $attrs = array('class'=>'diff-html-removed');
+ if ($mod->isFirstOfID()) {
+ $attrs['id'] = 'removed-' . $this->prefix . '-' . $mod->getID();
+ }
+ $this->addAttributes($mod, $attrs);
+ $attrs['onclick'] = 'return tipR(constructToolTipR(this));';
+ $this->handler->startElement('span', $attrs);
+ $remStarted = true;
+ }
+
+ $chars = $child->getText();
+
+ if ($child instanceof ImageNode) {
+ $this->writeImage($child);
+ } else {
+ $this->handler->characters($chars);
+ }
+
+ }
+ }
+
+ if ($newStarted) {
+ $this->handler->endElement('span');
+ $newStarted = false;
+ } else if ($changeStarted) {
+ $this->handler->endElement('span');
+ $changeStarted = false;
+ } else if ($remStarted) {
+ $this->handler->endElement('span');
+ $remStarted = false;
+ }
+
+ if (0 != strcasecmp($node->getQName(),'img')
+ && 0 != strcasecmp($node->getQName(),'body'))
+ $this->handler->endElement($node->getQName());
+ }
+
+ private function writeImage(ImageNode $imgNode){
+ $attrs = $imgNode->getAttributes();
+ if ($imgNode->getModification()->getType() == Modification::REMOVED)
+ $attrs['changeType']='diff-removed-image';
+ else if ($imgNode->getModification()->getType() == Modification::ADDED)
+ $attrs['changeType'] = 'diff-added-image';
+ $attrs['onload'] = 'updateOverlays()';
+ $attrs['onError'] = 'updateOverlays()';
+ $attrs['onAbort'] = 'updateOverlays()';
+ $this->handler->startElement('img', $attrs);
+ $this->handler->endElement('img');
+ }
+
+ private function addAttributes(Modification $mod, /*array*/ &$attrs) {
+ if (is_null($mod->getPrevious())) {
+ $previous = 'first-' . $this->prefix;
+ } else {
+ $previous = Modification::typeToString($mod->getPrevious()->getType()) . '-' . $this->prefix . '-'
+ . $mod->getPrevious()->getID();
+ }
+ $attrs['previous'] = $previous;
+
+ $changeId = Modification::typeToString($mod->getType()) . '-' + $this->prefix . '-' . $mod->getID();
+ $attrs['changeId'] = $changeId;
+
+ if (is_null($mod->getNext())) {
+ $next = 'last-' . $this->prefix;
+ } else {
+ $next = Modification::typeToString($mod->getNext()->getType()) . '-' . $this->prefix . '-'
+ . $mod->getNext()->getID();
+ }
+ $attrs['next'] = $next;
+ }
+}
+
+class EchoingContentHandler{
+
+ function startElement($qname, /*array*/ $arguments){
+ echo '<'.$qname;
+ foreach($arguments as $key => $value){
+ echo ' '.$key.'="'.Sanitizer::encodeAttribute($value).'"';
+ }
+ echo '>';
+ }
+
+ function endElement($qname){
+ echo '</'.$qname.'>';
+ }
+
+ function characters($chars){
+ echo $chars;
+ }
+
+}
+
+class DelegatingContentHandler{
+
+ private $delegate;
+
+ function __construct($delegate){
+ $this->delegate=$delegate;
+ }
+
+ function startElement($qname, /*array*/ $arguments){
+ $this->delegate->addHtml('<'.$qname) ;
+ foreach($arguments as $key => $value){
+ $this->delegate->addHtml(' '.$key.'="'. Sanitizer::encodeAttribute($value) .'"');
+ }
+ $this->delegate->addHtml('>');
+ }
+
+ function endElement($qname){
+ $this->delegate->addHtml('</'.$qname.'>');
+ }
+
+ function characters($chars){
+ $this->delegate->addHtml( $chars );
+ }
+
+}
Index: trunk/phase3/includes/Diff.php
===================================================================
--- trunk/phase3/includes/Diff.php (revision 39405)
+++ trunk/phase3/includes/Diff.php (revision 39406)
@@ -18,454 +18,488 @@
*/
/**
- * Implementation of the Diff algorithm.
- *
- * DO NOT USE ON PRODUCTION SYSTEMS
- *
- * The algorithm is based on the O(NP) LCS algorithm as descibed by Wu et al. in "An O(NP) Sequence Comparison Algorithm"
- * and extended with my own ideas.
+ * This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which
+ * in turn is based on Myers' "An O(ND) difference algorithm and its variations"
+ * (http://citeseer.ist.psu.edu/myers86ond.html) with range compression (see Wu et al.'s
+ * "An O(NP) Sequence Comparison Algorithm").
*
- * @return array($from_removed, $to_added)
- * TRUE if the token was removed or added.
+ * This implementation supports an upper bound on the excution time.
*
+ * Complexity: O((M + N)D) worst case time, O(M + N + D^2) expected time, O(M + N) space
+ *
* @author Guy Van den Broeck
+ *
*/
-function wikidiff3_diff( /*array*/ $from, /*array*/ $to, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){
- wfProfileIn( __METHOD__ );
+class WikiDiff3{
- $m = sizeof($from);
- $n = sizeof($to);
+ //Input variables
+ private $from;
+ private $to;
+ private $m;
+ private $n;
- $result_from = array_fill(0,$m,TRUE);
- $result_to = array_fill(0,$n,TRUE);
+ private $tooLong;
+ private $powLimit;
- //reduce the complexity for the next step (intentionally done twice)
- //remove common tokens at the start
- $i=0;
- while($i<$m && $i<$n && $from[$i]===$to[$i]){
- $result_from[$i] = $result_to[$i] = FALSE;
- unset($from[$i],$to[$i]);
- ++$i;
+ //State variables
+ private $maxDifferences;
+
+ //Output variables
+ public $length;
+ public $added;
+ public $removed;
+ public $heuristicUsed;
+
+ function __construct($tooLong = 2000000, $powLimit = 1.45){
+ $this->tooLong = $tooLong;
+ $this->powLimit = $powLimit;
}
- //remove common tokens at the end
- $j=1;
- while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){
- $result_from[$m-$j] = $result_to[$n-$j] = FALSE;
- unset($from[$m-$j],$to[$n-$j]);
- ++$j;
- }
+ public function diff(/*array*/ $from, /*array*/ $to){
+ $m = sizeof($from);
+ $n = sizeof($to);
+
+ $this->heuristicUsed = FALSE;
- $newFrom = $newFromIndex = $newTo = $newToIndex = array();
+ $result_from = $m>0?array_fill(0,$m,true):array();
+ $result_to = $n>0?array_fill(0,$n,true):array();
- //remove tokens not in both sequences
- $shared = array_fill_keys($from,FALSE);
- foreach($to as $index => $el){
- if(array_key_exists($el,$shared)){
- //keep it
- $shared[$el] = TRUE;
- $newTo[] = $el;
- $newToIndex[] = $index;
+ //reduce the complexity for the next step (intentionally done twice)
+ //remove common tokens at the start
+ $i=0;
+ while($i<$m && $i<$n && $from[$i]===$to[$i]){
+ $result_from[$i] = $result_to[$i] = false;
+ unset($from[$i],$to[$i]);
+ ++$i;
}
- }
- foreach($from as $index => $el){
- if($shared[$el]){
- //keep it
- $newFrom[] = $el;
- $newFromIndex[] = $index;
+
+ //remove common tokens at the end
+ $j=1;
+ while($i+$j<=$m && $i+$j<=$n && $from[$m-$j]===$to[$n-$j]){
+ $result_from[$m-$j] = $result_to[$n-$j] = false;
+ unset($from[$m-$j],$to[$n-$j]);
+ ++$j;
}
- }
- unset($from, $to, $shared);
+ $newFrom = $newFromIndex = $newTo = $newToIndex = array();
- $m2 = sizeof($newFrom);
- $n2 = sizeof($newTo);
- $offsetx = $offsety = 0;
- $from_inLcs = new InLcs($m2);
- $to_inLcs = new InLcs($n2);
+ //remove tokens not in both sequences
+ $shared = array_fill_keys($from,false);
+ foreach($to as $index => $el){
+ if(array_key_exists($el,$shared)){
+ //keep it
+ $this->to[] = $el;
+ $shared[$el] = true;
+ $newToIndex[] = $index;
+ }
+ }
+ foreach($from as $index => $el){
+ if($shared[$el]){
+ //keep it
+ $this->from[] = $el;
+ $newFromIndex[] = $index;
+ }
+ }
- wikidiff3_diffPart($newFrom, $newTo, $from_inLcs, $to_inLcs, $m2, $n2, $offsetx, $offsety, $m2, $boundRunningTime, $max_NP_before_bound);
+ unset($shared);
- foreach($from_inLcs->inLcs as $key => $in){
- if($in){
- $result_from[$newFromIndex[$key]] = FALSE;
+ $this->m = sizeof($this->from);
+ $this->n = sizeof($this->to);
+ $this->removed = $this->m>0?array_fill(0,$this->m,true):array();
+ $this->added = $this->n>0?array_fill(0,$this->n,true):array();
+
+ $this->longestCommonSubsequence();
+
+ $this->m = $m;
+ $this->n = $n;
+ $this->length += $i+$j-1;
+
+ foreach($this->removed as $key => $removed){
+ if(!$removed){
+ $result_from[$newFromIndex[$key]] = false;
+ }
}
- }
- foreach($to_inLcs->inLcs as $key => $in){
- if($in){
- $result_to[$newToIndex[$key]] = FALSE;
+ foreach($this->added as $key => $added){
+ if(!$added){
+ $result_to[$newToIndex[$key]] = false;
+ }
}
+ unset($this->added, $this->removed);
+ return array($result_from, $result_to);
}
-
- wfProfileOut( __METHOD__ );
- return array($result_from, $result_to);
-}
-function wikidiff3_diffPart( /*array*/ $a, /*array*/ $b, InLcs $a_inLcs, InLcs $b_inLcs, $m, $n, $offsetx, $offsety, $bestKnownLcs, $boundRunningTime=FALSE, $max_NP_before_bound = 800000){
- if($bestKnownLcs==0 || $m==0 || $n==0){
- return;
- }
-
- if($m>$n){
- return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound);
- }
+ public function longestCommonSubsequence() {
+ if ($this->m == 0 || $this->n == 0) {
+ $this->length = 0;
+ return;
+ }
- $a_inLcs_sym = &$a_inLcs->inLcs;
- $b_inLcs_sym = &$b_inLcs->inLcs;
+ $this->maxDifferences = ceil(($this->m + $this->n) / 2.0);
+ if ($this->m * $this->n > $this->tooLong) {
+ // limit complexity to D^POW_LIMIT for long sequences
+ $this->maxDifferences = floor(pow($this->maxDifferences, $this->powLimit - 1.0));
+ wfDebug("Limiting max number of differences to $this->maxDifferences");
+ }
- //remove common tokens at the start
- while($m>0 && $n>0 && $a[0]===$b[0]){
- $a_inLcs_sym[$offsetx] = $b_inLcs_sym[$offsety] = TRUE;
- ++$offsetx; ++$offsety;
- --$m; --$n;
- --$bestKnownLcs;
- array_shift($a);
- array_shift($b);
- }
+ /*
+ * The common prefixes and suffixes are always part of some LCS, include
+ * them now to reduce our search space
+ */
+ $max = min($this->m, $this->n);
+ for ($forwardBound = 0; $forwardBound < $max
+ && $this->from[$forwardBound]===$this->to[$forwardBound]; ++$forwardBound) {
+ $this->removed[$forwardBound] = $this->added[$forwardBound] = false;
+ }
- //remove common tokens at the end
- while($m>0 && $n>0 && $a[$m-1]===$b[$n-1]){
- $a_inLcs_sym[$offsetx+$m-1] = $b_inLcs_sym[$offsety+$n-1] = TRUE;
- --$m; --$n;
- --$bestKnownLcs;
- array_pop($a);
- array_pop($b);
- }
+ $backBoundL1 = $this->m - 1;
+ $backBoundL2 = $this->n - 1;
- if($bestKnownLcs==0 || $m==0 || $n==0){
- return;
+ while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound
+ && $this->from[$backBoundL1] === $this->to[$backBoundL2]) {
+ $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false;
+ }
+
+ $temp = array_fill(0,$this->m + $this->n + 1, 0);
+ $V = array($temp,$temp);
+ $snake = array(0,0,0);
+
+ $this->length = $forwardBound + $this->m - $backBoundL1 - 1
+ + $this->lcs_rec($forwardBound, $backBoundL1, $forwardBound, $backBoundL2,
+ $V, $snake);
}
- if($m>$n){
- return wikidiff3_diffPart($b, $a, $b_inLcs, $a_inLcs, $n, $m, $offsety, $offsetx, $bestKnownLcs, $boundRunningTime, $max_NP_before_bound);
- }
- $delta = $n-$m;
- $delta_plus_1 = $delta+1;
- $delta_min_1 = $delta-1;
+ private function lcs_rec($bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake) {
+ // check that both sequences are non-empty
+ if ($bottoml1 > $topl1 || $bottoml2 > $topl2) {
+ return 0;
+ }
- $fpForw = $snakeBeginForw = $snakeEndForw = array_fill( 0, $delta_plus_1, -1);
- $lcsSizeForw = $snakekForw = $lcsSizeBack = $snakekBack = array_fill( 0, $delta_plus_1, 0);
- $fpBack = $snakeBeginBack = $snakeEndBack = array_fill( 0, $delta_plus_1, $n);
- $overlap = $delta>1 ? array_fill( 1, $delta_min_1, FALSE) : array();
- $bestLcsLength = $bestLcsLengthTop = $bestLcsLengthBottom = -1;
+ $d = $this->find_middle_snake($bottoml1, $topl1, $bottoml2, $topl2, $V, $snake);
- if($boundRunningTime){
- $maxp_before_bound = max((-$delta+sqrt($delta*$delta+($max_NP_before_bound<<2)))>>1,2);
- if($maxp_before_bound>=$m){
- $boundRunningTime = false;
- unset($maxp_before_bound);
+ // need to store these so we don't lose them when they're overwritten by
+ // the recursion
+ $len = $snake[2];
+ $startx = $snake[0];
+ $starty = $snake[1];
+
+ // the middle snake is part of the LCS, store it
+ for ($i = 0; $i < $len; ++$i) {
+ $this->removed[$startx + $i] = $this->added[$starty + $i] = false;
}
+
+ if ($d > 1) {
+ return $len
+ + $this->lcs_rec($bottoml1, $startx - 1, $bottoml2, $starty - 1, $V, $snake)
+ + $this->lcs_rec($startx + $len, $topl1, $starty + $len, $topl2, $V, $snake);
+ } else if ($d == 1) {
+ /*
+ * In this case the sequences differ by exactly 1 line. We have
+ * already saved all the lines after the difference in the for loop
+ * above, now we need to save all the lines before the difference.
+ */
+ $max = min($startx - $bottoml1, $starty - $bottoml2);
+ for ($i = 0; $i < $max; ++$i) {
+ $this->removed[$bottoml1 + $i] = $this->added[$bottoml2 + $i] = false;
+ }
+ return $max + $len;
+ }
+ return $len;
}
- $p=-1;
- $m_min_1 = $m-1;
- $maxp=$m_min_1-$bestLcsLength;
+ private function find_middle_snake($bottoml1, $topl1, $bottoml2,$topl2, &$V, &$snake) {
+ $from = &$this->from;
+ $to = &$this->to;
+ $V0 = &$V[0];
+ $V1 = &$V[1];
+ $snake0 = &$snake[0];
+ $snake1 = &$snake[1];
+ $snake2 = &$snake[2];
+ $bottoml1_min_1 = $bottoml1-1;
+ $bottoml2_min_1 = $bottoml2-1;
+ $N = $topl1 - $bottoml1_min_1;
+ $M = $topl2 - $bottoml2_min_1;
+ $delta = $N - $M;
+ $maxabsx = $N+$bottoml1;
+ $maxabsy = $M+$bottoml2;
+ $limit = min($this->maxDifferences, ceil(($N + $M ) / 2)); // ceil((N+M)/2)
- while($p<$maxp){
+ //value_to_add_forward: a 0 or 1 that we add to the start
+ // offset
+ // to make it odd/even
+ if (($M & 1) == 1) {
+ $value_to_add_forward = 1;
+ } else {
+ $value_to_add_forward = 0;
+ }
- if($boundRunningTime && $p>$maxp_before_bound){
- // bound the running time by stopping early
- if($bestLcsLength>=0){
- // accept the best LCS thusfar
- break;
- }else{
- $bestLcsProgressForw = $bestkForw = 0;
- foreach($lcsSizeForw as $k => $localLcsProgress){
- if($localLcsProgress>$bestLcsProgressForw || ($localLcsProgress==$bestLcsProgressForw && $bestkForw > $k)){
- $bestLcsProgressForw = $localLcsProgress;
- $bestkForw = $k;
- }
- }
- $bestLcsProgressBack = $bestkBack = 0;
- foreach($lcsSizeBack as $k => $localLcsProgress){
- if($localLcsProgress>$bestLcsProgressBack || ($localLcsProgress==$bestLcsProgressBack && $bestkBack < $k)){
- $bestLcsProgressBack = $localLcsProgress;
- $bestkBack = $k;
- }
- }
- if($fpForw[$bestkForw]-$bestkForw>$fpBack[$bestkBack]-$bestkBack){
- // This is hard, maybe try some more? Can this even happen? A solution will be found soon anyway.
- }else{
- // lets do this
- $topSnakeStart = $snakeBeginForw[$bestkForw];
- $topSnakeEnd = $snakeEndForw[$bestkForw];
- $topSnakek = $snakekForw[$bestkForw];
- $bestLcsLengthTop = $lcsSizeForw[$bestkBack] + $topSnakeStart - $topSnakeEnd;
+ if (($N & 1) == 1) {
+ $value_to_add_backward = 1;
+ } else {
+ $value_to_add_backward = 0;
+ }
- $bottomSnakeStart = $snakeEndBack[$bestkBack]+1;
- $bottomSnakeEnd = $snakeBeginBack[$bestkBack]+1;
- $bottomSnakek = $snakekBack[$bestkBack];
- $bestLcsLengthBottom = $lcsSizeBack[$bestkBack] + $bottomSnakeStart - $bottomSnakeEnd;
+ $start_forward = -$M;
+ $end_forward = $N;
+ $start_backward = -$N;
+ $end_backward = $M;
- if($bottomSnakeStart-$topSnakeEnd>$n*0.6 && ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek)>$m*0.6){
- // cut the sequence from both sides (there isn't enough progress, 60% of the sequence is not covered)
- // also diff the middle now
- if($bottomSnakeEnd>($fpForw[$bestkForw]>>1)){
- // do the middle diff between the snakes
- wfDebug(" Limiting diff run time from both sides, middle between snakes\n");
- $m_t = ($bottomSnakeStart-$bottomSnakek)-($topSnakeEnd-$topSnakek);
- $n_t = $bottomSnakeStart-$topSnakeEnd;
- $a_t = array_slice($a, $topSnakeEnd-$topSnakek, $m_t);
- $b_t = array_slice($b, $topSnakeEnd, $n_t);
- $offsetx_t = $offsetx+($topSnakeEnd-$topSnakek);
- $offsety_t = $offsety+$topSnakeEnd;
- }else{
- // the snake is too early in the sequence, do the middle diff between endpoints of progress
- wfDebug(" Limiting diff run time from both sides, middle between endpoints\n");
- $m_t = ($fpBack[$bestkBack]+1-$bestkBack)-($fpForw[$bestkForw]-$bestkForw);
- $n_t = $fpBack[$bestkBack]+1-$fpForw[$bestkForw];
- $a_t = array_slice($a, $fpForw[$bestkForw]-$bestkForw, $m_t);
- $b_t = array_slice($b, $fpForw[$bestkForw], $n_t);
- $offsetx_t = $offsetx+($fpForw[$bestkForw]-$bestkForw);
- $offsety_t = $offsety+$fpForw[$bestkForw];
- }
- wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $n_t, $offsetx_t, $offsety_t, $m_t, $boundRunningTime, $max_NP_before_bound);
- }else if(min($n-$bottomSnakeStart, $m-($bottomSnakeStart-$bottomSnakek))>min($topSnakeEnd, $topSnakeEnd-$topSnakek)){
- // the bottom snake is more interesting
- wfDebug("Limiting diff run time from bottom side\n");
- $topSnakeStart = $bottomSnakeStart;
- $topSnakeEnd = $bottomSnakeEnd;
- $topSnakek = $bottomSnakek;
- $bestLcsLengthTop = $topSnakeEnd-$topSnakek;
- $bottomSnakeStart = $bottomSnakeEnd;
- }else{
- // the top snake is more interesting
- wfDebug(" Limiting diff run time from top side\n");
- $bottomSnakeStart = $topSnakeEnd;
- $bottomSnakeEnd = $topSnakeEnd;
- $bottomSnakek = $topSnakek;
- $bestLcsLengthBottom = $m-($bottomSnakeEnd-$bottomSnakek);
- }
- break;
- }
- }
- }
- ++$p;
- $overlap[-$p] = $overlap[$delta+$p] = FALSE;
+ $limit_min_1 = $limit-1;
+ $limit_plus_1 = $limit+1;
- $min_p_min_1 = -$p-1;
- $delta_plus_1_plus_p = $delta_plus_1+$p;
+ $V0[$limit_plus_1] = 0;
+ $V1[$limit_min_1] = $N;
+ $limit = min($this->maxDifferences, ceil(($N + $M ) / 2)); // ceil((N+M)/2)
+
+ if (($delta & 1) == 1) {
+ for ($d = 0; $d <= $limit; ++$d) {
+ $start_diag = max($value_to_add_forward + $start_forward, -$d);
+ $end_diag = min($end_forward, $d);
+ $value_to_add_forward = 1 - $value_to_add_forward;
- // forward
- $fpForw[$min_p_min_1] = $snakeEndForw[$min_p_min_1] = $snakekForw[$min_p_min_1] = $snakeBeginForw[$min_p_min_1] = $fpForw[$delta_plus_1_plus_p] =
- $snakeBeginForw[$delta_plus_1_plus_p] = $snakeEndForw[$delta_plus_1_plus_p] = $snakekForw[$delta_plus_1_plus_p] = -1;
- $lcsSizeForw[$min_p_min_1] = $lcsSizeForw[$delta_plus_1_plus_p] = 0;
+ // compute forward furthest reaching paths
+ for ($k = $start_diag; $k <= $end_diag; $k += 2) {
+ if ($k == -$d
+ || ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) {
+ $x = $V0[$limit_plus_1 + $k];
+ } else {
+ $x = $V0[$limit_min_1 + $k] + 1;
+ }
- $k=-$p;
- do {
- $k_plus_1 = $k+1;
- $k_min_1 = $k-1;
+ $absx = $snake0 = $x + $bottoml1;
+ $absy = $snake1 = $x - $k + $bottoml2;
+
+ while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) {
+ ++$absx;
+ ++$absy;
+ }
+ $x = $absx-$bottoml1;
+
+ $snake2 = $absx -$snake0;
+ $V0[$limit + $k] = $x;
+ if ($k >= $delta - $d + 1 && $k <= $delta + $d - 1
+ && $x >= $V1[$limit + $k - $delta]) {
+ return 2 * $d - 1;
+ }
- $fpBelow = $fpForw[$k_min_1]+1;
- $fpAbove = $fpForw[$k_plus_1];
- $y = &$fpForw[$k];
- if($fpBelow>$fpAbove){
- $oldy = $y = $fpBelow;
- $lcsSizeForw[$k] = $lcsSizeForw[$k_min_1];
- $snakeBeginForw[$k] = $snakeBeginForw[$k_min_1];
- $snakeEndForw[$k] = $snakeEndForw[$k_min_1];
- $snakekForw[$k] = $snakekForw[$k_min_1];
- }else{
- $oldy = $y = $fpAbove;
- $lcsSizeForw[$k] = $lcsSizeForw[$k_plus_1];
- $snakeBeginForw[$k] = $snakeBeginForw[$k_plus_1];
- $snakeEndForw[$k] = $snakeEndForw[$k_plus_1];
- $snakekForw[$k] = $snakekForw[$k_plus_1];
- }
- $x = $y-$k;
- while($x < $m && $y < $n && $a[$x]===$b[$y]){
- ++$x;
- ++$y;
- }
- if($y-$oldy>0){
- $lcsSizeForw[$k] += $y-$oldy;
- $snakeBeginForw[$k] = $oldy;
- $snakeEndForw[$k]= $y;
- $snakekForw[$k] = $k;
- }
- if($y>=$fpBack[$k] && !$overlap[$k]){
- // there is overlap
- $overlap[$k]=TRUE;
- $lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k];
- if($y>$fpBack[$k]+1){
- $snakeoverlap = $y-$fpBack[$k]-1;
- $lcsLength -= $snakeoverlap;
+ // check to see if we can cut down the diagonal range
+ if ($x >= $N && $end_forward > $k - 1) {
+ $end_forward = $k - 1;
+ } else if ($absy-$bottoml2 >= $M) {
+ $start_forward = $k + 1;
+ $value_to_add_forward = 0;
+ }
}
- if($lcsLength>$bestLcsLength){
- // a better sequence found!
- $bestLcsLength = $lcsLength;
- $topSnakeStart = $snakeBeginForw[$k];
- $topSnakeEnd = $snakeEndForw[$k];
- $topSnakek = $snakekForw[$k];
+ $start_diag = max($value_to_add_backward + $start_backward, -$d);
+ $end_diag = min($end_backward, $d);
+ $value_to_add_backward = 1 - $value_to_add_backward;
- // aligned snakes bite (\ )
- // ( \ \)
- $bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k]));
- $bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart);
- $bottomSnakek = $snakekBack[$k];
+ // compute backward furthest reaching paths
+ for ($k = $start_diag; $k <= $end_diag; $k += 2) {
+ if ($k == $d
+ || ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) {
+ $x = $V1[$limit_min_1 + $k];
+ } else {
+ $x = $V1[$limit_plus_1 + $k] - 1;
+ }
- if($bottomSnakeEnd<$y){
- $bottomSnakeStart = $y;
- $bottomSnakeEnd = $y;
- $bottomSnakek = $k;
+ $y = $x - $k - $delta;
+
+ $snake2 = 0;
+ while ($x > 0 && $y > 0
+ && $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) {
+ --$x;
+ --$y;
+ ++$snake2;
}
+ $V1[$limit + $k] = $x;
- $bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]);
- $bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]);
- if($bestKnownLcs==$lcsLength){
- $fpForw[$k]=$y;
- break 2;
+ // check to see if we can cut down our diagonal range
+ if ($x <= 0) {
+ $start_backward = $k + 1;
+ $value_to_add_backward = 0;
+ } else if ($y <= 0 && $end_backward > $k - 1) {
+ $end_backward = $k - 1;
}
- $maxp=$m_min_1-$bestLcsLength;
}
}
- if($k<$delta_min_1){
- ++$k;
- }else if($k>$delta){
- --$k;
- }else if($k==$delta_min_1){
- $k = $delta+$p;
- }else{
- break;
- }
- } while(TRUE);
+ } else {
+ for ($d = 0; $d <= $limit; ++$d) {
+ $start_diag = max($value_to_add_forward + $start_forward, -$d);
+ $end_diag = min($end_forward, $d);
+ $value_to_add_forward = 1 - $value_to_add_forward;
- // backward
- $fpBack[$min_p_min_1] = $snakeBeginBack[$min_p_min_1] = $snakeEndBack[$min_p_min_1] = $snakekBack[$min_p_min_1] =
- $fpBack[$delta_plus_1_plus_p] = $snakeBeginBack[$delta_plus_1_plus_p] = $snakeEndBack[$delta_plus_1_plus_p] = $snakekBack[$delta_plus_1_plus_p] = $n;
- $lcsSizeBack[$min_p_min_1] = $lcsSizeBack[$delta_plus_1_plus_p] = 0;
+ // compute forward furthest reaching paths
+ for ($k = $start_diag; $k <= $end_diag; $k += 2) {
+ if ($k == -$d
+ || ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) {
+ $x = $V0[$limit_plus_1 + $k];
+ } else {
+ $x = $V0[$limit_min_1 + $k] + 1;
+ }
- $k=$delta+$p;
- do {
- $k_plus_1 = $k+1;
- $k_min_1 = $k-1;
+ $absx = $snake0 = $x + $bottoml1;
+ $absy = $snake1 = $x - $k + $bottoml2;
+
+ while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) {
+ ++$absx;
+ ++$absy;
+ }
+ $x = $absx-$bottoml1;
+ $snake2 = $absx -$snake0;
+ $V0[$limit + $k] = $x;
- $fpBelow = $fpBack[$k_min_1];
- $fpAbove = $fpBack[$k_plus_1]-1;
- $y = &$fpBack[$k];
- if($fpBelow<$fpAbove){
- $oldy = $y = $fpBelow;
- $lcsSizeBack[$k] = $lcsSizeBack[$k_min_1];
- $snakeBeginBack[$k] = $snakeBeginBack[$k_min_1];
- $snakeEndBack[$k] = $snakeEndBack[$k_min_1];
- $snakekBack[$k] = $snakekBack[$k_min_1];
- }else{
- $oldy = $y = $fpAbove;
- $lcsSizeBack[$k] = $lcsSizeBack[$k_plus_1];
- $snakeBeginBack[$k] = $snakeBeginBack[$k_plus_1];
- $snakeEndBack[$k] = $snakeEndBack[$k_plus_1];
- $snakekBack[$k] = $snakekBack[$k_plus_1];
- }
- $x = $y-$k;
- while($x > -1 && $y > -1 && $a[$x]===$b[$y]){
- --$x;
- --$y;
- }
- if($oldy-$y>0){
- $lcsSizeBack[$k] += $oldy-$y;
- $snakeBeginBack[$k] = $oldy;
- $snakeEndBack[$k] = $y;
- $snakekBack[$k] = $k;
- }
- if($fpForw[$k]>=$y && !$overlap[$k]){
- // there is overlap
- $overlap[$k]=TRUE;
- $lcsLength = $lcsSizeForw[$k]+$lcsSizeBack[$k];
- if($fpForw[$k]>$y+1){
- $snakeoverlap = $fpForw[$k]-$y-1;
- $lcsLength -= $snakeoverlap;
+ // check to see if we can cut down the diagonal range
+ if ($x >= $N && $end_forward > $k - 1) {
+ $end_forward = $k - 1;
+ } else if ($absy-$bottoml2 >= $M) {
+ $start_forward = $k + 1;
+ $value_to_add_forward = 0;
+ }
}
- if($lcsLength>$bestLcsLength){
- // a better sequence found!
- $bestLcsLength = $lcsLength;
- $topSnakeStart = $snakeBeginForw[$k];
- $topSnakeEnd = $snakeEndForw[$k];
- $topSnakek = $snakekForw[$k];
+ $start_diag = max($value_to_add_backward + $start_backward, -$d);
+ $end_diag = min($end_backward, $d);
+ $value_to_add_backward = 1 - $value_to_add_backward;
- // aligned snakes bite (\ )
- // ( \ \)
- $bottomSnakeStart = max($snakeEndBack[$k]+1, $topSnakeEnd+max(0,$snakekBack[$k]-$snakekForw[$k]));
- $bottomSnakeEnd = max($snakeBeginBack[$k]+1, $bottomSnakeStart);
- $bottomSnakek = $snakekBack[$k];
+ // compute backward furthest reaching paths
+ for ($k = $start_diag; $k <= $end_diag; $k += 2) {
+ if ($k == $d
+ || ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) {
+ $x = $V1[$limit_min_1 + $k];
+ } else {
+ $x = $V1[$limit_plus_1 + $k] - 1;
+ }
- if($bottomSnakeEnd<$fpForw[$k]){
- $bottomSnakeStart = $fpForw[$k];
- $bottomSnakeEnd = $fpForw[$k];
- $bottomSnakek = $k;
+ $y = $x - $k - $delta;
+
+ $snake2 = 0;
+ while ($x > 0 && $y > 0
+ && $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) {
+ --$x;
+ --$y;
+ ++$snake2;
}
+ $V1[$limit + $k] = $x;
- $bestLcsLengthTop = $lcsSizeForw[$k]-($snakeEndForw[$k]-$snakeBeginForw[$k]);
- $bestLcsLengthBottom = $lcsSizeBack[$k]-($snakeBeginBack[$k]-$snakeEndBack[$k]);
- if($bestKnownLcs==$lcsLength){
- $fpBack[$k] = $y;
- break 2;
+ if ($k >= -$delta - $d && $k <= $d - $delta
+ && $x <= $V0[$limit + $k + $delta]) {
+ $snake0 = $bottoml1 + $x;
+ $snake1 = $bottoml2 + $y;
+ return 2 * $d;
}
- $maxp=$m_min_1-$bestLcsLength;
+
+ // check to see if we can cut down our diagonal range
+ if ($x <= 0) {
+ $start_backward = $k + 1;
+ $value_to_add_backward = 0;
+ } else if ($y <= 0 && $end_backward > $k - 1) {
+ $end_backward = $k - 1;
+ }
}
}
- if($k>1){
- --$k;
- }else if($k<0){
- ++$k;
- }else if($k==1){
- $k = -$p;
- }else{
- break;
- }
- } while(TRUE);
- }
+ }
+ /*
+ * computing the true LCS is too expensive, instead find the diagonal
+ * with the most progress and pretend a midle snake of length 0 occurs
+ * there.
+ */
- unset($fpForw, $fpBack, $lcsSizeForw, $lcsSizeBack);
- unset($snakeBeginForw, $snakeBeginBack, $snakeEndForw, $snakeEndBack, $snakekForw, $snakekBack);
- unset($overlap);
+ $most_progress = self::findMostProgress($M, $N, $limit, $V);
- // Mark snakes as in LCS
- $maxi = $offsetx+$topSnakeEnd-$topSnakek;
- for($i=$offsetx+$topSnakeStart-$topSnakek;$i<$maxi;++$i){
- $a_inLcs_sym[$i] = TRUE;
+ $snake0 = $bottoml1 + $most_progress[0];
+ $snake1 = $bottoml2 + $most_progress[1];
+ $snake2 = 0;
+ wfDebug('Computing the LCS is too expensive. Using a heuristic.');
+ $this->heuristicUsed = true;
+ return 5; /*
+ * HACK: since we didn't really finish the LCS computation
+ * we don't really know the length of the SES. We don't do
+ * anything with the result anyway, unless it's <=1. We know
+ * for a fact SES > 1 so 5 is as good a number as any to
+ * return here
+ */
}
- $maxi = $offsety+$topSnakeEnd;
- for($i=$offsety+$topSnakeStart;$i<$maxi;++$i){
- $b_inLcs_sym[$i] = TRUE;
- }
- $maxi = $offsetx+$bottomSnakeEnd-$bottomSnakek;
- for($i=$offsetx+$bottomSnakeStart-$bottomSnakek;$i<$maxi;++$i){
- $a_inLcs_sym[$i] = TRUE;
- }
- $maxi = $offsety+$bottomSnakeEnd;
- for($i=$offsety+$bottomSnakeStart;$i<$maxi;++$i){
- $b_inLcs_sym[$i] = TRUE;
- }
- $m_t = $topSnakeStart-$topSnakek;
- $a_t = array_slice($a, 0, $m_t);
- $b_t = array_slice($b, 0, $topSnakeStart);
+ private static function findMostProgress($M, $N, $limit, $V) {
+ $delta = $N - $M;
- $m_b = $m-($bottomSnakeEnd-$bottomSnakek);
- $n_b = $n-$bottomSnakeEnd;
- $a_b = array_slice($a, $bottomSnakeEnd-$bottomSnakek, $m_b);
- $b_b = array_slice($b, $bottomSnakeEnd, $n_b);
+ if (($M & 1) == ($limit & 1)) {
+ $forward_start_diag = max(-$M, -$limit);
+ } else {
+ $forward_start_diag = max(1 - $M, -$limit);
+ }
- wikidiff3_diffPart($a_t, $b_t, $a_inLcs, $b_inLcs, $m_t, $topSnakeStart, $offsetx, $offsety, $bestLcsLengthTop, $boundRunningTime, $max_NP_before_bound);
+ $forward_end_diag = min($N, $limit);
- wikidiff3_diffPart($a_b, $b_b, $a_inLcs, $b_inLcs, $m_b, $n_b, $offsetx+($bottomSnakeEnd-$bottomSnakek), $offsety+$bottomSnakeEnd, $bestLcsLengthBottom, $boundRunningTime, $max_NP_before_bound);
-}
+ if (($N & 1) == ($limit & 1)) {
+ $backward_start_diag = max(-$N, -$limit);
+ } else {
+ $backward_start_diag = max(1 - $N, -$limit);
+ }
-class InLcs {
+ $backward_end_diag = -min($M, $limit);
- public $inLcs;
+ $temp = array(0,0,0);
- function __construct($length){
- $this->inLcs = $length>0 ? array_fill(0,$length,FALSE): array();
- }
- /**
- * Get the length of the Longest Common Subsequence (computed)
- */
- public function getLcsLength(){
- return array_sum($this->inLcs);
+ $max_progress = array_fill(0,ceil(max($forward_end_diag
+ - $forward_start_diag, $backward_end_diag - $backward_start_diag) / 2), $temp);
+ $num_progress = 0; // the 1st entry is current, it is initialized
+ // with 0s
+
+ // first search the forward diagonals
+ for ($k = $forward_start_diag; $k <= $forward_end_diag; $k += 2) {
+ $x = $V[0][$limit + $k];
+ $y = $x - $k;
+ if ($x > $N || $y > $M) {
+ continue;
+ }
+
+ $progress = $x + $y;
+ if ($progress > $max_progress[0][2]) {
+ $num_progress = 0;
+ $max_progress[0][0] = $x;
+ $max_progress[0][1] = $y;
+ $max_progress[0][2] = $progress;
+ } else if ($progress == $max_progress[0][2]) {
+ ++$num_progress;
+ $max_progress[$num_progress][0] = $x;
+ $max_progress[$num_progress][1] = $y;
+ $max_progress[$num_progress][2] = $progress;
+ }
+ }
+
+ $max_progress_forward = true; // initially the maximum
+ // progress is in the forward
+ // direction
+
+ // now search the backward diagonals
+ for ($k = $backward_start_diag; $k <= $backward_end_diag; $k += 2) {
+ $x = $V[1][$limit + $k];
+ $y = $x - $k - $delta;
+ if ($x < 0 || $y < 0) {
+ continue;
+ }
+
+ $progress = $N - $x + $M - $y;
+ if ($progress > $max_progress[0][2]) {
+ $num_progress = 0;
+ $max_progress_forward = false;
+ $max_progress[0][0] = $x;
+ $max_progress[0][1] = $y;
+ $max_progress[0][2] = $progress;
+ } else if ($progress == $max_progress[0][2] && !$max_progress_forward) {
+ ++$num_progress;
+ $max_progress[$num_progress][0] = $x;
+ $max_progress[$num_progress][1] = $y;
+ $max_progress[$num_progress][2] = $progress;
+ }
+ }
+
+ // return the middle diagonal with maximal progress.
+ return $max_progress[floor($num_progress / 2)];
}
}
+
Index: trunk/phase3/includes/DifferenceEngine.php
===================================================================
--- trunk/phase3/includes/DifferenceEngine.php (revision 39405)
+++ trunk/phase3/includes/DifferenceEngine.php (revision 39406)
@@ -74,9 +74,10 @@
}
function showDiffPage( $diffOnly = false ) {
- global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol;
+ global $wgUser, $wgOut, $wgUseExternalEditor, $wgUseRCPatrol, $wgEnableHtmlDiff;
wfProfileIn( __METHOD__ );
+
# If external diffs are enabled both globally and for the user,
# we'll use the application/x-external-editor interface to call
# an external diff tool like kompare, kdiff3, etc.
@@ -265,11 +266,16 @@
'<div id="mw-diff-ntitle3">' . $newminor . $sk->revComment( $this->mNewRev, !$diffOnly, true ) . $rdel . "</div>" .
'<div id="mw-diff-ntitle4">' . $nextlink . $patrol . '</div>';
- $this->showDiff( $oldHeader, $newHeader );
+ if($wgEnableHtmlDiff){
+ $this->renderHtmlDiff();
+ }else{
- if ( !$diffOnly )
- $this->renderNewRevision();
+ $this->showDiff( $oldHeader, $newHeader );
+ if ( !$diffOnly ){
+ $this->renderNewRevision();
+ }
+ }
wfProfileOut( __METHOD__ );
}
@@ -319,6 +325,65 @@
wfProfileOut( __METHOD__ );
}
+
+ function renderHtmlDiff() {
+ global $wgOut, $IP;
+ wfProfileIn( __METHOD__ );
+
+ $this->showDiffStyle();
+
+ require_once( "$IP/includes/HTMLDiff.php" );
+
+ #add deleted rev tag if needed
+ if( !$this->mNewRev->userCan(Revision::DELETED_TEXT) ) {
+ $wgOut->addWikiMsg( 'rev-deleted-text-permission' );
+ } else if( $this->mNewRev->isDeleted(Revision::DELETED_TEXT) ) {
+ $wgOut->addWikiMsg( 'rev-deleted-text-view' );
+ }
+
+ if( !$this->mNewRev->isCurrent() ) {
+ $oldEditSectionSetting = $wgOut->parserOptions()->setEditSection( false );
+ }
+
+ $this->loadText();
+
+ if( is_object( $this->mOldRev ) ) {
+ $wgOut->setRevisionId( $this->mOldRev->getId() );
+ }
+
+ global $wgTitle, $wgParser, $wgTitle;
+ $popts = $wgOut->parserOptions();
+ $oldTidy = $popts->setTidy( TRUE );
+
+ $parserOutput = $wgParser->parse($this->mOldtext, $wgTitle, $popts, TRUE, TRUE, $wgOut->mRevisionId );
+ $popts->setTidy( $oldTidy );
+
+ //only for new?
+ //$wgOut->addParserOutputNoText( $parserOutput );
+ $oldHtml = $parserOutput->getText();
+ wfRunHooks( 'OutputPageBeforeHTML',array( &$wgOut, &$oldHtml ) );
+
+ if( is_object( $this->mNewRev ) ) {
+ $wgOut->setRevisionId( $this->mNewRev->getId() );
+ }
+
+ $popts = $wgOut->parserOptions();
+ $oldTidy = $popts->setTidy( TRUE );
+
+ $parserOutput = $wgParser->parse($this->mNewtext, $wgTitle, $popts, TRUE, TRUE, $wgOut->mRevisionId );
+ $popts->setTidy( $oldTidy );
+
+ //only for new?
+ $wgOut->addParserOutputNoText( $parserOutput );
+ $newHtml = $parserOutput->getText();
+ wfRunHooks( 'OutputPageBeforeHTML',array( &$wgOut, &$newHtml ) );
+
+ $differ = new HTMLDiffer(new DelegatingContentHandler($wgOut));
+ $differ->htmlDiff($oldHtml, $newHtml);
+
+ wfProfileOut( __METHOD__ );
+ }
+
/**
* Show the first revision of an article. Uses normal diff headers in
* contrast to normal "old revision" display style.
@@ -893,6 +958,30 @@
}
/**
+ * Alternative representation of a set of changes, by the index
+ * ranges that are changed.
+ */
+class RangeDifference {
+
+ public $leftstart;
+ public $leftend;
+ public $leftlength;
+
+ public $rightstart;
+ public $rightend;
+ public $rightlength;
+
+ function __construct($leftstart, $leftend, $rightstart, $rightend){
+ $this->leftstart = $leftstart;
+ $this->leftend = $leftend;
+ $this->leftlength = $leftend-$leftstart;
+ $this->rightstart = $rightstart;
+ $this->rightend = $rightend;
+ $this->rightlength = $rightend-$rightstart;
+ }
+}
+
+/**
* Class used internally by Diff to actually compute the diffs.
*
* The algorithm used here is mostly lifted from the perl module
@@ -920,22 +1009,108 @@
const MAX_XREF_LENGTH = 10000;
- function diff ($from_lines, $to_lines) {
+ function diff ($from_lines, $to_lines){
wfProfileIn( __METHOD__ );
- global $wgExternalDiffEngine;
+ // Diff and store locally
+ $this->diff_local($from_lines, $to_lines);
+ // Merge edits when possible
+ $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
+ $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
+
+ // Compute the edit operations.
$n_from = sizeof($from_lines);
$n_to = sizeof($to_lines);
+ $edits = array();
+ $xi = $yi = 0;
+ while ($xi < $n_from || $yi < $n_to) {
+ USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
+ USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
+
+ // Skip matching "snake".
+ $copy = array();
+ while ( $xi < $n_from && $yi < $n_to
+ && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
+ $copy[] = $from_lines[$xi++];
+ ++$yi;
+ }
+ if ($copy)
+ $edits[] = new _DiffOp_Copy($copy);
+
+ // Find deletes & adds.
+ $delete = array();
+ while ($xi < $n_from && $this->xchanged[$xi])
+ $delete[] = $from_lines[$xi++];
+
+ $add = array();
+ while ($yi < $n_to && $this->ychanged[$yi])
+ $add[] = $to_lines[$yi++];
+
+ if ($delete && $add)
+ $edits[] = new _DiffOp_Change($delete, $add);
+ elseif ($delete)
+ $edits[] = new _DiffOp_Delete($delete);
+ elseif ($add)
+ $edits[] = new _DiffOp_Add($add);
+ }
+ wfProfileOut( __METHOD__ );
+ return $edits;
+ }
+
+ function diff_range ($from_lines, $to_lines){
+ wfProfileIn( __METHOD__ );
+
+ // Diff and store locally
+ $this->diff_local($from_lines, $to_lines);
+
+ // Compute the ranges operations.
+ $n_from = sizeof($from_lines);
+ $n_to = sizeof($to_lines);
+
+ $ranges = array();
+ $xi = $yi = 0;
+ while ($xi < $n_from || $yi < $n_to) {
+ // Matching "snake".
+ while ( $xi < $n_from && $yi < $n_to
+ && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
+ ++$xi;
+ ++$yi;
+ }
+ // Find deletes & adds.
+ $xstart = $xi;
+ while ($xi < $n_from && $this->xchanged[$xi]){
+ ++$xi;
+ }
+
+ $ystart = $yi;
+ while ($yi < $n_to && $this->ychanged[$yi]){
+ ++$yi;
+ }
+
+ if ($xi>$xstart || $yi>$ystart){
+ $ranges[] = new RangeDifference($xstart,$xi,$ystart,$yi);
+ }
+ }
+ wfProfileOut( __METHOD__ );
+ return $ranges;
+ }
+
+ function diff_local ($from_lines, $to_lines) {
+ global $wgExternalDiffEngine;
+ wfProfileIn( __METHOD__);
+
if($wgExternalDiffEngine == 'wikidiff3'){
// wikidiff3
global $IP;
require_once( "$IP/includes/Diff.php" );
- list($this->xchanged, $this->ychanged) = wikidiff3_diff($from_lines, $to_lines, TRUE, 100000);
+ $wikidiff3 = new WikiDiff3();
+ list($this->xchanged, $this->ychanged) = $wikidiff3->diff($from_lines, $to_lines);
}else{
// old diff
- wfProfileIn( __METHOD__ ." - basicdiff");
+ $n_from = sizeof($from_lines);
+ $n_to = sizeof($to_lines);
$this->xchanged = $this->ychanged = array();
$this->xv = $this->yv = array();
$this->xind = $this->yind = array();
@@ -980,48 +1155,8 @@
// Find the LCS.
$this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
- wfProfileOut( __METHOD__ ." - basicdiff");
}
-
- // Merge edits when possible
- $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
- $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
-
- // Compute the edit operations.
- $edits = array();
- $xi = $yi = 0;
- while ($xi < $n_from || $yi < $n_to) {
- USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
- USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
-
- // Skip matching "snake".
- $copy = array();
- while ( $xi < $n_from && $yi < $n_to
- && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
- $copy[] = $from_lines[$xi++];
- ++$yi;
- }
- if ($copy)
- $edits[] = new _DiffOp_Copy($copy);
-
- // Find deletes & adds.
- $delete = array();
- while ($xi < $n_from && $this->xchanged[$xi])
- $delete[] = $from_lines[$xi++];
-
- $add = array();
- while ($yi < $n_to && $this->ychanged[$yi])
- $add[] = $to_lines[$yi++];
-
- if ($delete && $add)
- $edits[] = new _DiffOp_Change($delete, $add);
- elseif ($delete)
- $edits[] = new _DiffOp_Delete($delete);
- elseif ($add)
- $edits[] = new _DiffOp_Add($add);
- }
wfProfileOut( __METHOD__ );
- return $edits;
}
/**
@@ -1077,7 +1212,6 @@
$numer = $xlim - $xoff + $nchunks - 1;
$x = $xoff;
for ($chunk = 0; $chunk < $nchunks; $chunk++) {
- wfProfileIn( __METHOD__ . "-chunk" );
if ($chunk > 0)
for ($i = 0; $i <= $this->lcs; $i++)
$ymids[$i][$chunk-1] = $this->seq[$i];
@@ -1130,7 +1264,6 @@
if ($end == 0 || $ypos > $this->seq[$end]) {
$this->seq[++$this->lcs] = $ypos;
$this->in_seq[$ypos] = 1;
- wfProfileOut( __METHOD__ );
return $this->lcs;
}