3 namespace Caxy\HtmlDiff\Table;
5 use Caxy\HtmlDiff\AbstractDiff;
6 use Caxy\HtmlDiff\HtmlDiff;
7 use Caxy\HtmlDiff\HtmlDiffConfig;
8 use Caxy\HtmlDiff\Operation;
13 class TableDiff extends AbstractDiff
18 protected $oldTable = null;
23 protected $newTable = null;
26 * @var null|\DOMElement
28 protected $diffTable = null;
31 * @var null|\DOMDocument
33 protected $diffDom = null;
38 protected $newRowOffsets = 0;
43 protected $oldRowOffsets = 0;
48 protected $cellValues = array();
51 * @param string $oldText
52 * @param string $newText
53 * @param HtmlDiffConfig|null $config
57 public static function create($oldText, $newText, HtmlDiffConfig $config = null)
59 $diff = new self($oldText, $newText);
61 if (null !== $config) {
62 $diff->setConfig($config);
69 * TableDiff constructor.
71 * @param string $oldText
72 * @param string $newText
73 * @param string $encoding
74 * @param array|null $specialCaseTags
75 * @param bool|null $groupDiffs
77 public function __construct(
81 $specialCaseTags = null,
84 parent::__construct($oldText, $newText, $encoding, $specialCaseTags, $groupDiffs);
90 public function build()
94 if ($this->hasDiffCache() && $this->getDiffCache()->contains($this->oldText, $this->newText)) {
95 $this->content = $this->getDiffCache()->fetch($this->oldText, $this->newText);
97 return $this->content;
100 $this->buildTableDoms();
102 $this->diffDom = new \DOMDocument();
104 $this->indexCellValues($this->newTable);
106 $this->diffTableContent();
108 if ($this->hasDiffCache()) {
109 $this->getDiffCache()->save($this->oldText, $this->newText, $this->content);
112 return $this->content;
115 protected function diffTableContent()
117 $this->diffDom = new \DOMDocument();
118 $this->diffTable = $this->newTable->cloneNode($this->diffDom);
119 $this->diffDom->appendChild($this->diffTable);
121 $oldRows = $this->oldTable->getRows();
122 $newRows = $this->newTable->getRows();
124 $oldMatchData = array();
125 $newMatchData = array();
127 /* @var $oldRow TableRow */
128 foreach ($oldRows as $oldIndex => $oldRow) {
129 $oldMatchData[$oldIndex] = array();
131 // Get match percentages
132 /* @var $newRow TableRow */
133 foreach ($newRows as $newIndex => $newRow) {
134 if (!array_key_exists($newIndex, $newMatchData)) {
135 $newMatchData[$newIndex] = array();
139 $percentage = $this->getMatchPercentage($oldRow, $newRow, $oldIndex, $newIndex);
141 $oldMatchData[$oldIndex][$newIndex] = $percentage;
142 $newMatchData[$newIndex][$oldIndex] = $percentage;
146 $matches = $this->getRowMatches($oldMatchData, $newMatchData);
147 $this->diffTableRowsWithMatches($oldRows, $newRows, $matches);
149 $this->content = $this->htmlFromNode($this->diffTable);
153 * @param TableRow[] $oldRows
154 * @param TableRow[] $newRows
155 * @param RowMatch[] $matches
157 protected function diffTableRowsWithMatches($oldRows, $newRows, $matches)
159 $operations = array();
164 $oldRowCount = count($oldRows);
165 $newRowCount = count($newRows);
167 $matches[] = new RowMatch($newRowCount, $oldRowCount, $newRowCount, $oldRowCount);
170 foreach ($matches as $match) {
171 $matchAtIndexInOld = $indexInOld === $match->getStartInOld();
172 $matchAtIndexInNew = $indexInNew === $match->getStartInNew();
176 if (!$matchAtIndexInOld && !$matchAtIndexInNew) {
178 } elseif ($matchAtIndexInOld && !$matchAtIndexInNew) {
180 } elseif (!$matchAtIndexInOld && $matchAtIndexInNew) {
184 if ($action !== 'equal') {
185 $operations[] = new Operation(
188 $match->getStartInOld(),
190 $match->getStartInNew()
194 $operations[] = new Operation(
196 $match->getStartInOld(),
197 $match->getEndInOld(),
198 $match->getStartInNew(),
199 $match->getEndInNew()
202 $indexInOld = $match->getEndInOld();
203 $indexInNew = $match->getEndInNew();
206 $appliedRowSpans = array();
208 // process operations
209 foreach ($operations as $operation) {
210 switch ($operation->action) {
212 $this->processEqualOperation($operation, $oldRows, $newRows, $appliedRowSpans);
216 $this->processDeleteOperation($operation, $oldRows, $appliedRowSpans);
220 $this->processInsertOperation($operation, $newRows, $appliedRowSpans);
224 $this->processReplaceOperation($operation, $oldRows, $newRows, $appliedRowSpans);
231 * @param Operation $operation
232 * @param array $newRows
233 * @param array $appliedRowSpans
234 * @param bool $forceExpansion
236 protected function processInsertOperation(
237 Operation $operation,
240 $forceExpansion = false
242 $targetRows = array_slice($newRows, $operation->startInNew, $operation->endInNew - $operation->startInNew);
243 foreach ($targetRows as $row) {
244 $this->diffAndAppendRows(null, $row, $appliedRowSpans, $forceExpansion);
249 * @param Operation $operation
250 * @param array $oldRows
251 * @param array $appliedRowSpans
252 * @param bool $forceExpansion
254 protected function processDeleteOperation(
255 Operation $operation,
258 $forceExpansion = false
260 $targetRows = array_slice($oldRows, $operation->startInOld, $operation->endInOld - $operation->startInOld);
261 foreach ($targetRows as $row) {
262 $this->diffAndAppendRows($row, null, $appliedRowSpans, $forceExpansion);
267 * @param Operation $operation
268 * @param array $oldRows
269 * @param array $newRows
270 * @param array $appliedRowSpans
272 protected function processEqualOperation(Operation $operation, $oldRows, $newRows, &$appliedRowSpans)
274 $targetOldRows = array_values(
275 array_slice($oldRows, $operation->startInOld, $operation->endInOld - $operation->startInOld)
277 $targetNewRows = array_values(
278 array_slice($newRows, $operation->startInNew, $operation->endInNew - $operation->startInNew)
281 foreach ($targetNewRows as $index => $newRow) {
282 if (!isset($targetOldRows[$index])) {
286 $this->diffAndAppendRows($targetOldRows[$index], $newRow, $appliedRowSpans);
291 * @param Operation $operation
292 * @param array $oldRows
293 * @param array $newRows
294 * @param array $appliedRowSpans
296 protected function processReplaceOperation(Operation $operation, $oldRows, $newRows, &$appliedRowSpans)
298 $this->processDeleteOperation($operation, $oldRows, $appliedRowSpans, true);
299 $this->processInsertOperation($operation, $newRows, $appliedRowSpans, true);
303 * @param array $oldMatchData
304 * @param array $newMatchData
308 protected function getRowMatches($oldMatchData, $newMatchData)
314 $endInOld = count($oldMatchData);
315 $endInNew = count($newMatchData);
317 $this->findRowMatches($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew, $matches);
323 * @param array $newMatchData
324 * @param int $startInOld
325 * @param int $endInOld
326 * @param int $startInNew
327 * @param int $endInNew
328 * @param array $matches
330 protected function findRowMatches($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew, &$matches)
332 $match = $this->findRowMatch($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew);
333 if ($match !== null) {
334 if ($startInOld < $match->getStartInOld() &&
335 $startInNew < $match->getStartInNew()
337 $this->findRowMatches(
340 $match->getStartInOld(),
342 $match->getStartInNew(),
349 if ($match->getEndInOld() < $endInOld &&
350 $match->getEndInNew() < $endInNew
352 $this->findRowMatches(
354 $match->getEndInOld(),
356 $match->getEndInNew(),
365 * @param array $newMatchData
366 * @param int $startInOld
367 * @param int $endInOld
368 * @param int $startInNew
369 * @param int $endInNew
371 * @return RowMatch|null
373 protected function findRowMatch($newMatchData, $startInOld, $endInOld, $startInNew, $endInNew)
378 foreach ($newMatchData as $newIndex => $oldMatches) {
379 if ($newIndex < $startInNew) {
383 if ($newIndex >= $endInNew) {
386 foreach ($oldMatches as $oldIndex => $percentage) {
387 if ($oldIndex < $startInOld) {
391 if ($oldIndex >= $endInOld) {
395 if ($percentage > $bestPercentage) {
396 $bestPercentage = $percentage;
398 'oldIndex' => $oldIndex,
399 'newIndex' => $newIndex,
400 'percentage' => $percentage,
406 if ($bestMatch !== null) {
408 $bestMatch['newIndex'],
409 $bestMatch['oldIndex'],
410 $bestMatch['newIndex'] + 1,
411 $bestMatch['oldIndex'] + 1,
412 $bestMatch['percentage']
420 * @param TableRow|null $oldRow
421 * @param TableRow|null $newRow
422 * @param array $appliedRowSpans
423 * @param bool $forceExpansion
427 protected function diffRows($oldRow, $newRow, array &$appliedRowSpans, $forceExpansion = false)
429 // create tr dom element
430 $rowToClone = $newRow ?: $oldRow;
431 /* @var $diffRow \DOMElement */
432 $diffRow = $this->diffDom->importNode($rowToClone->getDomNode()->cloneNode(false), false);
434 $oldCells = $oldRow ? $oldRow->getCells() : array();
435 $newCells = $newRow ? $newRow->getCells() : array();
437 $position = new DiffRowPosition();
441 /* @var $expandCells \DOMElement[] */
442 $expandCells = array();
443 /* @var $cellsWithMultipleRows \DOMElement[] */
444 $cellsWithMultipleRows = array();
446 $newCellCount = count($newCells);
447 while ($position->getIndexInNew() < $newCellCount) {
448 if (!$position->areColumnsEqual()) {
449 $type = $position->getLesserColumnType();
450 if ($type === 'new') {
452 $targetRow = $extraRow;
455 $targetRow = $diffRow;
457 if ($row && $targetRow && (!$type === 'old' || isset($oldCells[$position->getIndexInOld()]))) {
458 $this->syncVirtualColumns($row, $position, $cellsWithMultipleRows, $targetRow, $type, true);
464 /* @var $newCell TableCell */
465 $newCell = $newCells[$position->getIndexInNew()];
466 /* @var $oldCell TableCell */
467 $oldCell = isset($oldCells[$position->getIndexInOld()]) ? $oldCells[$position->getIndexInOld()] : null;
469 if ($oldCell && $newCell->getColspan() != $oldCell->getColspan()) {
470 if (null === $extraRow) {
471 /* @var $extraRow \DOMElement */
472 $extraRow = $this->diffDom->importNode($rowToClone->getDomNode()->cloneNode(false), false);
475 if ($oldCell->getColspan() > $newCell->getColspan()) {
476 $this->diffCellsAndIncrementCounters(
479 $cellsWithMultipleRows,
484 $this->syncVirtualColumns($newRow, $position, $cellsWithMultipleRows, $extraRow, 'new', true);
486 $this->diffCellsAndIncrementCounters(
489 $cellsWithMultipleRows,
494 $this->syncVirtualColumns($oldRow, $position, $cellsWithMultipleRows, $diffRow, 'old', true);
497 $diffCell = $this->diffCellsAndIncrementCounters(
500 $cellsWithMultipleRows,
504 $expandCells[] = $diffCell;
508 $oldCellCount = count($oldCells);
509 while ($position->getIndexInOld() < $oldCellCount) {
510 $diffCell = $this->diffCellsAndIncrementCounters(
511 $oldCells[$position->getIndexInOld()],
513 $cellsWithMultipleRows,
517 $expandCells[] = $diffCell;
521 foreach ($expandCells as $expandCell) {
522 $rowspan = $expandCell->getAttribute('rowspan') ?: 1;
523 $expandCell->setAttribute('rowspan', 1 + $rowspan);
527 if ($extraRow || $forceExpansion) {
528 foreach ($appliedRowSpans as $rowSpanCells) {
529 /* @var $rowSpanCells \DOMElement[] */
530 foreach ($rowSpanCells as $extendCell) {
531 $rowspan = $extendCell->getAttribute('rowspan') ?: 1;
532 $extendCell->setAttribute('rowspan', 1 + $rowspan);
537 if (!$forceExpansion) {
538 array_shift($appliedRowSpans);
539 $appliedRowSpans = array_values($appliedRowSpans);
541 $appliedRowSpans = array_merge($appliedRowSpans, array_values($cellsWithMultipleRows));
543 return array($diffRow, $extraRow);
547 * @param TableCell|null $oldCell
548 * @param TableCell|null $newCell
550 * @return \DOMElement
552 protected function getNewCellNode(TableCell $oldCell = null, TableCell $newCell = null)
554 // If only one cell exists, use it
555 if (!$oldCell || !$newCell) {
557 ? $newCell->getDomNode()->cloneNode(false)
558 : $oldCell->getDomNode()->cloneNode(false);
560 $oldNode = $oldCell->getDomNode();
561 $newNode = $newCell->getDomNode();
563 /* @var $clone \DOMElement */
564 $clone = $newNode->cloneNode(false);
566 $oldRowspan = $oldNode->getAttribute('rowspan') ?: 1;
567 $oldColspan = $oldNode->getAttribute('colspan') ?: 1;
568 $newRowspan = $newNode->getAttribute('rowspan') ?: 1;
569 $newColspan = $newNode->getAttribute('colspan') ?: 1;
571 $clone->setAttribute('rowspan', max($oldRowspan, $newRowspan));
572 $clone->setAttribute('colspan', max($oldColspan, $newColspan));
575 return $this->diffDom->importNode($clone);
579 * @param TableCell|null $oldCell
580 * @param TableCell|null $newCell
581 * @param bool $usingExtraRow
583 * @return \DOMElement
585 protected function diffCells($oldCell, $newCell, $usingExtraRow = false)
587 $diffCell = $this->getNewCellNode($oldCell, $newCell);
589 $oldContent = $oldCell ? $this->getInnerHtml($oldCell->getDomNode()) : '';
590 $newContent = $newCell ? $this->getInnerHtml($newCell->getDomNode()) : '';
592 $htmlDiff = HtmlDiff::create(
593 mb_convert_encoding($oldContent, 'UTF-8', 'HTML-ENTITIES'),
594 mb_convert_encoding($newContent, 'UTF-8', 'HTML-ENTITIES'),
597 $diff = $htmlDiff->build();
599 $this->setInnerHtml($diffCell, $diff);
601 if (null === $newCell) {
602 $diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' del'));
605 if (null === $oldCell) {
606 $diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' ins'));
609 if ($usingExtraRow) {
610 $diffCell->setAttribute('class', trim($diffCell->getAttribute('class').' extra-row'));
616 protected function buildTableDoms()
618 $this->oldTable = $this->parseTableStructure($this->oldText);
619 $this->newTable = $this->parseTableStructure($this->newText);
623 * @param string $text
625 * @return \DOMDocument
627 protected function createDocumentWithHtml($text)
629 $dom = new \DOMDocument();
630 $dom->loadHTML(mb_convert_encoding(
631 $this->purifier->purify(mb_convert_encoding($text, $this->config->getEncoding(), mb_detect_encoding($text))),
633 $this->config->getEncoding()
640 * @param string $text
644 protected function parseTableStructure($text)
646 $dom = $this->createDocumentWithHtml($text);
648 $tableNode = $dom->getElementsByTagName('table')->item(0);
650 $table = new Table($tableNode);
652 $this->parseTable($table);
658 * @param Table $table
659 * @param \DOMNode|null $node
661 protected function parseTable(Table $table, \DOMNode $node = null)
663 if ($node === null) {
664 $node = $table->getDomNode();
667 if (!$node->childNodes) {
671 foreach ($node->childNodes as $child) {
672 if ($child->nodeName === 'tr') {
673 $row = new TableRow($child);
674 $table->addRow($row);
676 $this->parseTableRow($row);
678 $this->parseTable($table, $child);
684 * @param TableRow $row
686 protected function parseTableRow(TableRow $row)
688 $node = $row->getDomNode();
690 foreach ($node->childNodes as $child) {
691 if (in_array($child->nodeName, array('td', 'th'))) {
692 $cell = new TableCell($child);
693 $row->addCell($cell);
699 * @param \DOMNode $node
703 protected function getInnerHtml($node)
706 $children = $node->childNodes;
708 foreach ($children as $child) {
709 $innerHtml .= $this->htmlFromNode($child);
716 * @param \DOMNode $node
720 protected function htmlFromNode($node)
722 $domDocument = new \DOMDocument();
723 $newNode = $domDocument->importNode($node, true);
724 $domDocument->appendChild($newNode);
726 return $domDocument->saveHTML();
730 * @param \DOMNode $node
731 * @param string $html
733 protected function setInnerHtml($node, $html)
735 // DOMDocument::loadHTML does not allow empty strings.
736 if (strlen(trim($html)) === 0) {
737 $html = '<span class="empty"></span>';
740 $doc = $this->createDocumentWithHtml($html);
741 $fragment = $node->ownerDocument->createDocumentFragment();
742 $root = $doc->getElementsByTagName('body')->item(0);
743 foreach ($root->childNodes as $child) {
744 $fragment->appendChild($node->ownerDocument->importNode($child, true));
747 $node->appendChild($fragment);
751 * @param Table $table
753 protected function indexCellValues(Table $table)
755 foreach ($table->getRows() as $rowIndex => $row) {
756 foreach ($row->getCells() as $cellIndex => $cell) {
757 $value = trim($cell->getDomNode()->textContent);
759 if (!isset($this->cellValues[$value])) {
760 $this->cellValues[$value] = array();
763 $this->cellValues[$value][] = new TablePosition($rowIndex, $cellIndex);
769 * @param TableRow $tableRow
770 * @param DiffRowPosition $position
771 * @param array $cellsWithMultipleRows
772 * @param \DOMNode $diffRow
773 * @param string $diffType
774 * @param bool $usingExtraRow
776 protected function syncVirtualColumns(
778 DiffRowPosition $position,
779 &$cellsWithMultipleRows,
782 $usingExtraRow = false
784 $currentCell = $tableRow->getCell($position->getIndex($diffType));
785 while ($position->isColumnLessThanOther($diffType) && $currentCell) {
786 $diffCell = $diffType === 'new' ? $this->diffCells(null, $currentCell, $usingExtraRow) : $this->diffCells(
791 // Store cell in appliedRowSpans if spans multiple rows
792 if ($diffCell->getAttribute('rowspan') > 1) {
793 $cellsWithMultipleRows[$diffCell->getAttribute('rowspan')][] = $diffCell;
795 $diffRow->appendChild($diffCell);
796 $position->incrementColumn($diffType, $currentCell->getColspan());
797 $currentCell = $tableRow->getCell($position->incrementIndex($diffType));
802 * @param null|TableCell $oldCell
803 * @param null|TableCell $newCell
804 * @param array $cellsWithMultipleRows
805 * @param \DOMElement $diffRow
806 * @param DiffRowPosition $position
807 * @param bool $usingExtraRow
809 * @return \DOMElement
811 protected function diffCellsAndIncrementCounters(
814 &$cellsWithMultipleRows,
816 DiffRowPosition $position,
817 $usingExtraRow = false
819 $diffCell = $this->diffCells($oldCell, $newCell, $usingExtraRow);
820 // Store cell in appliedRowSpans if spans multiple rows
821 if ($diffCell->getAttribute('rowspan') > 1) {
822 $cellsWithMultipleRows[$diffCell->getAttribute('rowspan')][] = $diffCell;
824 $diffRow->appendChild($diffCell);
826 if ($newCell !== null) {
827 $position->incrementIndexInNew();
828 $position->incrementColumnInNew($newCell->getColspan());
831 if ($oldCell !== null) {
832 $position->incrementIndexInOld();
833 $position->incrementColumnInOld($oldCell->getColspan());
840 * @param TableRow|null $oldRow
841 * @param TableRow|null $newRow
842 * @param array $appliedRowSpans
843 * @param bool $forceExpansion
845 protected function diffAndAppendRows($oldRow, $newRow, &$appliedRowSpans, $forceExpansion = false)
847 list($rowDom, $extraRow) = $this->diffRows(
854 $this->diffTable->appendChild($rowDom);
857 $this->diffTable->appendChild($extraRow);
862 * @param TableRow $oldRow
863 * @param TableRow $newRow
864 * @param int $oldIndex
865 * @param int $newIndex
869 protected function getMatchPercentage(TableRow $oldRow, TableRow $newRow, $oldIndex, $newIndex)
871 $firstCellWeight = 1.5;
872 $indexDeltaWeight = 0.25 * (abs($oldIndex - $newIndex));
874 $minCells = min(count($newRow->getCells()), count($oldRow->getCells()));
875 $totalCount = ($minCells + $firstCellWeight + $indexDeltaWeight) * 100;
876 foreach ($newRow->getCells() as $newIndex => $newCell) {
877 $oldCell = $oldRow->getCell($newIndex);
881 similar_text($oldCell->getInnerHtml(), $newCell->getInnerHtml(), $percentage);
883 if ($percentage > ($this->config->getMatchThreshold() * 0.50)) {
884 $increment = $percentage;
885 if ($newIndex === 0 && $percentage > 95) {
886 $increment = $increment * $firstCellWeight;
888 $thresholdCount += $increment;
893 return ($totalCount > 0) ? ($thresholdCount / $totalCount) : 0;