3 namespace Caxy\HtmlDiff;
5 use Caxy\HtmlDiff\Strategy\ListItemMatchStrategy;
6 use Sunra\PhpSimple\HtmlDomParser;
8 class ListDiffLines extends AbstractDiff
10 const CLASS_LIST_ITEM_ADDED = 'normal new';
11 const CLASS_LIST_ITEM_DELETED = 'removed';
12 const CLASS_LIST_ITEM_CHANGED = 'replacement';
13 const CLASS_LIST_ITEM_NONE = 'normal';
15 protected static $listTypes = array('ul', 'ol', 'dl');
18 * List of tags that should be included when retrieving
19 * text from a single list item that will be used in
20 * matching logic (and only in matching logic).
22 * @see getRelevantNodeText()
26 protected static $listContentTags = array(
27 'h1','h2','h3','h4','h5','pre','div','br','hr','code',
28 'input','form','img','span','a','i','b','strong','em',
29 'font','big','del','tt','sub','sup','strike',
35 protected $lcsService;
38 * @param string $oldText
39 * @param string $newText
40 * @param HtmlDiffConfig|null $config
42 * @return ListDiffLines
44 public static function create($oldText, $newText, HtmlDiffConfig $config = null)
46 $diff = new self($oldText, $newText);
48 if (null !== $config) {
49 $diff->setConfig($config);
58 public function build()
62 if ($this->hasDiffCache() && $this->getDiffCache()->contains($this->oldText, $this->newText)) {
63 $this->content = $this->getDiffCache()->fetch($this->oldText, $this->newText);
65 return $this->content;
68 $matchStrategy = new ListItemMatchStrategy($this->config->getMatchThreshold());
69 $this->lcsService = new LcsService($matchStrategy);
71 return $this->listByLines($this->oldText, $this->newText);
80 protected function listByLines($old, $new)
82 /* @var $newDom \simple_html_dom */
83 $newDom = HtmlDomParser::str_get_html($new);
84 /* @var $oldDom \simple_html_dom */
85 $oldDom = HtmlDomParser::str_get_html($old);
87 $newListNode = $this->findListNode($newDom);
88 $oldListNode = $this->findListNode($oldDom);
90 $operations = $this->getListItemOperations($oldListNode, $newListNode);
92 return $this->processOperations($operations, $oldListNode, $newListNode);
96 * @param \simple_html_dom|\simple_html_dom_node $dom
98 * @return \simple_html_dom_node[]|\simple_html_dom_node|null
100 protected function findListNode($dom)
102 return $dom->find(implode(', ', static::$listTypes), 0);
106 * @param \simple_html_dom_node $oldListNode
107 * @param \simple_html_dom_node $newListNode
109 * @return array|Operation[]
111 protected function getListItemOperations($oldListNode, $newListNode)
113 // Prepare arrays of list item content to use in LCS algorithm
114 $oldListText = $this->getListTextArray($oldListNode);
115 $newListText = $this->getListTextArray($newListNode);
117 $lcsMatches = $this->lcsService->longestCommonSubsequence($oldListText, $newListText);
119 $oldLength = count($oldListText);
120 $newLength = count($newListText);
122 $operations = array();
123 $currentLineInOld = 0;
124 $currentLineInNew = 0;
125 $lcsMatches[$oldLength + 1] = $newLength + 1;
126 foreach ($lcsMatches as $matchInOld => $matchInNew) {
127 // No matching line in new list
128 if ($matchInNew === 0) {
132 $nextLineInOld = $currentLineInOld + 1;
133 $nextLineInNew = $currentLineInNew + 1;
135 if ($matchInNew > $nextLineInNew && $matchInOld > $nextLineInOld) {
137 $operations[] = new Operation(
144 } elseif ($matchInNew > $nextLineInNew && $matchInOld === $nextLineInOld) {
145 // Add items before this
146 $operations[] = new Operation(
153 } elseif ($matchInNew === $nextLineInNew && $matchInOld > $nextLineInOld) {
154 // Delete items before this
155 $operations[] = new Operation(
164 $currentLineInNew = $matchInNew;
165 $currentLineInOld = $matchInOld;
172 * @param \simple_html_dom_node $listNode
176 protected function getListTextArray($listNode)
179 foreach ($listNode->children() as $listItem) {
180 $output[] = $this->getRelevantNodeText($listItem);
187 * @param \simple_html_dom_node $node
191 protected function getRelevantNodeText($node)
193 if (!$node->hasChildNodes()) {
194 return $node->innertext();
198 foreach ($node->nodes as $child) {
199 /* @var $child \simple_html_dom_node */
200 if (!$child->hasChildNodes()) {
201 $output .= $child->outertext();
202 } elseif (in_array($child->nodeName(), static::$listContentTags, true)) {
203 $output .= sprintf('<%1$s>%2$s</%1$s>', $child->nodeName(), $this->getRelevantNodeText($child));
211 * @param \simple_html_dom_node $li
215 protected function deleteListItem($li)
217 $this->addClassToNode($li, self::CLASS_LIST_ITEM_DELETED);
218 $li->innertext = sprintf('<del>%s</del>', $li->innertext);
220 return $li->outertext;
224 * @param \simple_html_dom_node $li
225 * @param bool $replacement
229 protected function addListItem($li, $replacement = false)
231 $this->addClassToNode($li, $replacement ? self::CLASS_LIST_ITEM_CHANGED : self::CLASS_LIST_ITEM_ADDED);
232 $li->innertext = sprintf('<ins>%s</ins>', $li->innertext);
234 return $li->outertext;
238 * @param Operation[]|array $operations
239 * @param \simple_html_dom_node $oldListNode
240 * @param \simple_html_dom_node $newListNode
244 protected function processOperations($operations, $oldListNode, $newListNode)
250 $lastOperation = null;
252 foreach ($operations as $operation) {
254 while ($operation->startInOld > ($operation->action === Operation::ADDED ? $indexInOld : $indexInOld + 1)) {
255 $li = $oldListNode->children($indexInOld);
257 if ($operation->startInNew > ($operation->action === Operation::DELETED ? $indexInNew
260 $matchingLi = $newListNode->children($indexInNew);
262 if (null !== $matchingLi) {
263 $htmlDiff = HtmlDiff::create($li->innertext, $matchingLi->innertext, $this->config);
264 $li->innertext = $htmlDiff->build();
267 $class = self::CLASS_LIST_ITEM_NONE;
269 if ($lastOperation === Operation::DELETED && !$replaced) {
270 $class = self::CLASS_LIST_ITEM_CHANGED;
273 $li->setAttribute('class', trim($li->getAttribute('class').' '.$class));
275 $output .= $li->outertext;
279 switch ($operation->action) {
280 case Operation::ADDED:
281 for ($i = $operation->startInNew; $i <= $operation->endInNew; $i++) {
282 $output .= $this->addListItem($newListNode->children($i - 1));
284 $indexInNew = $operation->endInNew;
287 case Operation::DELETED:
288 for ($i = $operation->startInOld; $i <= $operation->endInOld; $i++) {
289 $output .= $this->deleteListItem($oldListNode->children($i - 1));
291 $indexInOld = $operation->endInOld;
294 case Operation::CHANGED:
296 for ($i = $operation->startInOld; $i <= $operation->endInOld; $i++) {
297 $output .= $this->deleteListItem($oldListNode->children($i - 1));
300 for ($i = $operation->startInNew; $i <= $operation->endInNew; $i++) {
301 $output .= $this->addListItem($newListNode->children($i - 1), $changeDelta < 0);
304 $indexInOld = $operation->endInOld;
305 $indexInNew = $operation->endInNew;
309 $lastOperation = $operation->action;
312 $oldCount = count($oldListNode->children());
313 $newCount = count($newListNode->children());
314 while ($indexInOld < $oldCount) {
315 $li = $oldListNode->children($indexInOld);
317 if ($indexInNew < $newCount) {
318 $matchingLi = $newListNode->children($indexInNew);
320 if (null !== $matchingLi) {
321 $htmlDiff = HtmlDiff::create($li->innertext(), $matchingLi->innertext(), $this->config);
322 $li->innertext = $htmlDiff->build();
325 $class = self::CLASS_LIST_ITEM_NONE;
327 if ($lastOperation === Operation::DELETED) {
328 $class = self::CLASS_LIST_ITEM_CHANGED;
330 $li->setAttribute('class', trim($li->getAttribute('class').' '.$class));
332 $output .= $li->outertext;
336 $newListNode->innertext = $output;
337 $newListNode->setAttribute('class', trim($newListNode->getAttribute('class').' diff-list'));
339 return $newListNode->outertext;
343 * @param \simple_html_dom_node $node
344 * @param string $class
346 protected function addClassToNode($node, $class)
348 $node->setAttribute('class', trim(sprintf('%s %s', $node->getAttribute('class'), $class)));