3 namespace Caxy\HtmlDiff;
8 abstract class AbstractDiff
13 * @deprecated since 0.1.0
15 public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
20 * @deprecated since 0.1.0
22 public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
27 * @deprecated since 0.1.0
29 public static $defaultGroupDiffs = true;
54 protected $oldWords = array();
59 protected $newWords = array();
64 protected $diffCaches = array();
72 * @var \HTMLPurifier_Config|null
74 protected $purifierConfig = null;
77 * @see array_slice_cached();
80 protected $resetCache = false;
83 * AbstractDiff constructor.
85 * @param string $oldText
86 * @param string $newText
87 * @param string $encoding
88 * @param null|array $specialCaseTags
89 * @param null|bool $groupDiffs
91 public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
93 mb_substitute_character(0x20);
95 $this->setConfig(HtmlDiffConfig::create()->setEncoding($encoding));
97 if ($specialCaseTags !== null) {
98 $this->config->setSpecialCaseTags($specialCaseTags);
101 if ($groupDiffs !== null) {
102 $this->config->setGroupDiffs($groupDiffs);
105 $this->oldText = $oldText;
106 $this->newText = $newText;
111 * @return bool|string
113 abstract public function build();
116 * Initializes HTMLPurifier with cache location.
118 * @param null|string $defaultPurifierSerializerCache
120 public function initPurifier($defaultPurifierSerializerCache = null)
122 if (null !== $this->purifierConfig) {
123 $HTMLPurifierConfig = $this->purifierConfig;
125 $HTMLPurifierConfig = \HTMLPurifier_Config::createDefault();
128 // Cache.SerializerPath defaults to Null and sets
129 // the location to inside the vendor HTMLPurifier library
130 // under the DefinitionCache/Serializer folder.
131 if (!is_null($defaultPurifierSerializerCache)) {
132 $HTMLPurifierConfig->set('Cache.SerializerPath', $defaultPurifierSerializerCache);
135 $this->purifier = new \HTMLPurifier($HTMLPurifierConfig);
139 * Prepare (purify) the HTML
143 protected function prepare()
145 $this->initPurifier($this->config->getPurifierCacheLocation());
147 $this->oldText = $this->purifyHtml($this->oldText);
148 $this->newText = $this->purifyHtml($this->newText);
152 * @return DiffCache|null
154 protected function getDiffCache()
156 if (!$this->hasDiffCache()) {
160 $hash = spl_object_hash($this->getConfig()->getCacheProvider());
162 if (!array_key_exists($hash, $this->diffCaches)) {
163 $this->diffCaches[$hash] = new DiffCache($this->getConfig()->getCacheProvider());
166 return $this->diffCaches[$hash];
172 protected function hasDiffCache()
174 return null !== $this->getConfig()->getCacheProvider();
178 * @return HtmlDiffConfig
180 public function getConfig()
182 return $this->config;
186 * @param HtmlDiffConfig $config
188 * @return AbstractDiff
190 public function setConfig(HtmlDiffConfig $config)
192 $this->config = $config;
200 * @deprecated since 0.1.0
202 public function getMatchThreshold()
204 return $this->config->getMatchThreshold();
208 * @param int $matchThreshold
210 * @return AbstractDiff
212 * @deprecated since 0.1.0
214 public function setMatchThreshold($matchThreshold)
216 $this->config->setMatchThreshold($matchThreshold);
222 * @param array $chars
224 * @deprecated since 0.1.0
226 public function setSpecialCaseChars(array $chars)
228 $this->config->setSpecialCaseChars($chars);
234 * @deprecated since 0.1.0
236 public function getSpecialCaseChars()
238 return $this->config->getSpecialCaseChars();
242 * @param string $char
244 * @deprecated since 0.1.0
246 public function addSpecialCaseChar($char)
248 $this->config->addSpecialCaseChar($char);
252 * @param string $char
254 * @deprecated since 0.1.0
256 public function removeSpecialCaseChar($char)
258 $this->config->removeSpecialCaseChar($char);
264 * @deprecated since 0.1.0
266 public function setSpecialCaseTags(array $tags = array())
268 $this->config->setSpecialCaseChars($tags);
274 * @deprecated since 0.1.0
276 public function addSpecialCaseTag($tag)
278 $this->config->addSpecialCaseTag($tag);
284 * @deprecated since 0.1.0
286 public function removeSpecialCaseTag($tag)
288 $this->config->removeSpecialCaseTag($tag);
294 * @deprecated since 0.1.0
296 public function getSpecialCaseTags()
298 return $this->config->getSpecialCaseTags();
304 public function getOldHtml()
306 return $this->oldText;
312 public function getNewHtml()
314 return $this->newText;
320 public function getDifference()
322 return $this->content;
326 * Clears the diff content.
330 public function clearContent()
332 $this->content = null;
336 * @param bool $boolean
340 * @deprecated since 0.1.0
342 public function setGroupDiffs($boolean)
344 $this->config->setGroupDiffs($boolean);
352 * @deprecated since 0.1.0
354 public function isGroupDiffs()
356 return $this->config->isGroupDiffs();
360 * @param \HTMLPurifier_Config $config
362 public function setHTMLPurifierConfig(\HTMLPurifier_Config $config)
364 $this->purifierConfig = $config;
372 protected function getOpeningTag($tag)
374 return '/<'.$tag.'[^>]*/i';
382 protected function getClosingTag($tag)
384 return '</'.$tag.'>';
389 * @param string $start
394 protected function getStringBetween($str, $start, $end)
396 $expStr = explode($start, $str, 2);
397 if (count($expStr) > 1) {
398 $expStr = explode($end, $expStr[ 1 ]);
399 if (count($expStr) > 1) {
402 return implode($end, $expStr);
410 * @param string $html
414 protected function purifyHtml($html)
416 if (class_exists('Tidy') && false) {
417 $config = array('output-xhtml' => true, 'indent' => false);
419 $tidy->parseString($html, $config, 'utf8');
420 $html = (string) $tidy;
422 return $this->getStringBetween($html, '<body>');
425 return $this->purifier->purify($html);
428 protected function splitInputsToWords()
430 $this->setOldWords($this->convertHtmlToListOfWords($this->explode($this->oldText)));
431 $this->setNewWords($this->convertHtmlToListOfWords($this->explode($this->newText)));
435 * @param array $oldWords
437 protected function setOldWords(array $oldWords)
439 $this->resetCache = true;
440 $this->oldWords = $oldWords;
444 * @param array $newWords
446 protected function setNewWords(array $newWords)
448 $this->resetCache = true;
449 $this->newWords = $newWords;
453 * @param string $text
457 protected function isPartOfWord($text)
459 return ctype_alnum(str_replace($this->config->getSpecialCaseChars(), '', $text));
463 * @param array $characterString
467 protected function convertHtmlToListOfWords($characterString)
472 foreach ($characterString as $i => $character) {
475 if ($this->isStartOfTag($character)) {
476 if ($current_word != '') {
477 $words[] = $current_word;
482 } elseif (preg_match("/\s/", $character)) {
483 if ($current_word !== '') {
484 $words[] = $current_word;
486 $current_word = preg_replace('/\s+/S', ' ', $character);
487 $mode = 'whitespace';
490 (ctype_alnum($character) && (strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
491 (in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i + 1]) && $this->isPartOfWord($characterString[$i + 1]))
493 $current_word .= $character;
495 $words[] = $current_word;
496 $current_word = $character;
501 if ($this->isEndOfTag($character)) {
502 $current_word .= '>';
503 $words[] = $current_word;
506 if (!preg_match('[^\s]', $character)) {
507 $mode = 'whitespace';
512 $current_word .= $character;
516 if ($this->isStartOfTag($character)) {
517 if ($current_word !== '') {
518 $words[] = $current_word;
522 } elseif (preg_match("/\s/", $character)) {
523 $current_word .= $character;
524 $current_word = preg_replace('/\s+/S', ' ', $current_word);
526 if ($current_word != '') {
527 $words[] = $current_word;
529 $current_word = $character;
537 if ($current_word != '') {
538 $words[] = $current_word;
549 protected function isStartOfTag($val)
559 protected function isEndOfTag($val)
565 * @param string $value
569 protected function isWhiteSpace($value)
571 return !preg_match('[^\s]', $value);
575 * @param string $value
579 protected function explode($value)
581 // as suggested by @onassar
582 return preg_split('//u', $value);