3 namespace Caxy\HtmlDiff;
8 abstract class AbstractDiff
13 * @deprecated since 0.1.0
15 public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
20 * @deprecated since 0.1.0
22 public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
27 * @deprecated since 0.1.0
29 public static $defaultGroupDiffs = true;
54 protected $oldWords = array();
59 protected $newWords = array();
64 protected $diffCaches = array();
72 * @var \HTMLPurifier_Config|null
74 protected $purifierConfig = null;
77 * @see array_slice_cached();
80 protected $resetCache = false;
83 * AbstractDiff constructor.
85 * @param string $oldText
86 * @param string $newText
87 * @param string $encoding
88 * @param null|array $specialCaseTags
89 * @param null|bool $groupDiffs
91 public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
93 mb_substitute_character(0x20);
95 $this->setConfig(HtmlDiffConfig::create()->setEncoding($encoding));
97 if ($specialCaseTags !== null) {
98 $this->config->setSpecialCaseTags($specialCaseTags);
101 if ($groupDiffs !== null) {
102 $this->config->setGroupDiffs($groupDiffs);
105 $this->oldText = $oldText;
106 $this->newText = $newText;
111 * @return bool|string
113 abstract public function build();
116 * Initializes HTMLPurifier with cache location.
118 * @param null|string $defaultPurifierSerializerCache
120 public function initPurifier($defaultPurifierSerializerCache = null)
122 if (null !== $this->purifierConfig) {
123 $HTMLPurifierConfig = $this->purifierConfig;
125 $HTMLPurifierConfig = \HTMLPurifier_Config::createDefault();
128 // Cache.SerializerPath defaults to Null and sets
129 // the location to inside the vendor HTMLPurifier library
130 // under the DefinitionCache/Serializer folder.
131 if (!is_null($defaultPurifierSerializerCache)) {
132 $HTMLPurifierConfig->set('Cache.SerializerPath', $defaultPurifierSerializerCache);
135 // Cache.SerializerPermissions defaults to 0744.
136 // This setting allows the cache files to be deleted by any user, as they are typically
137 // created by the web/php user (www-user, php-fpm, etc.)
138 $HTMLPurifierConfig->set('Cache.SerializerPermissions', 0777);
140 $this->purifier = new \HTMLPurifier($HTMLPurifierConfig);
144 * Prepare (purify) the HTML
148 protected function prepare()
150 $this->initPurifier($this->config->getPurifierCacheLocation());
152 $this->oldText = $this->purifyHtml($this->oldText);
153 $this->newText = $this->purifyHtml($this->newText);
157 * @return DiffCache|null
159 protected function getDiffCache()
161 if (!$this->hasDiffCache()) {
165 $hash = spl_object_hash($this->getConfig()->getCacheProvider());
167 if (!array_key_exists($hash, $this->diffCaches)) {
168 $this->diffCaches[$hash] = new DiffCache($this->getConfig()->getCacheProvider());
171 return $this->diffCaches[$hash];
177 protected function hasDiffCache()
179 return null !== $this->getConfig()->getCacheProvider();
183 * @return HtmlDiffConfig
185 public function getConfig()
187 return $this->config;
191 * @param HtmlDiffConfig $config
193 * @return AbstractDiff
195 public function setConfig(HtmlDiffConfig $config)
197 $this->config = $config;
205 * @deprecated since 0.1.0
207 public function getMatchThreshold()
209 return $this->config->getMatchThreshold();
213 * @param int $matchThreshold
215 * @return AbstractDiff
217 * @deprecated since 0.1.0
219 public function setMatchThreshold($matchThreshold)
221 $this->config->setMatchThreshold($matchThreshold);
227 * @param array $chars
229 * @deprecated since 0.1.0
231 public function setSpecialCaseChars(array $chars)
233 $this->config->setSpecialCaseChars($chars);
239 * @deprecated since 0.1.0
241 public function getSpecialCaseChars()
243 return $this->config->getSpecialCaseChars();
247 * @param string $char
249 * @deprecated since 0.1.0
251 public function addSpecialCaseChar($char)
253 $this->config->addSpecialCaseChar($char);
257 * @param string $char
259 * @deprecated since 0.1.0
261 public function removeSpecialCaseChar($char)
263 $this->config->removeSpecialCaseChar($char);
269 * @deprecated since 0.1.0
271 public function setSpecialCaseTags(array $tags = array())
273 $this->config->setSpecialCaseChars($tags);
279 * @deprecated since 0.1.0
281 public function addSpecialCaseTag($tag)
283 $this->config->addSpecialCaseTag($tag);
289 * @deprecated since 0.1.0
291 public function removeSpecialCaseTag($tag)
293 $this->config->removeSpecialCaseTag($tag);
299 * @deprecated since 0.1.0
301 public function getSpecialCaseTags()
303 return $this->config->getSpecialCaseTags();
309 public function getOldHtml()
311 return $this->oldText;
317 public function getNewHtml()
319 return $this->newText;
325 public function getDifference()
327 return $this->content;
331 * Clears the diff content.
335 public function clearContent()
337 $this->content = null;
341 * @param bool $boolean
345 * @deprecated since 0.1.0
347 public function setGroupDiffs($boolean)
349 $this->config->setGroupDiffs($boolean);
357 * @deprecated since 0.1.0
359 public function isGroupDiffs()
361 return $this->config->isGroupDiffs();
365 * @param \HTMLPurifier_Config $config
367 public function setHTMLPurifierConfig(\HTMLPurifier_Config $config)
369 $this->purifierConfig = $config;
377 protected function getOpeningTag($tag)
379 return '/<'.$tag.'[^>]*/i';
387 protected function getClosingTag($tag)
389 return '</'.$tag.'>';
394 * @param string $start
399 protected function getStringBetween($str, $start, $end)
401 $expStr = explode($start, $str, 2);
402 if (count($expStr) > 1) {
403 $expStr = explode($end, $expStr[ 1 ]);
404 if (count($expStr) > 1) {
407 return implode($end, $expStr);
415 * @param string $html
419 protected function purifyHtml($html)
421 if (class_exists('Tidy') && false) {
422 $config = array('output-xhtml' => true, 'indent' => false);
424 $tidy->parseString($html, $config, 'utf8');
425 $html = (string) $tidy;
427 return $this->getStringBetween($html, '<body>');
430 return $this->purifier->purify($html);
433 protected function splitInputsToWords()
435 $this->setOldWords($this->convertHtmlToListOfWords($this->explode($this->oldText)));
436 $this->setNewWords($this->convertHtmlToListOfWords($this->explode($this->newText)));
440 * @param array $oldWords
442 protected function setOldWords(array $oldWords)
444 $this->resetCache = true;
445 $this->oldWords = $oldWords;
449 * @param array $newWords
451 protected function setNewWords(array $newWords)
453 $this->resetCache = true;
454 $this->newWords = $newWords;
458 * @param string $text
462 protected function isPartOfWord($text)
464 return ctype_alnum(str_replace($this->config->getSpecialCaseChars(), '', $text));
468 * @param array $characterString
472 protected function convertHtmlToListOfWords($characterString)
477 foreach ($characterString as $i => $character) {
480 if ($this->isStartOfTag($character)) {
481 if ($current_word != '') {
482 $words[] = $current_word;
487 } elseif (preg_match("/\s/", $character)) {
488 if ($current_word !== '') {
489 $words[] = $current_word;
491 $current_word = preg_replace('/\s+/S', ' ', $character);
492 $mode = 'whitespace';
495 (ctype_alnum($character) && (strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
496 (in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i + 1]) && $this->isPartOfWord($characterString[$i + 1]))
498 $current_word .= $character;
500 $words[] = $current_word;
501 $current_word = $character;
506 if ($this->isEndOfTag($character)) {
507 $current_word .= '>';
508 $words[] = $current_word;
511 if (!preg_match('[^\s]', $character)) {
512 $mode = 'whitespace';
517 $current_word .= $character;
521 if ($this->isStartOfTag($character)) {
522 if ($current_word !== '') {
523 $words[] = $current_word;
527 } elseif (preg_match("/\s/", $character)) {
528 $current_word .= $character;
529 $current_word = preg_replace('/\s+/S', ' ', $current_word);
531 if ($current_word != '') {
532 $words[] = $current_word;
534 $current_word = $character;
542 if ($current_word != '') {
543 $words[] = $current_word;
554 protected function isStartOfTag($val)
564 protected function isEndOfTag($val)
570 * @param string $value
574 protected function isWhiteSpace($value)
576 return !preg_match('[^\s]', $value);
580 * @param string $value
584 protected function explode($value)
586 // as suggested by @onassar
587 return preg_split('//u', $value);