Version 1
[yaffs-website] / vendor / caxy / php-htmldiff / lib / Caxy / HtmlDiff / AbstractDiff.php
diff --git a/vendor/caxy/php-htmldiff/lib/Caxy/HtmlDiff/AbstractDiff.php b/vendor/caxy/php-htmldiff/lib/Caxy/HtmlDiff/AbstractDiff.php
new file mode 100644 (file)
index 0000000..85545b3
--- /dev/null
@@ -0,0 +1,584 @@
+<?php
+
+namespace Caxy\HtmlDiff;
+
+/**
+ * Class AbstractDiff.
+ */
+abstract class AbstractDiff
+{
+    /**
+     * @var array
+     *
+     * @deprecated since 0.1.0
+     */
+    public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
+
+    /**
+     * @var array
+     *
+     * @deprecated since 0.1.0
+     */
+    public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
+
+    /**
+     * @var bool
+     *
+     * @deprecated since 0.1.0
+     */
+    public static $defaultGroupDiffs = true;
+
+    /**
+     * @var HtmlDiffConfig
+     */
+    protected $config;
+
+    /**
+     * @var string
+     */
+    protected $content;
+
+    /**
+     * @var string
+     */
+    protected $oldText;
+
+    /**
+     * @var string
+     */
+    protected $newText;
+
+    /**
+     * @var array
+     */
+    protected $oldWords = array();
+
+    /**
+     * @var array
+     */
+    protected $newWords = array();
+
+    /**
+     * @var DiffCache[]
+     */
+    protected $diffCaches = array();
+
+    /**
+     * @var \HTMLPurifier
+     */
+    protected $purifier;
+
+    /**
+     * @var \HTMLPurifier_Config|null
+     */
+    protected $purifierConfig = null;
+
+    /**
+     * @see array_slice_cached();
+     * @var bool
+     */
+    protected $resetCache = false;
+
+    /**
+     * AbstractDiff constructor.
+     *
+     * @param string     $oldText
+     * @param string     $newText
+     * @param string     $encoding
+     * @param null|array $specialCaseTags
+     * @param null|bool  $groupDiffs
+     */
+    public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
+    {
+        mb_substitute_character(0x20);
+
+        $this->setConfig(HtmlDiffConfig::create()->setEncoding($encoding));
+
+        if ($specialCaseTags !== null) {
+            $this->config->setSpecialCaseTags($specialCaseTags);
+        }
+
+        if ($groupDiffs !== null) {
+            $this->config->setGroupDiffs($groupDiffs);
+        }
+
+        $this->oldText = $oldText;
+        $this->newText = $newText;
+        $this->content = '';
+    }
+
+    /**
+     * @return bool|string
+     */
+    abstract public function build();
+
+    /**
+     * Initializes HTMLPurifier with cache location.
+     *
+     * @param null|string $defaultPurifierSerializerCache
+     */
+    public function initPurifier($defaultPurifierSerializerCache = null)
+    {
+        if (null !== $this->purifierConfig) {
+            $HTMLPurifierConfig  = $this->purifierConfig;
+        } else {
+            $HTMLPurifierConfig = \HTMLPurifier_Config::createDefault();
+        }
+
+        // Cache.SerializerPath defaults to Null and sets
+        // the location to inside the vendor HTMLPurifier library
+        // under the DefinitionCache/Serializer folder.
+        if (!is_null($defaultPurifierSerializerCache)) {
+            $HTMLPurifierConfig->set('Cache.SerializerPath', $defaultPurifierSerializerCache);
+        }
+
+        $this->purifier = new \HTMLPurifier($HTMLPurifierConfig);
+    }
+
+    /**
+     * Prepare (purify) the HTML
+     *
+     * @return void
+     */
+    protected function prepare()
+    {
+        $this->initPurifier($this->config->getPurifierCacheLocation());
+
+        $this->oldText = $this->purifyHtml($this->oldText);
+        $this->newText = $this->purifyHtml($this->newText);
+    }
+
+    /**
+     * @return DiffCache|null
+     */
+    protected function getDiffCache()
+    {
+        if (!$this->hasDiffCache()) {
+            return null;
+        }
+
+        $hash = spl_object_hash($this->getConfig()->getCacheProvider());
+
+        if (!array_key_exists($hash, $this->diffCaches)) {
+            $this->diffCaches[$hash] = new DiffCache($this->getConfig()->getCacheProvider());
+        }
+
+        return $this->diffCaches[$hash];
+    }
+
+    /**
+     * @return bool
+     */
+    protected function hasDiffCache()
+    {
+        return null !== $this->getConfig()->getCacheProvider();
+    }
+
+    /**
+     * @return HtmlDiffConfig
+     */
+    public function getConfig()
+    {
+        return $this->config;
+    }
+
+    /**
+     * @param HtmlDiffConfig $config
+     *
+     * @return AbstractDiff
+     */
+    public function setConfig(HtmlDiffConfig $config)
+    {
+        $this->config = $config;
+
+        return $this;
+    }
+
+    /**
+     * @return int
+     *
+     * @deprecated since 0.1.0
+     */
+    public function getMatchThreshold()
+    {
+        return $this->config->getMatchThreshold();
+    }
+
+    /**
+     * @param int $matchThreshold
+     *
+     * @return AbstractDiff
+     *
+     * @deprecated since 0.1.0
+     */
+    public function setMatchThreshold($matchThreshold)
+    {
+        $this->config->setMatchThreshold($matchThreshold);
+
+        return $this;
+    }
+
+    /**
+     * @param array $chars
+     *
+     * @deprecated since 0.1.0
+     */
+    public function setSpecialCaseChars(array $chars)
+    {
+        $this->config->setSpecialCaseChars($chars);
+    }
+
+    /**
+     * @return array|null
+     *
+     * @deprecated since 0.1.0
+     */
+    public function getSpecialCaseChars()
+    {
+        return $this->config->getSpecialCaseChars();
+    }
+
+    /**
+     * @param string $char
+     *
+     * @deprecated since 0.1.0
+     */
+    public function addSpecialCaseChar($char)
+    {
+        $this->config->addSpecialCaseChar($char);
+    }
+
+    /**
+     * @param string $char
+     *
+     * @deprecated since 0.1.0
+     */
+    public function removeSpecialCaseChar($char)
+    {
+        $this->config->removeSpecialCaseChar($char);
+    }
+
+    /**
+     * @param array $tags
+     *
+     * @deprecated since 0.1.0
+     */
+    public function setSpecialCaseTags(array $tags = array())
+    {
+        $this->config->setSpecialCaseChars($tags);
+    }
+
+    /**
+     * @param string $tag
+     *
+     * @deprecated since 0.1.0
+     */
+    public function addSpecialCaseTag($tag)
+    {
+        $this->config->addSpecialCaseTag($tag);
+    }
+
+    /**
+     * @param string $tag
+     *
+     * @deprecated since 0.1.0
+     */
+    public function removeSpecialCaseTag($tag)
+    {
+        $this->config->removeSpecialCaseTag($tag);
+    }
+
+    /**
+     * @return array|null
+     *
+     * @deprecated since 0.1.0
+     */
+    public function getSpecialCaseTags()
+    {
+        return $this->config->getSpecialCaseTags();
+    }
+
+    /**
+     * @return string
+     */
+    public function getOldHtml()
+    {
+        return $this->oldText;
+    }
+
+    /**
+     * @return string
+     */
+    public function getNewHtml()
+    {
+        return $this->newText;
+    }
+
+    /**
+     * @return string
+     */
+    public function getDifference()
+    {
+        return $this->content;
+    }
+
+    /**
+     * Clears the diff content.
+     *
+     * @return void
+     */
+    public function clearContent()
+    {
+        $this->content = null;
+    }
+
+    /**
+     * @param bool $boolean
+     *
+     * @return $this
+     *
+     * @deprecated since 0.1.0
+     */
+    public function setGroupDiffs($boolean)
+    {
+        $this->config->setGroupDiffs($boolean);
+
+        return $this;
+    }
+
+    /**
+     * @return bool
+     *
+     * @deprecated since 0.1.0
+     */
+    public function isGroupDiffs()
+    {
+        return $this->config->isGroupDiffs();
+    }
+
+    /**
+     * @param \HTMLPurifier_Config $config
+     */
+    public function setHTMLPurifierConfig(\HTMLPurifier_Config $config)
+    {
+        $this->purifierConfig = $config;
+    }
+
+    /**
+     * @param string $tag
+     *
+     * @return string
+     */
+    protected function getOpeningTag($tag)
+    {
+        return '/<'.$tag.'[^>]*/i';
+    }
+
+    /**
+     * @param string $tag
+     *
+     * @return string
+     */
+    protected function getClosingTag($tag)
+    {
+        return '</'.$tag.'>';
+    }
+
+    /**
+     * @param string $str
+     * @param string $start
+     * @param string $end
+     *
+     * @return string
+     */
+    protected function getStringBetween($str, $start, $end)
+    {
+        $expStr = explode($start, $str, 2);
+        if (count($expStr) > 1) {
+            $expStr = explode($end, $expStr[ 1 ]);
+            if (count($expStr) > 1) {
+                array_pop($expStr);
+
+                return implode($end, $expStr);
+            }
+        }
+
+        return '';
+    }
+
+    /**
+     * @param string $html
+     *
+     * @return string
+     */
+    protected function purifyHtml($html)
+    {
+        if (class_exists('Tidy') && false) {
+            $config = array('output-xhtml' => true, 'indent' => false);
+            $tidy = new tidy();
+            $tidy->parseString($html, $config, 'utf8');
+            $html = (string) $tidy;
+
+            return $this->getStringBetween($html, '<body>');
+        }
+
+        return $this->purifier->purify($html);
+    }
+
+    protected function splitInputsToWords()
+    {
+        $this->setOldWords($this->convertHtmlToListOfWords($this->explode($this->oldText)));
+        $this->setNewWords($this->convertHtmlToListOfWords($this->explode($this->newText)));
+    }
+
+    /**
+     * @param array $oldWords
+     */
+    protected function setOldWords(array $oldWords)
+    {
+        $this->resetCache = true;
+        $this->oldWords   = $oldWords;
+    }
+
+    /**
+     * @param array $newWords
+     */
+    protected function setNewWords(array $newWords)
+    {
+        $this->resetCache = true;
+        $this->newWords   = $newWords;
+    }
+
+    /**
+     * @param string $text
+     *
+     * @return bool
+     */
+    protected function isPartOfWord($text)
+    {
+        return ctype_alnum(str_replace($this->config->getSpecialCaseChars(), '', $text));
+    }
+
+    /**
+     * @param array $characterString
+     *
+     * @return array
+     */
+    protected function convertHtmlToListOfWords($characterString)
+    {
+        $mode = 'character';
+        $current_word = '';
+        $words = array();
+        foreach ($characterString as $i => $character) {
+            switch ($mode) {
+                case 'character':
+                if ($this->isStartOfTag($character)) {
+                    if ($current_word != '') {
+                        $words[] = $current_word;
+                    }
+
+                    $current_word = '<';
+                    $mode = 'tag';
+                } elseif (preg_match("/\s/", $character)) {
+                    if ($current_word !== '') {
+                        $words[] = $current_word;
+                    }
+                    $current_word = preg_replace('/\s+/S', ' ', $character);
+                    $mode = 'whitespace';
+                } else {
+                    if (
+                        (ctype_alnum($character) && (strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
+                        (in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i + 1]) && $this->isPartOfWord($characterString[$i + 1]))
+                    ) {
+                        $current_word .= $character;
+                    } else {
+                        $words[] = $current_word;
+                        $current_word = $character;
+                    }
+                }
+                break;
+                case 'tag' :
+                if ($this->isEndOfTag($character)) {
+                    $current_word .= '>';
+                    $words[] = $current_word;
+                    $current_word = '';
+
+                    if (!preg_match('[^\s]', $character)) {
+                        $mode = 'whitespace';
+                    } else {
+                        $mode = 'character';
+                    }
+                } else {
+                    $current_word .= $character;
+                }
+                break;
+                case 'whitespace':
+                if ($this->isStartOfTag($character)) {
+                    if ($current_word !== '') {
+                        $words[] = $current_word;
+                    }
+                    $current_word = '<';
+                    $mode = 'tag';
+                } elseif (preg_match("/\s/", $character)) {
+                    $current_word .= $character;
+                    $current_word = preg_replace('/\s+/S', ' ', $current_word);
+                } else {
+                    if ($current_word != '') {
+                        $words[] = $current_word;
+                    }
+                    $current_word = $character;
+                    $mode = 'character';
+                }
+                break;
+                default:
+                break;
+            }
+        }
+        if ($current_word != '') {
+            $words[] = $current_word;
+        }
+
+        return $words;
+    }
+
+    /**
+     * @param string $val
+     *
+     * @return bool
+     */
+    protected function isStartOfTag($val)
+    {
+        return $val == '<';
+    }
+
+    /**
+     * @param string $val
+     *
+     * @return bool
+     */
+    protected function isEndOfTag($val)
+    {
+        return $val == '>';
+    }
+
+    /**
+     * @param string $value
+     *
+     * @return bool
+     */
+    protected function isWhiteSpace($value)
+    {
+        return !preg_match('[^\s]', $value);
+    }
+
+    /**
+     * @param string $value
+     *
+     * @return array
+     */
+    protected function explode($value)
+    {
+        // as suggested by @onassar
+        return preg_split('//u', $value);
+    }
+}