Updated to Drupal 8.5. Core Media not yet in use.
[yaffs-website] / vendor / caxy / php-htmldiff / lib / Caxy / HtmlDiff / AbstractDiff.php
index c23464497819146f6a5467cb33c5a3722531011d..46f74023866cf7851343c932481f31335c765c01 100644 (file)
@@ -398,9 +398,9 @@ abstract class AbstractDiff
      */
     protected function getStringBetween($str, $start, $end)
     {
-        $expStr = explode($start, $str, 2);
+        $expStr = mb_split($start, $str, 2);
         if (count($expStr) > 1) {
-            $expStr = explode($end, $expStr[ 1 ]);
+            $expStr = mb_split($end, $expStr[ 1 ]);
             if (count($expStr) > 1) {
                 array_pop($expStr);
 
@@ -461,7 +461,7 @@ abstract class AbstractDiff
      */
     protected function isPartOfWord($text)
     {
-        return ctype_alnum(str_replace($this->config->getSpecialCaseChars(), '', $text));
+        return $this->ctypeAlphanumUnicode(str_replace($this->config->getSpecialCaseChars(), '', $text));
     }
 
     /**
@@ -474,6 +474,7 @@ abstract class AbstractDiff
         $mode = 'character';
         $current_word = '';
         $words = array();
+        $keepNewLines = $this->getConfig()->isKeepNewLines();
         foreach ($characterString as $i => $character) {
             switch ($mode) {
                 case 'character':
@@ -484,15 +485,15 @@ abstract class AbstractDiff
 
                     $current_word = '<';
                     $mode = 'tag';
-                } elseif (preg_match("/\s/", $character)) {
+                } elseif (preg_match("/\s/u", $character)) {
                     if ($current_word !== '') {
                         $words[] = $current_word;
                     }
-                    $current_word = preg_replace('/\s+/S', ' ', $character);
+                    $current_word = $keepNewLines ? $character : preg_replace('/\s+/Su', ' ', $character);
                     $mode = 'whitespace';
                 } else {
                     if (
-                        (ctype_alnum($character) && (strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
+                        (($this->ctypeAlphanumUnicode($character)) && (mb_strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
                         (in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i + 1]) && $this->isPartOfWord($characterString[$i + 1]))
                     ) {
                         $current_word .= $character;
@@ -508,7 +509,7 @@ abstract class AbstractDiff
                     $words[] = $current_word;
                     $current_word = '';
 
-                    if (!preg_match('[^\s]', $character)) {
+                    if (!preg_match('[^\s]u', $character)) {
                         $mode = 'whitespace';
                     } else {
                         $mode = 'character';
@@ -524,9 +525,9 @@ abstract class AbstractDiff
                     }
                     $current_word = '<';
                     $mode = 'tag';
-                } elseif (preg_match("/\s/", $character)) {
+                } elseif (preg_match("/\s/u", $character)) {
                     $current_word .= $character;
-                    $current_word = preg_replace('/\s+/S', ' ', $current_word);
+                    if (!$keepNewLines) $current_word = preg_replace('/\s+/Su', ' ', $current_word);
                 } else {
                     if ($current_word != '') {
                         $words[] = $current_word;
@@ -573,7 +574,7 @@ abstract class AbstractDiff
      */
     protected function isWhiteSpace($value)
     {
-        return !preg_match('[^\s]', $value);
+        return !preg_match('[^\s]u', $value);
     }
 
     /**
@@ -584,6 +585,16 @@ abstract class AbstractDiff
     protected function explode($value)
     {
         // as suggested by @onassar
-        return preg_split('//u', $value);
+        return preg_split('//u', $value, -1, PREG_SPLIT_NO_EMPTY);
+    }
+
+    /**
+     * @param string $str
+     *
+     * @return bool
+     */
+    protected function ctypeAlphanumUnicode($str)
+    {
+        return preg_match("/^[a-zA-Z0-9\pL]+$/u", $str);
     }
 }