Security update for Core, with self-updated composer
[yaffs-website] / vendor / masterminds / html5 / src / HTML5 / Parser / Tokenizer.php
index 02b2affdd5c58c93c97747d0aab1b905dfa17edf..c42bc3d86ed968f6301adeaca72e2a93f82afe86 100644 (file)
@@ -83,11 +83,8 @@ class Tokenizer
      */
     public function parse()
     {
-        $p = 0;
         do {
-            $p = $this->scanner->position();
             $this->consumeData();
-
             // FIXME: Add infinite loop protection.
         } while ($this->carryOn);
     }
@@ -145,7 +142,8 @@ class Tokenizer
      */
     protected function characterData()
     {
-        if ($this->scanner->current() === false) {
+        $tok = $this->scanner->current();
+        if ($tok === false) {
             return false;
         }
         switch ($this->textMode) {
@@ -154,7 +152,6 @@ class Tokenizer
             case Elements::TEXT_RCDATA:
                 return $this->rcdata();
             default:
-                $tok = $this->scanner->current();
                 if (strspn($tok, "<&")) {
                     return false;
                 }
@@ -408,24 +405,26 @@ class Tokenizer
         if ($tok == '/') {
             $this->scanner->next();
             $this->scanner->whitespace();
-            if ($this->scanner->current() == '>') {
+            $tok = $this->scanner->current();
+
+            if ($tok == '>') {
                 $selfClose = true;
                 return true;
             }
-            if ($this->scanner->current() === false) {
+            if ($tok === false) {
                 $this->parseError("Unexpected EOF inside of tag.");
                 return true;
             }
             // Basically, we skip the / token and go on.
             // See 8.2.4.43.
-            $this->parseError("Unexpected '%s' inside of a tag.", $this->scanner->current());
+            $this->parseError("Unexpected '%s' inside of a tag.", $tok);
             return false;
         }
 
-        if ($this->scanner->current() == '>') {
+        if ($tok == '>') {
             return true;
         }
-        if ($this->scanner->current() === false) {
+        if ($tok === false) {
             $this->parseError("Unexpected EOF inside of tag.");
             return true;
         }
@@ -541,15 +540,21 @@ class Tokenizer
     {
         $stoplist = "\f" . $quote;
         $val = '';
-        $tok = $this->scanner->current();
-        while (strspn($tok, $stoplist) == 0 && $tok !== false) {
-            if ($tok == '&') {
-                $val .= $this->decodeCharacterReference(true);
-                $tok = $this->scanner->current();
+
+        while (true) {
+            $tokens = $this->scanner->charsUntil($stoplist.'&');
+            if ($tokens !== false) {
+                $val .= $tokens;
             } else {
-                $val .= $tok;
-                $tok = $this->scanner->next();
+                break;
             }
+
+            $tok = $this->scanner->current();
+            if ($tok == '&') {
+                $val .= $this->decodeCharacterReference(true, $tok);
+                continue;
+            }
+            break;
         }
         $this->scanner->next();
         return $val;
@@ -591,18 +596,18 @@ class Tokenizer
      */
     protected function bogusComment($leading = '')
     {
-
-        // TODO: This can be done more efficiently when the
-        // scanner exposes a readUntil() method.
         $comment = $leading;
+        $tokens = $this->scanner->charsUntil('>');
+        if ($tokens !== false) {
+            $comment .= $tokens;
+        }
         $tok = $this->scanner->current();
-        do {
+        if ($tok !== false) {
             $comment .= $tok;
-            $tok = $this->scanner->next();
-        } while ($tok !== false && $tok != '>');
+        }
 
         $this->flushBuffer();
-        $this->events->comment($comment . $tok);
+        $this->events->comment($comment);
         $this->scanner->next();
 
         return true;
@@ -646,15 +651,17 @@ class Tokenizer
      */
     protected function isCommentEnd()
     {
+        $tok = $this->scanner->current();
+
         // EOF
-        if ($this->scanner->current() === false) {
+        if ($tok === false) {
             // Hit the end.
             $this->parseError("Unexpected EOF in a comment.");
             return true;
         }
 
         // If it doesn't start with -, not the end.
-        if ($this->scanner->current() != '-') {
+        if ($tok != '-') {
             return false;
         }
 
@@ -737,7 +744,6 @@ class Tokenizer
 
         $pub = strtoupper($this->scanner->getAsciiAlpha());
         $white = strlen($this->scanner->whitespace());
-        $tok = $this->scanner->current();
 
         // Get ID, and flag it as pub or system.
         if (($pub == 'PUBLIC' || $pub == 'SYSTEM') && $white > 0) {
@@ -938,10 +944,11 @@ class Tokenizer
         $len = strlen($sequence);
         $buffer = '';
         for ($i = 0; $i < $len; ++ $i) {
-            $buffer .= $this->scanner->current();
+            $tok = $this->scanner->current();
+            $buffer .= $tok;
 
             // EOF. Rewind and let the caller handle it.
-            if ($this->scanner->current() === false) {
+            if ($tok === false) {
                 $this->scanner->unconsume($i);
                 return false;
             }
@@ -1067,18 +1074,22 @@ class Tokenizer
                 }
                 $entity = CharacterReference::lookupDecimal($numeric);
             }
-        }         // String entity.
-        else {
+        } elseif ($tok === '=' && $inAttribute) {
+            return '&';
+        } else { // String entity.
+
             // Attempt to consume a string up to a ';'.
             // [a-zA-Z0-9]+;
-            $cname = $this->scanner->getAsciiAlpha();
+            $cname = $this->scanner->getAsciiAlphaNum();
             $entity = CharacterReference::lookupName($cname);
 
             // When no entity is found provide the name of the unmatched string
             // and continue on as the & is not part of an entity. The & will
             // be converted to &amp; elsewhere.
             if ($entity == null) {
-                $this->parseError("No match in entity table for '%s'", $cname);
+                if (!$inAttribute || strlen($cname) === 0) {
+                    $this->parseError("No match in entity table for '%s'", $cname);
+                }
                 $this->scanner->unconsume($this->scanner->position() - $start);
                 return '&';
             }