*/
public function parse()
{
- $p = 0;
do {
- $p = $this->scanner->position();
$this->consumeData();
-
// FIXME: Add infinite loop protection.
} while ($this->carryOn);
}
*/
protected function characterData()
{
- if ($this->scanner->current() === false) {
+ $tok = $this->scanner->current();
+ if ($tok === false) {
return false;
}
switch ($this->textMode) {
case Elements::TEXT_RCDATA:
return $this->rcdata();
default:
- $tok = $this->scanner->current();
if (strspn($tok, "<&")) {
return false;
}
if ($tok == '/') {
$this->scanner->next();
$this->scanner->whitespace();
- if ($this->scanner->current() == '>') {
+ $tok = $this->scanner->current();
+
+ if ($tok == '>') {
$selfClose = true;
return true;
}
- if ($this->scanner->current() === false) {
+ if ($tok === false) {
$this->parseError("Unexpected EOF inside of tag.");
return true;
}
// Basically, we skip the / token and go on.
// See 8.2.4.43.
- $this->parseError("Unexpected '%s' inside of a tag.", $this->scanner->current());
+ $this->parseError("Unexpected '%s' inside of a tag.", $tok);
return false;
}
- if ($this->scanner->current() == '>') {
+ if ($tok == '>') {
return true;
}
- if ($this->scanner->current() === false) {
+ if ($tok === false) {
$this->parseError("Unexpected EOF inside of tag.");
return true;
}
{
$stoplist = "\f" . $quote;
$val = '';
- $tok = $this->scanner->current();
- while (strspn($tok, $stoplist) == 0 && $tok !== false) {
- if ($tok == '&') {
- $val .= $this->decodeCharacterReference(true);
- $tok = $this->scanner->current();
+
+ while (true) {
+ $tokens = $this->scanner->charsUntil($stoplist.'&');
+ if ($tokens !== false) {
+ $val .= $tokens;
} else {
- $val .= $tok;
- $tok = $this->scanner->next();
+ break;
}
+
+ $tok = $this->scanner->current();
+ if ($tok == '&') {
+ $val .= $this->decodeCharacterReference(true, $tok);
+ continue;
+ }
+ break;
}
$this->scanner->next();
return $val;
*/
protected function bogusComment($leading = '')
{
-
- // TODO: This can be done more efficiently when the
- // scanner exposes a readUntil() method.
$comment = $leading;
+ $tokens = $this->scanner->charsUntil('>');
+ if ($tokens !== false) {
+ $comment .= $tokens;
+ }
$tok = $this->scanner->current();
- do {
+ if ($tok !== false) {
$comment .= $tok;
- $tok = $this->scanner->next();
- } while ($tok !== false && $tok != '>');
+ }
$this->flushBuffer();
- $this->events->comment($comment . $tok);
+ $this->events->comment($comment);
$this->scanner->next();
return true;
*/
protected function isCommentEnd()
{
+ $tok = $this->scanner->current();
+
// EOF
- if ($this->scanner->current() === false) {
+ if ($tok === false) {
// Hit the end.
$this->parseError("Unexpected EOF in a comment.");
return true;
}
// If it doesn't start with -, not the end.
- if ($this->scanner->current() != '-') {
+ if ($tok != '-') {
return false;
}
$pub = strtoupper($this->scanner->getAsciiAlpha());
$white = strlen($this->scanner->whitespace());
- $tok = $this->scanner->current();
// Get ID, and flag it as pub or system.
if (($pub == 'PUBLIC' || $pub == 'SYSTEM') && $white > 0) {
$len = strlen($sequence);
$buffer = '';
for ($i = 0; $i < $len; ++ $i) {
- $buffer .= $this->scanner->current();
+ $tok = $this->scanner->current();
+ $buffer .= $tok;
// EOF. Rewind and let the caller handle it.
- if ($this->scanner->current() === false) {
+ if ($tok === false) {
$this->scanner->unconsume($i);
return false;
}
}
$entity = CharacterReference::lookupDecimal($numeric);
}
- } // String entity.
- else {
+ } elseif ($tok === '=' && $inAttribute) {
+ return '&';
+ } else { // String entity.
+
// Attempt to consume a string up to a ';'.
// [a-zA-Z0-9]+;
- $cname = $this->scanner->getAsciiAlpha();
+ $cname = $this->scanner->getAsciiAlphaNum();
$entity = CharacterReference::lookupName($cname);
// When no entity is found provide the name of the unmatched string
// and continue on as the & is not part of an entity. The & will
// be converted to & elsewhere.
if ($entity == null) {
- $this->parseError("No match in entity table for '%s'", $cname);
+ if (!$inAttribute || strlen($cname) === 0) {
+ $this->parseError("No match in entity table for '%s'", $cname);
+ }
$this->scanner->unconsume($this->scanner->position() - $start);
return '&';
}