6 * This parser is based on a skeleton written by Moriyoshi Koizumi, which in
7 * turn is based on work by Masato Bito.
9 use PhpParser\Node\Name;
10 use PhpParser\Node\Param;
11 use PhpParser\Node\Scalar\LNumber;
12 use PhpParser\Node\Scalar\String_;
13 use PhpParser\Node\Stmt\Class_;
14 use PhpParser\Node\Stmt\ClassConst;
15 use PhpParser\Node\Stmt\ClassMethod;
16 use PhpParser\Node\Stmt\Interface_;
17 use PhpParser\Node\Stmt\Namespace_;
18 use PhpParser\Node\Stmt\Property;
19 use PhpParser\Node\Stmt\TryCatch;
20 use PhpParser\Node\Stmt\UseUse;
22 abstract class ParserAbstract implements Parser
24 const SYMBOL_NONE = -1;
27 * The following members will be filled with generated parsing data:
30 /** @var int Size of $tokenToSymbol map */
31 protected $tokenToSymbolMapSize;
32 /** @var int Size of $action table */
33 protected $actionTableSize;
34 /** @var int Size of $goto table */
35 protected $gotoTableSize;
37 /** @var int Symbol number signifying an invalid token */
38 protected $invalidSymbol;
39 /** @var int Symbol number of error recovery token */
40 protected $errorSymbol;
41 /** @var int Action number signifying default action */
42 protected $defaultAction;
43 /** @var int Rule number signifying that an unexpected token was encountered */
44 protected $unexpectedTokenRule;
46 protected $YY2TBLSTATE;
47 protected $YYNLSTATES;
49 /** @var array Map of lexer tokens to internal symbols */
50 protected $tokenToSymbol;
51 /** @var array Map of symbols to their names */
52 protected $symbolToName;
53 /** @var array Names of the production rules (only necessary for debugging) */
54 protected $productions;
56 /** @var array Map of states to a displacement into the $action table. The corresponding action for this
57 * state/symbol pair is $action[$actionBase[$state] + $symbol]. If $actionBase[$state] is 0, the
58 action is defaulted, i.e. $actionDefault[$state] should be used instead. */
59 protected $actionBase;
60 /** @var array Table of actions. Indexed according to $actionBase comment. */
62 /** @var array Table indexed analogously to $action. If $actionCheck[$actionBase[$state] + $symbol] != $symbol
63 * then the action is defaulted, i.e. $actionDefault[$state] should be used instead. */
64 protected $actionCheck;
65 /** @var array Map of states to their default action */
66 protected $actionDefault;
68 /** @var array Map of non-terminals to a displacement into the $goto table. The corresponding goto state for this
69 * non-terminal/state pair is $goto[$gotoBase[$nonTerminal] + $state] (unless defaulted) */
71 /** @var array Table of states to goto after reduction. Indexed according to $gotoBase comment. */
73 /** @var array Table indexed analogously to $goto. If $gotoCheck[$gotoBase[$nonTerminal] + $state] != $nonTerminal
74 * then the goto state is defaulted, i.e. $gotoDefault[$nonTerminal] should be used. */
76 /** @var array Map of non-terminals to the default state to goto after their reduction */
77 protected $gotoDefault;
79 /** @var array Map of rules to the non-terminal on their left-hand side, i.e. the non-terminal to use for
80 * determining the state to goto after reduction. */
81 protected $ruleToNonTerminal;
82 /** @var array Map of rules to the length of their right-hand side, which is the number of elements that have to
83 * be popped from the stack(s) on reduction. */
84 protected $ruleToLength;
87 * The following members are part of the parser state:
90 /** @var Lexer Lexer that is used when parsing */
92 /** @var mixed Temporary value containing the result of last semantic action (reduction) */
94 /** @var int Position in stacks (state stack, semantic value stack, attribute stack) */
96 /** @var array Semantic value stack (contains values of tokens and semantic action results) */
98 /** @var array[] Start attribute stack */
99 protected $startAttributeStack;
100 /** @var array[] End attribute stack */
101 protected $endAttributeStack;
102 /** @var array End attributes of last *shifted* token */
103 protected $endAttributes;
104 /** @var array Start attributes of last *read* token */
105 protected $lookaheadStartAttributes;
107 /** @var ErrorHandler Error handler */
108 protected $errorHandler;
109 /** @var Error[] Errors collected during last parse */
111 /** @var int Error state, used to avoid error floods */
112 protected $errorState;
115 * Creates a parser instance.
117 * @param Lexer $lexer A lexer
118 * @param array $options Options array. Currently no options are supported.
120 public function __construct(Lexer $lexer, array $options = array()) {
121 $this->lexer = $lexer;
122 $this->errors = array();
124 if (isset($options['throwOnError'])) {
125 throw new \LogicException(
126 '"throwOnError" is no longer supported, use "errorHandler" instead');
131 * Parses PHP code into a node tree.
133 * If a non-throwing error handler is used, the parser will continue parsing after an error
134 * occurred and attempt to build a partial AST.
136 * @param string $code The source code to parse
137 * @param ErrorHandler|null $errorHandler Error handler to use for lexer/parser errors, defaults
138 * to ErrorHandler\Throwing.
140 * @return Node[]|null Array of statements (or null if the 'throwOnError' option is disabled and the parser was
141 * unable to recover from an error).
143 public function parse($code, ErrorHandler $errorHandler = null) {
144 $this->errorHandler = $errorHandler ?: new ErrorHandler\Throwing;
146 // Initialize the lexer
147 $this->lexer->startLexing($code, $this->errorHandler);
149 // We start off with no lookahead-token
150 $symbol = self::SYMBOL_NONE;
152 // The attributes for a node are taken from the first and last token of the node.
153 // From the first token only the startAttributes are taken and from the last only
154 // the endAttributes. Both are merged using the array union operator (+).
155 $startAttributes = '*POISON';
156 $endAttributes = '*POISON';
157 $this->endAttributes = $endAttributes;
159 // Keep stack of start and end attributes
160 $this->startAttributeStack = array();
161 $this->endAttributeStack = array($endAttributes);
163 // Start off in the initial state and keep a stack of previous states
165 $stateStack = array($state);
167 // Semantic value stack (contains values of tokens and semantic action results)
168 $this->semStack = array();
170 // Current position in the stack(s)
173 $this->errorState = 0;
176 //$this->traceNewState($state, $symbol);
178 if ($this->actionBase[$state] == 0) {
179 $rule = $this->actionDefault[$state];
181 if ($symbol === self::SYMBOL_NONE) {
182 // Fetch the next token id from the lexer and fetch additional info by-ref.
183 // The end attributes are fetched into a temporary variable and only set once the token is really
184 // shifted (not during read). Otherwise you would sometimes get off-by-one errors, when a rule is
185 // reduced after a token was read but not yet shifted.
186 $tokenId = $this->lexer->getNextToken($tokenValue, $startAttributes, $endAttributes);
188 // map the lexer token id to the internally used symbols
189 $symbol = $tokenId >= 0 && $tokenId < $this->tokenToSymbolMapSize
190 ? $this->tokenToSymbol[$tokenId]
191 : $this->invalidSymbol;
193 if ($symbol === $this->invalidSymbol) {
194 throw new \RangeException(sprintf(
195 'The lexer returned an invalid token (id=%d, value=%s)',
196 $tokenId, $tokenValue
200 // This is necessary to assign some meaningful attributes to /* empty */ productions. They'll get
201 // the attributes of the next token, even though they don't contain it themselves.
202 $this->startAttributeStack[$this->stackPos+1] = $startAttributes;
203 $this->endAttributeStack[$this->stackPos+1] = $endAttributes;
204 $this->lookaheadStartAttributes = $startAttributes;
206 //$this->traceRead($symbol);
209 $idx = $this->actionBase[$state] + $symbol;
210 if ((($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] == $symbol)
211 || ($state < $this->YY2TBLSTATE
212 && ($idx = $this->actionBase[$state + $this->YYNLSTATES] + $symbol) >= 0
213 && $idx < $this->actionTableSize && $this->actionCheck[$idx] == $symbol))
214 && ($action = $this->action[$idx]) != $this->defaultAction) {
216 * >= YYNLSTATES: shift and reduce
220 * = -YYUNEXPECTED: error
224 //$this->traceShift($symbol);
227 $stateStack[$this->stackPos] = $state = $action;
228 $this->semStack[$this->stackPos] = $tokenValue;
229 $this->startAttributeStack[$this->stackPos] = $startAttributes;
230 $this->endAttributeStack[$this->stackPos] = $endAttributes;
231 $this->endAttributes = $endAttributes;
232 $symbol = self::SYMBOL_NONE;
234 if ($this->errorState) {
238 if ($action < $this->YYNLSTATES) {
242 /* $yyn >= YYNLSTATES means shift-and-reduce */
243 $rule = $action - $this->YYNLSTATES;
248 $rule = $this->actionDefault[$state];
255 //$this->traceAccept();
256 return $this->semValue;
257 } elseif ($rule !== $this->unexpectedTokenRule) {
259 //$this->traceReduce($rule);
262 $this->{'reduceRule' . $rule}();
264 if (-1 === $e->getStartLine() && isset($startAttributes['startLine'])) {
265 $e->setStartLine($startAttributes['startLine']);
268 $this->emitError($e);
269 // Can't recover from this type of error
273 /* Goto - shift nonterminal */
274 $lastEndAttributes = $this->endAttributeStack[$this->stackPos];
275 $this->stackPos -= $this->ruleToLength[$rule];
276 $nonTerminal = $this->ruleToNonTerminal[$rule];
277 $idx = $this->gotoBase[$nonTerminal] + $stateStack[$this->stackPos];
278 if ($idx >= 0 && $idx < $this->gotoTableSize && $this->gotoCheck[$idx] == $nonTerminal) {
279 $state = $this->goto[$idx];
281 $state = $this->gotoDefault[$nonTerminal];
285 $stateStack[$this->stackPos] = $state;
286 $this->semStack[$this->stackPos] = $this->semValue;
287 $this->endAttributeStack[$this->stackPos] = $lastEndAttributes;
290 switch ($this->errorState) {
292 $msg = $this->getErrorMessage($symbol, $state);
293 $this->emitError(new Error($msg, $startAttributes + $endAttributes));
294 // Break missing intentionally
297 $this->errorState = 3;
299 // Pop until error-expecting state uncovered
301 (($idx = $this->actionBase[$state] + $this->errorSymbol) >= 0
302 && $idx < $this->actionTableSize && $this->actionCheck[$idx] == $this->errorSymbol)
303 || ($state < $this->YY2TBLSTATE
304 && ($idx = $this->actionBase[$state + $this->YYNLSTATES] + $this->errorSymbol) >= 0
305 && $idx < $this->actionTableSize && $this->actionCheck[$idx] == $this->errorSymbol)
306 ) || ($action = $this->action[$idx]) == $this->defaultAction) { // Not totally sure about this
307 if ($this->stackPos <= 0) {
308 // Could not recover from error
311 $state = $stateStack[--$this->stackPos];
312 //$this->tracePop($state);
315 //$this->traceShift($this->errorSymbol);
317 $stateStack[$this->stackPos] = $state = $action;
319 // We treat the error symbol as being empty, so we reset the end attributes
320 // to the end attributes of the last non-error symbol
321 $this->endAttributeStack[$this->stackPos] = $this->endAttributeStack[$this->stackPos - 1];
322 $this->endAttributes = $this->endAttributeStack[$this->stackPos - 1];
327 // Reached EOF without recovering from error
331 //$this->traceDiscard($symbol);
332 $symbol = self::SYMBOL_NONE;
337 if ($state < $this->YYNLSTATES) {
341 /* >= YYNLSTATES means shift-and-reduce */
342 $rule = $state - $this->YYNLSTATES;
346 throw new \RuntimeException('Reached end of parser loop');
349 protected function emitError(Error $error) {
350 $this->errorHandler->handleError($error);
353 protected function getErrorMessage($symbol, $state) {
354 $expectedString = '';
355 if ($expected = $this->getExpectedTokens($state)) {
356 $expectedString = ', expecting ' . implode(' or ', $expected);
359 return 'Syntax error, unexpected ' . $this->symbolToName[$symbol] . $expectedString;
362 protected function getExpectedTokens($state) {
365 $base = $this->actionBase[$state];
366 foreach ($this->symbolToName as $symbol => $name) {
367 $idx = $base + $symbol;
368 if ($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol
369 || $state < $this->YY2TBLSTATE
370 && ($idx = $this->actionBase[$state + $this->YYNLSTATES] + $symbol) >= 0
371 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol
373 if ($this->action[$idx] != $this->unexpectedTokenRule
374 && $this->action[$idx] != $this->defaultAction
375 && $symbol != $this->errorSymbol
377 if (count($expected) == 4) {
378 /* Too many expected tokens */
391 * Tracing functions used for debugging the parser.
395 protected function traceNewState($state, $symbol) {
396 echo '% State ' . $state
397 . ', Lookahead ' . ($symbol == self::SYMBOL_NONE ? '--none--' : $this->symbolToName[$symbol]) . "\n";
400 protected function traceRead($symbol) {
401 echo '% Reading ' . $this->symbolToName[$symbol] . "\n";
404 protected function traceShift($symbol) {
405 echo '% Shift ' . $this->symbolToName[$symbol] . "\n";
408 protected function traceAccept() {
409 echo "% Accepted.\n";
412 protected function traceReduce($n) {
413 echo '% Reduce by (' . $n . ') ' . $this->productions[$n] . "\n";
416 protected function tracePop($state) {
417 echo '% Recovering, uncovered state ' . $state . "\n";
420 protected function traceDiscard($symbol) {
421 echo '% Discard ' . $this->symbolToName[$symbol] . "\n";
426 * Helper functions invoked by semantic actions
430 * Moves statements of semicolon-style namespaces into $ns->stmts and checks various error conditions.
432 * @param Node[] $stmts
435 protected function handleNamespaces(array $stmts) {
437 $style = $this->getNamespacingStyle($stmts);
438 if (null === $style) {
439 // not namespaced, nothing to do
441 } elseif ('brace' === $style) {
442 // For braced namespaces we only have to check that there are no invalid statements between the namespaces
443 $afterFirstNamespace = false;
444 foreach ($stmts as $stmt) {
445 if ($stmt instanceof Node\Stmt\Namespace_) {
446 $afterFirstNamespace = true;
447 } elseif (!$stmt instanceof Node\Stmt\HaltCompiler
448 && !$stmt instanceof Node\Stmt\Nop
449 && $afterFirstNamespace && !$hasErrored) {
450 $this->emitError(new Error(
451 'No code may exist outside of namespace {}', $stmt->getAttributes()));
452 $hasErrored = true; // Avoid one error for every statement
457 // For semicolon namespaces we have to move the statements after a namespace declaration into ->stmts
458 $resultStmts = array();
459 $targetStmts =& $resultStmts;
460 foreach ($stmts as $stmt) {
461 if ($stmt instanceof Node\Stmt\Namespace_) {
462 if ($stmt->stmts === null) {
463 $stmt->stmts = array();
464 $targetStmts =& $stmt->stmts;
465 $resultStmts[] = $stmt;
467 // This handles the invalid case of mixed style namespaces
468 $resultStmts[] = $stmt;
469 $targetStmts =& $resultStmts;
471 } elseif ($stmt instanceof Node\Stmt\HaltCompiler) {
472 // __halt_compiler() is not moved into the namespace
473 $resultStmts[] = $stmt;
475 $targetStmts[] = $stmt;
482 private function getNamespacingStyle(array $stmts) {
484 $hasNotAllowedStmts = false;
485 foreach ($stmts as $i => $stmt) {
486 if ($stmt instanceof Node\Stmt\Namespace_) {
487 $currentStyle = null === $stmt->stmts ? 'semicolon' : 'brace';
488 if (null === $style) {
489 $style = $currentStyle;
490 if ($hasNotAllowedStmts) {
491 $this->emitError(new Error(
492 'Namespace declaration statement has to be the very first statement in the script',
493 $stmt->getLine() // Avoid marking the entire namespace as an error
496 } elseif ($style !== $currentStyle) {
497 $this->emitError(new Error(
498 'Cannot mix bracketed namespace declarations with unbracketed namespace declarations',
499 $stmt->getLine() // Avoid marking the entire namespace as an error
501 // Treat like semicolon style for namespace normalization
507 /* declare(), __halt_compiler() and nops can be used before a namespace declaration */
508 if ($stmt instanceof Node\Stmt\Declare_
509 || $stmt instanceof Node\Stmt\HaltCompiler
510 || $stmt instanceof Node\Stmt\Nop) {
514 /* There may be a hashbang line at the very start of the file */
515 if ($i == 0 && $stmt instanceof Node\Stmt\InlineHTML && preg_match('/\A#!.*\r?\n\z/', $stmt->value)) {
519 /* Everything else if forbidden before namespace declarations */
520 $hasNotAllowedStmts = true;
525 protected function handleBuiltinTypes(Name $name) {
536 if (!$name->isUnqualified()) {
540 $lowerName = strtolower($name->toString());
541 return isset($scalarTypes[$lowerName]) ? $lowerName : $name;
544 protected static $specialNames = array(
550 protected function getAttributesAt($pos) {
551 return $this->startAttributeStack[$pos] + $this->endAttributeStack[$pos];
554 protected function parseLNumber($str, $attributes, $allowInvalidOctal = false) {
556 return LNumber::fromString($str, $attributes, $allowInvalidOctal);
557 } catch (Error $error) {
558 $this->emitError($error);
560 return new LNumber(0, $attributes);
564 protected function parseNumString($str, $attributes) {
565 if (!preg_match('/^(?:0|-?[1-9][0-9]*)$/', $str)) {
566 return new String_($str, $attributes);
571 return new String_($str, $attributes);
574 return new LNumber($num, $attributes);
577 protected function checkModifier($a, $b, $modifierPos) {
578 // Jumping through some hoops here because verifyModifier() is also used elsewhere
580 Class_::verifyModifier($a, $b);
581 } catch (Error $error) {
582 $error->setAttributes($this->getAttributesAt($modifierPos));
583 $this->emitError($error);
587 protected function checkParam(Param $node) {
588 if ($node->variadic && null !== $node->default) {
589 $this->emitError(new Error(
590 'Variadic parameter cannot have a default value',
591 $node->default->getAttributes()
596 protected function checkTryCatch(TryCatch $node) {
597 if (empty($node->catches) && null === $node->finally) {
598 $this->emitError(new Error(
599 'Cannot use try without catch or finally', $node->getAttributes()
604 protected function checkNamespace(Namespace_ $node) {
605 if (isset(self::$specialNames[strtolower($node->name)])) {
606 $this->emitError(new Error(
607 sprintf('Cannot use \'%s\' as namespace name', $node->name),
608 $node->name->getAttributes()
612 if (null !== $node->stmts) {
613 foreach ($node->stmts as $stmt) {
614 if ($stmt instanceof Namespace_) {
615 $this->emitError(new Error(
616 'Namespace declarations cannot be nested', $stmt->getAttributes()
623 protected function checkClass(Class_ $node, $namePos) {
624 if (null !== $node->name && isset(self::$specialNames[strtolower($node->name)])) {
625 $this->emitError(new Error(
626 sprintf('Cannot use \'%s\' as class name as it is reserved', $node->name),
627 $this->getAttributesAt($namePos)
631 if (isset(self::$specialNames[strtolower($node->extends)])) {
632 $this->emitError(new Error(
633 sprintf('Cannot use \'%s\' as class name as it is reserved', $node->extends),
634 $node->extends->getAttributes()
638 foreach ($node->implements as $interface) {
639 if (isset(self::$specialNames[strtolower($interface)])) {
640 $this->emitError(new Error(
641 sprintf('Cannot use \'%s\' as interface name as it is reserved', $interface),
642 $interface->getAttributes()
648 protected function checkInterface(Interface_ $node, $namePos) {
649 if (null !== $node->name && isset(self::$specialNames[strtolower($node->name)])) {
650 $this->emitError(new Error(
651 sprintf('Cannot use \'%s\' as class name as it is reserved', $node->name),
652 $this->getAttributesAt($namePos)
656 foreach ($node->extends as $interface) {
657 if (isset(self::$specialNames[strtolower($interface)])) {
658 $this->emitError(new Error(
659 sprintf('Cannot use \'%s\' as interface name as it is reserved', $interface),
660 $interface->getAttributes()
666 protected function checkClassMethod(ClassMethod $node, $modifierPos) {
667 if ($node->flags & Class_::MODIFIER_STATIC) {
668 switch (strtolower($node->name)) {
670 $this->emitError(new Error(
671 sprintf('Constructor %s() cannot be static', $node->name),
672 $this->getAttributesAt($modifierPos)));
675 $this->emitError(new Error(
676 sprintf('Destructor %s() cannot be static', $node->name),
677 $this->getAttributesAt($modifierPos)));
680 $this->emitError(new Error(
681 sprintf('Clone method %s() cannot be static', $node->name),
682 $this->getAttributesAt($modifierPos)));
688 protected function checkClassConst(ClassConst $node, $modifierPos) {
689 if ($node->flags & Class_::MODIFIER_STATIC) {
690 $this->emitError(new Error(
691 "Cannot use 'static' as constant modifier",
692 $this->getAttributesAt($modifierPos)));
694 if ($node->flags & Class_::MODIFIER_ABSTRACT) {
695 $this->emitError(new Error(
696 "Cannot use 'abstract' as constant modifier",
697 $this->getAttributesAt($modifierPos)));
699 if ($node->flags & Class_::MODIFIER_FINAL) {
700 $this->emitError(new Error(
701 "Cannot use 'final' as constant modifier",
702 $this->getAttributesAt($modifierPos)));
706 protected function checkProperty(Property $node, $modifierPos) {
707 if ($node->flags & Class_::MODIFIER_ABSTRACT) {
708 $this->emitError(new Error('Properties cannot be declared abstract',
709 $this->getAttributesAt($modifierPos)));
712 if ($node->flags & Class_::MODIFIER_FINAL) {
713 $this->emitError(new Error('Properties cannot be declared final',
714 $this->getAttributesAt($modifierPos)));
718 protected function checkUseUse(UseUse $node, $namePos) {
719 if ('self' == strtolower($node->alias) || 'parent' == strtolower($node->alias)) {
720 $this->emitError(new Error(
722 'Cannot use %s as %s because \'%2$s\' is a special class name',
723 $node->name, $node->alias
725 $this->getAttributesAt($namePos)