Security update to Drupal 8.4.6
[yaffs-website] / vendor / twig / twig / lib / Twig / Lexer.php
1 <?php
2
3 /*
4  * This file is part of Twig.
5  *
6  * (c) Fabien Potencier
7  * (c) Armin Ronacher
8  *
9  * For the full copyright and license information, please view the LICENSE
10  * file that was distributed with this source code.
11  */
12
13 /**
14  * Lexes a template string.
15  *
16  * @author Fabien Potencier <fabien@symfony.com>
17  */
18 class Twig_Lexer implements Twig_LexerInterface
19 {
20     protected $tokens;
21     protected $code;
22     protected $cursor;
23     protected $lineno;
24     protected $end;
25     protected $state;
26     protected $states;
27     protected $brackets;
28     protected $env;
29     // to be renamed to $name in 2.0 (where it is private)
30     protected $filename;
31     protected $options;
32     protected $regexes;
33     protected $position;
34     protected $positions;
35     protected $currentVarBlockLine;
36
37     private $source;
38
39     const STATE_DATA = 0;
40     const STATE_BLOCK = 1;
41     const STATE_VAR = 2;
42     const STATE_STRING = 3;
43     const STATE_INTERPOLATION = 4;
44
45     const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
46     const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?/A';
47     const REGEX_STRING = '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
48     const REGEX_DQ_STRING_DELIM = '/"/A';
49     const REGEX_DQ_STRING_PART = '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As';
50     const PUNCTUATION = '()[]{}?:.,|';
51
52     public function __construct(Twig_Environment $env, array $options = array())
53     {
54         $this->env = $env;
55
56         $this->options = array_merge(array(
57             'tag_comment' => array('{#', '#}'),
58             'tag_block' => array('{%', '%}'),
59             'tag_variable' => array('{{', '}}'),
60             'whitespace_trim' => '-',
61             'interpolation' => array('#{', '}'),
62         ), $options);
63
64         $this->regexes = array(
65             'lex_var' => '/\s*'.preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_variable'][1], '/').'/A',
66             'lex_block' => '/\s*(?:'.preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_block'][1], '/').')\n?/A',
67             'lex_raw_data' => '/('.preg_quote($this->options['tag_block'][0].$this->options['whitespace_trim'], '/').'|'.preg_quote($this->options['tag_block'][0], '/').')\s*(?:end%s)\s*(?:'.preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_block'][1], '/').')/s',
68             'operator' => $this->getOperatorRegex(),
69             'lex_comment' => '/(?:'.preg_quote($this->options['whitespace_trim'], '/').preg_quote($this->options['tag_comment'][1], '/').'\s*|'.preg_quote($this->options['tag_comment'][1], '/').')\n?/s',
70             'lex_block_raw' => '/\s*(raw|verbatim)\s*(?:'.preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '/').'\s*|\s*'.preg_quote($this->options['tag_block'][1], '/').')/As',
71             'lex_block_line' => '/\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '/').'/As',
72             'lex_tokens_start' => '/('.preg_quote($this->options['tag_variable'][0], '/').'|'.preg_quote($this->options['tag_block'][0], '/').'|'.preg_quote($this->options['tag_comment'][0], '/').')('.preg_quote($this->options['whitespace_trim'], '/').')?/s',
73             'interpolation_start' => '/'.preg_quote($this->options['interpolation'][0], '/').'\s*/A',
74             'interpolation_end' => '/\s*'.preg_quote($this->options['interpolation'][1], '/').'/A',
75         );
76     }
77
78     public function tokenize($code, $name = null)
79     {
80         if (!$code instanceof Twig_Source) {
81             @trigger_error(sprintf('Passing a string as the $code argument of %s() is deprecated since version 1.27 and will be removed in 2.0. Pass a Twig_Source instance instead.', __METHOD__), E_USER_DEPRECATED);
82             $this->source = new Twig_Source($code, $name);
83         } else {
84             $this->source = $code;
85         }
86
87         if (((int) ini_get('mbstring.func_overload')) & 2) {
88             @trigger_error('Support for having "mbstring.func_overload" different from 0 is deprecated version 1.29 and will be removed in 2.0.', E_USER_DEPRECATED);
89         }
90
91         if (function_exists('mb_internal_encoding') && ((int) ini_get('mbstring.func_overload')) & 2) {
92             $mbEncoding = mb_internal_encoding();
93             mb_internal_encoding('ASCII');
94         } else {
95             $mbEncoding = null;
96         }
97
98         $this->code = str_replace(array("\r\n", "\r"), "\n", $this->source->getCode());
99         $this->filename = $this->source->getName();
100         $this->cursor = 0;
101         $this->lineno = 1;
102         $this->end = strlen($this->code);
103         $this->tokens = array();
104         $this->state = self::STATE_DATA;
105         $this->states = array();
106         $this->brackets = array();
107         $this->position = -1;
108
109         // find all token starts in one go
110         preg_match_all($this->regexes['lex_tokens_start'], $this->code, $matches, PREG_OFFSET_CAPTURE);
111         $this->positions = $matches;
112
113         while ($this->cursor < $this->end) {
114             // dispatch to the lexing functions depending
115             // on the current state
116             switch ($this->state) {
117                 case self::STATE_DATA:
118                     $this->lexData();
119                     break;
120
121                 case self::STATE_BLOCK:
122                     $this->lexBlock();
123                     break;
124
125                 case self::STATE_VAR:
126                     $this->lexVar();
127                     break;
128
129                 case self::STATE_STRING:
130                     $this->lexString();
131                     break;
132
133                 case self::STATE_INTERPOLATION:
134                     $this->lexInterpolation();
135                     break;
136             }
137         }
138
139         $this->pushToken(Twig_Token::EOF_TYPE);
140
141         if (!empty($this->brackets)) {
142             list($expect, $lineno) = array_pop($this->brackets);
143             throw new Twig_Error_Syntax(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
144         }
145
146         if ($mbEncoding) {
147             mb_internal_encoding($mbEncoding);
148         }
149
150         return new Twig_TokenStream($this->tokens, $this->source);
151     }
152
153     protected function lexData()
154     {
155         // if no matches are left we return the rest of the template as simple text token
156         if ($this->position == count($this->positions[0]) - 1) {
157             $this->pushToken(Twig_Token::TEXT_TYPE, substr($this->code, $this->cursor));
158             $this->cursor = $this->end;
159
160             return;
161         }
162
163         // Find the first token after the current cursor
164         $position = $this->positions[0][++$this->position];
165         while ($position[1] < $this->cursor) {
166             if ($this->position == count($this->positions[0]) - 1) {
167                 return;
168             }
169             $position = $this->positions[0][++$this->position];
170         }
171
172         // push the template text first
173         $text = $textContent = substr($this->code, $this->cursor, $position[1] - $this->cursor);
174         if (isset($this->positions[2][$this->position][0])) {
175             $text = rtrim($text);
176         }
177         $this->pushToken(Twig_Token::TEXT_TYPE, $text);
178         $this->moveCursor($textContent.$position[0]);
179
180         switch ($this->positions[1][$this->position][0]) {
181             case $this->options['tag_comment'][0]:
182                 $this->lexComment();
183                 break;
184
185             case $this->options['tag_block'][0]:
186                 // raw data?
187                 if (preg_match($this->regexes['lex_block_raw'], $this->code, $match, null, $this->cursor)) {
188                     $this->moveCursor($match[0]);
189                     $this->lexRawData($match[1]);
190                 // {% line \d+ %}
191                 } elseif (preg_match($this->regexes['lex_block_line'], $this->code, $match, null, $this->cursor)) {
192                     $this->moveCursor($match[0]);
193                     $this->lineno = (int) $match[1];
194                 } else {
195                     $this->pushToken(Twig_Token::BLOCK_START_TYPE);
196                     $this->pushState(self::STATE_BLOCK);
197                     $this->currentVarBlockLine = $this->lineno;
198                 }
199                 break;
200
201             case $this->options['tag_variable'][0]:
202                 $this->pushToken(Twig_Token::VAR_START_TYPE);
203                 $this->pushState(self::STATE_VAR);
204                 $this->currentVarBlockLine = $this->lineno;
205                 break;
206         }
207     }
208
209     protected function lexBlock()
210     {
211         if (empty($this->brackets) && preg_match($this->regexes['lex_block'], $this->code, $match, null, $this->cursor)) {
212             $this->pushToken(Twig_Token::BLOCK_END_TYPE);
213             $this->moveCursor($match[0]);
214             $this->popState();
215         } else {
216             $this->lexExpression();
217         }
218     }
219
220     protected function lexVar()
221     {
222         if (empty($this->brackets) && preg_match($this->regexes['lex_var'], $this->code, $match, null, $this->cursor)) {
223             $this->pushToken(Twig_Token::VAR_END_TYPE);
224             $this->moveCursor($match[0]);
225             $this->popState();
226         } else {
227             $this->lexExpression();
228         }
229     }
230
231     protected function lexExpression()
232     {
233         // whitespace
234         if (preg_match('/\s+/A', $this->code, $match, null, $this->cursor)) {
235             $this->moveCursor($match[0]);
236
237             if ($this->cursor >= $this->end) {
238                 throw new Twig_Error_Syntax(sprintf('Unclosed "%s".', self::STATE_BLOCK === $this->state ? 'block' : 'variable'), $this->currentVarBlockLine, $this->source);
239             }
240         }
241
242         // operators
243         if (preg_match($this->regexes['operator'], $this->code, $match, null, $this->cursor)) {
244             $this->pushToken(Twig_Token::OPERATOR_TYPE, preg_replace('/\s+/', ' ', $match[0]));
245             $this->moveCursor($match[0]);
246         }
247         // names
248         elseif (preg_match(self::REGEX_NAME, $this->code, $match, null, $this->cursor)) {
249             $this->pushToken(Twig_Token::NAME_TYPE, $match[0]);
250             $this->moveCursor($match[0]);
251         }
252         // numbers
253         elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, null, $this->cursor)) {
254             $number = (float) $match[0];  // floats
255             if (ctype_digit($match[0]) && $number <= PHP_INT_MAX) {
256                 $number = (int) $match[0]; // integers lower than the maximum
257             }
258             $this->pushToken(Twig_Token::NUMBER_TYPE, $number);
259             $this->moveCursor($match[0]);
260         }
261         // punctuation
262         elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) {
263             // opening bracket
264             if (false !== strpos('([{', $this->code[$this->cursor])) {
265                 $this->brackets[] = array($this->code[$this->cursor], $this->lineno);
266             }
267             // closing bracket
268             elseif (false !== strpos(')]}', $this->code[$this->cursor])) {
269                 if (empty($this->brackets)) {
270                     throw new Twig_Error_Syntax(sprintf('Unexpected "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
271                 }
272
273                 list($expect, $lineno) = array_pop($this->brackets);
274                 if ($this->code[$this->cursor] != strtr($expect, '([{', ')]}')) {
275                     throw new Twig_Error_Syntax(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
276                 }
277             }
278
279             $this->pushToken(Twig_Token::PUNCTUATION_TYPE, $this->code[$this->cursor]);
280             ++$this->cursor;
281         }
282         // strings
283         elseif (preg_match(self::REGEX_STRING, $this->code, $match, null, $this->cursor)) {
284             $this->pushToken(Twig_Token::STRING_TYPE, stripcslashes(substr($match[0], 1, -1)));
285             $this->moveCursor($match[0]);
286         }
287         // opening double quoted string
288         elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, null, $this->cursor)) {
289             $this->brackets[] = array('"', $this->lineno);
290             $this->pushState(self::STATE_STRING);
291             $this->moveCursor($match[0]);
292         }
293         // unlexable
294         else {
295             throw new Twig_Error_Syntax(sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
296         }
297     }
298
299     protected function lexRawData($tag)
300     {
301         if ('raw' === $tag) {
302             @trigger_error(sprintf('Twig Tag "raw" is deprecated since version 1.21. Use "verbatim" instead in %s at line %d.', $this->filename, $this->lineno), E_USER_DEPRECATED);
303         }
304
305         if (!preg_match(str_replace('%s', $tag, $this->regexes['lex_raw_data']), $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
306             throw new Twig_Error_Syntax(sprintf('Unexpected end of file: Unclosed "%s" block.', $tag), $this->lineno, $this->source);
307         }
308
309         $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor);
310         $this->moveCursor($text.$match[0][0]);
311
312         if (false !== strpos($match[1][0], $this->options['whitespace_trim'])) {
313             $text = rtrim($text);
314         }
315
316         $this->pushToken(Twig_Token::TEXT_TYPE, $text);
317     }
318
319     protected function lexComment()
320     {
321         if (!preg_match($this->regexes['lex_comment'], $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
322             throw new Twig_Error_Syntax('Unclosed comment.', $this->lineno, $this->source);
323         }
324
325         $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]);
326     }
327
328     protected function lexString()
329     {
330         if (preg_match($this->regexes['interpolation_start'], $this->code, $match, null, $this->cursor)) {
331             $this->brackets[] = array($this->options['interpolation'][0], $this->lineno);
332             $this->pushToken(Twig_Token::INTERPOLATION_START_TYPE);
333             $this->moveCursor($match[0]);
334             $this->pushState(self::STATE_INTERPOLATION);
335         } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, null, $this->cursor) && strlen($match[0]) > 0) {
336             $this->pushToken(Twig_Token::STRING_TYPE, stripcslashes($match[0]));
337             $this->moveCursor($match[0]);
338         } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, null, $this->cursor)) {
339             list($expect, $lineno) = array_pop($this->brackets);
340             if ('"' != $this->code[$this->cursor]) {
341                 throw new Twig_Error_Syntax(sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
342             }
343
344             $this->popState();
345             ++$this->cursor;
346         } else {
347             // unlexable
348             throw new Twig_Error_Syntax(sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
349         }
350     }
351
352     protected function lexInterpolation()
353     {
354         $bracket = end($this->brackets);
355         if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code, $match, null, $this->cursor)) {
356             array_pop($this->brackets);
357             $this->pushToken(Twig_Token::INTERPOLATION_END_TYPE);
358             $this->moveCursor($match[0]);
359             $this->popState();
360         } else {
361             $this->lexExpression();
362         }
363     }
364
365     protected function pushToken($type, $value = '')
366     {
367         // do not push empty text tokens
368         if (Twig_Token::TEXT_TYPE === $type && '' === $value) {
369             return;
370         }
371
372         $this->tokens[] = new Twig_Token($type, $value, $this->lineno);
373     }
374
375     protected function moveCursor($text)
376     {
377         $this->cursor += strlen($text);
378         $this->lineno += substr_count($text, "\n");
379     }
380
381     protected function getOperatorRegex()
382     {
383         $operators = array_merge(
384             array('='),
385             array_keys($this->env->getUnaryOperators()),
386             array_keys($this->env->getBinaryOperators())
387         );
388
389         $operators = array_combine($operators, array_map('strlen', $operators));
390         arsort($operators);
391
392         $regex = array();
393         foreach ($operators as $operator => $length) {
394             // an operator that ends with a character must be followed by
395             // a whitespace or a parenthesis
396             if (ctype_alpha($operator[$length - 1])) {
397                 $r = preg_quote($operator, '/').'(?=[\s()])';
398             } else {
399                 $r = preg_quote($operator, '/');
400             }
401
402             // an operator with a space can be any amount of whitespaces
403             $r = preg_replace('/\s+/', '\s+', $r);
404
405             $regex[] = $r;
406         }
407
408         return '/'.implode('|', $regex).'/A';
409     }
410
411     protected function pushState($state)
412     {
413         $this->states[] = $this->state;
414         $this->state = $state;
415     }
416
417     protected function popState()
418     {
419         if (0 === count($this->states)) {
420             throw new Exception('Cannot pop state without a previous state.');
421         }
422
423         $this->state = array_pop($this->states);
424     }
425 }
426
427 class_alias('Twig_Lexer', 'Twig\Lexer', false);