4 * This file is part of the Behat Gherkin.
5 * (c) Konstantin Kudryashov <ever.zet@gmail.com>
7 * For the full copyright and license information, please view the LICENSE
8 * file that was distributed with this source code.
11 namespace Behat\Gherkin;
13 use Behat\Gherkin\Exception\LexerException;
14 use Behat\Gherkin\Keywords\KeywordsInterface;
19 * @author Konstantin Kudryashov <ever.zet@gmail.com>
31 private $keywordsCache = array();
32 private $stepKeywordTypesCache = array();
33 private $deferredObjects = array();
34 private $deferredObjectsCount = 0;
35 private $stashedToken;
36 private $inPyString = false;
37 private $pyStringSwallow = 0;
38 private $featureStarted = false;
39 private $allowMultilineArguments = false;
40 private $allowSteps = false;
45 * @param KeywordsInterface $keywords Keywords holder
47 public function __construct(KeywordsInterface $keywords)
49 $this->keywords = $keywords;
55 * @param string $input Input string
56 * @param string $language Language name
58 * @throws Exception\LexerException
60 public function analyse($input, $language = 'en')
62 // try to detect unsupported encoding
63 if ('UTF-8' !== mb_detect_encoding($input, 'UTF-8', true)) {
64 throw new LexerException('Feature file is not in UTF8 encoding');
67 $input = strtr($input, array("\r\n" => "\n", "\r" => "\n"));
69 $this->lines = explode("\n", $input);
70 $this->linesCount = count($this->lines);
71 $this->line = $this->lines[0];
72 $this->lineNumber = 1;
73 $this->trimmedLine = null;
76 $this->deferredObjects = array();
77 $this->deferredObjectsCount = 0;
78 $this->stashedToken = null;
79 $this->inPyString = false;
80 $this->pyStringSwallow = 0;
82 $this->featureStarted = false;
83 $this->allowMultilineArguments = false;
84 $this->allowSteps = false;
86 $this->keywords->setLanguage($this->language = $language);
87 $this->keywordsCache = array();
88 $this->stepKeywordTypesCache = array();
92 * Returns current lexer language.
96 public function getLanguage()
98 return $this->language;
102 * Returns next token or previously stashed one.
106 public function getAdvancedToken()
108 return $this->getStashedToken() ?: $this->getNextToken();
114 * @param array $token Token to defer
116 public function deferToken(array $token)
118 $token['deferred'] = true;
119 $this->deferredObjects[] = $token;
120 ++$this->deferredObjectsCount;
124 * Predicts for number of tokens.
128 public function predictToken()
130 if (null === $this->stashedToken) {
131 $this->stashedToken = $this->getNextToken();
134 return $this->stashedToken;
138 * Constructs token with specified parameters.
140 * @param string $type Token type
141 * @param string $value Token value
145 public function takeToken($type, $value = null)
149 'line' => $this->lineNumber,
150 'value' => $value ?: null,
156 * Consumes line from input & increments line counter.
158 protected function consumeLine()
162 if (($this->lineNumber - 1) === $this->linesCount) {
168 $this->line = $this->lines[$this->lineNumber - 1];
169 $this->trimmedLine = null;
173 * Returns trimmed version of line.
177 protected function getTrimmedLine()
179 return null !== $this->trimmedLine ? $this->trimmedLine : $this->trimmedLine = trim($this->line);
183 * Returns stashed token or null if hasn't.
187 protected function getStashedToken()
189 $stashedToken = $this->stashedToken;
190 $this->stashedToken = null;
192 return $stashedToken;
196 * Returns deferred token or null if hasn't.
200 protected function getDeferredToken()
202 if (!$this->deferredObjectsCount) {
206 --$this->deferredObjectsCount;
208 return array_shift($this->deferredObjects);
212 * Returns next token from input.
216 protected function getNextToken()
218 return $this->getDeferredToken()
220 ?: $this->scanLanguage()
221 ?: $this->scanComment()
222 ?: $this->scanPyStringOp()
223 ?: $this->scanPyStringContent()
225 ?: $this->scanScenario()
226 ?: $this->scanBackground()
227 ?: $this->scanOutline()
228 ?: $this->scanExamples()
229 ?: $this->scanFeature()
231 ?: $this->scanTableRow()
232 ?: $this->scanNewline()
233 ?: $this->scanText();
237 * Scans for token with specified regex.
239 * @param string $regex Regular expression
240 * @param string $type Expected token type
244 protected function scanInput($regex, $type)
246 if (!preg_match($regex, $this->line, $matches)) {
250 $token = $this->takeToken($type, $matches[1]);
251 $this->consumeLine();
257 * Scans for token with specified keywords.
259 * @param string $keywords Keywords (splitted with |)
260 * @param string $type Expected token type
264 protected function scanInputForKeywords($keywords, $type)
266 if (!preg_match('/^(\s*)(' . $keywords . '):\s*(.*)/u', $this->line, $matches)) {
270 $token = $this->takeToken($type, $matches[3]);
271 $token['keyword'] = $matches[2];
272 $token['indent'] = mb_strlen($matches[1], 'utf8');
274 $this->consumeLine();
276 // turn off language searching
277 if ('Feature' === $type) {
278 $this->featureStarted = true;
281 // turn off PyString and Table searching
282 if ('Feature' === $type || 'Scenario' === $type || 'Outline' === $type) {
283 $this->allowMultilineArguments = false;
284 } elseif ('Examples' === $type) {
285 $this->allowMultilineArguments = true;
288 // turn on steps searching
289 if ('Scenario' === $type || 'Background' === $type || 'Outline' === $type) {
290 $this->allowSteps = true;
297 * Scans EOS from input & returns it if found.
301 protected function scanEOS()
307 return $this->takeToken('EOS');
311 * Returns keywords for provided type.
313 * @param string $type Keyword type
317 protected function getKeywords($type)
319 if (!isset($this->keywordsCache[$type])) {
320 $getter = 'get' . $type . 'Keywords';
321 $keywords = $this->keywords->$getter();
323 if ('Step' === $type) {
325 foreach (explode('|', $keywords) as $keyword) {
326 $padded[] = false !== mb_strpos($keyword, '<', 0, 'utf8')
327 ? preg_quote(mb_substr($keyword, 0, -1, 'utf8'), '/') . '\s*'
328 : preg_quote($keyword, '/') . '\s+';
331 $keywords = implode('|', $padded);
334 $this->keywordsCache[$type] = $keywords;
337 return $this->keywordsCache[$type];
341 * Scans Feature from input & returns it if found.
345 protected function scanFeature()
347 return $this->scanInputForKeywords($this->getKeywords('Feature'), 'Feature');
351 * Scans Background from input & returns it if found.
355 protected function scanBackground()
357 return $this->scanInputForKeywords($this->getKeywords('Background'), 'Background');
361 * Scans Scenario from input & returns it if found.
365 protected function scanScenario()
367 return $this->scanInputForKeywords($this->getKeywords('Scenario'), 'Scenario');
371 * Scans Scenario Outline from input & returns it if found.
375 protected function scanOutline()
377 return $this->scanInputForKeywords($this->getKeywords('Outline'), 'Outline');
381 * Scans Scenario Outline Examples from input & returns it if found.
385 protected function scanExamples()
387 return $this->scanInputForKeywords($this->getKeywords('Examples'), 'Examples');
391 * Scans Step from input & returns it if found.
395 protected function scanStep()
397 if (!$this->allowSteps) {
401 $keywords = $this->getKeywords('Step');
402 if (!preg_match('/^\s*(' . $keywords . ')([^\s].+)/u', $this->line, $matches)) {
406 $keyword = trim($matches[1]);
407 $token = $this->takeToken('Step', $keyword);
408 $token['keyword_type'] = $this->getStepKeywordType($keyword);
409 $token['text'] = $matches[2];
411 $this->consumeLine();
412 $this->allowMultilineArguments = true;
418 * Scans PyString from input & returns it if found.
422 protected function scanPyStringOp()
424 if (!$this->allowMultilineArguments) {
428 if (false === ($pos = mb_strpos($this->line, '"""', 0, 'utf8'))) {
432 $this->inPyString = !$this->inPyString;
433 $token = $this->takeToken('PyStringOp');
434 $this->pyStringSwallow = $pos;
436 $this->consumeLine();
442 * Scans PyString content.
446 protected function scanPyStringContent()
448 if (!$this->inPyString) {
452 $token = $this->scanText();
453 // swallow trailing spaces
454 $token['value'] = preg_replace('/^\s{0,' . $this->pyStringSwallow . '}/u', '', $token['value']);
460 * Scans Table Row from input & returns it if found.
464 protected function scanTableRow()
466 if (!$this->allowMultilineArguments) {
470 $line = $this->getTrimmedLine();
471 if (!isset($line[0]) || '|' !== $line[0] || '|' !== substr($line, -1)) {
475 $token = $this->takeToken('TableRow');
476 $line = mb_substr($line, 1, mb_strlen($line, 'utf8') - 2, 'utf8');
477 $columns = array_map(function ($column) {
478 return trim(str_replace('\\|', '|', $column));
479 }, preg_split('/(?<!\\\)\|/u', $line));
480 $token['columns'] = $columns;
482 $this->consumeLine();
488 * Scans Tags from input & returns it if found.
492 protected function scanTags()
494 $line = $this->getTrimmedLine();
495 if (!isset($line[0]) || '@' !== $line[0]) {
499 $token = $this->takeToken('Tag');
500 $tags = explode('@', mb_substr($line, 1, mb_strlen($line, 'utf8') - 1, 'utf8'));
501 $tags = array_map('trim', $tags);
502 $token['tags'] = $tags;
504 $this->consumeLine();
510 * Scans Language specifier from input & returns it if found.
514 protected function scanLanguage()
516 if ($this->featureStarted) {
520 if ($this->inPyString) {
524 if (0 !== mb_strpos(ltrim($this->line), '#', 0, 'utf8')) {
528 return $this->scanInput('/^\s*\#\s*language:\s*([\w_\-]+)\s*$/', 'Language');
532 * Scans Comment from input & returns it if found.
536 protected function scanComment()
538 if ($this->inPyString) {
542 $line = $this->getTrimmedLine();
543 if (0 !== mb_strpos($line, '#', 0, 'utf8')) {
547 $token = $this->takeToken('Comment', $line);
548 $this->consumeLine();
554 * Scans Newline from input & returns it if found.
558 protected function scanNewline()
560 if ('' !== $this->getTrimmedLine()) {
564 $token = $this->takeToken('Newline', mb_strlen($this->line, 'utf8'));
565 $this->consumeLine();
571 * Scans text from input & returns it if found.
575 protected function scanText()
577 $token = $this->takeToken('Text', $this->line);
578 $this->consumeLine();
584 * Returns step type keyword (Given, When, Then, etc.).
586 * @param string $native Step keyword in provided language
589 private function getStepKeywordType($native)
591 // Consider "*" as a AND keyword so that it is normalized to the previous step type
592 if ('*' === $native) {
596 if (empty($this->stepKeywordTypesCache)) {
597 $this->stepKeywordTypesCache = array(
598 'Given' => explode('|', $this->keywords->getGivenKeywords()),
599 'When' => explode('|', $this->keywords->getWhenKeywords()),
600 'Then' => explode('|', $this->keywords->getThenKeywords()),
601 'And' => explode('|', $this->keywords->getAndKeywords()),
602 'But' => explode('|', $this->keywords->getButKeywords())
606 foreach ($this->stepKeywordTypesCache as $type => $keywords) {
607 if (in_array($native, $keywords) || in_array($native . '<', $keywords)) {