Updated Drupal to 8.6. This goes with the following updates because it's possible...
[yaffs-website] / vendor / masterminds / html5 / test / HTML5 / Parser / TokenizerTest.php
1 <?php
2 namespace Masterminds\HTML5\Tests\Parser;
3
4 use Masterminds\HTML5\Parser\UTF8Utils;
5 use Masterminds\HTML5\Parser\StringInputStream;
6 use Masterminds\HTML5\Parser\Scanner;
7 use Masterminds\HTML5\Parser\Tokenizer;
8
9 class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase
10 {
11     // ================================================================
12     // Additional assertions.
13     // ================================================================
14     /**
15      * Tests that an event matches both the event type and the expected value.
16      *
17      * @param string $type
18      *            Expected event type.
19      * @param string $expects
20      *            The value expected in $event['data'][0].
21      */
22     public function assertEventEquals($type, $expects, $event)
23     {
24         $this->assertEquals($type, $event['name'], "Event $type for " . print_r($event, true));
25         if (is_array($expects)) {
26             $this->assertEquals($expects, $event['data'], "Event $type should equal " . print_r($expects, true) . ": " . print_r($event, true));
27         } else {
28             $this->assertEquals($expects, $event['data'][0], "Event $type should equal $expects: " . print_r($event, true));
29         }
30     }
31
32     /**
33      * Assert that a given event is 'error'.
34      */
35     public function assertEventError($event)
36     {
37         $this->assertEquals('error', $event['name'], "Expected error for event: " . print_r($event, true));
38     }
39
40     /**
41      * Asserts that all of the tests are good.
42      *
43      * This loops through a map of tests/expectations and runs a few assertions on each test.
44      *
45      * Checks:
46      * - depth (if depth is > 0)
47      * - event name
48      * - matches on event 0.
49      */
50     protected function isAllGood($name, $depth, $tests, $debug = false)
51     {
52         foreach ($tests as $try => $expects) {
53             if ($debug) {
54                 fprintf(STDOUT, "%s expects %s\n", $try, print_r($expects, true));
55             }
56             $e = $this->parse($try);
57             if ($depth > 0) {
58                 $this->assertEquals($depth, $e->depth(), "Expected depth $depth for test $try." . print_r($e, true));
59             }
60             $this->assertEventEquals($name, $expects, $e->get(0));
61         }
62     }
63
64     // ================================================================
65     // Utility functions.
66     // ================================================================
67     public function testParse()
68     {
69         list ($tok, $events) = $this->createTokenizer('');
70
71         $tok->parse();
72         $e1 = $events->get(0);
73
74         $this->assertEquals(1, $events->Depth());
75         $this->assertEquals('eof', $e1['name']);
76     }
77
78     public function testWhitespace()
79     {
80         $spaces = '    ';
81         list ($tok, $events) = $this->createTokenizer($spaces);
82
83         $tok->parse();
84
85         $this->assertEquals(2, $events->depth());
86
87         $e1 = $events->get(0);
88
89         $this->assertEquals('text', $e1['name']);
90         $this->assertEquals($spaces, $e1['data'][0]);
91     }
92
93     public function testCharacterReference()
94     {
95         $good = array(
96             '&amp;' => '&',
97             '&#x0003c;' => '<',
98             '&#38;' => '&',
99             '&' => '&'
100         );
101         $this->isAllGood('text', 2, $good);
102
103         // Test with broken charref
104         $str = '&foo';
105         $events = $this->parse($str);
106         $e1 = $events->get(0);
107         $this->assertEquals('error', $e1['name']);
108
109         $str = '&#xfoo';
110         $events = $this->parse($str);
111         $e1 = $events->get(0);
112         $this->assertEquals('error', $e1['name']);
113
114         $str = '&#foo';
115         $events = $this->parse($str);
116         $e1 = $events->get(0);
117         $this->assertEquals('error', $e1['name']);
118
119         // FIXME: Once the text processor is done, need to verify that the
120         // tokens are transformed correctly into text.
121     }
122
123     public function testBogusComment()
124     {
125         $bogus = array(
126             '</+this is a bogus comment. +>',
127             '<!+this is a bogus comment. !>',
128             '<!D OCTYPE foo bar>',
129             '<!DOCTYEP foo bar>',
130             '<![CADATA[ TEST ]]>',
131             '<![CDATA Hello ]]>',
132             '<![CDATA[ Hello [[>',
133             '<!CDATA[[ test ]]>',
134             '<![CDATA[',
135             '<![CDATA[hellooooo hello',
136             '<? Hello World ?>',
137             '<? Hello World'
138         );
139         foreach ($bogus as $str) {
140             $events = $this->parse($str);
141             $this->assertEventError($events->get(0));
142             $this->assertEventEquals('comment', $str, $events->get(1));
143         }
144     }
145
146     public function testEndTag()
147     {
148         $succeed = array(
149             '</a>' => 'a',
150             '</test>' => 'test',
151             '</test
152       >' => 'test',
153             '</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' => 'thisisthetagthatdoesntenditjustgoesonandonmyfriend',
154             // See 8.2.4.10, which requires this and does not say error.
155             '</a<b>' => 'a<b'
156         );
157         $this->isAllGood('endTag', 2, $succeed);
158
159         // Recoverable failures
160         $fail = array(
161             '</a class="monkey">' => 'a',
162             '</a <b>' => 'a',
163             '</a <b <c>' => 'a',
164             '</a is the loneliest letter>' => 'a',
165             '</a' => 'a'
166         );
167         foreach ($fail as $test => $result) {
168             $events = $this->parse($test);
169             $this->assertEquals(3, $events->depth());
170             // Should have triggered an error.
171             $this->assertEventError($events->get(0));
172             // Should have tried to parse anyway.
173             $this->assertEventEquals('endTag', $result, $events->get(1));
174         }
175
176         // BogoComments
177         $comments = array(
178             '</>' => '</>',
179             '</ >' => '</ >',
180             '</ a>' => '</ a>'
181         );
182         foreach ($comments as $test => $result) {
183             $events = $this->parse($test);
184             $this->assertEquals(3, $events->depth());
185
186             // Should have triggered an error.
187             $this->assertEventError($events->get(0));
188
189             // Should have tried to parse anyway.
190             $this->assertEventEquals('comment', $result, $events->get(1));
191         }
192     }
193
194     public function testComment()
195     {
196         $good = array(
197             '<!--easy-->' => 'easy',
198             '<!-- 1 > 0 -->' => ' 1 > 0 ',
199             '<!-- --$i -->' => ' --$i ',
200             '<!----$i-->' => '--$i',
201             "<!--\nHello World.\na-->" => "\nHello World.\na",
202             '<!-- <!-- -->' => ' <!-- '
203         );
204         foreach ($good as $test => $expected) {
205             $events = $this->parse($test);
206             $this->assertEventEquals('comment', $expected, $events->get(0));
207         }
208
209         $fail = array(
210             '<!-->' => '',
211             '<!--Hello' => 'Hello',
212             "<!--\0Hello" => UTF8Utils::FFFD . 'Hello',
213             '<!--' => ''
214         );
215         foreach ($fail as $test => $expected) {
216             $events = $this->parse($test);
217             $this->assertEquals(3, $events->depth());
218             $this->assertEventError($events->get(0));
219             $this->assertEventEquals('comment', $expected, $events->get(1));
220         }
221     }
222
223     public function testCDATASection()
224     {
225         $good = array(
226             '<![CDATA[ This is a test. ]]>' => ' This is a test. ',
227             '<![CDATA[CDATA]]>' => 'CDATA',
228             '<![CDATA[ ]] > ]]>' => ' ]] > ',
229             '<![CDATA[ ]]>' => ' '
230         );
231         $this->isAllGood('cdata', 2, $good);
232     }
233
234     public function testDoctype()
235     {
236         $good = array(
237             '<!DOCTYPE html>' => array(
238                 'html',
239                 0,
240                 null,
241                 false
242             ),
243             '<!doctype html>' => array(
244                 'html',
245                 0,
246                 null,
247                 false
248             ),
249             '<!DocType html>' => array(
250                 'html',
251                 0,
252                 null,
253                 false
254             ),
255             "<!DOCTYPE\nhtml>" => array(
256                 'html',
257                 0,
258                 null,
259                 false
260             ),
261             "<!DOCTYPE\fhtml>" => array(
262                 'html',
263                 0,
264                 null,
265                 false
266             ),
267             '<!DOCTYPE html PUBLIC "foo bar">' => array(
268                 'html',
269                 EventStack::DOCTYPE_PUBLIC,
270                 'foo bar',
271                 false
272             ),
273             "<!DOCTYPE html PUBLIC 'foo bar'>" => array(
274                 'html',
275                 EventStack::DOCTYPE_PUBLIC,
276                 'foo bar',
277                 false
278             ),
279             '<!DOCTYPE      html      PUBLIC     "foo bar"    >' => array(
280                 'html',
281                 EventStack::DOCTYPE_PUBLIC,
282                 'foo bar',
283                 false
284             ),
285             "<!DOCTYPE html \nPUBLIC\n'foo bar'>" => array(
286                 'html',
287                 EventStack::DOCTYPE_PUBLIC,
288                 'foo bar',
289                 false
290             ),
291             '<!DOCTYPE html SYSTEM "foo bar">' => array(
292                 'html',
293                 EventStack::DOCTYPE_SYSTEM,
294                 'foo bar',
295                 false
296             ),
297             "<!DOCTYPE html SYSTEM 'foo bar'>" => array(
298                 'html',
299                 EventStack::DOCTYPE_SYSTEM,
300                 'foo bar',
301                 false
302             ),
303             '<!DOCTYPE      html      SYSTEM "foo/bar"    >' => array(
304                 'html',
305                 EventStack::DOCTYPE_SYSTEM,
306                 'foo/bar',
307                 false
308             ),
309             "<!DOCTYPE html \nSYSTEM\n'foo bar'>" => array(
310                 'html',
311                 EventStack::DOCTYPE_SYSTEM,
312                 'foo bar',
313                 false
314             )
315         );
316         $this->isAllGood('doctype', 2, $good);
317
318         $bad = array(
319             '<!DOCTYPE>' => array(
320                 null,
321                 EventStack::DOCTYPE_NONE,
322                 null,
323                 true
324             ),
325             '<!DOCTYPE    >' => array(
326                 null,
327                 EventStack::DOCTYPE_NONE,
328                 null,
329                 true
330             ),
331             '<!DOCTYPE  foo' => array(
332                 'foo',
333                 EventStack::DOCTYPE_NONE,
334                 null,
335                 true
336             ),
337             '<!DOCTYPE foo PUB' => array(
338                 'foo',
339                 EventStack::DOCTYPE_NONE,
340                 null,
341                 true
342             ),
343             '<!DOCTYPE foo PUB>' => array(
344                 'foo',
345                 EventStack::DOCTYPE_NONE,
346                 null,
347                 true
348             ),
349             '<!DOCTYPE  foo PUB "Looks good">' => array(
350                 'foo',
351                 EventStack::DOCTYPE_NONE,
352                 null,
353                 true
354             ),
355             '<!DOCTYPE  foo SYSTME "Looks good"' => array(
356                 'foo',
357                 EventStack::DOCTYPE_NONE,
358                 null,
359                 true
360             ),
361
362             // Can't tell whether these are ids or ID types, since the context is chopped.
363             '<!DOCTYPE foo PUBLIC' => array(
364                 'foo',
365                 EventStack::DOCTYPE_NONE,
366                 null,
367                 true
368             ),
369             '<!DOCTYPE  foo PUBLIC>' => array(
370                 'foo',
371                 EventStack::DOCTYPE_NONE,
372                 null,
373                 true
374             ),
375             '<!DOCTYPE foo SYSTEM' => array(
376                 'foo',
377                 EventStack::DOCTYPE_NONE,
378                 null,
379                 true
380             ),
381             '<!DOCTYPE  foo SYSTEM>' => array(
382                 'foo',
383                 EventStack::DOCTYPE_NONE,
384                 null,
385                 true
386             ),
387
388             '<!DOCTYPE html SYSTEM "foo bar"' => array(
389                 'html',
390                 EventStack::DOCTYPE_SYSTEM,
391                 'foo bar',
392                 true
393             ),
394             '<!DOCTYPE html SYSTEM "foo bar" more stuff>' => array(
395                 'html',
396                 EventStack::DOCTYPE_SYSTEM,
397                 'foo bar',
398                 true
399             )
400         );
401         foreach ($bad as $test => $expects) {
402             $events = $this->parse($test);
403             // fprintf(STDOUT, $test . PHP_EOL);
404             $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true));
405             $this->assertEventError($events->get(0));
406             $this->assertEventEquals('doctype', $expects, $events->get(1));
407         }
408     }
409
410     public function testProcessorInstruction()
411     {
412         $good = array(
413             '<?hph ?>' => 'hph',
414             '<?hph echo "Hello World"; ?>' => array(
415                 'hph',
416                 'echo "Hello World"; '
417             ),
418             "<?hph \necho 'Hello World';\n?>" => array(
419                 'hph',
420                 "echo 'Hello World';\n"
421             )
422         );
423         $this->isAllGood('pi', 2, $good);
424     }
425
426     /**
427      * This tests just simple tags.
428      */
429     public function testSimpleTags()
430     {
431         $open = array(
432             '<foo>' => 'foo',
433             '<FOO>' => 'foo',
434             '<fOO>' => 'foo',
435             '<foo >' => 'foo',
436             "<foo\n\n\n\n>" => 'foo',
437             '<foo:bar>' => 'foo:bar'
438         );
439         $this->isAllGood('startTag', 2, $open);
440
441         $selfClose = array(
442             '<foo/>' => 'foo',
443             '<FOO/>' => 'foo',
444             '<foo />' => 'foo',
445             "<foo\n\n\n\n/>" => 'foo',
446             '<foo:bar/>' => 'foo:bar'
447         );
448         foreach ($selfClose as $test => $expects) {
449             $events = $this->parse($test);
450             $this->assertEquals(2, $events->depth(), "Counting events for '$test'" . print_r($events, true));
451             $this->assertEventEquals('startTag', $expects, $events->get(0));
452             $event = $events->get(0);
453             $this->assertTrue($event['data'][2]);
454         }
455
456         $bad = array(
457             '<foo' => 'foo',
458             '<foo ' => 'foo',
459             '<foo/' => 'foo',
460             '<foo /' => 'foo'
461         );
462
463         foreach ($bad as $test => $expects) {
464             $events = $this->parse($test);
465             $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true));
466             $this->assertEventError($events->get(0));
467             $this->assertEventEquals('startTag', $expects, $events->get(1));
468         }
469     }
470
471     public function testTagsWithAttributeAndMissingName()
472     {
473         $cases = array(
474             '<id="top_featured">' => 'id',
475             '<color="white">' => 'color',
476             "<class='neaktivni_stranka'>" => 'class',
477             '<bgcolor="white">' => 'bgcolor',
478             '<class="nom">' => 'class'
479         );
480
481         foreach ($cases as $html => $expected) {
482             $events = $this->parse($html);
483             $this->assertEventError($events->get(0));
484             $this->assertEventError($events->get(1));
485             $this->assertEventError($events->get(2));
486             $this->assertEventEquals('startTag', $expected, $events->get(3));
487             $this->assertEventEquals('eof', null, $events->get(4));
488         }
489     }
490
491     public function testTagNotClosedAfterTagName()
492     {
493         $cases = array(
494             "<noscript<img>" => array(
495                 'noscript',
496                 'img'
497             ),
498             '<center<a>' => array(
499                 'center',
500                 'a'
501             ),
502             '<br<br>' => array(
503                 'br',
504                 'br'
505             )
506         );
507
508         foreach ($cases as $html => $expected) {
509             $events = $this->parse($html);
510             $this->assertEventError($events->get(0));
511             $this->assertEventEquals('startTag', $expected[0], $events->get(1));
512             $this->assertEventEquals('startTag', $expected[1], $events->get(2));
513             $this->assertEventEquals('eof', null, $events->get(3));
514         }
515
516         $events = $this->parse('<span<>02</span>');
517         $this->assertEventError($events->get(0));
518         $this->assertEventEquals('startTag', 'span', $events->get(1));
519         $this->assertEventError($events->get(2));
520         $this->assertEventEquals('text', '>02', $events->get(3));
521         $this->assertEventEquals('endTag', 'span', $events->get(4));
522         $this->assertEventEquals('eof', null, $events->get(5));
523
524         $events = $this->parse('<p</p>');
525         $this->assertEventError($events->get(0));
526         $this->assertEventEquals('startTag', 'p', $events->get(1));
527         $this->assertEventEquals('endTag', 'p', $events->get(2));
528         $this->assertEventEquals('eof', null, $events->get(3));
529
530         $events = $this->parse('<strong><WordPress</strong>');
531         $this->assertEventEquals('startTag', 'strong', $events->get(0));
532         $this->assertEventError($events->get(1));
533         $this->assertEventEquals('startTag', 'wordpress', $events->get(2));
534         $this->assertEventEquals('endTag', 'strong', $events->get(3));
535         $this->assertEventEquals('eof', null, $events->get(4));
536
537         $events = $this->parse('<src=<a>');
538         $this->assertEventError($events->get(0));
539         $this->assertEventError($events->get(1));
540         $this->assertEventError($events->get(2));
541         $this->assertEventEquals('startTag', 'src', $events->get(3));
542         $this->assertEventEquals('startTag', 'a', $events->get(4));
543         $this->assertEventEquals('eof', null, $events->get(5));
544
545         $events = $this->parse('<br...<a>');
546         $this->assertEventError($events->get(0));
547         $this->assertEventEquals('startTag', 'br', $events->get(1));
548         $this->assertEventEquals('eof', null, $events->get(2));
549     }
550
551     public function testIllegalTagNames()
552     {
553         $cases = array(
554             '<li">' => 'li',
555             '<p">' => 'p',
556             '<b&nbsp; >' => 'b',
557             '<static*all>' => 'static',
558             '<h*0720/>' => 'h',
559             '<st*ATTRIBUTE />' => 'st',
560         );
561
562         foreach ($cases as $html => $expected) {
563             $events = $this->parse($html);
564             $this->assertEventError($events->get(0));
565             $this->assertEventEquals('startTag', $expected, $events->get(1));
566         }
567     }
568
569     /**
570      * @depends testCharacterReference
571      */
572     public function testTagAttributes()
573     {
574         // Opening tags.
575         $good = array(
576             '<foo bar="baz">' => array(
577                 'foo',
578                 array(
579                     'bar' => 'baz'
580                 ),
581                 false
582             ),
583             '<foo bar=" baz ">' => array(
584                 'foo',
585                 array(
586                     'bar' => ' baz '
587                 ),
588                 false
589             ),
590             "<foo bar=\"\nbaz\n\">" => array(
591                 'foo',
592                 array(
593                     'bar' => "\nbaz\n"
594                 ),
595                 false
596             ),
597             "<foo bar='baz'>" => array(
598                 'foo',
599                 array(
600                     'bar' => 'baz'
601                 ),
602                 false
603             ),
604             '<foo bar="A full sentence.">' => array(
605                 'foo',
606                 array(
607                     'bar' => 'A full sentence.'
608                 ),
609                 false
610             ),
611             "<foo a='1' b=\"2\">" => array(
612                 'foo',
613                 array(
614                     'a' => '1',
615                     'b' => '2'
616                 ),
617                 false
618             ),
619             "<foo ns:bar='baz'>" => array(
620                 'foo',
621                 array(
622                     'ns:bar' => 'baz'
623                 ),
624                 false
625             ),
626             "<foo a='blue&red'>" => array(
627                 'foo',
628                 array(
629                     'a' => 'blue&red'
630                 ),
631                 false
632             ),
633             "<foo a='blue&amp;red'>" => array(
634                 'foo',
635                 array(
636                     'a' => 'blue&red'
637                 ),
638                 false
639             ),
640             "<foo a='blue&&amp;&red'>" => array(
641                 'foo',
642                 array(
643                     'a' => 'blue&&&red'
644                 ),
645                 false
646             ),
647             "<foo a='blue&&amp;red'>" => array(
648                 'foo',
649                 array(
650                     'a' => 'blue&&red'
651                 ),
652                 false
653             ),
654             "<foo\nbar='baz'\n>" => array(
655                 'foo',
656                 array(
657                     'bar' => 'baz'
658                 ),
659                 false
660             ),
661             '<doe a deer>' => array(
662                 'doe',
663                 array(
664                     'a' => null,
665                     'deer' => null
666                 ),
667                 false
668             ),
669             '<foo bar=baz>' => array(
670                 'foo',
671                 array(
672                     'bar' => 'baz'
673                 ),
674                 false
675             ),
676
677             // Updated for 8.1.2.3
678             '<foo    bar   =   "baz"      >' => array(
679                 'foo',
680                 array(
681                     'bar' => 'baz'
682                 ),
683                 false
684             ),
685
686             // The spec allows an unquoted value '/'. This will not be a closing
687             // tag.
688             '<foo bar=/>' => array(
689                 'foo',
690                 array(
691                     'bar' => '/'
692                 ),
693                 false
694             ),
695             '<foo bar=baz/>' => array(
696                 'foo',
697                 array(
698                     'bar' => 'baz/'
699                 ),
700                 false
701             )
702         );
703         $this->isAllGood('startTag', 2, $good);
704
705         // Self-closing tags.
706         $withEnd = array(
707             '<foo bar="baz"/>' => array(
708                 'foo',
709                 array(
710                     'bar' => 'baz'
711                 ),
712                 true
713             ),
714             '<foo BAR="baz"/>' => array(
715                 'foo',
716                 array(
717                     'bar' => 'baz'
718                 ),
719                 true
720             ),
721             '<foo BAR="BAZ"/>' => array(
722                 'foo',
723                 array(
724                     'bar' => 'BAZ'
725                 ),
726                 true
727             ),
728             "<foo a='1' b=\"2\" c=3 d/>" => array(
729                 'foo',
730                 array(
731                     'a' => '1',
732                     'b' => '2',
733                     'c' => '3',
734                     'd' => null
735                 ),
736                 true
737             )
738         );
739         $this->isAllGood('startTag', 2, $withEnd);
740
741         // Cause a parse error.
742         $bad = array(
743             // This will emit an entity lookup failure for &+dark.
744             "<foo a='blue&+dark'>" => array(
745                 'foo',
746                 array(
747                     'a' => 'blue&+dark'
748                 ),
749                 false
750             ),
751             '<foo bar=>' => array(
752                 'foo',
753                 array(
754                     'bar' => null
755                 ),
756                 false
757             ),
758             '<foo bar="oh' => array(
759                 'foo',
760                 array(
761                     'bar' => 'oh'
762                 ),
763                 false
764             ),
765             '<foo bar=oh">' => array(
766                 'foo',
767                 array(
768                     'bar' => 'oh"'
769                 ),
770                 false
771             ),
772
773             // these attributes are ignored because of current implementation
774             // of method "DOMElement::setAttribute"
775             // see issue #23: https://github.com/Masterminds/html5-php/issues/23
776             '<foo b"="baz">' => array(
777                 'foo',
778                 array(),
779                 false
780             ),
781             '<foo 2abc="baz">' => array(
782                 'foo',
783                 array(),
784                 false
785             ),
786             '<foo ?="baz">' => array(
787                 'foo',
788                 array(),
789                 false
790             ),
791             '<foo foo?bar="baz">' => array(
792                 'foo',
793                 array(),
794                 false
795             )
796         )
797         ;
798         foreach ($bad as $test => $expects) {
799             $events = $this->parse($test);
800             $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true));
801             $this->assertEventError($events->get(0));
802             $this->assertEventEquals('startTag', $expects, $events->get(1));
803         }
804
805         // Cause multiple parse errors.
806         $reallyBad = array(
807             '<foo ="bar">' => array(
808                 'foo',
809                 array(
810                     '=' => null,
811                     '"bar"' => null
812                 ),
813                 false
814             ),
815             '<foo////>' => array(
816                 'foo',
817                 array(),
818                 true
819             ),
820             // character "&" in unquoted attribute shouldn't cause an infinite loop
821             '<foo bar=index.php?str=1&amp;id=29>' => array(
822                 'foo',
823                 array(
824                     'bar' => 'index.php?str=1&id=29'
825                 ),
826                 false
827             )
828         );
829         foreach ($reallyBad as $test => $expects) {
830             $events = $this->parse($test);
831             // fprintf(STDOUT, $test . print_r($events, true));
832             $this->assertEventError($events->get(0));
833             $this->assertEventError($events->get(1));
834             // $this->assertEventEquals('startTag', $expects, $events->get(1));
835         }
836
837         // Regression: Malformed elements should be detected.
838         // '<foo baz="1" <bar></foo>' => array('foo', array('baz' => '1'), false),
839         $events = $this->parse('<foo baz="1" <bar></foo>');
840         $this->assertEventError($events->get(0));
841         $this->assertEventEquals('startTag', array(
842             'foo',
843             array(
844                 'baz' => '1'
845             ),
846             false
847         ), $events->get(1));
848         $this->assertEventEquals('startTag', array(
849             'bar',
850             array(),
851             false
852         ), $events->get(2));
853         $this->assertEventEquals('endTag', array(
854             'foo'
855         ), $events->get(3));
856     }
857
858     public function testRawText()
859     {
860         $good = array(
861             '<script>abcd efg hijk lmnop</script>     ' => 'abcd efg hijk lmnop',
862             '<script><not/><the/><tag></script>' => '<not/><the/><tag>',
863             '<script><<<<<<<<</script>' => '<<<<<<<<',
864             '<script>hello</script</script>' => 'hello</script',
865             "<script>\nhello</script\n</script>" => "\nhello</script\n",
866             '<script>&amp;</script>' => '&amp;',
867             '<script><!--not a comment--></script>' => '<!--not a comment-->',
868             '<script><![CDATA[not a comment]]></script>' => '<![CDATA[not a comment]]>'
869         );
870         foreach ($good as $test => $expects) {
871             $events = $this->parse($test);
872             $this->assertEventEquals('startTag', 'script', $events->get(0));
873             $this->assertEventEquals('text', $expects, $events->get(1));
874             $this->assertEventEquals('endTag', 'script', $events->get(2));
875         }
876
877         $bad = array(
878             '<script>&amp;</script' => '&amp;</script',
879             '<script>Hello world' => 'Hello world'
880         );
881         foreach ($bad as $test => $expects) {
882             $events = $this->parse($test);
883             $this->assertEquals(4, $events->depth(), "Counting events for '$test': " . print_r($events, true));
884             $this->assertEventEquals('startTag', 'script', $events->get(0));
885             $this->assertEventError($events->get(1));
886             $this->assertEventEquals('text', $expects, $events->get(2));
887         }
888
889         // Testing case sensitivity
890         $events = $this->parse('<TITLE>a test</TITLE>');
891         $this->assertEventEquals('startTag', 'title', $events->get(0));
892         $this->assertEventEquals('text', 'a test', $events->get(1));
893         $this->assertEventEquals('endTag', 'title', $events->get(2));
894
895         // Testing end tags with whitespaces
896         $events = $this->parse('<title>Whitespaces are tasty</title >');
897         $this->assertEventEquals('startTag', 'title', $events->get(0));
898         $this->assertEventEquals('text', 'Whitespaces are tasty', $events->get(1));
899         $this->assertEventEquals('endTag', 'title', $events->get(2));
900     }
901
902     public function testRcdata()
903     {
904         list ($tok, $events) = $this->createTokenizer('<title>&#x27;<!-- not a comment --></TITLE>');
905         $tok->setTextMode(\Masterminds\HTML5\Elements::TEXT_RCDATA, 'title');
906         $tok->parse();
907         $this->assertEventEquals('text', "'<!-- not a comment -->", $events->get(1));
908     }
909
910     public function testText()
911     {
912         $events = $this->parse('a<br>b');
913         $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, true));
914         $this->assertEventEquals('text', 'a', $events->get(0));
915         $this->assertEventEquals('startTag', 'br', $events->get(1));
916         $this->assertEventEquals('text', 'b', $events->get(2));
917
918         $events = $this->parse('<a>Test</a>');
919         $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, true));
920         $this->assertEventEquals('startTag', 'a', $events->get(0));
921         $this->assertEventEquals('text', 'Test', $events->get(1));
922         $this->assertEventEquals('endTag', 'a', $events->get(2));
923
924         $events = $this->parse('<p>0</p><p>1</p>');
925         $this->assertEquals(7, $events->depth(), "Events: " . print_r($events, true));
926
927         $this->assertEventEquals('startTag', 'p', $events->get(0));
928         $this->assertEventEquals('text', '0', $events->get(1));
929         $this->assertEventEquals('endTag', 'p', $events->get(2));
930
931         $this->assertEventEquals('startTag', 'p', $events->get(3));
932         $this->assertEventEquals('text', '1', $events->get(4));
933         $this->assertEventEquals('endTag', 'p', $events->get(5));
934
935
936         $events = $this->parse('a<![CDATA[test]]>b');
937         $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, true));
938         $this->assertEventEquals('text', 'a', $events->get(0));
939         $this->assertEventEquals('cdata', 'test', $events->get(1));
940         $this->assertEventEquals('text', 'b', $events->get(2));
941
942         $events = $this->parse('a<!--test-->b');
943         $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, true));
944         $this->assertEventEquals('text', 'a', $events->get(0));
945         $this->assertEventEquals('comment', 'test', $events->get(1));
946         $this->assertEventEquals('text', 'b', $events->get(2));
947
948         $events = $this->parse('a&amp;b');
949         $this->assertEquals(2, $events->depth(), "Events: " . print_r($events, true));
950         $this->assertEventEquals('text', 'a&b', $events->get(0));
951
952         $events = $this->parse('a&sup2;b');
953         $this->assertEquals(2, $events->depth(), "Events: " . print_r($events, true));
954         $this->assertEventEquals('text', 'a²b', $events->get(0));
955     }
956
957     // ================================================================
958     // Utility functions.
959     // ================================================================
960     protected function createTokenizer($string, $debug = false)
961     {
962         $eventHandler = new EventStack();
963         $stream = new StringInputStream($string);
964         $scanner = new Scanner($stream);
965
966         $scanner->debug = $debug;
967
968         return array(
969             new Tokenizer($scanner, $eventHandler),
970             $eventHandler
971         );
972     }
973
974     public function parse($string, $debug = false)
975     {
976         list ($tok, $events) = $this->createTokenizer($string, $debug);
977         $tok->parse();
978
979         return $events;
980     }
981 }