5e527227e54889d9b56271b2ec4e6cad7c1b9d64
[yaffs-website] / vendor / masterminds / html5 / test / HTML5 / Parser / TokenizerTest.php
1 <?php
2 namespace Masterminds\HTML5\Tests\Parser;
3
4 use Masterminds\HTML5\Parser\UTF8Utils;
5 use Masterminds\HTML5\Parser\StringInputStream;
6 use Masterminds\HTML5\Parser\Scanner;
7 use Masterminds\HTML5\Parser\Tokenizer;
8
9 class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase
10 {
11     // ================================================================
12     // Additional assertions.
13     // ================================================================
14     /**
15      * Tests that an event matches both the event type and the expected value.
16      *
17      * @param string $type
18      *            Expected event type.
19      * @param string $expects
20      *            The value expected in $event['data'][0].
21      */
22     public function assertEventEquals($type, $expects, $event)
23     {
24         $this->assertEquals($type, $event['name'], "Event $type for " . print_r($event, true));
25         if (is_array($expects)) {
26             $this->assertEquals($expects, $event['data'], "Event $type should equal " . print_r($expects, true) . ": " . print_r($event, true));
27         } else {
28             $this->assertEquals($expects, $event['data'][0], "Event $type should equal $expects: " . print_r($event, true));
29         }
30     }
31
32     /**
33      * Assert that a given event is 'error'.
34      */
35     public function assertEventError($event)
36     {
37         $this->assertEquals('error', $event['name'], "Expected error for event: " . print_r($event, true));
38     }
39
40     /**
41      * Asserts that all of the tests are good.
42      *
43      * This loops through a map of tests/expectations and runs a few assertions on each test.
44      *
45      * Checks:
46      * - depth (if depth is > 0)
47      * - event name
48      * - matches on event 0.
49      */
50     protected function isAllGood($name, $depth, $tests, $debug = false)
51     {
52         foreach ($tests as $try => $expects) {
53             if ($debug) {
54                 fprintf(STDOUT, "%s expects %s\n", $try, print_r($expects, true));
55             }
56             $e = $this->parse($try);
57             if ($depth > 0) {
58                 $this->assertEquals($depth, $e->depth(), "Expected depth $depth for test $try." . print_r($e, true));
59             }
60             $this->assertEventEquals($name, $expects, $e->get(0));
61         }
62     }
63
64     // ================================================================
65     // Utility functions.
66     // ================================================================
67     public function testParse()
68     {
69         list ($tok, $events) = $this->createTokenizer('');
70
71         $tok->parse();
72         $e1 = $events->get(0);
73
74         $this->assertEquals(1, $events->Depth());
75         $this->assertEquals('eof', $e1['name']);
76     }
77
78     public function testWhitespace()
79     {
80         $spaces = '    ';
81         list ($tok, $events) = $this->createTokenizer($spaces);
82
83         $tok->parse();
84
85         $this->assertEquals(2, $events->depth());
86
87         $e1 = $events->get(0);
88
89         $this->assertEquals('text', $e1['name']);
90         $this->assertEquals($spaces, $e1['data'][0]);
91     }
92
93     public function testCharacterReference()
94     {
95         $good = array(
96             '&amp;' => '&',
97             '&#x0003c;' => '<',
98             '&#38;' => '&',
99             '&' => '&'
100         );
101         $this->isAllGood('text', 2, $good);
102
103         // Test with broken charref
104         $str = '&foo';
105         $events = $this->parse($str);
106         $e1 = $events->get(0);
107         $this->assertEquals('error', $e1['name']);
108
109         $str = '&#xfoo';
110         $events = $this->parse($str);
111         $e1 = $events->get(0);
112         $this->assertEquals('error', $e1['name']);
113
114         $str = '&#foo';
115         $events = $this->parse($str);
116         $e1 = $events->get(0);
117         $this->assertEquals('error', $e1['name']);
118
119         // FIXME: Once the text processor is done, need to verify that the
120         // tokens are transformed correctly into text.
121     }
122
123     public function testBogusComment()
124     {
125         $bogus = array(
126             '</+this is a bogus comment. +>',
127             '<!+this is a bogus comment. !>',
128             '<!D OCTYPE foo bar>',
129             '<!DOCTYEP foo bar>',
130             '<![CADATA[ TEST ]]>',
131             '<![CDATA Hello ]]>',
132             '<![CDATA[ Hello [[>',
133             '<!CDATA[[ test ]]>',
134             '<![CDATA[',
135             '<![CDATA[hellooooo hello',
136             '<? Hello World ?>',
137             '<? Hello World'
138         );
139         foreach ($bogus as $str) {
140             $events = $this->parse($str);
141             $this->assertEventError($events->get(0));
142             $this->assertEventEquals('comment', $str, $events->get(1));
143         }
144     }
145
146     public function testEndTag()
147     {
148         $succeed = array(
149             '</a>' => 'a',
150             '</test>' => 'test',
151             '</test
152       >' => 'test',
153             '</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' => 'thisisthetagthatdoesntenditjustgoesonandonmyfriend',
154             // See 8.2.4.10, which requires this and does not say error.
155             '</a<b>' => 'a<b'
156         );
157         $this->isAllGood('endTag', 2, $succeed);
158
159         // Recoverable failures
160         $fail = array(
161             '</a class="monkey">' => 'a',
162             '</a <b>' => 'a',
163             '</a <b <c>' => 'a',
164             '</a is the loneliest letter>' => 'a',
165             '</a' => 'a'
166         );
167         foreach ($fail as $test => $result) {
168             $events = $this->parse($test);
169             $this->assertEquals(3, $events->depth());
170             // Should have triggered an error.
171             $this->assertEventError($events->get(0));
172             // Should have tried to parse anyway.
173             $this->assertEventEquals('endTag', $result, $events->get(1));
174         }
175
176         // BogoComments
177         $comments = array(
178             '</>' => '</>',
179             '</ >' => '</ >',
180             '</ a>' => '</ a>'
181         );
182         foreach ($comments as $test => $result) {
183             $events = $this->parse($test);
184             $this->assertEquals(3, $events->depth());
185
186             // Should have triggered an error.
187             $this->assertEventError($events->get(0));
188
189             // Should have tried to parse anyway.
190             $this->assertEventEquals('comment', $result, $events->get(1));
191         }
192     }
193
194     public function testComment()
195     {
196         $good = array(
197             '<!--easy-->' => 'easy',
198             '<!-- 1 > 0 -->' => ' 1 > 0 ',
199             '<!-- --$i -->' => ' --$i ',
200             '<!----$i-->' => '--$i',
201             "<!--\nHello World.\na-->" => "\nHello World.\na",
202             '<!-- <!-- -->' => ' <!-- '
203         );
204         foreach ($good as $test => $expected) {
205             $events = $this->parse($test);
206             $this->assertEventEquals('comment', $expected, $events->get(0));
207         }
208
209         $fail = array(
210             '<!-->' => '',
211             '<!--Hello' => 'Hello',
212             "<!--\0Hello" => UTF8Utils::FFFD . 'Hello',
213             '<!--' => ''
214         );
215         foreach ($fail as $test => $expected) {
216             $events = $this->parse($test);
217             $this->assertEquals(3, $events->depth());
218             $this->assertEventError($events->get(0));
219             $this->assertEventEquals('comment', $expected, $events->get(1));
220         }
221     }
222
223     public function testCDATASection()
224     {
225         $good = array(
226             '<![CDATA[ This is a test. ]]>' => ' This is a test. ',
227             '<![CDATA[CDATA]]>' => 'CDATA',
228             '<![CDATA[ ]] > ]]>' => ' ]] > ',
229             '<![CDATA[ ]]>' => ' '
230         );
231         $this->isAllGood('cdata', 2, $good);
232     }
233
234     public function testDoctype()
235     {
236         $good = array(
237             '<!DOCTYPE html>' => array(
238                 'html',
239                 0,
240                 null,
241                 false
242             ),
243             '<!doctype html>' => array(
244                 'html',
245                 0,
246                 null,
247                 false
248             ),
249             '<!DocType html>' => array(
250                 'html',
251                 0,
252                 null,
253                 false
254             ),
255             "<!DOCTYPE\nhtml>" => array(
256                 'html',
257                 0,
258                 null,
259                 false
260             ),
261             "<!DOCTYPE\fhtml>" => array(
262                 'html',
263                 0,
264                 null,
265                 false
266             ),
267             '<!DOCTYPE html PUBLIC "foo bar">' => array(
268                 'html',
269                 EventStack::DOCTYPE_PUBLIC,
270                 'foo bar',
271                 false
272             ),
273             "<!DOCTYPE html PUBLIC 'foo bar'>" => array(
274                 'html',
275                 EventStack::DOCTYPE_PUBLIC,
276                 'foo bar',
277                 false
278             ),
279             '<!DOCTYPE      html      PUBLIC     "foo bar"    >' => array(
280                 'html',
281                 EventStack::DOCTYPE_PUBLIC,
282                 'foo bar',
283                 false
284             ),
285             "<!DOCTYPE html \nPUBLIC\n'foo bar'>" => array(
286                 'html',
287                 EventStack::DOCTYPE_PUBLIC,
288                 'foo bar',
289                 false
290             ),
291             '<!DOCTYPE html SYSTEM "foo bar">' => array(
292                 'html',
293                 EventStack::DOCTYPE_SYSTEM,
294                 'foo bar',
295                 false
296             ),
297             "<!DOCTYPE html SYSTEM 'foo bar'>" => array(
298                 'html',
299                 EventStack::DOCTYPE_SYSTEM,
300                 'foo bar',
301                 false
302             ),
303             '<!DOCTYPE      html      SYSTEM "foo/bar"    >' => array(
304                 'html',
305                 EventStack::DOCTYPE_SYSTEM,
306                 'foo/bar',
307                 false
308             ),
309             "<!DOCTYPE html \nSYSTEM\n'foo bar'>" => array(
310                 'html',
311                 EventStack::DOCTYPE_SYSTEM,
312                 'foo bar',
313                 false
314             )
315         );
316         $this->isAllGood('doctype', 2, $good);
317
318         $bad = array(
319             '<!DOCTYPE>' => array(
320                 null,
321                 EventStack::DOCTYPE_NONE,
322                 null,
323                 true
324             ),
325             '<!DOCTYPE    >' => array(
326                 null,
327                 EventStack::DOCTYPE_NONE,
328                 null,
329                 true
330             ),
331             '<!DOCTYPE  foo' => array(
332                 'foo',
333                 EventStack::DOCTYPE_NONE,
334                 null,
335                 true
336             ),
337             '<!DOCTYPE foo PUB' => array(
338                 'foo',
339                 EventStack::DOCTYPE_NONE,
340                 null,
341                 true
342             ),
343             '<!DOCTYPE foo PUB>' => array(
344                 'foo',
345                 EventStack::DOCTYPE_NONE,
346                 null,
347                 true
348             ),
349             '<!DOCTYPE  foo PUB "Looks good">' => array(
350                 'foo',
351                 EventStack::DOCTYPE_NONE,
352                 null,
353                 true
354             ),
355             '<!DOCTYPE  foo SYSTME "Looks good"' => array(
356                 'foo',
357                 EventStack::DOCTYPE_NONE,
358                 null,
359                 true
360             ),
361
362             // Can't tell whether these are ids or ID types, since the context is chopped.
363             '<!DOCTYPE foo PUBLIC' => array(
364                 'foo',
365                 EventStack::DOCTYPE_NONE,
366                 null,
367                 true
368             ),
369             '<!DOCTYPE  foo PUBLIC>' => array(
370                 'foo',
371                 EventStack::DOCTYPE_NONE,
372                 null,
373                 true
374             ),
375             '<!DOCTYPE foo SYSTEM' => array(
376                 'foo',
377                 EventStack::DOCTYPE_NONE,
378                 null,
379                 true
380             ),
381             '<!DOCTYPE  foo SYSTEM>' => array(
382                 'foo',
383                 EventStack::DOCTYPE_NONE,
384                 null,
385                 true
386             ),
387
388             '<!DOCTYPE html SYSTEM "foo bar"' => array(
389                 'html',
390                 EventStack::DOCTYPE_SYSTEM,
391                 'foo bar',
392                 true
393             ),
394             '<!DOCTYPE html SYSTEM "foo bar" more stuff>' => array(
395                 'html',
396                 EventStack::DOCTYPE_SYSTEM,
397                 'foo bar',
398                 true
399             )
400         );
401         foreach ($bad as $test => $expects) {
402             $events = $this->parse($test);
403             // fprintf(STDOUT, $test . PHP_EOL);
404             $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true));
405             $this->assertEventError($events->get(0));
406             $this->assertEventEquals('doctype', $expects, $events->get(1));
407         }
408     }
409
410     public function testProcessorInstruction()
411     {
412         $good = array(
413             '<?hph ?>' => 'hph',
414             '<?hph echo "Hello World"; ?>' => array(
415                 'hph',
416                 'echo "Hello World"; '
417             ),
418             "<?hph \necho 'Hello World';\n?>" => array(
419                 'hph',
420                 "echo 'Hello World';\n"
421             )
422         );
423         $this->isAllGood('pi', 2, $good);
424     }
425
426     /**
427      * This tests just simple tags.
428      */
429     public function testSimpleTags()
430     {
431         $open = array(
432             '<foo>' => 'foo',
433             '<FOO>' => 'foo',
434             '<fOO>' => 'foo',
435             '<foo >' => 'foo',
436             "<foo\n\n\n\n>" => 'foo',
437             '<foo:bar>' => 'foo:bar'
438         );
439         $this->isAllGood('startTag', 2, $open);
440
441         $selfClose = array(
442             '<foo/>' => 'foo',
443             '<FOO/>' => 'foo',
444             '<foo />' => 'foo',
445             "<foo\n\n\n\n/>" => 'foo',
446             '<foo:bar/>' => 'foo:bar'
447         );
448         foreach ($selfClose as $test => $expects) {
449             $events = $this->parse($test);
450             $this->assertEquals(3, $events->depth(), "Counting events for '$test'" . print_r($events, true));
451             $this->assertEventEquals('startTag', $expects, $events->get(0));
452             $this->assertEventEquals('endTag', $expects, $events->get(1));
453         }
454
455         $bad = array(
456             '<foo' => 'foo',
457             '<foo ' => 'foo',
458             '<foo/' => 'foo',
459             '<foo /' => 'foo'
460         );
461
462         foreach ($bad as $test => $expects) {
463             $events = $this->parse($test);
464             $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true));
465             $this->assertEventError($events->get(0));
466             $this->assertEventEquals('startTag', $expects, $events->get(1));
467         }
468     }
469
470     public function testTagsWithAttributeAndMissingName()
471     {
472         $cases = array(
473             '<id="top_featured">' => 'id',
474             '<color="white">' => 'color',
475             "<class='neaktivni_stranka'>" => 'class',
476             '<bgcolor="white">' => 'bgcolor',
477             '<class="nom">' => 'class'
478         );
479
480         foreach ($cases as $html => $expected) {
481             $events = $this->parse($html);
482             $this->assertEventError($events->get(0));
483             $this->assertEventError($events->get(1));
484             $this->assertEventError($events->get(2));
485             $this->assertEventEquals('startTag', $expected, $events->get(3));
486             $this->assertEventEquals('eof', null, $events->get(4));
487         }
488     }
489
490     public function testTagNotClosedAfterTagName()
491     {
492         $cases = array(
493             "<noscript<img>" => array(
494                 'noscript',
495                 'img'
496             ),
497             '<center<a>' => array(
498                 'center',
499                 'a'
500             ),
501             '<br<br>' => array(
502                 'br',
503                 'br'
504             )
505         );
506
507         foreach ($cases as $html => $expected) {
508             $events = $this->parse($html);
509             $this->assertEventError($events->get(0));
510             $this->assertEventEquals('startTag', $expected[0], $events->get(1));
511             $this->assertEventEquals('startTag', $expected[1], $events->get(2));
512             $this->assertEventEquals('eof', null, $events->get(3));
513         }
514
515         $events = $this->parse('<span<>02</span>');
516         $this->assertEventError($events->get(0));
517         $this->assertEventEquals('startTag', 'span', $events->get(1));
518         $this->assertEventError($events->get(2));
519         $this->assertEventEquals('text', '>02', $events->get(3));
520         $this->assertEventEquals('endTag', 'span', $events->get(4));
521         $this->assertEventEquals('eof', null, $events->get(5));
522
523         $events = $this->parse('<p</p>');
524         $this->assertEventError($events->get(0));
525         $this->assertEventEquals('startTag', 'p', $events->get(1));
526         $this->assertEventEquals('endTag', 'p', $events->get(2));
527         $this->assertEventEquals('eof', null, $events->get(3));
528
529         $events = $this->parse('<strong><WordPress</strong>');
530         $this->assertEventEquals('startTag', 'strong', $events->get(0));
531         $this->assertEventError($events->get(1));
532         $this->assertEventEquals('startTag', 'wordpress', $events->get(2));
533         $this->assertEventEquals('endTag', 'strong', $events->get(3));
534         $this->assertEventEquals('eof', null, $events->get(4));
535
536         $events = $this->parse('<src=<a>');
537         $this->assertEventError($events->get(0));
538         $this->assertEventError($events->get(1));
539         $this->assertEventError($events->get(2));
540         $this->assertEventEquals('startTag', 'src', $events->get(3));
541         $this->assertEventEquals('startTag', 'a', $events->get(4));
542         $this->assertEventEquals('eof', null, $events->get(5));
543
544         $events = $this->parse('<br...<a>');
545         $this->assertEventError($events->get(0));
546         $this->assertEventEquals('startTag', 'br', $events->get(1));
547         $this->assertEventEquals('eof', null, $events->get(2));
548     }
549
550     public function testIllegalTagNames()
551     {
552         $cases = array(
553             '<li">' => 'li',
554             '<p">' => 'p',
555             '<b&nbsp; >' => 'b',
556             '<static*all>' => 'static',
557             '<h*0720/>' => 'h',
558             '<st*ATTRIBUTE />' => 'st',
559         );
560
561         foreach ($cases as $html => $expected) {
562             $events = $this->parse($html);
563             $this->assertEventError($events->get(0));
564             $this->assertEventEquals('startTag', $expected, $events->get(1));
565         }
566     }
567
568     /**
569      * @depends testCharacterReference
570      */
571     public function testTagAttributes()
572     {
573         // Opening tags.
574         $good = array(
575             '<foo bar="baz">' => array(
576                 'foo',
577                 array(
578                     'bar' => 'baz'
579                 ),
580                 false
581             ),
582             '<foo bar=" baz ">' => array(
583                 'foo',
584                 array(
585                     'bar' => ' baz '
586                 ),
587                 false
588             ),
589             "<foo bar=\"\nbaz\n\">" => array(
590                 'foo',
591                 array(
592                     'bar' => "\nbaz\n"
593                 ),
594                 false
595             ),
596             "<foo bar='baz'>" => array(
597                 'foo',
598                 array(
599                     'bar' => 'baz'
600                 ),
601                 false
602             ),
603             '<foo bar="A full sentence.">' => array(
604                 'foo',
605                 array(
606                     'bar' => 'A full sentence.'
607                 ),
608                 false
609             ),
610             "<foo a='1' b=\"2\">" => array(
611                 'foo',
612                 array(
613                     'a' => '1',
614                     'b' => '2'
615                 ),
616                 false
617             ),
618             "<foo ns:bar='baz'>" => array(
619                 'foo',
620                 array(
621                     'ns:bar' => 'baz'
622                 ),
623                 false
624             ),
625             "<foo a='blue&red'>" => array(
626                 'foo',
627                 array(
628                     'a' => 'blue&red'
629                 ),
630                 false
631             ),
632             "<foo a='blue&amp;red'>" => array(
633                 'foo',
634                 array(
635                     'a' => 'blue&red'
636                 ),
637                 false
638             ),
639             "<foo a='blue&&amp;&red'>" => array(
640                 'foo',
641                 array(
642                     'a' => 'blue&&&red'
643                 ),
644                 false
645             ),
646             "<foo a='blue&&amp;red'>" => array(
647                 'foo',
648                 array(
649                     'a' => 'blue&&red'
650                 ),
651                 false
652             ),
653             "<foo\nbar='baz'\n>" => array(
654                 'foo',
655                 array(
656                     'bar' => 'baz'
657                 ),
658                 false
659             ),
660             '<doe a deer>' => array(
661                 'doe',
662                 array(
663                     'a' => null,
664                     'deer' => null
665                 ),
666                 false
667             ),
668             '<foo bar=baz>' => array(
669                 'foo',
670                 array(
671                     'bar' => 'baz'
672                 ),
673                 false
674             ),
675
676             // Updated for 8.1.2.3
677             '<foo    bar   =   "baz"      >' => array(
678                 'foo',
679                 array(
680                     'bar' => 'baz'
681                 ),
682                 false
683             ),
684
685             // The spec allows an unquoted value '/'. This will not be a closing
686             // tag.
687             '<foo bar=/>' => array(
688                 'foo',
689                 array(
690                     'bar' => '/'
691                 ),
692                 false
693             ),
694             '<foo bar=baz/>' => array(
695                 'foo',
696                 array(
697                     'bar' => 'baz/'
698                 ),
699                 false
700             )
701         );
702         $this->isAllGood('startTag', 2, $good);
703
704         // Self-closing tags.
705         $withEnd = array(
706             '<foo bar="baz"/>' => array(
707                 'foo',
708                 array(
709                     'bar' => 'baz'
710                 ),
711                 true
712             ),
713             '<foo BAR="baz"/>' => array(
714                 'foo',
715                 array(
716                     'bar' => 'baz'
717                 ),
718                 true
719             ),
720             '<foo BAR="BAZ"/>' => array(
721                 'foo',
722                 array(
723                     'bar' => 'BAZ'
724                 ),
725                 true
726             ),
727             "<foo a='1' b=\"2\" c=3 d/>" => array(
728                 'foo',
729                 array(
730                     'a' => '1',
731                     'b' => '2',
732                     'c' => '3',
733                     'd' => null
734                 ),
735                 true
736             )
737         );
738         $this->isAllGood('startTag', 3, $withEnd);
739
740         // Cause a parse error.
741         $bad = array(
742             // This will emit an entity lookup failure for &+dark.
743             "<foo a='blue&+dark'>" => array(
744                 'foo',
745                 array(
746                     'a' => 'blue&+dark'
747                 ),
748                 false
749             ),
750             '<foo bar=>' => array(
751                 'foo',
752                 array(
753                     'bar' => null
754                 ),
755                 false
756             ),
757             '<foo bar="oh' => array(
758                 'foo',
759                 array(
760                     'bar' => 'oh'
761                 ),
762                 false
763             ),
764             '<foo bar=oh">' => array(
765                 'foo',
766                 array(
767                     'bar' => 'oh"'
768                 ),
769                 false
770             ),
771
772             // these attributes are ignored because of current implementation
773             // of method "DOMElement::setAttribute"
774             // see issue #23: https://github.com/Masterminds/html5-php/issues/23
775             '<foo b"="baz">' => array(
776                 'foo',
777                 array(),
778                 false
779             ),
780             '<foo 2abc="baz">' => array(
781                 'foo',
782                 array(),
783                 false
784             ),
785             '<foo ?="baz">' => array(
786                 'foo',
787                 array(),
788                 false
789             ),
790             '<foo foo?bar="baz">' => array(
791                 'foo',
792                 array(),
793                 false
794             )
795         )
796         ;
797         foreach ($bad as $test => $expects) {
798             $events = $this->parse($test);
799             $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true));
800             $this->assertEventError($events->get(0));
801             $this->assertEventEquals('startTag', $expects, $events->get(1));
802         }
803
804         // Cause multiple parse errors.
805         $reallyBad = array(
806             '<foo ="bar">' => array(
807                 'foo',
808                 array(
809                     '=' => null,
810                     '"bar"' => null
811                 ),
812                 false
813             ),
814             '<foo////>' => array(
815                 'foo',
816                 array(),
817                 true
818             ),
819             // character "&" in unquoted attribute shouldn't cause an infinite loop
820             '<foo bar=index.php?str=1&amp;id=29>' => array(
821                 'foo',
822                 array(
823                     'bar' => 'index.php?str=1&id=29'
824                 ),
825                 false
826             )
827         );
828         foreach ($reallyBad as $test => $expects) {
829             $events = $this->parse($test);
830             // fprintf(STDOUT, $test . print_r($events, true));
831             $this->assertEventError($events->get(0));
832             $this->assertEventError($events->get(1));
833             // $this->assertEventEquals('startTag', $expects, $events->get(1));
834         }
835
836         // Regression: Malformed elements should be detected.
837         // '<foo baz="1" <bar></foo>' => array('foo', array('baz' => '1'), false),
838         $events = $this->parse('<foo baz="1" <bar></foo>');
839         $this->assertEventError($events->get(0));
840         $this->assertEventEquals('startTag', array(
841             'foo',
842             array(
843                 'baz' => '1'
844             ),
845             false
846         ), $events->get(1));
847         $this->assertEventEquals('startTag', array(
848             'bar',
849             array(),
850             false
851         ), $events->get(2));
852         $this->assertEventEquals('endTag', array(
853             'foo'
854         ), $events->get(3));
855     }
856
857     public function testRawText()
858     {
859         $good = array(
860             '<script>abcd efg hijk lmnop</script>     ' => 'abcd efg hijk lmnop',
861             '<script><not/><the/><tag></script>' => '<not/><the/><tag>',
862             '<script><<<<<<<<</script>' => '<<<<<<<<',
863             '<script>hello</script</script>' => 'hello</script',
864             "<script>\nhello</script\n</script>" => "\nhello</script\n",
865             '<script>&amp;</script>' => '&amp;',
866             '<script><!--not a comment--></script>' => '<!--not a comment-->',
867             '<script><![CDATA[not a comment]]></script>' => '<![CDATA[not a comment]]>'
868         );
869         foreach ($good as $test => $expects) {
870             $events = $this->parse($test);
871             $this->assertEventEquals('startTag', 'script', $events->get(0));
872             $this->assertEventEquals('text', $expects, $events->get(1));
873             $this->assertEventEquals('endTag', 'script', $events->get(2));
874         }
875
876         $bad = array(
877             '<script>&amp;</script' => '&amp;</script',
878             '<script>Hello world' => 'Hello world'
879         );
880         foreach ($bad as $test => $expects) {
881             $events = $this->parse($test);
882             $this->assertEquals(4, $events->depth(), "Counting events for '$test': " . print_r($events, true));
883             $this->assertEventEquals('startTag', 'script', $events->get(0));
884             $this->assertEventError($events->get(1));
885             $this->assertEventEquals('text', $expects, $events->get(2));
886         }
887
888         // Testing case sensitivity
889         $events = $this->parse('<TITLE>a test</TITLE>');
890         $this->assertEventEquals('startTag', 'title', $events->get(0));
891         $this->assertEventEquals('text', 'a test', $events->get(1));
892         $this->assertEventEquals('endTag', 'title', $events->get(2));
893
894         // Testing end tags with whitespaces
895         $events = $this->parse('<title>Whitespaces are tasty</title >');
896         $this->assertEventEquals('startTag', 'title', $events->get(0));
897         $this->assertEventEquals('text', 'Whitespaces are tasty', $events->get(1));
898         $this->assertEventEquals('endTag', 'title', $events->get(2));
899     }
900
901     public function testRcdata()
902     {
903         list ($tok, $events) = $this->createTokenizer('<title>&#x27;<!-- not a comment --></TITLE>');
904         $tok->setTextMode(\Masterminds\HTML5\Elements::TEXT_RCDATA, 'title');
905         $tok->parse();
906         $this->assertEventEquals('text', "'<!-- not a comment -->", $events->get(1));
907     }
908
909     public function testText()
910     {
911         $events = $this->parse('a<br>b');
912         $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, true));
913         $this->assertEventEquals('text', 'a', $events->get(0));
914         $this->assertEventEquals('startTag', 'br', $events->get(1));
915         $this->assertEventEquals('text', 'b', $events->get(2));
916
917         $events = $this->parse('<a>Test</a>');
918         $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, true));
919         $this->assertEventEquals('startTag', 'a', $events->get(0));
920         $this->assertEventEquals('text', 'Test', $events->get(1));
921         $this->assertEventEquals('endTag', 'a', $events->get(2));
922
923         $events = $this->parse('<p>0</p><p>1</p>');
924         $this->assertEquals(7, $events->depth(), "Events: " . print_r($events, true));
925
926         $this->assertEventEquals('startTag', 'p', $events->get(0));
927         $this->assertEventEquals('text', '0', $events->get(1));
928         $this->assertEventEquals('endTag', 'p', $events->get(2));
929
930         $this->assertEventEquals('startTag', 'p', $events->get(3));
931         $this->assertEventEquals('text', '1', $events->get(4));
932         $this->assertEventEquals('endTag', 'p', $events->get(5));
933
934
935         $events = $this->parse('a<![CDATA[test]]>b');
936         $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, true));
937         $this->assertEventEquals('text', 'a', $events->get(0));
938         $this->assertEventEquals('cdata', 'test', $events->get(1));
939         $this->assertEventEquals('text', 'b', $events->get(2));
940
941         $events = $this->parse('a<!--test-->b');
942         $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, true));
943         $this->assertEventEquals('text', 'a', $events->get(0));
944         $this->assertEventEquals('comment', 'test', $events->get(1));
945         $this->assertEventEquals('text', 'b', $events->get(2));
946
947         $events = $this->parse('a&amp;b');
948         $this->assertEquals(2, $events->depth(), "Events: " . print_r($events, true));
949         $this->assertEventEquals('text', 'a&b', $events->get(0));
950
951         $events = $this->parse('a&sup2;b');
952         $this->assertEquals(2, $events->depth(), "Events: " . print_r($events, true));
953         $this->assertEventEquals('text', 'a²b', $events->get(0));
954     }
955
956     // ================================================================
957     // Utility functions.
958     // ================================================================
959     protected function createTokenizer($string, $debug = false)
960     {
961         $eventHandler = new EventStack();
962         $stream = new StringInputStream($string);
963         $scanner = new Scanner($stream);
964
965         $scanner->debug = $debug;
966
967         return array(
968             new Tokenizer($scanner, $eventHandler),
969             $eventHandler
970         );
971     }
972
973     public function parse($string, $debug = false)
974     {
975         list ($tok, $events) = $this->createTokenizer($string, $debug);
976         $tok->parse();
977
978         return $events;
979     }
980 }