4 * The rules for generating output in the serializer.
6 * These output rules are likely to generate output similar to the document that
7 * was parsed. It is not intended to output exactly the document that was parsed.
9 namespace Masterminds\HTML5\Serializer;
11 use Masterminds\HTML5\Elements;
14 * Generate the output html5 based on element rules.
16 class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
19 * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0
21 const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
23 const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML';
25 const NAMESPACE_SVG = 'http://www.w3.org/2000/svg';
27 const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';
29 const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace';
31 const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';
34 * Holds the HTML5 element names that causes a namespace switch
38 protected $implicitNamespaces = array(
41 self::NAMESPACE_MATHML,
43 self::NAMESPACE_XMLNS,
50 const IM_IN_MATHML = 3;
53 * Used as cache to detect if is available ENT_HTML5
56 private $hasHTML5 = false;
60 protected $encode = false;
64 protected $outputMode;
68 protected $nonBooleanAttributes = array(
71 'nodeNamespace'=>'http://www.w3.org/1999/xhtml',
72 'attrNamespace'=>'http://www.w3.org/1999/xhtml',
74 'nodeName'=>'img', 'nodeName'=>array('img', 'a'),
75 'attrName'=>'alt', 'attrName'=>array('title', 'alt'),
79 'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
80 'attrName' => array('href',
153 'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
154 'xpath' => 'starts-with(local-name(), \'data-\')',
158 const DOCTYPE = '<!DOCTYPE html>';
160 public function __construct($output, $options = array())
162 if (isset($options['encode_entities'])) {
163 $this->encode = $options['encode_entities'];
166 $this->outputMode = static::IM_IN_HTML;
167 $this->out = $output;
169 // If HHVM, see https://github.com/facebook/hhvm/issues/2727
170 $this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION');
172 public function addRule(array $rule)
174 $this->nonBooleanAttributes[] = $rule;
177 public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser)
179 $this->traverser = $traverser;
184 public function document($dom)
187 if ($dom->documentElement) {
188 foreach ($dom->childNodes as $node) {
189 $this->traverser->node($node);
195 protected function doctype()
197 $this->wr(static::DOCTYPE);
201 public function element($ele)
203 $name = $ele->tagName;
206 // If the element has a declared namespace in the HTML, MathML or
207 // SVG namespaces, we use the lname instead of the tagName.
208 if ($this->traverser->isLocalElement($ele)) {
209 $name = $ele->localName;
212 // If we are in SVG or MathML there is special handling.
213 // Using if/elseif instead of switch because it's faster in PHP.
214 if ($name == 'svg') {
215 $this->outputMode = static::IM_IN_SVG;
216 $name = Elements::normalizeSvgElement($name);
217 } elseif ($name == 'math') {
218 $this->outputMode = static::IM_IN_MATHML;
221 $this->openTag($ele);
222 if (Elements::isA($name, Elements::TEXT_RAW)) {
223 foreach ($ele->childNodes as $child) {
224 if ($child instanceof \DOMCharacterData) {
225 $this->wr($child->data);
226 } elseif ($child instanceof \DOMElement) {
227 $this->element($child);
232 if ($ele->hasChildNodes()) {
233 $this->traverser->children($ele->childNodes);
236 // Close out the SVG or MathML special handling.
237 if ($name == 'svg' || $name == 'math') {
238 $this->outputMode = static::IM_IN_HTML;
242 // If not unary, add a closing tag.
243 if (! Elements::isA($name, Elements::VOID_TAG)) {
244 $this->closeTag($ele);
251 * @param \DOMText $ele
252 * The text node to write.
254 public function text($ele)
256 if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) {
257 $this->wr($ele->data);
261 // FIXME: This probably needs some flags set.
262 $this->wr($this->enc($ele->data));
265 public function cdata($ele)
267 // This encodes CDATA.
268 $this->wr($ele->ownerDocument->saveXML($ele));
271 public function comment($ele)
273 // These produce identical output.
274 // $this->wr('<!--')->wr($ele->data)->wr('-->');
275 $this->wr($ele->ownerDocument->saveXML($ele));
278 public function processorInstruction($ele)
287 * Write the namespace attributes
290 * @param \DOMNode $ele
291 * The element being written.
293 protected function namespaceAttrs($ele)
295 if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument){
296 $this->xpath = new \DOMXPath($ele->ownerDocument);
299 foreach( $this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele ) as $nsNode ) {
300 if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) {
301 $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"');
307 * Write the opening tag.
309 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
310 * qualified name (8.3).
312 * @param \DOMNode $ele
313 * The element being written.
315 protected function openTag($ele)
317 $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName);
321 $this->namespaceAttrs($ele);
324 if ($this->outputMode == static::IM_IN_HTML) {
326 } // If we are not in html mode we are in SVG, MathML, or XML embedded content.
328 if ($ele->hasChildNodes()) {
330 } // If there are no children this is self closing.
337 protected function attrs($ele)
339 // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
340 if (! $ele->hasAttributes()) {
344 // TODO: Currently, this always writes name="value", and does not do
345 // value-less attributes.
346 $map = $ele->attributes;
348 for ($i = 0; $i < $len; ++ $i) {
349 $node = $map->item($i);
350 $val = $this->enc($node->value, true);
352 // XXX: The spec says that we need to ensure that anything in
353 // the XML, XMLNS, or XLink NS's should use the canonical
354 // prefix. It seems that DOM does this for us already, but there
355 // may be exceptions.
356 $name = $node->nodeName;
358 // Special handling for attributes in SVG and MathML.
359 // Using if/elseif instead of switch because it's faster in PHP.
360 if ($this->outputMode == static::IM_IN_SVG) {
361 $name = Elements::normalizeSvgAttribute($name);
362 } elseif ($this->outputMode == static::IM_IN_MATHML) {
363 $name = Elements::normalizeMathMlAttribute($name);
366 $this->wr(' ')->wr($name);
368 if ((isset($val) && $val !== '') || $this->nonBooleanAttribute($node)) {
369 $this->wr('="')->wr($val)->wr('"');
375 protected function nonBooleanAttribute(\DOMAttr $attr)
377 $ele = $attr->ownerElement;
378 foreach($this->nonBooleanAttributes as $rule){
380 if(isset($rule['nodeNamespace']) && $rule['nodeNamespace']!==$ele->namespaceURI){
383 if(isset($rule['attNamespace']) && $rule['attNamespace']!==$attr->namespaceURI){
386 if(isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName']!==$ele->localName){
389 if(isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)){
392 if(isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName']!==$attr->localName){
395 if(isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)){
398 if(isset($rule['xpath'])){
400 $xp = $this->getXPath($attr);
401 if(isset($rule['prefixes'])){
402 foreach($rule['prefixes'] as $nsPrefix => $ns){
403 $xp->registerNamespace($nsPrefix, $ns);
406 if(!$xp->evaluate($rule['xpath'], $attr)){
417 private function getXPath(\DOMNode $node){
419 $this->xpath = new \DOMXPath($node->ownerDocument);
425 * Write the closing tag.
427 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
428 * qualified name (8.3).
430 * @param \DOMNode $ele
431 * The element being written.
433 protected function closeTag($ele)
435 if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
436 $this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>');
441 * Write to the output.
443 * @param string $text
444 * The string to put into the output.
446 * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
448 protected function wr($text)
450 fwrite($this->out, $text);
455 * Write a new line character.
457 * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
459 protected function nl()
461 fwrite($this->out, PHP_EOL);
468 * When encode is set to false, the default value, the text passed in is
469 * escaped per section 8.3 of the html5 spec. For details on how text is
470 * escaped see the escape() method.
472 * When encoding is set to true the text is converted to named character
473 * references where appropriate. Section 8.1.4 Character references of the
474 * html5 spec refers to using named character references. This is useful for
475 * characters that can't otherwise legally be used in the text.
477 * The named character references are listed in section 8.5.
479 * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities.
480 * This includes such characters as +.# and many other common ones. By default
481 * encoding here will just escape &'<>".
483 * Note, PHP 5.4+ has better html5 encoding.
485 * @todo Use the Entities class in php 5.3 to have html5 entities.
487 * @param string $text
489 * @param boolean $attribute
490 * True if we are encoding an attrubute, false otherwise
492 * @return string The encoded text.
494 protected function enc($text, $attribute = false)
497 // Escape the text rather than convert to named character references.
498 if (! $this->encode) {
499 return $this->escape($text, $attribute);
502 // If we are in PHP 5.4+ we can use the native html5 entity functionality to
503 // convert the named character references.
505 if ($this->hasHTML5) {
506 return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false);
507 } // If a version earlier than 5.4 html5 entities are not entirely handled.
508 // This manually handles them.
510 return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map);
517 * According to the html5 spec section 8.3 Serializing HTML fragments, text
518 * within tags that are not style, script, xmp, iframe, noembed, and noframes
519 * need to be properly escaped.
521 * The & should be converted to &, no breaking space unicode characters
522 * converted to , when in attribute mode the " should be converted to
523 * ", and when not in attribute mode the < and > should be converted to
526 * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
528 * @param string $text
530 * @param boolean $attribute
531 * True if we are escaping an attrubute, false otherwise
533 protected function escape($text, $attribute = false)
536 // Not using htmlspecialchars because, while it does escaping, it doesn't
537 // match the requirements of section 8.5. For example, it doesn't handle
538 // non-breaking spaces.
543 "\xc2\xa0" => ' '
550 "\xc2\xa0" => ' '
554 return strtr($text, $replace);