'http://www.w3.org/1999/xhtml', 'attrNamespace'=>'http://www.w3.org/1999/xhtml', 'nodeName'=>'img', 'nodeName'=>array('img', 'a'), 'attrName'=>'alt', 'attrName'=>array('title', 'alt'), ), */ array( 'nodeNamespace' => 'http://www.w3.org/1999/xhtml', 'attrName' => array('href', 'hreflang', 'http-equiv', 'icon', 'id', 'keytype', 'kind', 'label', 'lang', 'language', 'list', 'maxlength', 'media', 'method', 'name', 'placeholder', 'rel', 'rows', 'rowspan', 'sandbox', 'spellcheck', 'scope', 'seamless', 'shape', 'size', 'sizes', 'span', 'src', 'srcdoc', 'srclang', 'srcset', 'start', 'step', 'style', 'summary', 'tabindex', 'target', 'title', 'type', 'value', 'width', 'border', 'charset', 'cite', 'class', 'code', 'codebase', 'color', 'cols', 'colspan', 'content', 'coords', 'data', 'datetime', 'default', 'dir', 'dirname', 'enctype', 'for', 'form', 'formaction', 'headers', 'height', 'accept', 'accept-charset', 'accesskey', 'action', 'align', 'alt', 'bgcolor', ), ), array( 'nodeNamespace' => 'http://www.w3.org/1999/xhtml', 'xpath' => 'starts-with(local-name(), \'data-\')', ), ); const DOCTYPE = ''; public function __construct($output, $options = array()) { if (isset($options['encode_entities'])) { $this->encode = $options['encode_entities']; } $this->outputMode = static::IM_IN_HTML; $this->out = $output; // If HHVM, see https://github.com/facebook/hhvm/issues/2727 $this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION'); } public function addRule(array $rule) { $this->nonBooleanAttributes[] = $rule; } public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser) { $this->traverser = $traverser; return $this; } public function document($dom) { $this->doctype(); if ($dom->documentElement) { foreach ($dom->childNodes as $node) { $this->traverser->node($node); } $this->nl(); } } protected function doctype() { $this->wr(static::DOCTYPE); $this->nl(); } public function element($ele) { $name = $ele->tagName; // Per spec: // If the element has a declared namespace in the HTML, MathML or // SVG namespaces, we use the lname instead of the tagName. if ($this->traverser->isLocalElement($ele)) { $name = $ele->localName; } // If we are in SVG or MathML there is special handling. // Using if/elseif instead of switch because it's faster in PHP. if ($name == 'svg') { $this->outputMode = static::IM_IN_SVG; $name = Elements::normalizeSvgElement($name); } elseif ($name == 'math') { $this->outputMode = static::IM_IN_MATHML; } $this->openTag($ele); if (Elements::isA($name, Elements::TEXT_RAW)) { foreach ($ele->childNodes as $child) { if ($child instanceof \DOMCharacterData) { $this->wr($child->data); } elseif ($child instanceof \DOMElement) { $this->element($child); } } } else { // Handle children. if ($ele->hasChildNodes()) { $this->traverser->children($ele->childNodes); } // Close out the SVG or MathML special handling. if ($name == 'svg' || $name == 'math') { $this->outputMode = static::IM_IN_HTML; } } // If not unary, add a closing tag. if (! Elements::isA($name, Elements::VOID_TAG)) { $this->closeTag($ele); } } /** * Write a text node. * * @param \DOMText $ele * The text node to write. */ public function text($ele) { if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) { $this->wr($ele->data); return; } // FIXME: This probably needs some flags set. $this->wr($this->enc($ele->data)); } public function cdata($ele) { // This encodes CDATA. $this->wr($ele->ownerDocument->saveXML($ele)); } public function comment($ele) { // These produce identical output. // $this->wr(''); $this->wr($ele->ownerDocument->saveXML($ele)); } public function processorInstruction($ele) { $this->wr('wr($ele->target) ->wr(' ') ->wr($ele->data) ->wr('?>'); } /** * Write the namespace attributes * * * @param \DOMNode $ele * The element being written. */ protected function namespaceAttrs($ele) { if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument){ $this->xpath = new \DOMXPath($ele->ownerDocument); } foreach( $this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele ) as $nsNode ) { if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) { $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"'); } } } /** * Write the opening tag. * * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the * qualified name (8.3). * * @param \DOMNode $ele * The element being written. */ protected function openTag($ele) { $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName); $this->attrs($ele); $this->namespaceAttrs($ele); if ($this->outputMode == static::IM_IN_HTML) { $this->wr('>'); } // If we are not in html mode we are in SVG, MathML, or XML embedded content. else { if ($ele->hasChildNodes()) { $this->wr('>'); } // If there are no children this is self closing. else { $this->wr(' />'); } } } protected function attrs($ele) { // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements. if (! $ele->hasAttributes()) { return $this; } // TODO: Currently, this always writes name="value", and does not do // value-less attributes. $map = $ele->attributes; $len = $map->length; for ($i = 0; $i < $len; ++ $i) { $node = $map->item($i); $val = $this->enc($node->value, true); // XXX: The spec says that we need to ensure that anything in // the XML, XMLNS, or XLink NS's should use the canonical // prefix. It seems that DOM does this for us already, but there // may be exceptions. $name = $node->nodeName; // Special handling for attributes in SVG and MathML. // Using if/elseif instead of switch because it's faster in PHP. if ($this->outputMode == static::IM_IN_SVG) { $name = Elements::normalizeSvgAttribute($name); } elseif ($this->outputMode == static::IM_IN_MATHML) { $name = Elements::normalizeMathMlAttribute($name); } $this->wr(' ')->wr($name); if ((isset($val) && $val !== '') || $this->nonBooleanAttribute($node)) { $this->wr('="')->wr($val)->wr('"'); } } } protected function nonBooleanAttribute(\DOMAttr $attr) { $ele = $attr->ownerElement; foreach($this->nonBooleanAttributes as $rule){ if(isset($rule['nodeNamespace']) && $rule['nodeNamespace']!==$ele->namespaceURI){ continue; } if(isset($rule['attNamespace']) && $rule['attNamespace']!==$attr->namespaceURI){ continue; } if(isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName']!==$ele->localName){ continue; } if(isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)){ continue; } if(isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName']!==$attr->localName){ continue; } if(isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)){ continue; } if(isset($rule['xpath'])){ $xp = $this->getXPath($attr); if(isset($rule['prefixes'])){ foreach($rule['prefixes'] as $nsPrefix => $ns){ $xp->registerNamespace($nsPrefix, $ns); } } if(!$xp->evaluate($rule['xpath'], $attr)){ continue; } } return true; } return false; } private function getXPath(\DOMNode $node){ if(!$this->xpath){ $this->xpath = new \DOMXPath($node->ownerDocument); } return $this->xpath; } /** * Write the closing tag. * * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the * qualified name (8.3). * * @param \DOMNode $ele * The element being written. */ protected function closeTag($ele) { if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) { $this->wr('wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>'); } } /** * Write to the output. * * @param string $text * The string to put into the output. * * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining. */ protected function wr($text) { fwrite($this->out, $text); return $this; } /** * Write a new line character. * * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining. */ protected function nl() { fwrite($this->out, PHP_EOL); return $this; } /** * Encode text. * * When encode is set to false, the default value, the text passed in is * escaped per section 8.3 of the html5 spec. For details on how text is * escaped see the escape() method. * * When encoding is set to true the text is converted to named character * references where appropriate. Section 8.1.4 Character references of the * html5 spec refers to using named character references. This is useful for * characters that can't otherwise legally be used in the text. * * The named character references are listed in section 8.5. * * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities. * This includes such characters as +.# and many other common ones. By default * encoding here will just escape &'<>". * * Note, PHP 5.4+ has better html5 encoding. * * @todo Use the Entities class in php 5.3 to have html5 entities. * * @param string $text * text to encode. * @param boolean $attribute * True if we are encoding an attrubute, false otherwise * * @return string The encoded text. */ protected function enc($text, $attribute = false) { // Escape the text rather than convert to named character references. if (! $this->encode) { return $this->escape($text, $attribute); } // If we are in PHP 5.4+ we can use the native html5 entity functionality to // convert the named character references. if ($this->hasHTML5) { return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false); } // If a version earlier than 5.4 html5 entities are not entirely handled. // This manually handles them. else { return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map); } } /** * Escape test. * * According to the html5 spec section 8.3 Serializing HTML fragments, text * within tags that are not style, script, xmp, iframe, noembed, and noframes * need to be properly escaped. * * The & should be converted to &, no breaking space unicode characters * converted to  , when in attribute mode the " should be converted to * ", and when not in attribute mode the < and > should be converted to * < and >. * * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString * * @param string $text * text to escape. * @param boolean $attribute * True if we are escaping an attrubute, false otherwise */ protected function escape($text, $attribute = false) { // Not using htmlspecialchars because, while it does escaping, it doesn't // match the requirements of section 8.5. For example, it doesn't handle // non-breaking spaces. if ($attribute) { $replace = array( '"' => '"', '&' => '&', "\xc2\xa0" => ' ' ); } else { $replace = array( '<' => '<', '>' => '>', '&' => '&', "\xc2\xa0" => ' ' ); } return strtr($text, $replace); } }