tag. The time of Java applets has passed. * - The attribute 'icon' is omitted, because no browser implements the * tag anymore. * See https://developer.mozilla.org/en-US/docs/Web/HTML/Element/command. * - The 'manifest' attribute is omitted because it only exists for the * tag. That tag only makes sense in a HTML-served-as-HTML context, in which * case relative URLs are guaranteed to work. * * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes * @see https://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value * * @var string[] */ protected static $uriAttributes = ['href', 'poster', 'src', 'cite', 'data', 'action', 'formaction', 'srcset', 'about']; /** * Prepares a string for use as a valid class name. * * Do not pass one string containing multiple classes as they will be * incorrectly concatenated with dashes, i.e. "one two" will become "one-two". * * @param mixed $class * The class name to clean. It can be a string or anything that can be cast * to string. * * @return string * The cleaned class name. */ public static function getClass($class) { $class = (string) $class; if (!isset(static::$classes[$class])) { static::$classes[$class] = static::cleanCssIdentifier(mb_strtolower($class)); } return static::$classes[$class]; } /** * Prepares a string for use as a CSS identifier (element, class, or ID name). * * Link below shows the syntax for valid CSS identifiers (including element * names, classes, and IDs in selectors). * * @see http://www.w3.org/TR/CSS21/syndata.html#characters * * @param string $identifier * The identifier to clean. * @param array $filter * An array of string replacements to use on the identifier. * * @return string * The cleaned identifier. */ public static function cleanCssIdentifier($identifier, array $filter = [ ' ' => '-', '_' => '-', '/' => '-', '[' => '-', ']' => '', ]) { // We could also use strtr() here but its much slower than str_replace(). In // order to keep '__' to stay '__' we first replace it with a different // placeholder after checking that it is not defined as a filter. $double_underscore_replacements = 0; if (!isset($filter['__'])) { $identifier = str_replace('__', '##', $identifier, $double_underscore_replacements); } $identifier = str_replace(array_keys($filter), array_values($filter), $identifier); // Replace temporary placeholder '##' with '__' only if the original // $identifier contained '__'. if ($double_underscore_replacements > 0) { $identifier = str_replace('##', '__', $identifier); } // Valid characters in a CSS identifier are: // - the hyphen (U+002D) // - a-z (U+0030 - U+0039) // - A-Z (U+0041 - U+005A) // - the underscore (U+005F) // - 0-9 (U+0061 - U+007A) // - ISO 10646 characters U+00A1 and higher // We strip out any character not in the above list. $identifier = preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $identifier); // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit. $identifier = preg_replace([ '/^[0-9]/', '/^(-[0-9])|^(--)/', ], ['_', '__'], $identifier); return $identifier; } /** * Sets if this request is an Ajax request. * * @param bool $is_ajax * TRUE if this request is an Ajax request, FALSE otherwise. */ public static function setIsAjax($is_ajax) { static::$isAjax = $is_ajax; } /** * Prepares a string for use as a valid HTML ID and guarantees uniqueness. * * This function ensures that each passed HTML ID value only exists once on * the page. By tracking the already returned ids, this function enables * forms, blocks, and other content to be output multiple times on the same * page, without breaking (X)HTML validation. * * For already existing IDs, a counter is appended to the ID string. * Therefore, JavaScript and CSS code should not rely on any value that was * generated by this function and instead should rely on manually added CSS * classes or similarly reliable constructs. * * Two consecutive hyphens separate the counter from the original ID. To * manage uniqueness across multiple Ajax requests on the same page, Ajax * requests POST an array of all IDs currently present on the page, which are * used to prime this function's cache upon first invocation. * * To allow reverse-parsing of IDs submitted via Ajax, any multiple * consecutive hyphens in the originally passed $id are replaced with a * single hyphen. * * @param string $id * The ID to clean. * * @return string * The cleaned ID. */ public static function getUniqueId($id) { // If this is an Ajax request, then content returned by this page request // will be merged with content already on the base page. The HTML IDs must // be unique for the fully merged content. Therefore use unique IDs. if (static::$isAjax) { return static::getId($id) . '--' . Crypt::randomBytesBase64(8); } // @todo Remove all that code once we switch over to random IDs only, // see https://www.drupal.org/node/1090592. if (!isset(static::$seenIdsInit)) { static::$seenIdsInit = []; } if (!isset(static::$seenIds)) { static::$seenIds = static::$seenIdsInit; } $id = static::getId($id); // Ensure IDs are unique by appending a counter after the first occurrence. // The counter needs to be appended with a delimiter that does not exist in // the base ID. Requiring a unique delimiter helps ensure that we really do // return unique IDs and also helps us re-create the $seen_ids array during // Ajax requests. if (isset(static::$seenIds[$id])) { $id = $id . '--' . ++static::$seenIds[$id]; } else { static::$seenIds[$id] = 1; } return $id; } /** * Prepares a string for use as a valid HTML ID. * * Only use this function when you want to intentionally skip the uniqueness * guarantee of self::getUniqueId(). * * @param string $id * The ID to clean. * * @return string * The cleaned ID. * * @see self::getUniqueId() */ public static function getId($id) { $id = str_replace([' ', '_', '[', ']'], ['-', '-', '-', ''], mb_strtolower($id)); // As defined in http://www.w3.org/TR/html4/types.html#type-name, HTML IDs can // only contain letters, digits ([0-9]), hyphens ("-"), underscores ("_"), // colons (":"), and periods ("."). We strip out any character not in that // list. Note that the CSS spec doesn't allow colons or periods in identifiers // (http://www.w3.org/TR/CSS21/syndata.html#characters), so we strip those two // characters as well. $id = preg_replace('/[^A-Za-z0-9\-_]/', '', $id); // Removing multiple consecutive hyphens. $id = preg_replace('/\-+/', '-', $id); return $id; } /** * Resets the list of seen IDs. */ public static function resetSeenIds() { static::$seenIds = NULL; } /** * Normalizes an HTML snippet. * * This function is essentially \DOMDocument::normalizeDocument(), but * operates on an HTML string instead of a \DOMDocument. * * @param string $html * The HTML string to normalize. * * @return string * The normalized HTML string. */ public static function normalize($html) { $document = static::load($html); return static::serialize($document); } /** * Parses an HTML snippet and returns it as a DOM object. * * This function loads the body part of a partial (X)HTML document and returns * a full \DOMDocument object that represents this document. * * Use \Drupal\Component\Utility\Html::serialize() to serialize this * \DOMDocument back to a string. * * @param string $html * The partial (X)HTML snippet to load. Invalid markup will be corrected on * import. * * @return \DOMDocument * A \DOMDocument that represents the loaded (X)HTML snippet. */ public static function load($html) { $document = << !html EOD; // PHP's \DOMDocument serialization adds extra whitespace when the markup // of the wrapping document contains newlines, so ensure we remove all // newlines before injecting the actual HTML body to be processed. $document = strtr($document, ["\n" => '', '!html' => $html]); $dom = new \DOMDocument(); // Ignore warnings during HTML soup loading. @$dom->loadHTML($document); return $dom; } /** * Converts the body of a \DOMDocument back to an HTML snippet. * * The function serializes the body part of a \DOMDocument back to an (X)HTML * snippet. The resulting (X)HTML snippet will be properly formatted to be * compatible with HTML user agents. * * @param \DOMDocument $document * A \DOMDocument object to serialize, only the tags below the first * node will be converted. * * @return string * A valid (X)HTML snippet, as a string. */ public static function serialize(\DOMDocument $document) { $body_node = $document->getElementsByTagName('body')->item(0); $html = ''; if ($body_node !== NULL) { foreach ($body_node->getElementsByTagName('script') as $node) { static::escapeCdataElement($node); } foreach ($body_node->getElementsByTagName('style') as $node) { static::escapeCdataElement($node, '/*', '*/'); } foreach ($body_node->childNodes as $node) { $html .= $document->saveXML($node); } } return $html; } /** * Adds comments around a childNodes as $child_node) { if ($child_node instanceof \DOMCdataSection) { $embed_prefix = "\n{$comment_end}\n"; // Prevent invalid cdata escaping as this would throw a DOM error. // This is the same behavior as found in libxml2. // Related W3C standard: http://www.w3.org/TR/REC-xml/#dt-cdsection // Fix explanation: http://wikipedia.org/wiki/CDATA#Nesting $data = str_replace(']]>', ']]]]>', $child_node->data); $fragment = $node->ownerDocument->createDocumentFragment(); $fragment->appendXML($embed_prefix . $data . $embed_suffix); $node->appendChild($fragment); $node->removeChild($child_node); } } } /** * Decodes all HTML entities including numerical ones to regular UTF-8 bytes. * * Double-escaped entities will only be decoded once ("<" becomes * "<", not "<"). Be careful when using this function, as it will revert * previous sanitization efforts (<script> will become