Version 1
[yaffs-website] / vendor / ezyang / htmlpurifier / library / HTMLPurifier / Strategy / RemoveForeignElements.php
diff --git a/vendor/ezyang/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php b/vendor/ezyang/htmlpurifier/library/HTMLPurifier/Strategy/RemoveForeignElements.php
new file mode 100644 (file)
index 0000000..1a8149e
--- /dev/null
@@ -0,0 +1,207 @@
+<?php
+
+/**
+ * Removes all unrecognized tags from the list of tokens.
+ *
+ * This strategy iterates through all the tokens and removes unrecognized
+ * tokens. If a token is not recognized but a TagTransform is defined for
+ * that element, the element will be transformed accordingly.
+ */
+
+class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
+{
+
+    /**
+     * @param HTMLPurifier_Token[] $tokens
+     * @param HTMLPurifier_Config $config
+     * @param HTMLPurifier_Context $context
+     * @return array|HTMLPurifier_Token[]
+     */
+    public function execute($tokens, $config, $context)
+    {
+        $definition = $config->getHTMLDefinition();
+        $generator = new HTMLPurifier_Generator($config, $context);
+        $result = array();
+
+        $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
+        $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
+
+        // currently only used to determine if comments should be kept
+        $trusted = $config->get('HTML.Trusted');
+        $comment_lookup = $config->get('HTML.AllowedComments');
+        $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
+        $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
+
+        $remove_script_contents = $config->get('Core.RemoveScriptContents');
+        $hidden_elements = $config->get('Core.HiddenElements');
+
+        // remove script contents compatibility
+        if ($remove_script_contents === true) {
+            $hidden_elements['script'] = true;
+        } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
+            unset($hidden_elements['script']);
+        }
+
+        $attr_validator = new HTMLPurifier_AttrValidator();
+
+        // removes tokens until it reaches a closing tag with its value
+        $remove_until = false;
+
+        // converts comments into text tokens when this is equal to a tag name
+        $textify_comments = false;
+
+        $token = false;
+        $context->register('CurrentToken', $token);
+
+        $e = false;
+        if ($config->get('Core.CollectErrors')) {
+            $e =& $context->get('ErrorCollector');
+        }
+
+        foreach ($tokens as $token) {
+            if ($remove_until) {
+                if (empty($token->is_tag) || $token->name !== $remove_until) {
+                    continue;
+                }
+            }
+            if (!empty($token->is_tag)) {
+                // DEFINITION CALL
+
+                // before any processing, try to transform the element
+                if (isset($definition->info_tag_transform[$token->name])) {
+                    $original_name = $token->name;
+                    // there is a transformation for this tag
+                    // DEFINITION CALL
+                    $token = $definition->
+                        info_tag_transform[$token->name]->transform($token, $config, $context);
+                    if ($e) {
+                        $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
+                    }
+                }
+
+                if (isset($definition->info[$token->name])) {
+                    // mostly everything's good, but
+                    // we need to make sure required attributes are in order
+                    if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
+                        $definition->info[$token->name]->required_attr &&
+                        ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
+                    ) {
+                        $attr_validator->validateToken($token, $config, $context);
+                        $ok = true;
+                        foreach ($definition->info[$token->name]->required_attr as $name) {
+                            if (!isset($token->attr[$name])) {
+                                $ok = false;
+                                break;
+                            }
+                        }
+                        if (!$ok) {
+                            if ($e) {
+                                $e->send(
+                                    E_ERROR,
+                                    'Strategy_RemoveForeignElements: Missing required attribute',
+                                    $name
+                                );
+                            }
+                            continue;
+                        }
+                        $token->armor['ValidateAttributes'] = true;
+                    }
+
+                    if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
+                        $textify_comments = $token->name;
+                    } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
+                        $textify_comments = false;
+                    }
+
+                } elseif ($escape_invalid_tags) {
+                    // invalid tag, generate HTML representation and insert in
+                    if ($e) {
+                        $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
+                    }
+                    $token = new HTMLPurifier_Token_Text(
+                        $generator->generateFromToken($token)
+                    );
+                } else {
+                    // check if we need to destroy all of the tag's children
+                    // CAN BE GENERICIZED
+                    if (isset($hidden_elements[$token->name])) {
+                        if ($token instanceof HTMLPurifier_Token_Start) {
+                            $remove_until = $token->name;
+                        } elseif ($token instanceof HTMLPurifier_Token_Empty) {
+                            // do nothing: we're still looking
+                        } else {
+                            $remove_until = false;
+                        }
+                        if ($e) {
+                            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
+                        }
+                    } else {
+                        if ($e) {
+                            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
+                        }
+                    }
+                    continue;
+                }
+            } elseif ($token instanceof HTMLPurifier_Token_Comment) {
+                // textify comments in script tags when they are allowed
+                if ($textify_comments !== false) {
+                    $data = $token->data;
+                    $token = new HTMLPurifier_Token_Text($data);
+                } elseif ($trusted || $check_comments) {
+                    // always cleanup comments
+                    $trailing_hyphen = false;
+                    if ($e) {
+                        // perform check whether or not there's a trailing hyphen
+                        if (substr($token->data, -1) == '-') {
+                            $trailing_hyphen = true;
+                        }
+                    }
+                    $token->data = rtrim($token->data, '-');
+                    $found_double_hyphen = false;
+                    while (strpos($token->data, '--') !== false) {
+                        $found_double_hyphen = true;
+                        $token->data = str_replace('--', '-', $token->data);
+                    }
+                    if ($trusted || !empty($comment_lookup[trim($token->data)]) ||
+                        ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {
+                        // OK good
+                        if ($e) {
+                            if ($trailing_hyphen) {
+                                $e->send(
+                                    E_NOTICE,
+                                    'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'
+                                );
+                            }
+                            if ($found_double_hyphen) {
+                                $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
+                            }
+                        }
+                    } else {
+                        if ($e) {
+                            $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
+                        }
+                        continue;
+                    }
+                } else {
+                    // strip comments
+                    if ($e) {
+                        $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
+                    }
+                    continue;
+                }
+            } elseif ($token instanceof HTMLPurifier_Token_Text) {
+            } else {
+                continue;
+            }
+            $result[] = $token;
+        }
+        if ($remove_until && $e) {
+            // we removed tokens until the end, throw error
+            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
+        }
+        $context->destroy('CurrentToken');
+        return $result;
+    }
+}
+
+// vim: et sw=4 sts=4