| 1 | <?php |
|---|
| 2 | |
|---|
| 3 | /** |
|---|
| 4 | * Removes all unrecognized tags from the list of tokens. |
|---|
| 5 | * |
|---|
| 6 | * This strategy iterates through all the tokens and removes unrecognized |
|---|
| 7 | * tokens. If a token is not recognized but a TagTransform is defined for |
|---|
| 8 | * that element, the element will be transformed accordingly. |
|---|
| 9 | */ |
|---|
| 10 | |
|---|
| 11 | class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy |
|---|
| 12 | { |
|---|
| 13 | |
|---|
| 14 | public function execute($tokens, $config, $context) { |
|---|
| 15 | $definition = $config->getHTMLDefinition(); |
|---|
| 16 | $generator = new HTMLPurifier_Generator(); |
|---|
| 17 | $result = array(); |
|---|
| 18 | |
|---|
| 19 | $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); |
|---|
| 20 | $remove_invalid_img = $config->get('Core', 'RemoveInvalidImg'); |
|---|
| 21 | |
|---|
| 22 | $remove_script_contents = $config->get('Core', 'RemoveScriptContents'); |
|---|
| 23 | $hidden_elements = $config->get('Core', 'HiddenElements'); |
|---|
| 24 | |
|---|
| 25 | // remove script contents compatibility |
|---|
| 26 | if ($remove_script_contents === true) { |
|---|
| 27 | $hidden_elements['script'] = true; |
|---|
| 28 | } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) { |
|---|
| 29 | unset($hidden_elements['script']); |
|---|
| 30 | } |
|---|
| 31 | |
|---|
| 32 | $attr_validator = new HTMLPurifier_AttrValidator(); |
|---|
| 33 | |
|---|
| 34 | // removes tokens until it reaches a closing tag with its value |
|---|
| 35 | $remove_until = false; |
|---|
| 36 | |
|---|
| 37 | // converts comments into text tokens when this is equal to a tag name |
|---|
| 38 | $textify_comments = false; |
|---|
| 39 | |
|---|
| 40 | $token = false; |
|---|
| 41 | $context->register('CurrentToken', $token); |
|---|
| 42 | |
|---|
| 43 | $e = false; |
|---|
| 44 | if ($config->get('Core', 'CollectErrors')) { |
|---|
| 45 | $e =& $context->get('ErrorCollector'); |
|---|
| 46 | } |
|---|
| 47 | |
|---|
| 48 | foreach($tokens as $token) { |
|---|
| 49 | if ($remove_until) { |
|---|
| 50 | if (empty($token->is_tag) || $token->name !== $remove_until) { |
|---|
| 51 | continue; |
|---|
| 52 | } |
|---|
| 53 | } |
|---|
| 54 | if (!empty( $token->is_tag )) { |
|---|
| 55 | // DEFINITION CALL |
|---|
| 56 | |
|---|
| 57 | // before any processing, try to transform the element |
|---|
| 58 | if ( |
|---|
| 59 | isset($definition->info_tag_transform[$token->name]) |
|---|
| 60 | ) { |
|---|
| 61 | $original_name = $token->name; |
|---|
| 62 | // there is a transformation for this tag |
|---|
| 63 | // DEFINITION CALL |
|---|
| 64 | $token = $definition-> |
|---|
| 65 | info_tag_transform[$token->name]-> |
|---|
| 66 | transform($token, $config, $context); |
|---|
| 67 | if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name); |
|---|
| 68 | } |
|---|
| 69 | |
|---|
| 70 | if (isset($definition->info[$token->name])) { |
|---|
| 71 | |
|---|
| 72 | // mostly everything's good, but |
|---|
| 73 | // we need to make sure required attributes are in order |
|---|
| 74 | if ( |
|---|
| 75 | ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) && |
|---|
| 76 | $definition->info[$token->name]->required_attr && |
|---|
| 77 | ($token->name != 'img' || $remove_invalid_img) // ensure config option still works |
|---|
| 78 | ) { |
|---|
| 79 | $attr_validator->validateToken($token, $config, $context); |
|---|
| 80 | $ok = true; |
|---|
| 81 | foreach ($definition->info[$token->name]->required_attr as $name) { |
|---|
| 82 | if (!isset($token->attr[$name])) { |
|---|
| 83 | $ok = false; |
|---|
| 84 | break; |
|---|
| 85 | } |
|---|
| 86 | } |
|---|
| 87 | if (!$ok) { |
|---|
| 88 | if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name); |
|---|
| 89 | continue; |
|---|
| 90 | } |
|---|
| 91 | $token->armor['ValidateAttributes'] = true; |
|---|
| 92 | } |
|---|
| 93 | |
|---|
| 94 | if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) { |
|---|
| 95 | $textify_comments = $token->name; |
|---|
| 96 | } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) { |
|---|
| 97 | $textify_comments = false; |
|---|
| 98 | } |
|---|
| 99 | |
|---|
| 100 | } elseif ($escape_invalid_tags) { |
|---|
| 101 | // invalid tag, generate HTML representation and insert in |
|---|
| 102 | if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text'); |
|---|
| 103 | $token = new HTMLPurifier_Token_Text( |
|---|
| 104 | $generator->generateFromToken($token, $config, $context) |
|---|
| 105 | ); |
|---|
| 106 | } else { |
|---|
| 107 | // check if we need to destroy all of the tag's children |
|---|
| 108 | // CAN BE GENERICIZED |
|---|
| 109 | if (isset($hidden_elements[$token->name])) { |
|---|
| 110 | if ($token instanceof HTMLPurifier_Token_Start) { |
|---|
| 111 | $remove_until = $token->name; |
|---|
| 112 | } elseif ($token instanceof HTMLPurifier_Token_Empty) { |
|---|
| 113 | // do nothing: we're still looking |
|---|
| 114 | } else { |
|---|
| 115 | $remove_until = false; |
|---|
| 116 | } |
|---|
| 117 | if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed'); |
|---|
| 118 | } else { |
|---|
| 119 | if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed'); |
|---|
| 120 | } |
|---|
| 121 | continue; |
|---|
| 122 | } |
|---|
| 123 | } elseif ($token instanceof HTMLPurifier_Token_Comment) { |
|---|
| 124 | // textify comments in script tags when they are allowed |
|---|
| 125 | if ($textify_comments !== false) { |
|---|
| 126 | $data = $token->data; |
|---|
| 127 | $token = new HTMLPurifier_Token_Text($data); |
|---|
| 128 | } else { |
|---|
| 129 | // strip comments |
|---|
| 130 | if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); |
|---|
| 131 | continue; |
|---|
| 132 | } |
|---|
| 133 | } elseif ($token instanceof HTMLPurifier_Token_Text) { |
|---|
| 134 | } else { |
|---|
| 135 | continue; |
|---|
| 136 | } |
|---|
| 137 | $result[] = $token; |
|---|
| 138 | } |
|---|
| 139 | if ($remove_until && $e) { |
|---|
| 140 | // we removed tokens until the end, throw error |
|---|
| 141 | $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until); |
|---|
| 142 | } |
|---|
| 143 | |
|---|
| 144 | $context->destroy('CurrentToken'); |
|---|
| 145 | |
|---|
| 146 | return $result; |
|---|
| 147 | } |
|---|
| 148 | |
|---|
| 149 | } |
|---|
| 150 | |
|---|