| 1 | <?php |
|---|
| 2 | |
|---|
| 3 | /** |
|---|
| 4 | * Takes tokens makes them well-formed (balance end tags, etc.) |
|---|
| 5 | */ |
|---|
| 6 | class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy |
|---|
| 7 | { |
|---|
| 8 | |
|---|
| 9 | /** |
|---|
| 10 | * Locally shared variable references |
|---|
| 11 | */ |
|---|
| 12 | protected $inputTokens, $inputIndex, $outputTokens, $currentNesting, |
|---|
| 13 | $currentInjector, $injectors; |
|---|
| 14 | |
|---|
| 15 | public function execute($tokens, $config, $context) { |
|---|
| 16 | |
|---|
| 17 | $definition = $config->getHTMLDefinition(); |
|---|
| 18 | |
|---|
| 19 | // local variables |
|---|
| 20 | $result = array(); |
|---|
| 21 | $generator = new HTMLPurifier_Generator(); |
|---|
| 22 | $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); |
|---|
| 23 | $e = $context->get('ErrorCollector', true); |
|---|
| 24 | |
|---|
| 25 | // member variables |
|---|
| 26 | $this->currentNesting = array(); |
|---|
| 27 | $this->inputIndex = false; |
|---|
| 28 | $this->inputTokens =& $tokens; |
|---|
| 29 | $this->outputTokens =& $result; |
|---|
| 30 | |
|---|
| 31 | // context variables |
|---|
| 32 | $context->register('CurrentNesting', $this->currentNesting); |
|---|
| 33 | $context->register('InputIndex', $this->inputIndex); |
|---|
| 34 | $context->register('InputTokens', $tokens); |
|---|
| 35 | |
|---|
| 36 | // -- begin INJECTOR -- |
|---|
| 37 | |
|---|
| 38 | $this->injectors = array(); |
|---|
| 39 | |
|---|
| 40 | $injectors = $config->getBatch('AutoFormat'); |
|---|
| 41 | $custom_injectors = $injectors['Custom']; |
|---|
| 42 | unset($injectors['Custom']); // special case |
|---|
| 43 | foreach ($injectors as $injector => $b) { |
|---|
| 44 | $injector = "HTMLPurifier_Injector_$injector"; |
|---|
| 45 | if (!$b) continue; |
|---|
| 46 | $this->injectors[] = new $injector; |
|---|
| 47 | } |
|---|
| 48 | foreach ($custom_injectors as $injector) { |
|---|
| 49 | if (is_string($injector)) { |
|---|
| 50 | $injector = "HTMLPurifier_Injector_$injector"; |
|---|
| 51 | $injector = new $injector; |
|---|
| 52 | } |
|---|
| 53 | $this->injectors[] = $injector; |
|---|
| 54 | } |
|---|
| 55 | |
|---|
| 56 | // array index of the injector that resulted in an array |
|---|
| 57 | // substitution. This enables processTokens() to know which |
|---|
| 58 | // injectors are affected by the added tokens and which are |
|---|
| 59 | // not (namely, the ones after the current injector are not |
|---|
| 60 | // affected) |
|---|
| 61 | $this->currentInjector = false; |
|---|
| 62 | |
|---|
| 63 | // give the injectors references to the definition and context |
|---|
| 64 | // variables for performance reasons |
|---|
| 65 | foreach ($this->injectors as $i => $injector) { |
|---|
| 66 | $error = $injector->prepare($config, $context); |
|---|
| 67 | if (!$error) continue; |
|---|
| 68 | array_splice($this->injectors, $i, 1); // rm the injector |
|---|
| 69 | trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING); |
|---|
| 70 | } |
|---|
| 71 | |
|---|
| 72 | // warning: most foreach loops follow the convention $i => $injector. |
|---|
| 73 | // Don't define these as loop-wide variables, please! |
|---|
| 74 | |
|---|
| 75 | // -- end INJECTOR -- |
|---|
| 76 | |
|---|
| 77 | $token = false; |
|---|
| 78 | $context->register('CurrentToken', $token); |
|---|
| 79 | |
|---|
| 80 | // isset is in loop because $tokens size changes during loop exec |
|---|
| 81 | for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) { |
|---|
| 82 | |
|---|
| 83 | // if all goes well, this token will be passed through unharmed |
|---|
| 84 | $token = $tokens[$this->inputIndex]; |
|---|
| 85 | |
|---|
| 86 | //printTokens($tokens, $this->inputIndex); |
|---|
| 87 | |
|---|
| 88 | foreach ($this->injectors as $injector) { |
|---|
| 89 | if ($injector->skip > 0) $injector->skip--; |
|---|
| 90 | } |
|---|
| 91 | |
|---|
| 92 | // quick-check: if it's not a tag, no need to process |
|---|
| 93 | if (empty( $token->is_tag )) { |
|---|
| 94 | if ($token instanceof HTMLPurifier_Token_Text) { |
|---|
| 95 | // injector handler code; duplicated for performance reasons |
|---|
| 96 | foreach ($this->injectors as $i => $injector) { |
|---|
| 97 | if (!$injector->skip) $injector->handleText($token); |
|---|
| 98 | if (is_array($token)) { |
|---|
| 99 | $this->currentInjector = $i; |
|---|
| 100 | break; |
|---|
| 101 | } |
|---|
| 102 | } |
|---|
| 103 | } |
|---|
| 104 | $this->processToken($token, $config, $context); |
|---|
| 105 | continue; |
|---|
| 106 | } |
|---|
| 107 | |
|---|
| 108 | $info = $definition->info[$token->name]->child; |
|---|
| 109 | |
|---|
| 110 | // quick tag checks: anything that's *not* an end tag |
|---|
| 111 | $ok = false; |
|---|
| 112 | if ($info->type === 'empty' && $token instanceof HTMLPurifier_Token_Start) { |
|---|
| 113 | // test if it claims to be a start tag but is empty |
|---|
| 114 | $token = new HTMLPurifier_Token_Empty($token->name, $token->attr); |
|---|
| 115 | $ok = true; |
|---|
| 116 | } elseif ($info->type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) { |
|---|
| 117 | // claims to be empty but really is a start tag |
|---|
| 118 | $token = array( |
|---|
| 119 | new HTMLPurifier_Token_Start($token->name, $token->attr), |
|---|
| 120 | new HTMLPurifier_Token_End($token->name) |
|---|
| 121 | ); |
|---|
| 122 | $ok = true; |
|---|
| 123 | } elseif ($token instanceof HTMLPurifier_Token_Empty) { |
|---|
| 124 | // real empty token |
|---|
| 125 | $ok = true; |
|---|
| 126 | } elseif ($token instanceof HTMLPurifier_Token_Start) { |
|---|
| 127 | // start tag |
|---|
| 128 | |
|---|
| 129 | // ...unless they also have to close their parent |
|---|
| 130 | if (!empty($this->currentNesting)) { |
|---|
| 131 | |
|---|
| 132 | $parent = array_pop($this->currentNesting); |
|---|
| 133 | $parent_info = $definition->info[$parent->name]; |
|---|
| 134 | |
|---|
| 135 | // this can be replaced with a more general algorithm: |
|---|
| 136 | // if the token is not allowed by the parent, auto-close |
|---|
| 137 | // the parent |
|---|
| 138 | if (!isset($parent_info->child->elements[$token->name])) { |
|---|
| 139 | if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent); |
|---|
| 140 | // close the parent, then re-loop to reprocess token |
|---|
| 141 | $result[] = new HTMLPurifier_Token_End($parent->name); |
|---|
| 142 | $this->inputIndex--; |
|---|
| 143 | continue; |
|---|
| 144 | } |
|---|
| 145 | |
|---|
| 146 | $this->currentNesting[] = $parent; // undo the pop |
|---|
| 147 | } |
|---|
| 148 | $ok = true; |
|---|
| 149 | } |
|---|
| 150 | |
|---|
| 151 | // injector handler code; duplicated for performance reasons |
|---|
| 152 | if ($ok) { |
|---|
| 153 | foreach ($this->injectors as $i => $injector) { |
|---|
| 154 | if (!$injector->skip) $injector->handleElement($token); |
|---|
| 155 | if (is_array($token)) { |
|---|
| 156 | $this->currentInjector = $i; |
|---|
| 157 | break; |
|---|
| 158 | } |
|---|
| 159 | } |
|---|
| 160 | $this->processToken($token, $config, $context); |
|---|
| 161 | continue; |
|---|
| 162 | } |
|---|
| 163 | |
|---|
| 164 | // sanity check: we should be dealing with a closing tag |
|---|
| 165 | if (!$token instanceof HTMLPurifier_Token_End) continue; |
|---|
| 166 | |
|---|
| 167 | // make sure that we have something open |
|---|
| 168 | if (empty($this->currentNesting)) { |
|---|
| 169 | if ($escape_invalid_tags) { |
|---|
| 170 | if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text'); |
|---|
| 171 | $result[] = new HTMLPurifier_Token_Text( |
|---|
| 172 | $generator->generateFromToken($token, $config, $context) |
|---|
| 173 | ); |
|---|
| 174 | } elseif ($e) { |
|---|
| 175 | $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed'); |
|---|
| 176 | } |
|---|
| 177 | continue; |
|---|
| 178 | } |
|---|
| 179 | |
|---|
| 180 | // first, check for the simplest case: everything closes neatly |
|---|
| 181 | $current_parent = array_pop($this->currentNesting); |
|---|
| 182 | if ($current_parent->name == $token->name) { |
|---|
| 183 | $result[] = $token; |
|---|
| 184 | foreach ($this->injectors as $i => $injector) { |
|---|
| 185 | $injector->notifyEnd($token); |
|---|
| 186 | } |
|---|
| 187 | continue; |
|---|
| 188 | } |
|---|
| 189 | |
|---|
| 190 | // okay, so we're trying to close the wrong tag |
|---|
| 191 | |
|---|
| 192 | // undo the pop previous pop |
|---|
| 193 | $this->currentNesting[] = $current_parent; |
|---|
| 194 | |
|---|
| 195 | // scroll back the entire nest, trying to find our tag. |
|---|
| 196 | // (feature could be to specify how far you'd like to go) |
|---|
| 197 | $size = count($this->currentNesting); |
|---|
| 198 | // -2 because -1 is the last element, but we already checked that |
|---|
| 199 | $skipped_tags = false; |
|---|
| 200 | for ($i = $size - 2; $i >= 0; $i--) { |
|---|
| 201 | if ($this->currentNesting[$i]->name == $token->name) { |
|---|
| 202 | // current nesting is modified |
|---|
| 203 | $skipped_tags = array_splice($this->currentNesting, $i); |
|---|
| 204 | break; |
|---|
| 205 | } |
|---|
| 206 | } |
|---|
| 207 | |
|---|
| 208 | // we still didn't find the tag, so remove |
|---|
| 209 | if ($skipped_tags === false) { |
|---|
| 210 | if ($escape_invalid_tags) { |
|---|
| 211 | $result[] = new HTMLPurifier_Token_Text( |
|---|
| 212 | $generator->generateFromToken($token, $config, $context) |
|---|
| 213 | ); |
|---|
| 214 | if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text'); |
|---|
| 215 | } elseif ($e) { |
|---|
| 216 | $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed'); |
|---|
| 217 | } |
|---|
| 218 | continue; |
|---|
| 219 | } |
|---|
| 220 | |
|---|
| 221 | // okay, we found it, close all the skipped tags |
|---|
| 222 | // note that skipped tags contains the element we need closed |
|---|
| 223 | for ($i = count($skipped_tags) - 1; $i >= 0; $i--) { |
|---|
| 224 | // please don't redefine $i! |
|---|
| 225 | if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) { |
|---|
| 226 | $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]); |
|---|
| 227 | } |
|---|
| 228 | $result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name); |
|---|
| 229 | foreach ($this->injectors as $injector) { |
|---|
| 230 | $injector->notifyEnd($new_token); |
|---|
| 231 | } |
|---|
| 232 | } |
|---|
| 233 | |
|---|
| 234 | } |
|---|
| 235 | |
|---|
| 236 | $context->destroy('CurrentNesting'); |
|---|
| 237 | $context->destroy('InputTokens'); |
|---|
| 238 | $context->destroy('InputIndex'); |
|---|
| 239 | $context->destroy('CurrentToken'); |
|---|
| 240 | |
|---|
| 241 | // we're at the end now, fix all still unclosed tags (this is |
|---|
| 242 | // duplicated from the end of the loop with some slight modifications) |
|---|
| 243 | // not using $skipped_tags since it would invariably be all of them |
|---|
| 244 | if (!empty($this->currentNesting)) { |
|---|
| 245 | for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) { |
|---|
| 246 | // please don't redefine $i! |
|---|
| 247 | if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) { |
|---|
| 248 | $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]); |
|---|
| 249 | } |
|---|
| 250 | $result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name); |
|---|
| 251 | foreach ($this->injectors as $injector) { |
|---|
| 252 | $injector->notifyEnd($new_token); |
|---|
| 253 | } |
|---|
| 254 | } |
|---|
| 255 | } |
|---|
| 256 | |
|---|
| 257 | unset($this->outputTokens, $this->injectors, $this->currentInjector, |
|---|
| 258 | $this->currentNesting, $this->inputTokens, $this->inputIndex); |
|---|
| 259 | |
|---|
| 260 | return $result; |
|---|
| 261 | } |
|---|
| 262 | |
|---|
| 263 | function processToken($token, $config, $context) { |
|---|
| 264 | if (is_array($token)) { |
|---|
| 265 | // the original token was overloaded by an injector, time |
|---|
| 266 | // to some fancy acrobatics |
|---|
| 267 | |
|---|
| 268 | // $this->inputIndex is decremented so that the entire set gets |
|---|
| 269 | // re-processed |
|---|
| 270 | array_splice($this->inputTokens, $this->inputIndex--, 1, $token); |
|---|
| 271 | |
|---|
| 272 | // adjust the injector skips based on the array substitution |
|---|
| 273 | if ($this->injectors) { |
|---|
| 274 | $offset = count($token); |
|---|
| 275 | for ($i = 0; $i <= $this->currentInjector; $i++) { |
|---|
| 276 | // because of the skip back, we need to add one more |
|---|
| 277 | // for uninitialized injectors. I'm not exactly |
|---|
| 278 | // sure why this is the case, but I think it has to |
|---|
| 279 | // do with the fact that we're decrementing skips |
|---|
| 280 | // before re-checking text |
|---|
| 281 | if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++; |
|---|
| 282 | $this->injectors[$i]->skip += $offset; |
|---|
| 283 | } |
|---|
| 284 | } |
|---|
| 285 | } elseif ($token) { |
|---|
| 286 | // regular case |
|---|
| 287 | $this->outputTokens[] = $token; |
|---|
| 288 | if ($token instanceof HTMLPurifier_Token_Start) { |
|---|
| 289 | $this->currentNesting[] = $token; |
|---|
| 290 | } elseif ($token instanceof HTMLPurifier_Token_End) { |
|---|
| 291 | array_pop($this->currentNesting); // not actually used |
|---|
| 292 | } |
|---|
| 293 | } |
|---|
| 294 | } |
|---|
| 295 | |
|---|
| 296 | } |
|---|
| 297 | |
|---|