| [21] | 1 | <?php |
|---|
| 2 | |
|---|
| 3 | /** |
|---|
| 4 | * Definition of the purified HTML that describes allowed children, |
|---|
| 5 | * attributes, and many other things. |
|---|
| 6 | * |
|---|
| 7 | * Conventions: |
|---|
| 8 | * |
|---|
| 9 | * All member variables that are prefixed with info |
|---|
| 10 | * (including the main $info array) are used by HTML Purifier internals |
|---|
| 11 | * and should not be directly edited when customizing the HTMLDefinition. |
|---|
| 12 | * They can usually be set via configuration directives or custom |
|---|
| 13 | * modules. |
|---|
| 14 | * |
|---|
| 15 | * On the other hand, member variables without the info prefix are used |
|---|
| 16 | * internally by the HTMLDefinition and MUST NOT be used by other HTML |
|---|
| 17 | * Purifier internals. Many of them, however, are public, and may be |
|---|
| 18 | * edited by userspace code to tweak the behavior of HTMLDefinition. |
|---|
| 19 | * |
|---|
| 20 | * @note This class is inspected by Printer_HTMLDefinition; please |
|---|
| 21 | * update that class if things here change. |
|---|
| 22 | * |
|---|
| 23 | * @warning Directives that change this object's structure must be in |
|---|
| 24 | * the HTML or Attr namespace! |
|---|
| 25 | */ |
|---|
| 26 | class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition |
|---|
| 27 | { |
|---|
| 28 | |
|---|
| 29 | // FULLY-PUBLIC VARIABLES --------------------------------------------- |
|---|
| 30 | |
|---|
| 31 | /** |
|---|
| 32 | * Associative array of element names to HTMLPurifier_ElementDef |
|---|
| 33 | */ |
|---|
| 34 | public $info = array(); |
|---|
| 35 | |
|---|
| 36 | /** |
|---|
| 37 | * Associative array of global attribute name to attribute definition. |
|---|
| 38 | */ |
|---|
| 39 | public $info_global_attr = array(); |
|---|
| 40 | |
|---|
| 41 | /** |
|---|
| 42 | * String name of parent element HTML will be going into. |
|---|
| 43 | */ |
|---|
| 44 | public $info_parent = 'div'; |
|---|
| 45 | |
|---|
| 46 | /** |
|---|
| 47 | * Definition for parent element, allows parent element to be a |
|---|
| 48 | * tag that's not allowed inside the HTML fragment. |
|---|
| 49 | */ |
|---|
| 50 | public $info_parent_def; |
|---|
| 51 | |
|---|
| 52 | /** |
|---|
| 53 | * String name of element used to wrap inline elements in block context |
|---|
| 54 | * @note This is rarely used except for BLOCKQUOTEs in strict mode |
|---|
| 55 | */ |
|---|
| 56 | public $info_block_wrapper = 'p'; |
|---|
| 57 | |
|---|
| 58 | /** |
|---|
| 59 | * Associative array of deprecated tag name to HTMLPurifier_TagTransform |
|---|
| 60 | */ |
|---|
| 61 | public $info_tag_transform = array(); |
|---|
| 62 | |
|---|
| 63 | /** |
|---|
| 64 | * Indexed list of HTMLPurifier_AttrTransform to be performed before validation. |
|---|
| 65 | */ |
|---|
| 66 | public $info_attr_transform_pre = array(); |
|---|
| 67 | |
|---|
| 68 | /** |
|---|
| 69 | * Indexed list of HTMLPurifier_AttrTransform to be performed after validation. |
|---|
| 70 | */ |
|---|
| 71 | public $info_attr_transform_post = array(); |
|---|
| 72 | |
|---|
| 73 | /** |
|---|
| 74 | * Nested lookup array of content set name (Block, Inline) to |
|---|
| 75 | * element name to whether or not it belongs in that content set. |
|---|
| 76 | */ |
|---|
| 77 | public $info_content_sets = array(); |
|---|
| 78 | |
|---|
| 79 | /** |
|---|
| 80 | * Doctype object |
|---|
| 81 | */ |
|---|
| 82 | public $doctype; |
|---|
| 83 | |
|---|
| 84 | |
|---|
| 85 | |
|---|
| 86 | // RAW CUSTOMIZATION STUFF -------------------------------------------- |
|---|
| 87 | |
|---|
| 88 | /** |
|---|
| 89 | * Adds a custom attribute to a pre-existing element |
|---|
| 90 | * @note This is strictly convenience, and does not have a corresponding |
|---|
| 91 | * method in HTMLPurifier_HTMLModule |
|---|
| 92 | * @param $element_name String element name to add attribute to |
|---|
| 93 | * @param $attr_name String name of attribute |
|---|
| 94 | * @param $def Attribute definition, can be string or object, see |
|---|
| 95 | * HTMLPurifier_AttrTypes for details |
|---|
| 96 | */ |
|---|
| 97 | public function addAttribute($element_name, $attr_name, $def) { |
|---|
| 98 | $module = $this->getAnonymousModule(); |
|---|
| 99 | if (!isset($module->info[$element_name])) { |
|---|
| 100 | $element = $module->addBlankElement($element_name); |
|---|
| 101 | } else { |
|---|
| 102 | $element = $module->info[$element_name]; |
|---|
| 103 | } |
|---|
| 104 | $element->attr[$attr_name] = $def; |
|---|
| 105 | } |
|---|
| 106 | |
|---|
| 107 | /** |
|---|
| 108 | * Adds a custom element to your HTML definition |
|---|
| 109 | * @note See HTMLPurifier_HTMLModule::addElement for detailed |
|---|
| 110 | * parameter and return value descriptions. |
|---|
| 111 | */ |
|---|
| 112 | public function addElement($element_name, $type, $contents, $attr_collections, $attributes) { |
|---|
| 113 | $module = $this->getAnonymousModule(); |
|---|
| 114 | // assume that if the user is calling this, the element |
|---|
| 115 | // is safe. This may not be a good idea |
|---|
| 116 | $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes); |
|---|
| 117 | return $element; |
|---|
| 118 | } |
|---|
| 119 | |
|---|
| 120 | /** |
|---|
| 121 | * Adds a blank element to your HTML definition, for overriding |
|---|
| 122 | * existing behavior |
|---|
| 123 | * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed |
|---|
| 124 | * parameter and return value descriptions. |
|---|
| 125 | */ |
|---|
| 126 | public function addBlankElement($element_name) { |
|---|
| 127 | $module = $this->getAnonymousModule(); |
|---|
| 128 | $element = $module->addBlankElement($element_name); |
|---|
| 129 | return $element; |
|---|
| 130 | } |
|---|
| 131 | |
|---|
| 132 | /** |
|---|
| 133 | * Retrieves a reference to the anonymous module, so you can |
|---|
| 134 | * bust out advanced features without having to make your own |
|---|
| 135 | * module. |
|---|
| 136 | */ |
|---|
| 137 | public function getAnonymousModule() { |
|---|
| 138 | if (!$this->_anonModule) { |
|---|
| 139 | $this->_anonModule = new HTMLPurifier_HTMLModule(); |
|---|
| 140 | $this->_anonModule->name = 'Anonymous'; |
|---|
| 141 | } |
|---|
| 142 | return $this->_anonModule; |
|---|
| 143 | } |
|---|
| 144 | |
|---|
| 145 | private $_anonModule; |
|---|
| 146 | |
|---|
| 147 | |
|---|
| 148 | // PUBLIC BUT INTERNAL VARIABLES -------------------------------------- |
|---|
| 149 | |
|---|
| 150 | public $type = 'HTML'; |
|---|
| 151 | public $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */ |
|---|
| 152 | |
|---|
| 153 | /** |
|---|
| 154 | * Performs low-cost, preliminary initialization. |
|---|
| 155 | */ |
|---|
| 156 | public function __construct() { |
|---|
| 157 | $this->manager = new HTMLPurifier_HTMLModuleManager(); |
|---|
| 158 | } |
|---|
| 159 | |
|---|
| 160 | protected function doSetup($config) { |
|---|
| 161 | $this->processModules($config); |
|---|
| 162 | $this->setupConfigStuff($config); |
|---|
| 163 | unset($this->manager); |
|---|
| 164 | |
|---|
| 165 | // cleanup some of the element definitions |
|---|
| 166 | foreach ($this->info as $k => $v) { |
|---|
| 167 | unset($this->info[$k]->content_model); |
|---|
| 168 | unset($this->info[$k]->content_model_type); |
|---|
| 169 | } |
|---|
| 170 | } |
|---|
| 171 | |
|---|
| 172 | /** |
|---|
| 173 | * Extract out the information from the manager |
|---|
| 174 | */ |
|---|
| 175 | protected function processModules($config) { |
|---|
| 176 | |
|---|
| 177 | if ($this->_anonModule) { |
|---|
| 178 | // for user specific changes |
|---|
| 179 | // this is late-loaded so we don't have to deal with PHP4 |
|---|
| 180 | // reference wonky-ness |
|---|
| 181 | $this->manager->addModule($this->_anonModule); |
|---|
| 182 | unset($this->_anonModule); |
|---|
| 183 | } |
|---|
| 184 | |
|---|
| 185 | $this->manager->setup($config); |
|---|
| 186 | $this->doctype = $this->manager->doctype; |
|---|
| 187 | |
|---|
| 188 | foreach ($this->manager->modules as $module) { |
|---|
| 189 | foreach($module->info_tag_transform as $k => $v) { |
|---|
| 190 | if ($v === false) unset($this->info_tag_transform[$k]); |
|---|
| 191 | else $this->info_tag_transform[$k] = $v; |
|---|
| 192 | } |
|---|
| 193 | foreach($module->info_attr_transform_pre as $k => $v) { |
|---|
| 194 | if ($v === false) unset($this->info_attr_transform_pre[$k]); |
|---|
| 195 | else $this->info_attr_transform_pre[$k] = $v; |
|---|
| 196 | } |
|---|
| 197 | foreach($module->info_attr_transform_post as $k => $v) { |
|---|
| 198 | if ($v === false) unset($this->info_attr_transform_post[$k]); |
|---|
| 199 | else $this->info_attr_transform_post[$k] = $v; |
|---|
| 200 | } |
|---|
| 201 | } |
|---|
| 202 | |
|---|
| 203 | $this->info = $this->manager->getElements(); |
|---|
| 204 | $this->info_content_sets = $this->manager->contentSets->lookup; |
|---|
| 205 | |
|---|
| 206 | } |
|---|
| 207 | |
|---|
| 208 | /** |
|---|
| 209 | * Sets up stuff based on config. We need a better way of doing this. |
|---|
| 210 | */ |
|---|
| 211 | protected function setupConfigStuff($config) { |
|---|
| 212 | |
|---|
| 213 | $block_wrapper = $config->get('HTML', 'BlockWrapper'); |
|---|
| 214 | if (isset($this->info_content_sets['Block'][$block_wrapper])) { |
|---|
| 215 | $this->info_block_wrapper = $block_wrapper; |
|---|
| 216 | } else { |
|---|
| 217 | trigger_error('Cannot use non-block element as block wrapper', |
|---|
| 218 | E_USER_ERROR); |
|---|
| 219 | } |
|---|
| 220 | |
|---|
| 221 | $parent = $config->get('HTML', 'Parent'); |
|---|
| 222 | $def = $this->manager->getElement($parent, true); |
|---|
| 223 | if ($def) { |
|---|
| 224 | $this->info_parent = $parent; |
|---|
| 225 | $this->info_parent_def = $def; |
|---|
| 226 | } else { |
|---|
| 227 | trigger_error('Cannot use unrecognized element as parent', |
|---|
| 228 | E_USER_ERROR); |
|---|
| 229 | $this->info_parent_def = $this->manager->getElement($this->info_parent, true); |
|---|
| 230 | } |
|---|
| 231 | |
|---|
| 232 | // support template text |
|---|
| 233 | $support = "(for information on implementing this, see the ". |
|---|
| 234 | "support forums) "; |
|---|
| 235 | |
|---|
| 236 | // setup allowed elements ----------------------------------------- |
|---|
| 237 | |
|---|
| 238 | $allowed_elements = $config->get('HTML', 'AllowedElements'); |
|---|
| 239 | $allowed_attributes = $config->get('HTML', 'AllowedAttributes'); // retrieve early |
|---|
| 240 | |
|---|
| 241 | if (!is_array($allowed_elements) && !is_array($allowed_attributes)) { |
|---|
| 242 | $allowed = $config->get('HTML', 'Allowed'); |
|---|
| 243 | if (is_string($allowed)) { |
|---|
| 244 | list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed); |
|---|
| 245 | } |
|---|
| 246 | } |
|---|
| 247 | |
|---|
| 248 | if (is_array($allowed_elements)) { |
|---|
| 249 | foreach ($this->info as $name => $d) { |
|---|
| 250 | if(!isset($allowed_elements[$name])) unset($this->info[$name]); |
|---|
| 251 | unset($allowed_elements[$name]); |
|---|
| 252 | } |
|---|
| 253 | // emit errors |
|---|
| 254 | foreach ($allowed_elements as $element => $d) { |
|---|
| 255 | $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful! |
|---|
| 256 | trigger_error("Element '$element' is not supported $support", E_USER_WARNING); |
|---|
| 257 | } |
|---|
| 258 | } |
|---|
| 259 | |
|---|
| 260 | // setup allowed attributes --------------------------------------- |
|---|
| 261 | |
|---|
| 262 | $allowed_attributes_mutable = $allowed_attributes; // by copy! |
|---|
| 263 | if (is_array($allowed_attributes)) { |
|---|
| 264 | |
|---|
| 265 | // This actually doesn't do anything, since we went away from |
|---|
| 266 | // global attributes. It's possible that userland code uses |
|---|
| 267 | // it, but HTMLModuleManager doesn't! |
|---|
| 268 | foreach ($this->info_global_attr as $attr => $x) { |
|---|
| 269 | $keys = array($attr, "*@$attr", "*.$attr"); |
|---|
| 270 | $delete = true; |
|---|
| 271 | foreach ($keys as $key) { |
|---|
| 272 | if ($delete && isset($allowed_attributes[$key])) { |
|---|
| 273 | $delete = false; |
|---|
| 274 | } |
|---|
| 275 | if (isset($allowed_attributes_mutable[$key])) { |
|---|
| 276 | unset($allowed_attributes_mutable[$key]); |
|---|
| 277 | } |
|---|
| 278 | } |
|---|
| 279 | if ($delete) unset($this->info_global_attr[$attr]); |
|---|
| 280 | } |
|---|
| 281 | |
|---|
| 282 | foreach ($this->info as $tag => $info) { |
|---|
| 283 | foreach ($info->attr as $attr => $x) { |
|---|
| 284 | $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr"); |
|---|
| 285 | $delete = true; |
|---|
| 286 | foreach ($keys as $key) { |
|---|
| 287 | if ($delete && isset($allowed_attributes[$key])) { |
|---|
| 288 | $delete = false; |
|---|
| 289 | } |
|---|
| 290 | if (isset($allowed_attributes_mutable[$key])) { |
|---|
| 291 | unset($allowed_attributes_mutable[$key]); |
|---|
| 292 | } |
|---|
| 293 | } |
|---|
| 294 | if ($delete) unset($this->info[$tag]->attr[$attr]); |
|---|
| 295 | } |
|---|
| 296 | } |
|---|
| 297 | // emit errors |
|---|
| 298 | foreach ($allowed_attributes_mutable as $elattr => $d) { |
|---|
| 299 | $bits = preg_split('/[.@]/', $elattr, 2); |
|---|
| 300 | $c = count($bits); |
|---|
| 301 | switch ($c) { |
|---|
| 302 | case 2: |
|---|
| 303 | if ($bits[0] !== '*') { |
|---|
| 304 | $element = htmlspecialchars($bits[0]); |
|---|
| 305 | $attribute = htmlspecialchars($bits[1]); |
|---|
| 306 | if (!isset($this->info[$element])) { |
|---|
| 307 | trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support"); |
|---|
| 308 | } else { |
|---|
| 309 | trigger_error("Attribute '$attribute' in element '$element' not supported $support", |
|---|
| 310 | E_USER_WARNING); |
|---|
| 311 | } |
|---|
| 312 | break; |
|---|
| 313 | } |
|---|
| 314 | // otherwise fall through |
|---|
| 315 | case 1: |
|---|
| 316 | $attribute = htmlspecialchars($bits[0]); |
|---|
| 317 | trigger_error("Global attribute '$attribute' is not ". |
|---|
| 318 | "supported in any elements $support", |
|---|
| 319 | E_USER_WARNING); |
|---|
| 320 | break; |
|---|
| 321 | } |
|---|
| 322 | } |
|---|
| 323 | |
|---|
| 324 | } |
|---|
| 325 | |
|---|
| 326 | // setup forbidden elements --------------------------------------- |
|---|
| 327 | |
|---|
| 328 | $forbidden_elements = $config->get('HTML', 'ForbiddenElements'); |
|---|
| 329 | $forbidden_attributes = $config->get('HTML', 'ForbiddenAttributes'); |
|---|
| 330 | |
|---|
| 331 | foreach ($this->info as $tag => $info) { |
|---|
| 332 | if (isset($forbidden_elements[$tag])) { |
|---|
| 333 | unset($this->info[$tag]); |
|---|
| 334 | continue; |
|---|
| 335 | } |
|---|
| 336 | foreach ($info->attr as $attr => $x) { |
|---|
| 337 | if ( |
|---|
| 338 | isset($forbidden_attributes["$tag@$attr"]) || |
|---|
| 339 | isset($forbidden_attributes["*@$attr"]) || |
|---|
| 340 | isset($forbidden_attributes[$attr]) |
|---|
| 341 | ) { |
|---|
| 342 | unset($this->info[$tag]->attr[$attr]); |
|---|
| 343 | continue; |
|---|
| 344 | } // this segment might get removed eventually |
|---|
| 345 | elseif (isset($forbidden_attributes["$tag.$attr"])) { |
|---|
| 346 | // $tag.$attr are not user supplied, so no worries! |
|---|
| 347 | trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING); |
|---|
| 348 | } |
|---|
| 349 | } |
|---|
| 350 | } |
|---|
| 351 | foreach ($forbidden_attributes as $key => $v) { |
|---|
| 352 | if (strlen($key) < 2) continue; |
|---|
| 353 | if ($key[0] != '*') continue; |
|---|
| 354 | if ($key[1] == '.') { |
|---|
| 355 | trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING); |
|---|
| 356 | } |
|---|
| 357 | } |
|---|
| 358 | |
|---|
| 359 | } |
|---|
| 360 | |
|---|
| 361 | /** |
|---|
| 362 | * Parses a TinyMCE-flavored Allowed Elements and Attributes list into |
|---|
| 363 | * separate lists for processing. Format is element[attr1|attr2],element2... |
|---|
| 364 | * @warning Although it's largely drawn from TinyMCE's implementation, |
|---|
| 365 | * it is different, and you'll probably have to modify your lists |
|---|
| 366 | * @param $list String list to parse |
|---|
| 367 | * @param array($allowed_elements, $allowed_attributes) |
|---|
| 368 | * @todo Give this its own class, probably static interface |
|---|
| 369 | */ |
|---|
| 370 | public function parseTinyMCEAllowedList($list) { |
|---|
| 371 | |
|---|
| 372 | $list = str_replace(array(' ', "\t"), '', $list); |
|---|
| 373 | |
|---|
| 374 | $elements = array(); |
|---|
| 375 | $attributes = array(); |
|---|
| 376 | |
|---|
| 377 | $chunks = preg_split('/(,|[\n\r]+)/', $list); |
|---|
| 378 | foreach ($chunks as $chunk) { |
|---|
| 379 | if (empty($chunk)) continue; |
|---|
| 380 | // remove TinyMCE element control characters |
|---|
| 381 | if (!strpos($chunk, '[')) { |
|---|
| 382 | $element = $chunk; |
|---|
| 383 | $attr = false; |
|---|
| 384 | } else { |
|---|
| 385 | list($element, $attr) = explode('[', $chunk); |
|---|
| 386 | } |
|---|
| 387 | if ($element !== '*') $elements[$element] = true; |
|---|
| 388 | if (!$attr) continue; |
|---|
| 389 | $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ] |
|---|
| 390 | $attr = explode('|', $attr); |
|---|
| 391 | foreach ($attr as $key) { |
|---|
| 392 | $attributes["$element.$key"] = true; |
|---|
| 393 | } |
|---|
| 394 | } |
|---|
| 395 | |
|---|
| 396 | return array($elements, $attributes); |
|---|
| 397 | |
|---|
| 398 | } |
|---|
| 399 | |
|---|
| 400 | |
|---|
| 401 | } |
|---|
| 402 | |
|---|
| 403 | |
|---|