| 1 | <?php |
|---|
| 2 | |
|---|
| 3 | class HTMLPurifier_HTMLModuleManager |
|---|
| 4 | { |
|---|
| 5 | |
|---|
| 6 | /** |
|---|
| 7 | * Instance of HTMLPurifier_DoctypeRegistry |
|---|
| 8 | */ |
|---|
| 9 | public $doctypes; |
|---|
| 10 | |
|---|
| 11 | /** |
|---|
| 12 | * Instance of current doctype |
|---|
| 13 | */ |
|---|
| 14 | public $doctype; |
|---|
| 15 | |
|---|
| 16 | /** |
|---|
| 17 | * Instance of HTMLPurifier_AttrTypes |
|---|
| 18 | */ |
|---|
| 19 | public $attrTypes; |
|---|
| 20 | |
|---|
| 21 | /** |
|---|
| 22 | * Active instances of modules for the specified doctype are |
|---|
| 23 | * indexed, by name, in this array. |
|---|
| 24 | */ |
|---|
| 25 | public $modules = array(); |
|---|
| 26 | |
|---|
| 27 | /** |
|---|
| 28 | * Array of recognized HTMLPurifier_Module instances, indexed by |
|---|
| 29 | * module's class name. This array is usually lazy loaded, but a |
|---|
| 30 | * user can overload a module by pre-emptively registering it. |
|---|
| 31 | */ |
|---|
| 32 | public $registeredModules = array(); |
|---|
| 33 | |
|---|
| 34 | /** |
|---|
| 35 | * List of extra modules that were added by the user using addModule(). |
|---|
| 36 | * These get unconditionally merged into the current doctype, whatever |
|---|
| 37 | * it may be. |
|---|
| 38 | */ |
|---|
| 39 | public $userModules = array(); |
|---|
| 40 | |
|---|
| 41 | /** |
|---|
| 42 | * Associative array of element name to list of modules that have |
|---|
| 43 | * definitions for the element; this array is dynamically filled. |
|---|
| 44 | */ |
|---|
| 45 | public $elementLookup = array(); |
|---|
| 46 | |
|---|
| 47 | /** List of prefixes we should use for registering small names */ |
|---|
| 48 | public $prefixes = array('HTMLPurifier_HTMLModule_'); |
|---|
| 49 | |
|---|
| 50 | public $contentSets; /**< Instance of HTMLPurifier_ContentSets */ |
|---|
| 51 | public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */ |
|---|
| 52 | |
|---|
| 53 | /** If set to true, unsafe elements and attributes will be allowed */ |
|---|
| 54 | public $trusted = false; |
|---|
| 55 | |
|---|
| 56 | public function __construct() { |
|---|
| 57 | |
|---|
| 58 | // editable internal objects |
|---|
| 59 | $this->attrTypes = new HTMLPurifier_AttrTypes(); |
|---|
| 60 | $this->doctypes = new HTMLPurifier_DoctypeRegistry(); |
|---|
| 61 | |
|---|
| 62 | // setup basic modules |
|---|
| 63 | $common = array( |
|---|
| 64 | 'CommonAttributes', 'Text', 'Hypertext', 'List', |
|---|
| 65 | 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', |
|---|
| 66 | 'StyleAttribute', 'Scripting', 'Object' |
|---|
| 67 | ); |
|---|
| 68 | $transitional = array('Legacy', 'Target'); |
|---|
| 69 | $xml = array('XMLCommonAttributes'); |
|---|
| 70 | $non_xml = array('NonXMLCommonAttributes'); |
|---|
| 71 | |
|---|
| 72 | // setup basic doctypes |
|---|
| 73 | $this->doctypes->register( |
|---|
| 74 | 'HTML 4.01 Transitional', false, |
|---|
| 75 | array_merge($common, $transitional, $non_xml), |
|---|
| 76 | array('Tidy_Transitional', 'Tidy_Proprietary'), |
|---|
| 77 | array(), |
|---|
| 78 | '-//W3C//DTD HTML 4.01 Transitional//EN', |
|---|
| 79 | 'http://www.w3.org/TR/html4/loose.dtd' |
|---|
| 80 | ); |
|---|
| 81 | |
|---|
| 82 | $this->doctypes->register( |
|---|
| 83 | 'HTML 4.01 Strict', false, |
|---|
| 84 | array_merge($common, $non_xml), |
|---|
| 85 | array('Tidy_Strict', 'Tidy_Proprietary'), |
|---|
| 86 | array(), |
|---|
| 87 | '-//W3C//DTD HTML 4.01//EN', |
|---|
| 88 | 'http://www.w3.org/TR/html4/strict.dtd' |
|---|
| 89 | ); |
|---|
| 90 | |
|---|
| 91 | $this->doctypes->register( |
|---|
| 92 | 'XHTML 1.0 Transitional', true, |
|---|
| 93 | array_merge($common, $transitional, $xml, $non_xml), |
|---|
| 94 | array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary'), |
|---|
| 95 | array(), |
|---|
| 96 | '-//W3C//DTD XHTML 1.0 Transitional//EN', |
|---|
| 97 | 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' |
|---|
| 98 | ); |
|---|
| 99 | |
|---|
| 100 | $this->doctypes->register( |
|---|
| 101 | 'XHTML 1.0 Strict', true, |
|---|
| 102 | array_merge($common, $xml, $non_xml), |
|---|
| 103 | array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary'), |
|---|
| 104 | array(), |
|---|
| 105 | '-//W3C//DTD XHTML 1.0 Strict//EN', |
|---|
| 106 | 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' |
|---|
| 107 | ); |
|---|
| 108 | |
|---|
| 109 | $this->doctypes->register( |
|---|
| 110 | 'XHTML 1.1', true, |
|---|
| 111 | array_merge($common, $xml, array('Ruby')), |
|---|
| 112 | array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict'), // Tidy_XHTML1_1 |
|---|
| 113 | array(), |
|---|
| 114 | '-//W3C//DTD XHTML 1.1//EN', |
|---|
| 115 | 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' |
|---|
| 116 | ); |
|---|
| 117 | |
|---|
| 118 | } |
|---|
| 119 | |
|---|
| 120 | /** |
|---|
| 121 | * Registers a module to the recognized module list, useful for |
|---|
| 122 | * overloading pre-existing modules. |
|---|
| 123 | * @param $module Mixed: string module name, with or without |
|---|
| 124 | * HTMLPurifier_HTMLModule prefix, or instance of |
|---|
| 125 | * subclass of HTMLPurifier_HTMLModule. |
|---|
| 126 | * @param $overload Boolean whether or not to overload previous modules. |
|---|
| 127 | * If this is not set, and you do overload a module, |
|---|
| 128 | * HTML Purifier will complain with a warning. |
|---|
| 129 | * @note This function will not call autoload, you must instantiate |
|---|
| 130 | * (and thus invoke) autoload outside the method. |
|---|
| 131 | * @note If a string is passed as a module name, different variants |
|---|
| 132 | * will be tested in this order: |
|---|
| 133 | * - Check for HTMLPurifier_HTMLModule_$name |
|---|
| 134 | * - Check all prefixes with $name in order they were added |
|---|
| 135 | * - Check for literal object name |
|---|
| 136 | * - Throw fatal error |
|---|
| 137 | * If your object name collides with an internal class, specify |
|---|
| 138 | * your module manually. All modules must have been included |
|---|
| 139 | * externally: registerModule will not perform inclusions for you! |
|---|
| 140 | */ |
|---|
| 141 | public function registerModule($module, $overload = false) { |
|---|
| 142 | if (is_string($module)) { |
|---|
| 143 | // attempt to load the module |
|---|
| 144 | $original_module = $module; |
|---|
| 145 | $ok = false; |
|---|
| 146 | foreach ($this->prefixes as $prefix) { |
|---|
| 147 | $module = $prefix . $original_module; |
|---|
| 148 | if (class_exists($module)) { |
|---|
| 149 | $ok = true; |
|---|
| 150 | break; |
|---|
| 151 | } |
|---|
| 152 | } |
|---|
| 153 | if (!$ok) { |
|---|
| 154 | $module = $original_module; |
|---|
| 155 | if (!class_exists($module)) { |
|---|
| 156 | trigger_error($original_module . ' module does not exist', |
|---|
| 157 | E_USER_ERROR); |
|---|
| 158 | return; |
|---|
| 159 | } |
|---|
| 160 | } |
|---|
| 161 | $module = new $module(); |
|---|
| 162 | } |
|---|
| 163 | if (empty($module->name)) { |
|---|
| 164 | trigger_error('Module instance of ' . get_class($module) . ' must have name'); |
|---|
| 165 | return; |
|---|
| 166 | } |
|---|
| 167 | if (!$overload && isset($this->registeredModules[$module->name])) { |
|---|
| 168 | trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING); |
|---|
| 169 | } |
|---|
| 170 | $this->registeredModules[$module->name] = $module; |
|---|
| 171 | } |
|---|
| 172 | |
|---|
| 173 | /** |
|---|
| 174 | * Adds a module to the current doctype by first registering it, |
|---|
| 175 | * and then tacking it on to the active doctype |
|---|
| 176 | */ |
|---|
| 177 | public function addModule($module) { |
|---|
| 178 | $this->registerModule($module); |
|---|
| 179 | if (is_object($module)) $module = $module->name; |
|---|
| 180 | $this->userModules[] = $module; |
|---|
| 181 | } |
|---|
| 182 | |
|---|
| 183 | /** |
|---|
| 184 | * Adds a class prefix that registerModule() will use to resolve a |
|---|
| 185 | * string name to a concrete class |
|---|
| 186 | */ |
|---|
| 187 | public function addPrefix($prefix) { |
|---|
| 188 | $this->prefixes[] = $prefix; |
|---|
| 189 | } |
|---|
| 190 | |
|---|
| 191 | /** |
|---|
| 192 | * Performs processing on modules, after being called you may |
|---|
| 193 | * use getElement() and getElements() |
|---|
| 194 | * @param $config Instance of HTMLPurifier_Config |
|---|
| 195 | */ |
|---|
| 196 | public function setup($config) { |
|---|
| 197 | |
|---|
| 198 | $this->trusted = $config->get('HTML', 'Trusted'); |
|---|
| 199 | |
|---|
| 200 | // generate |
|---|
| 201 | $this->doctype = $this->doctypes->make($config); |
|---|
| 202 | $modules = $this->doctype->modules; |
|---|
| 203 | |
|---|
| 204 | // take out the default modules that aren't allowed |
|---|
| 205 | $lookup = $config->get('HTML', 'AllowedModules'); |
|---|
| 206 | $special_cases = $config->get('HTML', 'CoreModules'); |
|---|
| 207 | |
|---|
| 208 | if (is_array($lookup)) { |
|---|
| 209 | foreach ($modules as $k => $m) { |
|---|
| 210 | if (isset($special_cases[$m])) continue; |
|---|
| 211 | if (!isset($lookup[$m])) unset($modules[$k]); |
|---|
| 212 | } |
|---|
| 213 | } |
|---|
| 214 | |
|---|
| 215 | // merge in custom modules |
|---|
| 216 | $modules = array_merge($modules, $this->userModules); |
|---|
| 217 | |
|---|
| 218 | // add proprietary module (this gets special treatment because |
|---|
| 219 | // it is completely removed from doctypes, etc.) |
|---|
| 220 | if ($config->get('HTML', 'Proprietary')) { |
|---|
| 221 | $modules[] = 'Proprietary'; |
|---|
| 222 | } |
|---|
| 223 | |
|---|
| 224 | foreach ($modules as $module) { |
|---|
| 225 | $this->processModule($module); |
|---|
| 226 | } |
|---|
| 227 | |
|---|
| 228 | foreach ($this->doctype->tidyModules as $module) { |
|---|
| 229 | $this->processModule($module); |
|---|
| 230 | if (method_exists($this->modules[$module], 'construct')) { |
|---|
| 231 | $this->modules[$module]->construct($config); |
|---|
| 232 | } |
|---|
| 233 | } |
|---|
| 234 | |
|---|
| 235 | // setup lookup table based on all valid modules |
|---|
| 236 | foreach ($this->modules as $module) { |
|---|
| 237 | foreach ($module->info as $name => $def) { |
|---|
| 238 | if (!isset($this->elementLookup[$name])) { |
|---|
| 239 | $this->elementLookup[$name] = array(); |
|---|
| 240 | } |
|---|
| 241 | $this->elementLookup[$name][] = $module->name; |
|---|
| 242 | } |
|---|
| 243 | } |
|---|
| 244 | |
|---|
| 245 | // note the different choice |
|---|
| 246 | $this->contentSets = new HTMLPurifier_ContentSets( |
|---|
| 247 | // content set assembly deals with all possible modules, |
|---|
| 248 | // not just ones deemed to be "safe" |
|---|
| 249 | $this->modules |
|---|
| 250 | ); |
|---|
| 251 | $this->attrCollections = new HTMLPurifier_AttrCollections( |
|---|
| 252 | $this->attrTypes, |
|---|
| 253 | // there is no way to directly disable a global attribute, |
|---|
| 254 | // but using AllowedAttributes or simply not including |
|---|
| 255 | // the module in your custom doctype should be sufficient |
|---|
| 256 | $this->modules |
|---|
| 257 | ); |
|---|
| 258 | } |
|---|
| 259 | |
|---|
| 260 | /** |
|---|
| 261 | * Takes a module and adds it to the active module collection, |
|---|
| 262 | * registering it if necessary. |
|---|
| 263 | */ |
|---|
| 264 | public function processModule($module) { |
|---|
| 265 | if (!isset($this->registeredModules[$module]) || is_object($module)) { |
|---|
| 266 | $this->registerModule($module); |
|---|
| 267 | } |
|---|
| 268 | $this->modules[$module] = $this->registeredModules[$module]; |
|---|
| 269 | } |
|---|
| 270 | |
|---|
| 271 | /** |
|---|
| 272 | * Retrieves merged element definitions. |
|---|
| 273 | * @return Array of HTMLPurifier_ElementDef |
|---|
| 274 | */ |
|---|
| 275 | public function getElements() { |
|---|
| 276 | |
|---|
| 277 | $elements = array(); |
|---|
| 278 | foreach ($this->modules as $module) { |
|---|
| 279 | if (!$this->trusted && !$module->safe) continue; |
|---|
| 280 | foreach ($module->info as $name => $v) { |
|---|
| 281 | if (isset($elements[$name])) continue; |
|---|
| 282 | $elements[$name] = $this->getElement($name); |
|---|
| 283 | } |
|---|
| 284 | } |
|---|
| 285 | |
|---|
| 286 | // remove dud elements, this happens when an element that |
|---|
| 287 | // appeared to be safe actually wasn't |
|---|
| 288 | foreach ($elements as $n => $v) { |
|---|
| 289 | if ($v === false) unset($elements[$n]); |
|---|
| 290 | } |
|---|
| 291 | |
|---|
| 292 | return $elements; |
|---|
| 293 | |
|---|
| 294 | } |
|---|
| 295 | |
|---|
| 296 | /** |
|---|
| 297 | * Retrieves a single merged element definition |
|---|
| 298 | * @param $name Name of element |
|---|
| 299 | * @param $trusted Boolean trusted overriding parameter: set to true |
|---|
| 300 | * if you want the full version of an element |
|---|
| 301 | * @return Merged HTMLPurifier_ElementDef |
|---|
| 302 | * @note You may notice that modules are getting iterated over twice (once |
|---|
| 303 | * in getElements() and once here). This |
|---|
| 304 | * is because |
|---|
| 305 | */ |
|---|
| 306 | public function getElement($name, $trusted = null) { |
|---|
| 307 | |
|---|
| 308 | if (!isset($this->elementLookup[$name])) { |
|---|
| 309 | return false; |
|---|
| 310 | } |
|---|
| 311 | |
|---|
| 312 | // setup global state variables |
|---|
| 313 | $def = false; |
|---|
| 314 | if ($trusted === null) $trusted = $this->trusted; |
|---|
| 315 | |
|---|
| 316 | // iterate through each module that has registered itself to this |
|---|
| 317 | // element |
|---|
| 318 | foreach($this->elementLookup[$name] as $module_name) { |
|---|
| 319 | |
|---|
| 320 | $module = $this->modules[$module_name]; |
|---|
| 321 | |
|---|
| 322 | // refuse to create/merge from a module that is deemed unsafe-- |
|---|
| 323 | // pretend the module doesn't exist--when trusted mode is not on. |
|---|
| 324 | if (!$trusted && !$module->safe) { |
|---|
| 325 | continue; |
|---|
| 326 | } |
|---|
| 327 | |
|---|
| 328 | // clone is used because, ideally speaking, the original |
|---|
| 329 | // definition should not be modified. Usually, this will |
|---|
| 330 | // make no difference, but for consistency's sake |
|---|
| 331 | $new_def = clone $module->info[$name]; |
|---|
| 332 | |
|---|
| 333 | if (!$def && $new_def->standalone) { |
|---|
| 334 | $def = $new_def; |
|---|
| 335 | } elseif ($def) { |
|---|
| 336 | // This will occur even if $new_def is standalone. In practice, |
|---|
| 337 | // this will usually result in a full replacement. |
|---|
| 338 | $def->mergeIn($new_def); |
|---|
| 339 | } else { |
|---|
| 340 | // :TODO: |
|---|
| 341 | // non-standalone definitions that don't have a standalone |
|---|
| 342 | // to merge into could be deferred to the end |
|---|
| 343 | continue; |
|---|
| 344 | } |
|---|
| 345 | |
|---|
| 346 | // attribute value expansions |
|---|
| 347 | $this->attrCollections->performInclusions($def->attr); |
|---|
| 348 | $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes); |
|---|
| 349 | |
|---|
| 350 | // descendants_are_inline, for ChildDef_Chameleon |
|---|
| 351 | if (is_string($def->content_model) && |
|---|
| 352 | strpos($def->content_model, 'Inline') !== false) { |
|---|
| 353 | if ($name != 'del' && $name != 'ins') { |
|---|
| 354 | // this is for you, ins/del |
|---|
| 355 | $def->descendants_are_inline = true; |
|---|
| 356 | } |
|---|
| 357 | } |
|---|
| 358 | |
|---|
| 359 | $this->contentSets->generateChildDef($def, $module); |
|---|
| 360 | } |
|---|
| 361 | |
|---|
| 362 | // add information on required attributes |
|---|
| 363 | foreach ($def->attr as $attr_name => $attr_def) { |
|---|
| 364 | if ($attr_def->required) { |
|---|
| 365 | $def->required_attr[] = $attr_name; |
|---|
| 366 | } |
|---|
| 367 | } |
|---|
| 368 | |
|---|
| 369 | return $def; |
|---|
| 370 | |
|---|
| 371 | } |
|---|
| 372 | |
|---|
| 373 | } |
|---|
| 374 | |
|---|
| 375 | |
|---|