| 1 | <?php |
|---|
| 2 | |
|---|
| 3 | /** |
|---|
| 4 | * Represents an XHTML 1.1 module, with information on elements, tags |
|---|
| 5 | * and attributes. |
|---|
| 6 | * @note Even though this is technically XHTML 1.1, it is also used for |
|---|
| 7 | * regular HTML parsing. We are using modulization as a convenient |
|---|
| 8 | * way to represent the internals of HTMLDefinition, and our |
|---|
| 9 | * implementation is by no means conforming and does not directly |
|---|
| 10 | * use the normative DTDs or XML schemas. |
|---|
| 11 | * @note The public variables in a module should almost directly |
|---|
| 12 | * correspond to the variables in HTMLPurifier_HTMLDefinition. |
|---|
| 13 | * However, the prefix info carries no special meaning in these |
|---|
| 14 | * objects (include it anyway if that's the correspondence though). |
|---|
| 15 | * @todo Consider making some member functions protected |
|---|
| 16 | */ |
|---|
| 17 | |
|---|
| 18 | class HTMLPurifier_HTMLModule |
|---|
| 19 | { |
|---|
| 20 | |
|---|
| 21 | // -- Overloadable ---------------------------------------------------- |
|---|
| 22 | |
|---|
| 23 | /** |
|---|
| 24 | * Short unique string identifier of the module |
|---|
| 25 | */ |
|---|
| 26 | public $name; |
|---|
| 27 | |
|---|
| 28 | /** |
|---|
| 29 | * Informally, a list of elements this module changes. Not used in |
|---|
| 30 | * any significant way. |
|---|
| 31 | */ |
|---|
| 32 | public $elements = array(); |
|---|
| 33 | |
|---|
| 34 | /** |
|---|
| 35 | * Associative array of element names to element definitions. |
|---|
| 36 | * Some definitions may be incomplete, to be merged in later |
|---|
| 37 | * with the full definition. |
|---|
| 38 | */ |
|---|
| 39 | public $info = array(); |
|---|
| 40 | |
|---|
| 41 | /** |
|---|
| 42 | * Associative array of content set names to content set additions. |
|---|
| 43 | * This is commonly used to, say, add an A element to the Inline |
|---|
| 44 | * content set. This corresponds to an internal variable $content_sets |
|---|
| 45 | * and NOT info_content_sets member variable of HTMLDefinition. |
|---|
| 46 | */ |
|---|
| 47 | public $content_sets = array(); |
|---|
| 48 | |
|---|
| 49 | /** |
|---|
| 50 | * Associative array of attribute collection names to attribute |
|---|
| 51 | * collection additions. More rarely used for adding attributes to |
|---|
| 52 | * the global collections. Example is the StyleAttribute module adding |
|---|
| 53 | * the style attribute to the Core. Corresponds to HTMLDefinition's |
|---|
| 54 | * attr_collections->info, since the object's data is only info, |
|---|
| 55 | * with extra behavior associated with it. |
|---|
| 56 | */ |
|---|
| 57 | public $attr_collections = array(); |
|---|
| 58 | |
|---|
| 59 | /** |
|---|
| 60 | * Associative array of deprecated tag name to HTMLPurifier_TagTransform |
|---|
| 61 | */ |
|---|
| 62 | public $info_tag_transform = array(); |
|---|
| 63 | |
|---|
| 64 | /** |
|---|
| 65 | * List of HTMLPurifier_AttrTransform to be performed before validation. |
|---|
| 66 | */ |
|---|
| 67 | public $info_attr_transform_pre = array(); |
|---|
| 68 | |
|---|
| 69 | /** |
|---|
| 70 | * List of HTMLPurifier_AttrTransform to be performed after validation. |
|---|
| 71 | */ |
|---|
| 72 | public $info_attr_transform_post = array(); |
|---|
| 73 | |
|---|
| 74 | /** |
|---|
| 75 | * Boolean flag that indicates whether or not getChildDef is implemented. |
|---|
| 76 | * For optimization reasons: may save a call to a function. Be sure |
|---|
| 77 | * to set it if you do implement getChildDef(), otherwise it will have |
|---|
| 78 | * no effect! |
|---|
| 79 | */ |
|---|
| 80 | public $defines_child_def = false; |
|---|
| 81 | |
|---|
| 82 | /** |
|---|
| 83 | * Boolean flag whether or not this module is safe. If it is not safe, all |
|---|
| 84 | * of its members are unsafe. Modules are safe by default (this might be |
|---|
| 85 | * slightly dangerous, but it doesn't make much sense to force HTML Purifier, |
|---|
| 86 | * which is based off of safe HTML, to explicitly say, "This is safe," even |
|---|
| 87 | * though there are modules which are "unsafe") |
|---|
| 88 | * |
|---|
| 89 | * @note Previously, safety could be applied at an element level granularity. |
|---|
| 90 | * We've removed this ability, so in order to add "unsafe" elements |
|---|
| 91 | * or attributes, a dedicated module with this property set to false |
|---|
| 92 | * must be used. |
|---|
| 93 | */ |
|---|
| 94 | public $safe = true; |
|---|
| 95 | |
|---|
| 96 | /** |
|---|
| 97 | * Retrieves a proper HTMLPurifier_ChildDef subclass based on |
|---|
| 98 | * content_model and content_model_type member variables of |
|---|
| 99 | * the HTMLPurifier_ElementDef class. There is a similar function |
|---|
| 100 | * in HTMLPurifier_HTMLDefinition. |
|---|
| 101 | * @param $def HTMLPurifier_ElementDef instance |
|---|
| 102 | * @return HTMLPurifier_ChildDef subclass |
|---|
| 103 | */ |
|---|
| 104 | public function getChildDef($def) {return false;} |
|---|
| 105 | |
|---|
| 106 | // -- Convenience ----------------------------------------------------- |
|---|
| 107 | |
|---|
| 108 | /** |
|---|
| 109 | * Convenience function that sets up a new element |
|---|
| 110 | * @param $element Name of element to add |
|---|
| 111 | * @param $type What content set should element be registered to? |
|---|
| 112 | * Set as false to skip this step. |
|---|
| 113 | * @param $contents Allowed children in form of: |
|---|
| 114 | * "$content_model_type: $content_model" |
|---|
| 115 | * @param $attr_includes What attribute collections to register to |
|---|
| 116 | * element? |
|---|
| 117 | * @param $attr What unique attributes does the element define? |
|---|
| 118 | * @note See ElementDef for in-depth descriptions of these parameters. |
|---|
| 119 | * @return Created element definition object, so you |
|---|
| 120 | * can set advanced parameters |
|---|
| 121 | */ |
|---|
| 122 | public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array()) { |
|---|
| 123 | $this->elements[] = $element; |
|---|
| 124 | // parse content_model |
|---|
| 125 | list($content_model_type, $content_model) = $this->parseContents($contents); |
|---|
| 126 | // merge in attribute inclusions |
|---|
| 127 | $this->mergeInAttrIncludes($attr, $attr_includes); |
|---|
| 128 | // add element to content sets |
|---|
| 129 | if ($type) $this->addElementToContentSet($element, $type); |
|---|
| 130 | // create element |
|---|
| 131 | $this->info[$element] = HTMLPurifier_ElementDef::create( |
|---|
| 132 | $content_model, $content_model_type, $attr |
|---|
| 133 | ); |
|---|
| 134 | // literal object $contents means direct child manipulation |
|---|
| 135 | if (!is_string($contents)) $this->info[$element]->child = $contents; |
|---|
| 136 | return $this->info[$element]; |
|---|
| 137 | } |
|---|
| 138 | |
|---|
| 139 | /** |
|---|
| 140 | * Convenience function that creates a totally blank, non-standalone |
|---|
| 141 | * element. |
|---|
| 142 | * @param $element Name of element to create |
|---|
| 143 | * @return Created element |
|---|
| 144 | */ |
|---|
| 145 | public function addBlankElement($element) { |
|---|
| 146 | if (!isset($this->info[$element])) { |
|---|
| 147 | $this->elements[] = $element; |
|---|
| 148 | $this->info[$element] = new HTMLPurifier_ElementDef(); |
|---|
| 149 | $this->info[$element]->standalone = false; |
|---|
| 150 | } else { |
|---|
| 151 | trigger_error("Definition for $element already exists in module, cannot redefine"); |
|---|
| 152 | } |
|---|
| 153 | return $this->info[$element]; |
|---|
| 154 | } |
|---|
| 155 | |
|---|
| 156 | /** |
|---|
| 157 | * Convenience function that registers an element to a content set |
|---|
| 158 | * @param Element to register |
|---|
| 159 | * @param Name content set (warning: case sensitive, usually upper-case |
|---|
| 160 | * first letter) |
|---|
| 161 | */ |
|---|
| 162 | public function addElementToContentSet($element, $type) { |
|---|
| 163 | if (!isset($this->content_sets[$type])) $this->content_sets[$type] = ''; |
|---|
| 164 | else $this->content_sets[$type] .= ' | '; |
|---|
| 165 | $this->content_sets[$type] .= $element; |
|---|
| 166 | } |
|---|
| 167 | |
|---|
| 168 | /** |
|---|
| 169 | * Convenience function that transforms single-string contents |
|---|
| 170 | * into separate content model and content model type |
|---|
| 171 | * @param $contents Allowed children in form of: |
|---|
| 172 | * "$content_model_type: $content_model" |
|---|
| 173 | * @note If contents is an object, an array of two nulls will be |
|---|
| 174 | * returned, and the callee needs to take the original $contents |
|---|
| 175 | * and use it directly. |
|---|
| 176 | */ |
|---|
| 177 | public function parseContents($contents) { |
|---|
| 178 | if (!is_string($contents)) return array(null, null); // defer |
|---|
| 179 | switch ($contents) { |
|---|
| 180 | // check for shorthand content model forms |
|---|
| 181 | case 'Empty': |
|---|
| 182 | return array('empty', ''); |
|---|
| 183 | case 'Inline': |
|---|
| 184 | return array('optional', 'Inline | #PCDATA'); |
|---|
| 185 | case 'Flow': |
|---|
| 186 | return array('optional', 'Flow | #PCDATA'); |
|---|
| 187 | } |
|---|
| 188 | list($content_model_type, $content_model) = explode(':', $contents); |
|---|
| 189 | $content_model_type = strtolower(trim($content_model_type)); |
|---|
| 190 | $content_model = trim($content_model); |
|---|
| 191 | return array($content_model_type, $content_model); |
|---|
| 192 | } |
|---|
| 193 | |
|---|
| 194 | /** |
|---|
| 195 | * Convenience function that merges a list of attribute includes into |
|---|
| 196 | * an attribute array. |
|---|
| 197 | * @param $attr Reference to attr array to modify |
|---|
| 198 | * @param $attr_includes Array of includes / string include to merge in |
|---|
| 199 | */ |
|---|
| 200 | public function mergeInAttrIncludes(&$attr, $attr_includes) { |
|---|
| 201 | if (!is_array($attr_includes)) { |
|---|
| 202 | if (empty($attr_includes)) $attr_includes = array(); |
|---|
| 203 | else $attr_includes = array($attr_includes); |
|---|
| 204 | } |
|---|
| 205 | $attr[0] = $attr_includes; |
|---|
| 206 | } |
|---|
| 207 | |
|---|
| 208 | /** |
|---|
| 209 | * Convenience function that generates a lookup table with boolean |
|---|
| 210 | * true as value. |
|---|
| 211 | * @param $list List of values to turn into a lookup |
|---|
| 212 | * @note You can also pass an arbitrary number of arguments in |
|---|
| 213 | * place of the regular argument |
|---|
| 214 | * @return Lookup array equivalent of list |
|---|
| 215 | */ |
|---|
| 216 | public function makeLookup($list) { |
|---|
| 217 | if (is_string($list)) $list = func_get_args(); |
|---|
| 218 | $ret = array(); |
|---|
| 219 | foreach ($list as $value) { |
|---|
| 220 | if (is_null($value)) continue; |
|---|
| 221 | $ret[$value] = true; |
|---|
| 222 | } |
|---|
| 223 | return $ret; |
|---|
| 224 | } |
|---|
| 225 | } |
|---|
| 226 | |
|---|