| 1 | <?php |
|---|
| 2 | |
|---|
| 3 | /** |
|---|
| 4 | * Generates HTML from tokens. |
|---|
| 5 | * @todo Refactor interface so that configuration/context is determined |
|---|
| 6 | * upon instantiation, no need for messy generateFromTokens() calls |
|---|
| 7 | * @todo Make some of the more internal functions protected, and have |
|---|
| 8 | * unit tests work around that |
|---|
| 9 | */ |
|---|
| 10 | class HTMLPurifier_Generator |
|---|
| 11 | { |
|---|
| 12 | |
|---|
| 13 | /** |
|---|
| 14 | * Whether or not generator should produce XML output |
|---|
| 15 | */ |
|---|
| 16 | private $_xhtml = true; |
|---|
| 17 | |
|---|
| 18 | /** |
|---|
| 19 | * :HACK: Whether or not generator should comment the insides of <script> tags |
|---|
| 20 | */ |
|---|
| 21 | private $_scriptFix = false; |
|---|
| 22 | |
|---|
| 23 | /** |
|---|
| 24 | * Cache of HTMLDefinition during HTML output to determine whether or |
|---|
| 25 | * not attributes should be minimized. |
|---|
| 26 | */ |
|---|
| 27 | private $_def; |
|---|
| 28 | |
|---|
| 29 | /** |
|---|
| 30 | * Configuration for the generator |
|---|
| 31 | */ |
|---|
| 32 | protected $config; |
|---|
| 33 | |
|---|
| 34 | /** |
|---|
| 35 | * @param $config Instance of HTMLPurifier_Config |
|---|
| 36 | * @param $context Instance of HTMLPurifier_Context |
|---|
| 37 | */ |
|---|
| 38 | public function __construct($config = null, $context = null) { |
|---|
| 39 | if (!$config) $config = HTMLPurifier_Config::createDefault(); |
|---|
| 40 | $this->config = $config; |
|---|
| 41 | $this->_scriptFix = $config->get('Output', 'CommentScriptContents'); |
|---|
| 42 | $this->_def = $config->getHTMLDefinition(); |
|---|
| 43 | $this->_xhtml = $this->_def->doctype->xml; |
|---|
| 44 | } |
|---|
| 45 | |
|---|
| 46 | /** |
|---|
| 47 | * Generates HTML from an array of tokens. |
|---|
| 48 | * @param $tokens Array of HTMLPurifier_Token |
|---|
| 49 | * @param $config HTMLPurifier_Config object |
|---|
| 50 | * @return Generated HTML |
|---|
| 51 | */ |
|---|
| 52 | public function generateFromTokens($tokens) { |
|---|
| 53 | if (!$tokens) return ''; |
|---|
| 54 | |
|---|
| 55 | // Basic algorithm |
|---|
| 56 | $html = ''; |
|---|
| 57 | for ($i = 0, $size = count($tokens); $i < $size; $i++) { |
|---|
| 58 | if ($this->_scriptFix && $tokens[$i]->name === 'script' |
|---|
| 59 | && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) { |
|---|
| 60 | // script special case |
|---|
| 61 | // the contents of the script block must be ONE token |
|---|
| 62 | // for this to work. |
|---|
| 63 | $html .= $this->generateFromToken($tokens[$i++]); |
|---|
| 64 | $html .= $this->generateScriptFromToken($tokens[$i++]); |
|---|
| 65 | } |
|---|
| 66 | $html .= $this->generateFromToken($tokens[$i]); |
|---|
| 67 | } |
|---|
| 68 | |
|---|
| 69 | // Tidy cleanup |
|---|
| 70 | if (extension_loaded('tidy') && $this->config->get('Output', 'TidyFormat')) { |
|---|
| 71 | $tidy = new Tidy; |
|---|
| 72 | $tidy->parseString($html, array( |
|---|
| 73 | 'indent'=> true, |
|---|
| 74 | 'output-xhtml' => $this->_xhtml, |
|---|
| 75 | 'show-body-only' => true, |
|---|
| 76 | 'indent-spaces' => 2, |
|---|
| 77 | 'wrap' => 68, |
|---|
| 78 | ), 'utf8'); |
|---|
| 79 | $tidy->cleanRepair(); |
|---|
| 80 | $html = (string) $tidy; // explicit cast necessary |
|---|
| 81 | } |
|---|
| 82 | |
|---|
| 83 | // Normalize newlines to system defined value |
|---|
| 84 | $nl = $this->config->get('Output', 'Newline'); |
|---|
| 85 | if ($nl === null) $nl = PHP_EOL; |
|---|
| 86 | if ($nl !== "\n") $html = str_replace("\n", $nl, $html); |
|---|
| 87 | return $html; |
|---|
| 88 | } |
|---|
| 89 | |
|---|
| 90 | /** |
|---|
| 91 | * Generates HTML from a single token. |
|---|
| 92 | * @param $token HTMLPurifier_Token object. |
|---|
| 93 | * @return Generated HTML |
|---|
| 94 | */ |
|---|
| 95 | public function generateFromToken($token) { |
|---|
| 96 | if (!$token instanceof HTMLPurifier_Token) { |
|---|
| 97 | trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING); |
|---|
| 98 | return ''; |
|---|
| 99 | |
|---|
| 100 | } elseif ($token instanceof HTMLPurifier_Token_Start) { |
|---|
| 101 | $attr = $this->generateAttributes($token->attr, $token->name); |
|---|
| 102 | return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; |
|---|
| 103 | |
|---|
| 104 | } elseif ($token instanceof HTMLPurifier_Token_End) { |
|---|
| 105 | return '</' . $token->name . '>'; |
|---|
| 106 | |
|---|
| 107 | } elseif ($token instanceof HTMLPurifier_Token_Empty) { |
|---|
| 108 | $attr = $this->generateAttributes($token->attr, $token->name); |
|---|
| 109 | return '<' . $token->name . ($attr ? ' ' : '') . $attr . |
|---|
| 110 | ( $this->_xhtml ? ' /': '' ) // <br /> v. <br> |
|---|
| 111 | . '>'; |
|---|
| 112 | |
|---|
| 113 | } elseif ($token instanceof HTMLPurifier_Token_Text) { |
|---|
| 114 | return $this->escape($token->data, ENT_NOQUOTES); |
|---|
| 115 | |
|---|
| 116 | } elseif ($token instanceof HTMLPurifier_Token_Comment) { |
|---|
| 117 | return '<!--' . $token->data . '-->'; |
|---|
| 118 | } else { |
|---|
| 119 | return ''; |
|---|
| 120 | |
|---|
| 121 | } |
|---|
| 122 | } |
|---|
| 123 | |
|---|
| 124 | /** |
|---|
| 125 | * Special case processor for the contents of script tags |
|---|
| 126 | * @warning This runs into problems if there's already a literal |
|---|
| 127 | * --> somewhere inside the script contents. |
|---|
| 128 | */ |
|---|
| 129 | public function generateScriptFromToken($token) { |
|---|
| 130 | if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token); |
|---|
| 131 | // Thanks <http://lachy.id.au/log/2005/05/script-comments> |
|---|
| 132 | $data = preg_replace('#//\s*$#', '', $token->data); |
|---|
| 133 | return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>'; |
|---|
| 134 | } |
|---|
| 135 | |
|---|
| 136 | /** |
|---|
| 137 | * Generates attribute declarations from attribute array. |
|---|
| 138 | * @note This does not include the leading or trailing space. |
|---|
| 139 | * @param $assoc_array_of_attributes Attribute array |
|---|
| 140 | * @param $element Name of element attributes are for, used to check |
|---|
| 141 | * attribute minimization. |
|---|
| 142 | * @return Generate HTML fragment for insertion. |
|---|
| 143 | */ |
|---|
| 144 | public function generateAttributes($assoc_array_of_attributes, $element = false) { |
|---|
| 145 | $html = ''; |
|---|
| 146 | foreach ($assoc_array_of_attributes as $key => $value) { |
|---|
| 147 | if (!$this->_xhtml) { |
|---|
| 148 | // Remove namespaced attributes |
|---|
| 149 | if (strpos($key, ':') !== false) continue; |
|---|
| 150 | // Check if we should minimize the attribute: val="val" -> val |
|---|
| 151 | if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) { |
|---|
| 152 | $html .= $key . ' '; |
|---|
| 153 | continue; |
|---|
| 154 | } |
|---|
| 155 | } |
|---|
| 156 | $html .= $key.'="'.$this->escape($value).'" '; |
|---|
| 157 | } |
|---|
| 158 | return rtrim($html); |
|---|
| 159 | } |
|---|
| 160 | |
|---|
| 161 | /** |
|---|
| 162 | * Escapes raw text data. |
|---|
| 163 | * @todo This really ought to be protected, but until we have a facility |
|---|
| 164 | * for properly generating HTML here w/o using tokens, it stays |
|---|
| 165 | * public. |
|---|
| 166 | * @param $string String data to escape for HTML. |
|---|
| 167 | * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is |
|---|
| 168 | * permissible for non-attribute output. |
|---|
| 169 | * @return String escaped data. |
|---|
| 170 | */ |
|---|
| 171 | public function escape($string, $quote = ENT_COMPAT) { |
|---|
| 172 | return htmlspecialchars($string, $quote, 'UTF-8'); |
|---|
| 173 | } |
|---|
| 174 | |
|---|
| 175 | } |
|---|
| 176 | |
|---|