| 1 | <?php |
|---|
| 2 | |
|---|
| 3 | /** |
|---|
| 4 | * Validates contents based on NMTOKENS attribute type. |
|---|
| 5 | * @note The only current use for this is the class attribute in HTML |
|---|
| 6 | * @note Could have some functionality factored out into Nmtoken class |
|---|
| 7 | * @warning We cannot assume this class will be used only for 'class' |
|---|
| 8 | * attributes. Not sure how to hook in magic behavior, then. |
|---|
| 9 | */ |
|---|
| 10 | class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef |
|---|
| 11 | { |
|---|
| 12 | |
|---|
| 13 | public function validate($string, $config, $context) { |
|---|
| 14 | |
|---|
| 15 | $string = trim($string); |
|---|
| 16 | |
|---|
| 17 | // early abort: '' and '0' (strings that convert to false) are invalid |
|---|
| 18 | if (!$string) return false; |
|---|
| 19 | |
|---|
| 20 | // OPTIMIZABLE! |
|---|
| 21 | // do the preg_match, capture all subpatterns for reformulation |
|---|
| 22 | |
|---|
| 23 | // we don't support U+00A1 and up codepoints or |
|---|
| 24 | // escaping because I don't know how to do that with regexps |
|---|
| 25 | // and plus it would complicate optimization efforts (you never |
|---|
| 26 | // see that anyway). |
|---|
| 27 | $matches = array(); |
|---|
| 28 | $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start |
|---|
| 29 | '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'. |
|---|
| 30 | '(?:(?=\s)|\z)/'; // look ahead for space or string end |
|---|
| 31 | preg_match_all($pattern, $string, $matches); |
|---|
| 32 | |
|---|
| 33 | if (empty($matches[1])) return false; |
|---|
| 34 | |
|---|
| 35 | // reconstruct string |
|---|
| 36 | $new_string = ''; |
|---|
| 37 | foreach ($matches[1] as $token) { |
|---|
| 38 | $new_string .= $token . ' '; |
|---|
| 39 | } |
|---|
| 40 | $new_string = rtrim($new_string); |
|---|
| 41 | |
|---|
| 42 | return $new_string; |
|---|
| 43 | |
|---|
| 44 | } |
|---|
| 45 | |
|---|
| 46 | } |
|---|
| 47 | |
|---|