root/afridex/plugins/Flutter/purifier_lib/HTMLPurifier/EntityParser.php

Revision 21, 4.8 kB (checked in by admin, 18 years ago)
Line 
1<?php
2
3// if want to implement error collecting here, we'll need to use some sort
4// of global data (probably trigger_error) because it's impossible to pass
5// $config or $context to the callback functions.
6
7/**
8 * Handles referencing and derefencing character entities
9 */
10class HTMLPurifier_EntityParser
11{
12   
13    /**
14     * Reference to entity lookup table.
15     */
16    protected $_entity_lookup;
17   
18    /**
19     * Callback regex string for parsing entities.
20     */                             
21    protected $_substituteEntitiesRegex =
22'/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
23//     1. hex             2. dec      3. string (XML style)
24   
25   
26    /**
27     * Decimal to parsed string conversion table for special entities.
28     */
29    protected $_special_dec2str =
30            array(
31                    34 => '"',
32                    38 => '&',
33                    39 => "'",
34                    60 => '<',
35                    62 => '>'
36            );
37   
38    /**
39     * Stripped entity names to decimal conversion table for special entities.
40     */
41    protected $_special_ent2dec =
42            array(
43                    'quot' => 34,
44                    'amp'  => 38,
45                    'lt'   => 60,
46                    'gt'   => 62
47            );
48   
49    /**
50     * Substitutes non-special entities with their parsed equivalents. Since
51     * running this whenever you have parsed character is t3h 5uck, we run
52     * it before everything else.
53     *
54     * @param $string String to have non-special entities parsed.
55     * @returns Parsed string.
56     */
57    public function substituteNonSpecialEntities($string) {
58        // it will try to detect missing semicolons, but don't rely on it
59        return preg_replace_callback(
60            $this->_substituteEntitiesRegex,
61            array($this, 'nonSpecialEntityCallback'),
62            $string
63            );
64    }
65   
66    /**
67     * Callback function for substituteNonSpecialEntities() that does the work.
68     *
69     * @param $matches  PCRE matches array, with 0 the entire match, and
70     *                  either index 1, 2 or 3 set with a hex value, dec value,
71     *                  or string (respectively).
72     * @returns Replacement string.
73     */
74   
75    protected function nonSpecialEntityCallback($matches) {
76        // replaces all but big five
77        $entity = $matches[0];
78        $is_num = (@$matches[0][1] === '#');
79        if ($is_num) {
80            $is_hex = (@$entity[2] === 'x');
81            $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
82           
83            // abort for special characters
84            if (isset($this->_special_dec2str[$code]))  return $entity;
85           
86            return HTMLPurifier_Encoder::unichr($code);
87        } else {
88            if (isset($this->_special_ent2dec[$matches[3]])) return $entity;
89            if (!$this->_entity_lookup) {
90                $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
91            }
92            if (isset($this->_entity_lookup->table[$matches[3]])) {
93                return $this->_entity_lookup->table[$matches[3]];
94            } else {
95                return $entity;
96            }
97        }
98    }
99   
100    /**
101     * Substitutes only special entities with their parsed equivalents.
102     *
103     * @notice We try to avoid calling this function because otherwise, it
104     * would have to be called a lot (for every parsed section).
105     *
106     * @param $string String to have non-special entities parsed.
107     * @returns Parsed string.
108     */
109    public function substituteSpecialEntities($string) {
110        return preg_replace_callback(
111            $this->_substituteEntitiesRegex,
112            array($this, 'specialEntityCallback'),
113            $string);
114    }
115   
116    /**
117     * Callback function for substituteSpecialEntities() that does the work.
118     *
119     * This callback has same syntax as nonSpecialEntityCallback().
120     *
121     * @param $matches  PCRE-style matches array, with 0 the entire match, and
122     *                  either index 1, 2 or 3 set with a hex value, dec value,
123     *                  or string (respectively).
124     * @returns Replacement string.
125     */
126    protected function specialEntityCallback($matches) {
127        $entity = $matches[0];
128        $is_num = (@$matches[0][1] === '#');
129        if ($is_num) {
130            $is_hex = (@$entity[2] === 'x');
131            $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
132            return isset($this->_special_dec2str[$int]) ?
133                $this->_special_dec2str[$int] :
134                $entity;
135        } else {
136            return isset($this->_special_ent2dec[$matches[3]]) ?
137                $this->_special_ent2dec[$matches[3]] :
138                $entity;
139        }
140    }
141   
142}
143
Note: See TracBrowser for help on using the browser.