| 1 | <?php |
|---|
| 2 | |
|---|
| 3 | /** |
|---|
| 4 | * HTML Purifier's internal representation of a URI. |
|---|
| 5 | * @note |
|---|
| 6 | * Internal data-structures are completely escaped. If the data needs |
|---|
| 7 | * to be used in a non-URI context (which is very unlikely), be sure |
|---|
| 8 | * to decode it first. The URI may not necessarily be well-formed until |
|---|
| 9 | * validate() is called. |
|---|
| 10 | */ |
|---|
| 11 | class HTMLPurifier_URI |
|---|
| 12 | { |
|---|
| 13 | |
|---|
| 14 | public $scheme, $userinfo, $host, $port, $path, $query, $fragment; |
|---|
| 15 | |
|---|
| 16 | /** |
|---|
| 17 | * @note Automatically normalizes scheme and port |
|---|
| 18 | */ |
|---|
| 19 | public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) { |
|---|
| 20 | $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme); |
|---|
| 21 | $this->userinfo = $userinfo; |
|---|
| 22 | $this->host = $host; |
|---|
| 23 | $this->port = is_null($port) ? $port : (int) $port; |
|---|
| 24 | $this->path = $path; |
|---|
| 25 | $this->query = $query; |
|---|
| 26 | $this->fragment = $fragment; |
|---|
| 27 | } |
|---|
| 28 | |
|---|
| 29 | /** |
|---|
| 30 | * Retrieves a scheme object corresponding to the URI's scheme/default |
|---|
| 31 | * @param $config Instance of HTMLPurifier_Config |
|---|
| 32 | * @param $context Instance of HTMLPurifier_Context |
|---|
| 33 | * @return Scheme object appropriate for validating this URI |
|---|
| 34 | */ |
|---|
| 35 | public function getSchemeObj($config, $context) { |
|---|
| 36 | $registry = HTMLPurifier_URISchemeRegistry::instance(); |
|---|
| 37 | if ($this->scheme !== null) { |
|---|
| 38 | $scheme_obj = $registry->getScheme($this->scheme, $config, $context); |
|---|
| 39 | if (!$scheme_obj) return false; // invalid scheme, clean it out |
|---|
| 40 | } else { |
|---|
| 41 | // no scheme: retrieve the default one |
|---|
| 42 | $def = $config->getDefinition('URI'); |
|---|
| 43 | $scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context); |
|---|
| 44 | if (!$scheme_obj) { |
|---|
| 45 | // something funky happened to the default scheme object |
|---|
| 46 | trigger_error( |
|---|
| 47 | 'Default scheme object "' . $def->defaultScheme . '" was not readable', |
|---|
| 48 | E_USER_WARNING |
|---|
| 49 | ); |
|---|
| 50 | return false; |
|---|
| 51 | } |
|---|
| 52 | } |
|---|
| 53 | return $scheme_obj; |
|---|
| 54 | } |
|---|
| 55 | |
|---|
| 56 | /** |
|---|
| 57 | * Generic validation method applicable for all schemes. May modify |
|---|
| 58 | * this URI in order to get it into a compliant form. |
|---|
| 59 | * @param $config Instance of HTMLPurifier_Config |
|---|
| 60 | * @param $context Instance of HTMLPurifier_Context |
|---|
| 61 | * @return True if validation/filtering succeeds, false if failure |
|---|
| 62 | */ |
|---|
| 63 | public function validate($config, $context) { |
|---|
| 64 | |
|---|
| 65 | // ABNF definitions from RFC 3986 |
|---|
| 66 | $chars_sub_delims = '!$&\'()*+,;='; |
|---|
| 67 | $chars_gen_delims = ':/?#[]@'; |
|---|
| 68 | $chars_pchar = $chars_sub_delims . ':@'; |
|---|
| 69 | |
|---|
| 70 | // validate scheme (MUST BE FIRST!) |
|---|
| 71 | if (!is_null($this->scheme) && is_null($this->host)) { |
|---|
| 72 | $def = $config->getDefinition('URI'); |
|---|
| 73 | if ($def->defaultScheme === $this->scheme) { |
|---|
| 74 | $this->scheme = null; |
|---|
| 75 | } |
|---|
| 76 | } |
|---|
| 77 | |
|---|
| 78 | // validate host |
|---|
| 79 | if (!is_null($this->host)) { |
|---|
| 80 | $host_def = new HTMLPurifier_AttrDef_URI_Host(); |
|---|
| 81 | $this->host = $host_def->validate($this->host, $config, $context); |
|---|
| 82 | if ($this->host === false) $this->host = null; |
|---|
| 83 | } |
|---|
| 84 | |
|---|
| 85 | // validate username |
|---|
| 86 | if (!is_null($this->userinfo)) { |
|---|
| 87 | $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':'); |
|---|
| 88 | $this->userinfo = $encoder->encode($this->userinfo); |
|---|
| 89 | } |
|---|
| 90 | |
|---|
| 91 | // validate port |
|---|
| 92 | if (!is_null($this->port)) { |
|---|
| 93 | if ($this->port < 1 || $this->port > 65535) $this->port = null; |
|---|
| 94 | } |
|---|
| 95 | |
|---|
| 96 | // validate path |
|---|
| 97 | $path_parts = array(); |
|---|
| 98 | $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/'); |
|---|
| 99 | if (!is_null($this->host)) { |
|---|
| 100 | // path-abempty (hier and relative) |
|---|
| 101 | $this->path = $segments_encoder->encode($this->path); |
|---|
| 102 | } elseif ($this->path !== '' && $this->path[0] === '/') { |
|---|
| 103 | // path-absolute (hier and relative) |
|---|
| 104 | if (strlen($this->path) >= 2 && $this->path[1] === '/') { |
|---|
| 105 | // This shouldn't ever happen! |
|---|
| 106 | $this->path = ''; |
|---|
| 107 | } else { |
|---|
| 108 | $this->path = $segments_encoder->encode($this->path); |
|---|
| 109 | } |
|---|
| 110 | } elseif (!is_null($this->scheme) && $this->path !== '') { |
|---|
| 111 | // path-rootless (hier) |
|---|
| 112 | // Short circuit evaluation means we don't need to check nz |
|---|
| 113 | $this->path = $segments_encoder->encode($this->path); |
|---|
| 114 | } elseif (is_null($this->scheme) && $this->path !== '') { |
|---|
| 115 | // path-noscheme (relative) |
|---|
| 116 | // (once again, not checking nz) |
|---|
| 117 | $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@'); |
|---|
| 118 | $c = strpos($this->path, '/'); |
|---|
| 119 | if ($c !== false) { |
|---|
| 120 | $this->path = |
|---|
| 121 | $segment_nc_encoder->encode(substr($this->path, 0, $c)) . |
|---|
| 122 | $segments_encoder->encode(substr($this->path, $c)); |
|---|
| 123 | } else { |
|---|
| 124 | $this->path = $segment_nc_encoder->encode($this->path); |
|---|
| 125 | } |
|---|
| 126 | } else { |
|---|
| 127 | // path-empty (hier and relative) |
|---|
| 128 | $this->path = ''; // just to be safe |
|---|
| 129 | } |
|---|
| 130 | |
|---|
| 131 | return true; |
|---|
| 132 | |
|---|
| 133 | } |
|---|
| 134 | |
|---|
| 135 | /** |
|---|
| 136 | * Convert URI back to string |
|---|
| 137 | * @return String URI appropriate for output |
|---|
| 138 | */ |
|---|
| 139 | public function toString() { |
|---|
| 140 | // reconstruct authority |
|---|
| 141 | $authority = null; |
|---|
| 142 | if (!is_null($this->host)) { |
|---|
| 143 | $authority = ''; |
|---|
| 144 | if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@'; |
|---|
| 145 | $authority .= $this->host; |
|---|
| 146 | if(!is_null($this->port)) $authority .= ':' . $this->port; |
|---|
| 147 | } |
|---|
| 148 | |
|---|
| 149 | // reconstruct the result |
|---|
| 150 | $result = ''; |
|---|
| 151 | if (!is_null($this->scheme)) $result .= $this->scheme . ':'; |
|---|
| 152 | if (!is_null($authority)) $result .= '//' . $authority; |
|---|
| 153 | $result .= $this->path; |
|---|
| 154 | if (!is_null($this->query)) $result .= '?' . $this->query; |
|---|
| 155 | if (!is_null($this->fragment)) $result .= '#' . $this->fragment; |
|---|
| 156 | |
|---|
| 157 | return $result; |
|---|
| 158 | } |
|---|
| 159 | |
|---|
| 160 | } |
|---|
| 161 | |
|---|