| 1 | <?php |
|---|
| 2 | |
|---|
| 3 | /** |
|---|
| 4 | * Validates a host according to the IPv4, IPv6 and DNS (future) specifications. |
|---|
| 5 | */ |
|---|
| 6 | class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef |
|---|
| 7 | { |
|---|
| 8 | |
|---|
| 9 | /** |
|---|
| 10 | * Instance of HTMLPurifier_AttrDef_URI_IPv4 sub-validator |
|---|
| 11 | */ |
|---|
| 12 | protected $ipv4; |
|---|
| 13 | |
|---|
| 14 | /** |
|---|
| 15 | * Instance of HTMLPurifier_AttrDef_URI_IPv6 sub-validator |
|---|
| 16 | */ |
|---|
| 17 | protected $ipv6; |
|---|
| 18 | |
|---|
| 19 | public function __construct() { |
|---|
| 20 | $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4(); |
|---|
| 21 | $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6(); |
|---|
| 22 | } |
|---|
| 23 | |
|---|
| 24 | public function validate($string, $config, $context) { |
|---|
| 25 | $length = strlen($string); |
|---|
| 26 | if ($string === '') return ''; |
|---|
| 27 | if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') { |
|---|
| 28 | //IPv6 |
|---|
| 29 | $ip = substr($string, 1, $length - 2); |
|---|
| 30 | $valid = $this->ipv6->validate($ip, $config, $context); |
|---|
| 31 | if ($valid === false) return false; |
|---|
| 32 | return '['. $valid . ']'; |
|---|
| 33 | } |
|---|
| 34 | |
|---|
| 35 | // need to do checks on unusual encodings too |
|---|
| 36 | $ipv4 = $this->ipv4->validate($string, $config, $context); |
|---|
| 37 | if ($ipv4 !== false) return $ipv4; |
|---|
| 38 | |
|---|
| 39 | // A regular domain name. |
|---|
| 40 | |
|---|
| 41 | // This breaks I18N domain names, but we don't have proper IRI support, |
|---|
| 42 | // so force users to insert Punycode. If there's complaining we'll |
|---|
| 43 | // try to fix things into an international friendly form. |
|---|
| 44 | |
|---|
| 45 | // The productions describing this are: |
|---|
| 46 | $a = '[a-z]'; // alpha |
|---|
| 47 | $an = '[a-z0-9]'; // alphanum |
|---|
| 48 | $and = '[a-z0-9-]'; // alphanum | "-" |
|---|
| 49 | // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum |
|---|
| 50 | $domainlabel = "$an($and*$an)?"; |
|---|
| 51 | // toplabel = alpha | alpha *( alphanum | "-" ) alphanum |
|---|
| 52 | $toplabel = "$a($and*$an)?"; |
|---|
| 53 | // hostname = *( domainlabel "." ) toplabel [ "." ] |
|---|
| 54 | $match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string); |
|---|
| 55 | if (!$match) return false; |
|---|
| 56 | |
|---|
| 57 | return $string; |
|---|
| 58 | } |
|---|
| 59 | |
|---|
| 60 | } |
|---|
| 61 | |
|---|