| <?php |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| class SimplePie_Sanitize |
| { |
| |
| var $base; |
|
|
| |
| var $remove_div = true; |
| var $image_handler = ''; |
| var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'); |
| var $encode_instead_of_strip = false; |
| var $strip_attributes = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'); |
| var $add_attributes = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none')); |
| var $strip_comments = false; |
| var $output_encoding = 'UTF-8'; |
| var $enable_cache = true; |
| var $cache_location = './cache'; |
| var $cache_name_function = 'md5'; |
| var $timeout = 10; |
| var $useragent = ''; |
| var $force_fsockopen = false; |
| var $replace_url_attributes = null; |
| var $registry; |
|
|
| |
| |
| |
| |
| |
| |
| var $https_domains = array(); |
|
|
| public function __construct() |
| { |
| |
| $this->set_url_replacements(null); |
| } |
|
|
| public function remove_div($enable = true) |
| { |
| $this->remove_div = (bool) $enable; |
| } |
|
|
| public function set_image_handler($page = false) |
| { |
| if ($page) |
| { |
| $this->image_handler = (string) $page; |
| } |
| else |
| { |
| $this->image_handler = false; |
| } |
| } |
|
|
| public function set_registry(SimplePie_Registry $registry) |
| { |
| $this->registry = $registry; |
| } |
|
|
| public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache') |
| { |
| if (isset($enable_cache)) |
| { |
| $this->enable_cache = (bool) $enable_cache; |
| } |
|
|
| if ($cache_location) |
| { |
| $this->cache_location = (string) $cache_location; |
| } |
|
|
| if ($cache_name_function) |
| { |
| $this->cache_name_function = (string) $cache_name_function; |
| } |
| } |
|
|
| public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false) |
| { |
| if ($timeout) |
| { |
| $this->timeout = (string) $timeout; |
| } |
|
|
| if ($useragent) |
| { |
| $this->useragent = (string) $useragent; |
| } |
|
|
| if ($force_fsockopen) |
| { |
| $this->force_fsockopen = (string) $force_fsockopen; |
| } |
| } |
|
|
| public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style')) |
| { |
| if ($tags) |
| { |
| if (is_array($tags)) |
| { |
| $this->strip_htmltags = $tags; |
| } |
| else |
| { |
| $this->strip_htmltags = explode(',', $tags); |
| } |
| } |
| else |
| { |
| $this->strip_htmltags = false; |
| } |
| } |
|
|
| public function encode_instead_of_strip($encode = false) |
| { |
| $this->encode_instead_of_strip = (bool) $encode; |
| } |
|
|
| public function strip_attributes($attribs = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc')) |
| { |
| if ($attribs) |
| { |
| if (is_array($attribs)) |
| { |
| $this->strip_attributes = $attribs; |
| } |
| else |
| { |
| $this->strip_attributes = explode(',', $attribs); |
| } |
| } |
| else |
| { |
| $this->strip_attributes = false; |
| } |
| } |
|
|
| public function add_attributes($attribs = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none'))) |
| { |
| if ($attribs) |
| { |
| if (is_array($attribs)) |
| { |
| $this->add_attributes = $attribs; |
| } |
| else |
| { |
| $this->add_attributes = explode(',', $attribs); |
| } |
| } |
| else |
| { |
| $this->add_attributes = false; |
| } |
| } |
|
|
| public function strip_comments($strip = false) |
| { |
| $this->strip_comments = (bool) $strip; |
| } |
|
|
| public function set_output_encoding($encoding = 'UTF-8') |
| { |
| $this->output_encoding = (string) $encoding; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| public function set_url_replacements($element_attribute = null) |
| { |
| if ($element_attribute === null) |
| { |
| $element_attribute = array( |
| 'a' => 'href', |
| 'area' => 'href', |
| 'blockquote' => 'cite', |
| 'del' => 'cite', |
| 'form' => 'action', |
| 'img' => array( |
| 'longdesc', |
| 'src' |
| ), |
| 'input' => 'src', |
| 'ins' => 'cite', |
| 'q' => 'cite' |
| ); |
| } |
| $this->replace_url_attributes = (array) $element_attribute; |
| } |
|
|
| |
| |
| |
| |
| |
| public function set_https_domains($domains) |
| { |
| $this->https_domains = array(); |
| foreach ($domains as $domain) |
| { |
| $domain = trim($domain, ". \t\n\r\0\x0B"); |
| $segments = array_reverse(explode('.', $domain)); |
| $node =& $this->https_domains; |
| foreach ($segments as $segment) |
| { |
| if ($node === true) |
| { |
| break; |
| } |
| if (!isset($node[$segment])) |
| { |
| $node[$segment] = array(); |
| } |
| $node =& $node[$segment]; |
| } |
| $node = true; |
| } |
| } |
|
|
| |
| |
| |
| protected function is_https_domain($domain) |
| { |
| $domain = trim($domain, '. '); |
| $segments = array_reverse(explode('.', $domain)); |
| $node =& $this->https_domains; |
| foreach ($segments as $segment) |
| { |
| if (isset($node[$segment])) |
| { |
| $node =& $node[$segment]; |
| } |
| else |
| { |
| break; |
| } |
| } |
| return $node === true; |
| } |
|
|
| |
| |
| |
| public function https_url($url) |
| { |
| return (strtolower(substr($url, 0, 7)) === 'http://') && |
| $this->is_https_domain(parse_url($url, PHP_URL_HOST)) ? |
| substr_replace($url, 's', 4, 0) : |
| $url; |
| } |
|
|
| public function sanitize($data, $type, $base = '') |
| { |
| $data = trim($data); |
| if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI) |
| { |
| if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML) |
| { |
| if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data)) |
| { |
| $type |= SIMPLEPIE_CONSTRUCT_HTML; |
| } |
| else |
| { |
| $type |= SIMPLEPIE_CONSTRUCT_TEXT; |
| } |
| } |
|
|
| if ($type & SIMPLEPIE_CONSTRUCT_BASE64) |
| { |
| $data = base64_decode($data); |
| } |
|
|
| if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) |
| { |
|
|
| if (!class_exists('DOMDocument')) |
| { |
| throw new SimplePie_Exception('DOMDocument not found, unable to use sanitizer'); |
| } |
| $document = new DOMDocument(); |
| $document->encoding = 'UTF-8'; |
|
|
| $data = $this->preprocess($data, $type); |
|
|
| set_error_handler(array('SimplePie_Misc', 'silence_errors')); |
| $document->loadHTML($data); |
| restore_error_handler(); |
|
|
| $xpath = new DOMXPath($document); |
|
|
| |
| if ($this->strip_comments) |
| { |
| $comments = $xpath->query('//comment()'); |
|
|
| foreach ($comments as $comment) |
| { |
| $comment->parentNode->removeChild($comment); |
| } |
| } |
|
|
| |
| |
| |
| if ($this->strip_htmltags) |
| { |
| foreach ($this->strip_htmltags as $tag) |
| { |
| $this->strip_tag($tag, $document, $xpath, $type); |
| } |
| } |
|
|
| if ($this->strip_attributes) |
| { |
| foreach ($this->strip_attributes as $attrib) |
| { |
| $this->strip_attr($attrib, $xpath); |
| } |
| } |
|
|
| if ($this->add_attributes) |
| { |
| foreach ($this->add_attributes as $tag => $valuePairs) |
| { |
| $this->add_attr($tag, $valuePairs, $document); |
| } |
| } |
|
|
| |
| $this->base = $base; |
| foreach ($this->replace_url_attributes as $element => $attributes) |
| { |
| $this->replace_urls($document, $element, $attributes); |
| } |
|
|
| |
| if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache) |
| { |
| $images = $document->getElementsByTagName('img'); |
| foreach ($images as $img) |
| { |
| if ($img->hasAttribute('src')) |
| { |
| $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src')); |
| $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi')); |
|
|
| if ($cache->load()) |
| { |
| $img->setAttribute('src', $this->image_handler . $image_url); |
| } |
| else |
| { |
| $file = $this->registry->create('File', array($img->getAttribute('src'), $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen)); |
| $headers = $file->headers; |
|
|
| if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) |
| { |
| if ($cache->save(array('headers' => $file->headers, 'body' => $file->body))) |
| { |
| $img->setAttribute('src', $this->image_handler . $image_url); |
| } |
| else |
| { |
| trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); |
| } |
| } |
| } |
| } |
| } |
| } |
|
|
| |
| $div = $document->getElementsByTagName('body')->item(0)->firstChild; |
| |
| $data = trim($document->saveHTML($div)); |
|
|
| if ($this->remove_div) |
| { |
| $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data); |
| $data = preg_replace('/<\/div>$/', '', $data); |
| } |
| else |
| { |
| $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data); |
| } |
| } |
|
|
| if ($type & SIMPLEPIE_CONSTRUCT_IRI) |
| { |
| $absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base)); |
| if ($absolute !== false) |
| { |
| $data = $absolute; |
| } |
| } |
|
|
| if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI)) |
| { |
| $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8'); |
| } |
|
|
| if ($this->output_encoding !== 'UTF-8') |
| { |
| $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding)); |
| } |
| } |
| return $data; |
| } |
|
|
| protected function preprocess($html, $type) |
| { |
| $ret = ''; |
| $html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html); |
| if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML) |
| { |
| |
| |
| $html = '<div>' . $html . '</div>'; |
| $ret .= '<!DOCTYPE html>'; |
| $content_type = 'text/html'; |
| } |
| else |
| { |
| $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'; |
| $content_type = 'application/xhtml+xml'; |
| } |
|
|
| $ret .= '<html><head>'; |
| $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />'; |
| $ret .= '</head><body>' . $html . '</body></html>'; |
| return $ret; |
| } |
|
|
| public function replace_urls($document, $tag, $attributes) |
| { |
| if (!is_array($attributes)) |
| { |
| $attributes = array($attributes); |
| } |
|
|
| if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) |
| { |
| $elements = $document->getElementsByTagName($tag); |
| foreach ($elements as $element) |
| { |
| foreach ($attributes as $attribute) |
| { |
| if ($element->hasAttribute($attribute)) |
| { |
| $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base)); |
| if ($value !== false) |
| { |
| $value = $this->https_url($value); |
| $element->setAttribute($attribute, $value); |
| } |
| } |
| } |
| } |
| } |
| } |
|
|
| public function do_strip_htmltags($match) |
| { |
| if ($this->encode_instead_of_strip) |
| { |
| if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style'))) |
| { |
| $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8'); |
| $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8'); |
| return "<$match[1]$match[2]>$match[3]</$match[1]>"; |
| } |
| else |
| { |
| return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8'); |
| } |
| } |
| elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style'))) |
| { |
| return $match[4]; |
| } |
| else |
| { |
| return ''; |
| } |
| } |
|
|
| protected function strip_tag($tag, $document, $xpath, $type) |
| { |
| $elements = $xpath->query('body//' . $tag); |
| if ($this->encode_instead_of_strip) |
| { |
| foreach ($elements as $element) |
| { |
| $fragment = $document->createDocumentFragment(); |
|
|
| |
| if (!in_array($tag, array('script', 'style'))) |
| { |
| $text = '<' . $tag; |
| if ($element->hasAttributes()) |
| { |
| $attrs = array(); |
| foreach ($element->attributes as $name => $attr) |
| { |
| $value = $attr->value; |
|
|
| |
| if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML)) |
| { |
| $value = $name; |
| } |
| |
| elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML)) |
| { |
| $attrs[] = $name; |
| continue; |
| } |
|
|
| |
| $attrs[] = $name . '="' . $attr->value . '"'; |
| } |
| $text .= ' ' . implode(' ', $attrs); |
| } |
| $text .= '>'; |
| $fragment->appendChild(new DOMText($text)); |
| } |
|
|
| $number = $element->childNodes->length; |
| for ($i = $number; $i > 0; $i--) |
| { |
| $child = $element->childNodes->item(0); |
| $fragment->appendChild($child); |
| } |
|
|
| if (!in_array($tag, array('script', 'style'))) |
| { |
| $fragment->appendChild(new DOMText('</' . $tag . '>')); |
| } |
|
|
| $element->parentNode->replaceChild($fragment, $element); |
| } |
|
|
| return; |
| } |
| elseif (in_array($tag, array('script', 'style'))) |
| { |
| foreach ($elements as $element) |
| { |
| $element->parentNode->removeChild($element); |
| } |
|
|
| return; |
| } |
| else |
| { |
| foreach ($elements as $element) |
| { |
| $fragment = $document->createDocumentFragment(); |
| $number = $element->childNodes->length; |
| for ($i = $number; $i > 0; $i--) |
| { |
| $child = $element->childNodes->item(0); |
| $fragment->appendChild($child); |
| } |
|
|
| $element->parentNode->replaceChild($fragment, $element); |
| } |
| } |
| } |
|
|
| protected function strip_attr($attrib, $xpath) |
| { |
| $elements = $xpath->query('//*[@' . $attrib . ']'); |
|
|
| foreach ($elements as $element) |
| { |
| $element->removeAttribute($attrib); |
| } |
| } |
|
|
| protected function add_attr($tag, $valuePairs, $document) |
| { |
| $elements = $document->getElementsByTagName($tag); |
| foreach ($elements as $element) |
| { |
| foreach ($valuePairs as $attrib => $value) |
| { |
| $element->setAttribute($attrib, $value); |
| } |
| } |
| } |
| } |
|
|