1: <?php
2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43:
44:
45: 46: 47: 48: 49: 50: 51: 52: 53:
54: class SimplePie_Sanitize
55: {
56:
57: var $base;
58:
59:
60: var $remove_div = true;
61: var $image_handler = '';
62: var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style');
63: var $encode_instead_of_strip = false;
64: var $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc');
65: var $strip_comments = false;
66: var $output_encoding = 'UTF-8';
67: var $enable_cache = true;
68: var $cache_location = './cache';
69: var $cache_name_function = 'md5';
70: var $timeout = 10;
71: var $useragent = '';
72: var $force_fsockopen = false;
73:
74: var $replace_url_attributes = array(
75: 'a' => 'href',
76: 'area' => 'href',
77: 'blockquote' => 'cite',
78: 'del' => 'cite',
79: 'form' => 'action',
80: 'img' => array('longdesc', 'src'),
81: 'input' => 'src',
82: 'ins' => 'cite',
83: 'q' => 'cite'
84: );
85:
86: public function remove_div($enable = true)
87: {
88: $this->remove_div = (bool) $enable;
89: }
90:
91: public function set_image_handler($page = false)
92: {
93: if ($page)
94: {
95: $this->image_handler = (string) $page;
96: }
97: else
98: {
99: $this->image_handler = false;
100: }
101: }
102:
103: public function set_registry(SimplePie_Registry &$registry)
104: {
105: $this->registry = $registry;
106: }
107:
108: public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache')
109: {
110: if (isset($enable_cache))
111: {
112: $this->enable_cache = (bool) $enable_cache;
113: }
114:
115: if ($cache_location)
116: {
117: $this->cache_location = (string) $cache_location;
118: }
119:
120: if ($cache_name_function)
121: {
122: $this->cache_name_function = (string) $cache_name_function;
123: }
124: }
125:
126: public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false)
127: {
128: if ($timeout)
129: {
130: $this->timeout = (string) $timeout;
131: }
132:
133: if ($useragent)
134: {
135: $this->useragent = (string) $useragent;
136: }
137:
138: if ($force_fsockopen)
139: {
140: $this->force_fsockopen = (string) $force_fsockopen;
141: }
142: }
143:
144: public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'))
145: {
146: if ($tags)
147: {
148: if (is_array($tags))
149: {
150: $this->strip_htmltags = $tags;
151: }
152: else
153: {
154: $this->strip_htmltags = explode(',', $tags);
155: }
156: }
157: else
158: {
159: $this->strip_htmltags = false;
160: }
161: }
162:
163: public function encode_instead_of_strip($encode = false)
164: {
165: $this->encode_instead_of_strip = (bool) $encode;
166: }
167:
168: public function strip_attributes($attribs = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'))
169: {
170: if ($attribs)
171: {
172: if (is_array($attribs))
173: {
174: $this->strip_attributes = $attribs;
175: }
176: else
177: {
178: $this->strip_attributes = explode(',', $attribs);
179: }
180: }
181: else
182: {
183: $this->strip_attributes = false;
184: }
185: }
186:
187: public function strip_comments($strip = false)
188: {
189: $this->strip_comments = (bool) $strip;
190: }
191:
192: public function set_output_encoding($encoding = 'UTF-8')
193: {
194: $this->output_encoding = (string) $encoding;
195: }
196:
197: 198: 199: 200: 201: 202: 203: 204:
205: public function set_url_replacements($element_attribute = array('a' => 'href', 'area' => 'href', 'blockquote' => 'cite', 'del' => 'cite', 'form' => 'action', 'img' => array('longdesc', 'src'), 'input' => 'src', 'ins' => 'cite', 'q' => 'cite'))
206: {
207: $this->replace_url_attributes = (array) $element_attribute;
208: }
209:
210: public function sanitize($data, $type, $base = '')
211: {
212: $data = trim($data);
213: if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI)
214: {
215: if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML)
216: {
217: if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data))
218: {
219: $type |= SIMPLEPIE_CONSTRUCT_HTML;
220: }
221: else
222: {
223: $type |= SIMPLEPIE_CONSTRUCT_TEXT;
224: }
225: }
226:
227: if ($type & SIMPLEPIE_CONSTRUCT_BASE64)
228: {
229: $data = base64_decode($data);
230: }
231:
232: if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML))
233: {
234:
235: $document = new DOMDocument();
236: $document->encoding = 'UTF-8';
237: $data = $this->preprocess($data, $type);
238:
239: set_error_handler(array('SimplePie_Misc', 'silence_errors'));
240: $document->loadHTML($data);
241: restore_error_handler();
242:
243:
244: if ($this->strip_comments)
245: {
246: $xpath = new DOMXPath($document);
247: $comments = $xpath->query('//comment()');
248:
249: foreach ($comments as $comment)
250: {
251: $comment->parentNode->removeChild($comment);
252: }
253: }
254:
255:
256:
257:
258: if ($this->strip_htmltags)
259: {
260: foreach ($this->strip_htmltags as $tag)
261: {
262: $this->strip_tag($tag, $document, $type);
263: }
264: }
265:
266: if ($this->strip_attributes)
267: {
268: foreach ($this->strip_attributes as $attrib)
269: {
270: $this->strip_attr($attrib, $document);
271: }
272: }
273:
274:
275: $this->base = $base;
276: foreach ($this->replace_url_attributes as $element => $attributes)
277: {
278: $this->replace_urls($document, $element, $attributes);
279: }
280:
281:
282: if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache)
283: {
284: $images = $document->getElementsByTagName('img');
285: foreach ($images as $img)
286: {
287: if ($img->hasAttribute('src'))
288: {
289: $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src'));
290: $cache = $this->registry->call('Cache', 'create', array($this->cache_location, $image_url, 'spi'));
291:
292: if ($cache->load())
293: {
294: $img->setAttribute('src', $this->image_handler . $image_url);
295: }
296: else
297: {
298: $file = $this->registry->create('File', array($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen));
299: $headers = $file->headers;
300:
301: if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
302: {
303: if ($cache->save(array('headers' => $file->headers, 'body' => $file->body)))
304: {
305: $img->setAttribute('src', $this->image_handler . $image_url);
306: }
307: else
308: {
309: trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
310: }
311: }
312: }
313: }
314: }
315: }
316:
317:
318:
319: if ($document->firstChild instanceof DOMDocumentType)
320: {
321: $document->removeChild($document->firstChild);
322: }
323:
324:
325: $real_body = $document->getElementsByTagName('body')->item(0)->childNodes->item(0);
326: $document->replaceChild($real_body, $document->firstChild);
327:
328:
329: $data = trim($document->saveHTML());
330:
331: if ($type & SIMPLEPIE_CONSTRUCT_XHTML)
332: {
333: if ($this->remove_div)
334: {
335: $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
336: $data = preg_replace('/<\/div>$/', '', $data);
337: }
338: else
339: {
340: $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
341: }
342: }
343: }
344:
345: if ($type & SIMPLEPIE_CONSTRUCT_IRI)
346: {
347: $data = $this->registry->call('Misc', 'absolutize_url', array($data, $base));
348: }
349:
350: if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI))
351: {
352: $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
353: }
354:
355: if ($this->output_encoding !== 'UTF-8')
356: {
357: $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding));
358: }
359: }
360: return $data;
361: }
362:
363: protected function preprocess($html, $type)
364: {
365: $ret = '';
366: if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML)
367: {
368:
369:
370: $html = '<div>' . $html . '</div>';
371: $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';
372: $content_type = 'application/xhtml+xml';
373: }
374: else
375: {
376: $ret .= '<!DOCTYPE html>';
377: $content_type = 'text/html';
378: }
379:
380: $ret .= '<html><head>';
381: $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />';
382: $ret .= '</head><body>' . $html . '</body></html>';
383: return $ret;
384: }
385:
386: public function replace_urls(&$document, $tag, $attributes)
387: {
388: if (!is_array($attributes))
389: {
390: $attributes = array($attributes);
391: }
392:
393: if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags))
394: {
395: $elements = $document->getElementsByTagName($tag);
396: foreach ($elements as $element)
397: {
398: foreach ($attributes as $attribute)
399: {
400: if ($element->hasAttribute($attribute))
401: {
402: $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base));
403: $element->setAttribute($attribute, $value);
404: }
405: }
406: }
407: }
408: }
409:
410: public function do_strip_htmltags($match)
411: {
412: if ($this->encode_instead_of_strip)
413: {
414: if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
415: {
416: $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8');
417: $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8');
418: return "<$match[1]$match[2]>$match[3]</$match[1]>";
419: }
420: else
421: {
422: return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8');
423: }
424: }
425: elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
426: {
427: return $match[4];
428: }
429: else
430: {
431: return '';
432: }
433: }
434:
435: protected function strip_tag($tag, &$document, $type)
436: {
437: $xpath = new DOMXPath($document);
438: $elements = $xpath->query('body//' . $tag);
439: if ($this->encode_instead_of_strip)
440: {
441: foreach ($elements as $element)
442: {
443: $fragment = $document->createDocumentFragment();
444:
445:
446: if (!in_array($tag, array('script', 'style')))
447: {
448: $text = '<' . $tag;
449: if ($element->hasAttributes())
450: {
451: $attrs = array();
452: foreach ($element->attributes as $name => $attr)
453: {
454: $value = $attr->value;
455:
456:
457: if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML))
458: {
459: $value = $name;
460: }
461:
462: elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML))
463: {
464: $attrs[] = $name;
465: continue;
466: }
467:
468:
469: $attrs[] = $name . '="' . $attr->value . '"';
470: }
471: $text .= ' ' . implode(' ', $attrs);
472: }
473: $text .= '>';
474: $fragment->appendChild(new DOMText($text));
475: }
476: foreach ($element->childNodes as $child)
477: {
478: $fragment->appendChild($child);
479: }
480: if (!in_array($tag, array('script', 'style')))
481: {
482: $fragment->appendChild(new DOMText('</' . $tag . '>'));
483: }
484:
485: $element->parentNode->replaceChild($fragment, $element);
486: }
487:
488: return;
489: }
490: elseif (in_array($tag, array('script', 'style')))
491: {
492: foreach ($elements as $element)
493: {
494: $element->parentNode->removeChild($element);
495: }
496:
497: return;
498: }
499: else
500: {
501: foreach ($elements as $element)
502: {
503: $fragment = $document->createDocumentFragment();
504: foreach ($element->childNodes as $child)
505: {
506: $fragment->appendChild($child);
507: }
508:
509: $element->parentNode->replaceChild($fragment, $element);
510: }
511: }
512: }
513:
514: protected function strip_attr($attrib, &$document)
515: {
516: $xpath = new DOMXPath($document);
517: $elements = $xpath->query('//*[@' . $attrib . ']');
518:
519: foreach ($elements as $element)
520: {
521: $element->removeAttribute($attrib);
522: }
523: }
524: }
525: