1: <?php
2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43:
44:
45: 46: 47: 48: 49: 50: 51: 52:
53: class SimplePie_Parser
54: {
55: var $error_code;
56: var $error_string;
57: var $current_line;
58: var $current_column;
59: var $current_byte;
60: var $separator = ' ';
61: var $namespace = array('');
62: var $element = array('');
63: var $xml_base = array('');
64: var $xml_base_explicit = array(false);
65: var $xml_lang = array('');
66: var $data = array();
67: var $datas = array(array());
68: var $current_xhtml_construct = -1;
69: var $encoding;
70: protected $registry;
71:
72: public function set_registry(SimplePie_Registry &$registry)
73: {
74: $this->registry = &$registry;
75: }
76:
77: public function parse(&$data, $encoding)
78: {
79:
80: if (strtoupper($encoding) === 'US-ASCII')
81: {
82: $this->encoding = 'UTF-8';
83: }
84: else
85: {
86: $this->encoding = $encoding;
87: }
88:
89:
90:
91: if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
92: {
93: $data = substr($data, 4);
94: }
95:
96: elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
97: {
98: $data = substr($data, 4);
99: }
100:
101: elseif (substr($data, 0, 2) === "\xFE\xFF")
102: {
103: $data = substr($data, 2);
104: }
105:
106: elseif (substr($data, 0, 2) === "\xFF\xFE")
107: {
108: $data = substr($data, 2);
109: }
110:
111: elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
112: {
113: $data = substr($data, 3);
114: }
115:
116: if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false)
117: {
118: $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
119: if ($declaration->parse())
120: {
121: $data = substr($data, $pos + 2);
122: $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data;
123: }
124: else
125: {
126: $this->error_string = 'SimplePie bug! Please report this!';
127: return false;
128: }
129: }
130:
131: $return = true;
132:
133: static $xml_is_sane = null;
134: if ($xml_is_sane === null)
135: {
136: $parser_check = xml_parser_create();
137: xml_parse_into_struct($parser_check, '<foo>&</foo>', $values);
138: xml_parser_free($parser_check);
139: $xml_is_sane = isset($values[0]['value']);
140: }
141:
142:
143: if ($xml_is_sane)
144: {
145: $xml = xml_parser_create_ns($this->encoding, $this->separator);
146: xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1);
147: xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0);
148: xml_set_object($xml, $this);
149: xml_set_character_data_handler($xml, 'cdata');
150: xml_set_element_handler($xml, 'tag_open', 'tag_close');
151:
152:
153: if (!xml_parse($xml, $data, true))
154: {
155: $this->error_code = xml_get_error_code($xml);
156: $this->error_string = xml_error_string($this->error_code);
157: $return = false;
158: }
159: $this->current_line = xml_get_current_line_number($xml);
160: $this->current_column = xml_get_current_column_number($xml);
161: $this->current_byte = xml_get_current_byte_index($xml);
162: xml_parser_free($xml);
163: return $return;
164: }
165: else
166: {
167: libxml_clear_errors();
168: $xml = new XMLReader();
169: $xml->xml($data);
170: while (@$xml->read())
171: {
172: switch ($xml->nodeType)
173: {
174:
175: case constant('XMLReader::END_ELEMENT'):
176: if ($xml->namespaceURI !== '')
177: {
178: $tagName = $xml->namespaceURI . $this->separator . $xml->localName;
179: }
180: else
181: {
182: $tagName = $xml->localName;
183: }
184: $this->tag_close(null, $tagName);
185: break;
186: case constant('XMLReader::ELEMENT'):
187: $empty = $xml->isEmptyElement;
188: if ($xml->namespaceURI !== '')
189: {
190: $tagName = $xml->namespaceURI . $this->separator . $xml->localName;
191: }
192: else
193: {
194: $tagName = $xml->localName;
195: }
196: $attributes = array();
197: while ($xml->moveToNextAttribute())
198: {
199: if ($xml->namespaceURI !== '')
200: {
201: $attrName = $xml->namespaceURI . $this->separator . $xml->localName;
202: }
203: else
204: {
205: $attrName = $xml->localName;
206: }
207: $attributes[$attrName] = $xml->value;
208: }
209: $this->tag_open(null, $tagName, $attributes);
210: if ($empty)
211: {
212: $this->tag_close(null, $tagName);
213: }
214: break;
215: case constant('XMLReader::TEXT'):
216:
217: case constant('XMLReader::CDATA'):
218: $this->cdata(null, $xml->value);
219: break;
220: }
221: }
222: if ($error = libxml_get_last_error())
223: {
224: $this->error_code = $error->code;
225: $this->error_string = $error->message;
226: $this->current_line = $error->line;
227: $this->current_column = $error->column;
228: return false;
229: }
230: else
231: {
232: return true;
233: }
234: }
235: }
236:
237: public function get_error_code()
238: {
239: return $this->error_code;
240: }
241:
242: public function get_error_string()
243: {
244: return $this->error_string;
245: }
246:
247: public function get_current_line()
248: {
249: return $this->current_line;
250: }
251:
252: public function get_current_column()
253: {
254: return $this->current_column;
255: }
256:
257: public function get_current_byte()
258: {
259: return $this->current_byte;
260: }
261:
262: public function get_data()
263: {
264: return $this->data;
265: }
266:
267: public function tag_open($parser, $tag, $attributes)
268: {
269: list($this->namespace[], $this->element[]) = $this->split_ns($tag);
270:
271: $attribs = array();
272: foreach ($attributes as $name => $value)
273: {
274: list($attrib_namespace, $attribute) = $this->split_ns($name);
275: $attribs[$attrib_namespace][$attribute] = $value;
276: }
277:
278: if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['base']))
279: {
280: $this->xml_base[] = $this->registry->call('Misc', 'absolutize_url', array($attribs[SIMPLEPIE_NAMESPACE_XML]['base'], end($this->xml_base)));
281: $this->xml_base_explicit[] = true;
282: }
283: else
284: {
285: $this->xml_base[] = end($this->xml_base);
286: $this->xml_base_explicit[] = end($this->xml_base_explicit);
287: }
288:
289: if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['lang']))
290: {
291: $this->xml_lang[] = $attribs[SIMPLEPIE_NAMESPACE_XML]['lang'];
292: }
293: else
294: {
295: $this->xml_lang[] = end($this->xml_lang);
296: }
297:
298: if ($this->current_xhtml_construct >= 0)
299: {
300: $this->current_xhtml_construct++;
301: if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML)
302: {
303: $this->data['data'] .= '<' . end($this->element);
304: if (isset($attribs['']))
305: {
306: foreach ($attribs[''] as $name => $value)
307: {
308: $this->data['data'] .= ' ' . $name . '="' . htmlspecialchars($value, ENT_COMPAT, $this->encoding) . '"';
309: }
310: }
311: $this->data['data'] .= '>';
312: }
313: }
314: else
315: {
316: $this->datas[] =& $this->data;
317: $this->data =& $this->data['child'][end($this->namespace)][end($this->element)][];
318: $this->data = array('data' => '', 'attribs' => $attribs, 'xml_base' => end($this->xml_base), 'xml_base_explicit' => end($this->xml_base_explicit), 'xml_lang' => end($this->xml_lang));
319: if ((end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_03 && in_array(end($this->element), array('title', 'tagline', 'copyright', 'info', 'summary', 'content')) && isset($attribs['']['mode']) && $attribs['']['mode'] === 'xml')
320: || (end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_10 && in_array(end($this->element), array('rights', 'subtitle', 'summary', 'info', 'title', 'content')) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml'))
321: {
322: $this->current_xhtml_construct = 0;
323: }
324: }
325: }
326:
327: public function cdata($parser, $cdata)
328: {
329: if ($this->current_xhtml_construct >= 0)
330: {
331: $this->data['data'] .= htmlspecialchars($cdata, ENT_QUOTES, $this->encoding);
332: }
333: else
334: {
335: $this->data['data'] .= $cdata;
336: }
337: }
338:
339: public function tag_close($parser, $tag)
340: {
341: if ($this->current_xhtml_construct >= 0)
342: {
343: $this->current_xhtml_construct--;
344: if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML && !in_array(end($this->element), array('area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param')))
345: {
346: $this->data['data'] .= '</' . end($this->element) . '>';
347: }
348: }
349: if ($this->current_xhtml_construct === -1)
350: {
351: $this->data =& $this->datas[count($this->datas) - 1];
352: array_pop($this->datas);
353: }
354:
355: array_pop($this->element);
356: array_pop($this->namespace);
357: array_pop($this->xml_base);
358: array_pop($this->xml_base_explicit);
359: array_pop($this->xml_lang);
360: }
361:
362: public function split_ns($string)
363: {
364: static $cache = array();
365: if (!isset($cache[$string]))
366: {
367: if ($pos = strpos($string, $this->separator))
368: {
369: static $separator_length;
370: if (!$separator_length)
371: {
372: $separator_length = strlen($this->separator);
373: }
374: $namespace = substr($string, 0, $pos);
375: $local_name = substr($string, $pos + $separator_length);
376: if (strtolower($namespace) === SIMPLEPIE_NAMESPACE_ITUNES)
377: {
378: $namespace = SIMPLEPIE_NAMESPACE_ITUNES;
379: }
380:
381:
382: if ($namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG ||
383: $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG2 ||
384: $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG3 ||
385: $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG4 ||
386: $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG5 )
387: {
388: $namespace = SIMPLEPIE_NAMESPACE_MEDIARSS;
389: }
390: $cache[$string] = array($namespace, $local_name);
391: }
392: else
393: {
394: $cache[$string] = array('', $string);
395: }
396: }
397: return $cache[$string];
398: }
399: }
400: