1: <?php
2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43:
44:
45: 46: 47: 48: 49: 50: 51: 52:
53: class SimplePie_Locator
54: {
55: var $useragent;
56: var $timeout;
57: var $file;
58: var $local = array();
59: var $elsewhere = array();
60: var $cached_entities = array();
61: var $http_base;
62: var $base;
63: var $base_location = 0;
64: var $checked_feeds = 0;
65: var $max_checked_feeds = 10;
66: protected $registry;
67:
68: public function __construct(&$file, $timeout = 10, $useragent = null, $max_checked_feeds = 10)
69: {
70: $this->file =& $file;
71: $this->useragent = $useragent;
72: $this->timeout = $timeout;
73: $this->max_checked_feeds = $max_checked_feeds;
74:
75: $this->dom = new DOMDocument();
76:
77: set_error_handler(array('SimplePie_Misc', 'silence_errors'));
78: $this->dom->loadHTML($this->file->body);
79: restore_error_handler();
80: }
81:
82: public function set_registry(SimplePie_Registry &$registry)
83: {
84: $this->registry = &$registry;
85: }
86:
87: public function find($type = SIMPLEPIE_LOCATOR_ALL, &$working)
88: {
89: if ($this->is_feed($this->file))
90: {
91: return $this->file;
92: }
93:
94: if ($this->file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
95: {
96: $sniffer = $this->registry->create('Content_Type_Sniffer', array($this->file));
97: if ($sniffer->get_type() !== 'text/html')
98: {
99: return null;
100: }
101: }
102:
103: if ($type & ~SIMPLEPIE_LOCATOR_NONE)
104: {
105: $this->get_base();
106: }
107:
108: if ($type & SIMPLEPIE_LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery())
109: {
110: return $working[0];
111: }
112:
113: if ($type & (SIMPLEPIE_LOCATOR_LOCAL_EXTENSION | SIMPLEPIE_LOCATOR_LOCAL_BODY | SIMPLEPIE_LOCATOR_REMOTE_EXTENSION | SIMPLEPIE_LOCATOR_REMOTE_BODY) && $this->get_links())
114: {
115: if ($type & SIMPLEPIE_LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local))
116: {
117: return $working;
118: }
119:
120: if ($type & SIMPLEPIE_LOCATOR_LOCAL_BODY && $working = $this->body($this->local))
121: {
122: return $working;
123: }
124:
125: if ($type & SIMPLEPIE_LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere))
126: {
127: return $working;
128: }
129:
130: if ($type & SIMPLEPIE_LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere))
131: {
132: return $working;
133: }
134: }
135: return null;
136: }
137:
138: public function is_feed(&$file)
139: {
140: if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
141: {
142: $sniffer = $this->registry->create('Content_Type_Sniffer', array($file));
143: $sniffed = $sniffer->get_type();
144: if (in_array($sniffed, array('application/rss+xml', 'application/rdf+xml', 'text/rdf', 'application/atom+xml', 'text/xml', 'application/xml')))
145: {
146: return true;
147: }
148: else
149: {
150: return false;
151: }
152: }
153: elseif ($file->method & SIMPLEPIE_FILE_SOURCE_LOCAL)
154: {
155: return true;
156: }
157: else
158: {
159: return false;
160: }
161: }
162:
163: public function get_base()
164: {
165: $this->http_base = $this->file->url;
166: $this->base = $this->http_base;
167: $elements = $this->dom->getElementsByTagName('base');
168: foreach ($elements as $element)
169: {
170: if ($element->hasAttribute('href'))
171: {
172: $this->base = $this->registry->call('Misc', 'absolutize_url', array(trim($element->getAttribute('href')), $this->http_base));
173: $this->base_location = $element->getLineNo();
174: break;
175: }
176: }
177: }
178:
179: public function autodiscovery()
180: {
181: $done = array();
182: $feeds = array();
183: $feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds));
184: $feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds));
185: $feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds));
186:
187: if (!empty($feeds))
188: {
189: return array_values($feeds);
190: }
191: else
192: {
193: return null;
194: }
195: }
196:
197: protected function search_elements_by_tag($name, &$done, $feeds)
198: {
199: $links = $this->dom->getElementsByTagName($name);
200: foreach ($links as $link)
201: {
202: if ($this->checked_feeds === $this->max_checked_feeds)
203: {
204: break;
205: }
206: if ($link->hasAttribute('href') && $link->hasAttribute('rel'))
207: {
208: $rel = array_unique($this->registry->call('Misc', 'space_seperated_tokens', array(strtolower($link->getAttribute('rel')))));
209:
210: if ($this->base_location < $link->getLineNo())
211: {
212: $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
213: }
214: else
215: {
216: $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
217: }
218:
219: if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href]))
220: {
221: $this->checked_feeds++;
222: $headers = array(
223: 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
224: );
225: $feed = $this->registry->create('File', array($href, $this->timeout, 5, $headers, $this->useragent));
226: if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
227: {
228: $feeds[$href] = $feed;
229: }
230: }
231: $done[] = $href;
232: }
233: }
234:
235: return $feeds;
236: }
237:
238: public function get_links()
239: {
240: $links = $this->dom->getElementsByTagName('a');
241: foreach ($links as $link)
242: {
243: if ($link->hasAttribute('href'))
244: {
245: $href = trim($link->getAttribute('href'));
246: $parsed = $this->registry->call('Misc', 'parse_url', array($href));
247: if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme']))
248: {
249: if ($this->base_location < $link->getLineNo())
250: {
251: $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
252: }
253: else
254: {
255: $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->http_base));
256: }
257:
258: $current = $this->registry->call('Misc', 'parse_url', array($this->file->url));
259:
260: if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority'])
261: {
262: $this->local[] = $href;
263: }
264: else
265: {
266: $this->elsewhere[] = $href;
267: }
268: }
269: }
270: }
271: $this->local = array_unique($this->local);
272: $this->elsewhere = array_unique($this->elsewhere);
273: if (!empty($this->local) || !empty($this->elsewhere))
274: {
275: return true;
276: }
277: return null;
278: }
279:
280: public function extension(&$array)
281: {
282: foreach ($array as $key => $value)
283: {
284: if ($this->checked_feeds === $this->max_checked_feeds)
285: {
286: break;
287: }
288: if (in_array(strtolower(strrchr($value, '.')), array('.rss', '.rdf', '.atom', '.xml')))
289: {
290: $this->checked_feeds++;
291:
292: $headers = array(
293: 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
294: );
295: $feed = $this->registry->create('File', array($value, $this->timeout, 5, $headers, $this->useragent));
296: if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
297: {
298: return $feed;
299: }
300: else
301: {
302: unset($array[$key]);
303: }
304: }
305: }
306: return null;
307: }
308:
309: public function body(&$array)
310: {
311: foreach ($array as $key => $value)
312: {
313: if ($this->checked_feeds === $this->max_checked_feeds)
314: {
315: break;
316: }
317: if (preg_match('/(rss|rdf|atom|xml)/i', $value))
318: {
319: $this->checked_feeds++;
320: $headers = array(
321: 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
322: );
323: $feed = $this->registry->create('File', array($value, $this->timeout, 5, null, $this->useragent));
324: if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
325: {
326: return $feed;
327: }
328: else
329: {
330: unset($array[$key]);
331: }
332: }
333: }
334: return null;
335: }
336: }
337:
338: