Commit | Line | Data |
---|---|---|
bc13d5d6 H |
1 | <?php |
2 | /* | |
3 | * Project: MagpieRSS: a simple RSS integration tool | |
4 | * File: rss_fetch.inc, a simple functional interface | |
5 | to fetching and parsing RSS files, via the | |
6 | function fetch_rss() | |
7 | * Author: Kellan Elliott-McCrea <kellan@protest.net> | |
8 | * Version: 0.3 | |
9 | * License: GPL | |
10 | * | |
11 | * The lastest version of MagpieRSS can be obtained from: | |
12 | * http://magpierss.sourceforge.net | |
13 | * | |
14 | * For questions, help, comments, discussion, etc., please join the | |
15 | * Mapgie mailing list: | |
16 | * magpierss-general@lists.sourceforge.net | |
17 | * | |
18 | */ | |
19 | ||
20 | // Setup MAGPIE_DIR for use on hosts that don't include | |
21 | // the current path in include_path. | |
22 | // with thanks to rajiv and smarty | |
23 | define('DIR_SEP', DIRECTORY_SEPARATOR); | |
24 | ||
25 | if (!defined('MAGPIE_DIR')) { | |
26 | define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP); | |
27 | } | |
28 | ||
29 | require_once( MAGPIE_DIR . 'rss_parse.inc' ); | |
30 | require_once( MAGPIE_DIR . 'rss_cache.inc' ); | |
31 | ||
32 | // for including 3rd party libraries | |
33 | define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP); | |
34 | require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc'); | |
35 | ||
36 | ||
37 | /* | |
38 | * CONSTANTS - redefine these in your script to change the | |
39 | * behaviour of fetch_rss() currently, most options effect the cache | |
40 | * | |
41 | * MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects? | |
42 | * For me a built in cache was essential to creating a "PHP-like" | |
43 | * feel to Magpie, see rss_cache.inc for rationale | |
44 | * | |
45 | * | |
46 | * MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects? | |
47 | * This should be a location that the webserver can write to. If this | |
48 | * directory does not already exist Mapie will try to be smart and create | |
49 | * it. This will often fail for permissions reasons. | |
50 | * | |
51 | * | |
52 | * MAPGIE_CACHE_AGE - How long to store cached RSS objects? In seconds. | |
53 | * | |
54 | * | |
55 | * MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error | |
56 | * instead of returning stale object? | |
57 | * | |
58 | * MAGPIE_DEBUG - Display debugging notices? | |
59 | * | |
60 | */ | |
61 | ||
62 | ||
63 | /*=======================================================================*\ | |
64 | Function: fetch_rss: | |
65 | Purpose: return RSS object for the give url | |
66 | maintain the cache | |
67 | Input: url of RSS file | |
68 | Output: parsed RSS object (see rss_parse.inc) | |
69 | ||
70 | NOTES ON CACHEING: | |
71 | If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache. | |
72 | ||
73 | NOTES ON RETRIEVING REMOTE FILES: | |
74 | If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will | |
75 | return a cached object, and touch the cache object upon recieving a | |
76 | 304. | |
77 | ||
78 | NOTES ON FAILED REQUESTS: | |
79 | If there is an HTTP error while fetching an RSS object, the cached | |
80 | version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off) | |
81 | \*=======================================================================*/ | |
82 | ||
83 | function fetch_rss ($url,$hash=false) { | |
84 | // initialize constants | |
85 | init(); | |
86 | ||
87 | // if cache is disabled | |
88 | if ( !MAGPIE_CACHE_ON ) { | |
89 | // fetch file, and parse it | |
90 | $resp = _fetch_remote_file( $url ); | |
91 | if ( is_success( $resp->status ) ) { | |
92 | return _response_to_rss( $resp, $hash ); | |
93 | } | |
94 | else { | |
95 | trigger_error("MagpieRSS: failed to fetch $url. Cache is off"); | |
96 | return false; | |
97 | } | |
98 | } | |
99 | // else cache is ON | |
100 | else { | |
101 | // Flow | |
102 | // 1. check cache | |
103 | // 2. if there is a hit, make sure its fresh | |
104 | // 3. if cached obj fails freshness check, fetch remote | |
105 | // 4. if remote fails, return stale object, or error | |
106 | ||
107 | $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE ); | |
108 | ||
109 | if (MAGPIE_DEBUG and $cache->ERROR) { | |
110 | trigger_error($cache->ERROR, E_USER_WARNING); | |
111 | } | |
112 | ||
113 | ||
114 | $cache_status; // response of check_cache | |
115 | $request_headers = array(); // HTTP headers to send with fetch | |
116 | $rss; // parsed RSS object | |
117 | $errormsg; // errors, if any | |
118 | ||
119 | if (!$cache->ERROR) { | |
120 | // return cache HIT, MISS, or STALE | |
121 | $cache_status = $cache->check_cache( $url ); | |
122 | } | |
123 | ||
124 | // if object cached, and cache is fresh, return cached obj | |
125 | if ( $cache_status == 'HIT' ) { | |
126 | $rss = $cache->get( $url ); | |
127 | if ( $rss ) { | |
128 | $rss->from_cache = 1; | |
129 | if ( MAGPIE_DEBUG > 1) { | |
130 | trigger_error("Cache HIT", E_USER_NOTICE); | |
131 | } | |
132 | return $rss; | |
133 | } | |
134 | } | |
135 | ||
136 | // else attempt a conditional get | |
137 | ||
138 | // setup headers | |
139 | if ( $cache_status == 'STALE' ) { | |
140 | $rss = $cache->get( $url ); | |
141 | if ( $rss->etag and $rss->last_modified ) { | |
142 | $request_headers['If-None-Match'] = $rss->etag; | |
143 | $request_headers['If-Last-Modified'] = $rss->last_modified; | |
144 | } | |
145 | } | |
146 | ||
147 | $resp = _fetch_remote_file( $url, $request_headers ); | |
148 | ||
149 | if ($resp) { | |
150 | if ($resp->status == '304' ) { | |
151 | // we have the most current copy | |
152 | if ( MAGPIE_DEBUG > 1) { | |
153 | $msg = "Got 304 for $url"; | |
154 | trigger_error($msg, E_USER_NOTICE); | |
155 | } | |
156 | // TODO: not sure if I should re-set the cache here or not | |
157 | return $rss; | |
158 | } | |
159 | elseif ( is_success( $resp->status ) ) { | |
160 | $rss = _response_to_rss( $resp ); | |
161 | if ( $rss ) { | |
162 | if (MAGPIE_DEBUG > 1) { | |
163 | trigger_error("Fetch successful", E_USER_NOTICE); | |
164 | } | |
165 | // add object to cache | |
166 | $cache->set( $url, $rss ); | |
167 | return $rss; | |
168 | } | |
169 | } | |
170 | else { | |
171 | $errormsg = "Failed to fetch $url. "; | |
172 | if ( $resp->error ) { | |
173 | $errormsg .= "Error: " . $resp->error; | |
174 | } | |
175 | else { | |
176 | $errormsg .= "Response: " . $resp->$response_code; | |
177 | } | |
178 | } | |
179 | } | |
180 | else { | |
181 | $errormsg = "Unable to retrieve RSS file for unknown reasons."; | |
182 | } | |
183 | ||
184 | // else fetch failed | |
185 | ||
186 | // attempt to return cached object | |
187 | if ($rss) { | |
188 | if ( MAGPIE_DEBUG ) { | |
189 | trigger_error("Returning STALE object for $url", E_USER_NOTICE); | |
190 | } | |
191 | return $rss; | |
192 | } | |
193 | ||
194 | // else we totally failed | |
195 | ||
196 | trigger_error( "MagpieRSS: " . $errormsg ); | |
197 | ||
198 | return false; | |
199 | ||
200 | } // end if ( !MAGPIE_CACHE_ON ) { | |
201 | } // end fetch_rss() | |
202 | ||
203 | /*=======================================================================*\ | |
204 | Function: _fetch_remote_files | |
205 | Purpose: retrieve an arbitrary remote file | |
206 | Input: url of the remote file | |
207 | headers to send along with the request (optional) | |
208 | Output: an HTTP response object (see Snoopy.class.inc) | |
209 | \*=======================================================================*/ | |
210 | function _fetch_remote_file ($url, $headers) { | |
211 | // Snoopy is an HTTP client in PHP | |
212 | $client = new Snoopy(); | |
213 | $client->agent = MAGPIE_USER_AGENT; | |
214 | $client->read_timeout = MAGPIE_FETCH_TIME_OUT; | |
215 | ||
216 | if (!empty($headers) ) { | |
217 | $client->rawheaders = $headers; | |
218 | } | |
219 | ||
220 | if ( $client->fetch($url) ) { | |
221 | return $client; | |
222 | } | |
223 | else { | |
224 | return false; | |
225 | } | |
226 | } | |
227 | ||
228 | /*=======================================================================*\ | |
229 | Function: _response_to_rss | |
230 | Purpose: parse an HTTP response object into an RSS object | |
231 | Input: an HTTP response object (see Snoopy) | |
232 | Output: parsed RSS object (see rss_parse) | |
233 | \*=======================================================================*/ | |
234 | function _response_to_rss ($resp,$hash=false) { | |
235 | ||
236 | if (md5($resp->results)==$hash) { | |
237 | echo "zadne nove data"; | |
238 | return false; | |
239 | } | |
240 | ||
241 | $rss = new MagpieRSS( $resp->results ); | |
242 | // if RSS parsed successfully | |
243 | if ( $rss and !$rss->ERROR) { | |
244 | ||
245 | // find Etag, and Last-Modified | |
246 | foreach($resp->headers as $h) { | |
247 | list($field, $val) = explode(": ", $h, 2); | |
248 | if ( $field == 'ETag' ) { | |
249 | $rss->etag = $val; | |
250 | } | |
251 | ||
252 | if ( $field == 'Last-Modified' ) { | |
253 | $rss->last_modified = $val; | |
254 | } | |
255 | } | |
256 | ||
257 | return $rss; | |
258 | } // else construct error message | |
259 | else { | |
260 | $errormsg = "MagpieRSS: failed to parse $url."; | |
261 | ||
262 | if ($rss) { | |
263 | $errormsg .= " " . $rss->ERROR; | |
264 | } | |
265 | trigger_error($errormsg); | |
266 | ||
267 | return false; | |
268 | } // end if ($rss and !$rss->error) | |
269 | } | |
270 | ||
271 | /*=======================================================================*\ | |
272 | Function: init | |
273 | Purpose: setup constants with default values | |
274 | check for user overrides | |
275 | \*=======================================================================*/ | |
276 | function init () { | |
277 | if ( defined('MAGPIE_INITALIZED') ) { | |
278 | return; | |
279 | } | |
280 | else { | |
281 | define('MAGPIE_INITALIZED', 1); | |
282 | } | |
283 | ||
284 | if ( !defined('MAGPIE_CACHE_ON') ) { | |
285 | define('MAGPIE_CACHE_ON', 1); | |
286 | } | |
287 | ||
288 | if ( !defined('MAGPIE_CACHE_DIR') ) { | |
289 | define('MAGPIE_CACHE_DIR', './cache'); | |
290 | } | |
291 | ||
292 | if ( !defined('MAGPIE_CACHE_AGE') ) { | |
293 | define('MAGPIE_CACHE_AGE', 60*60); // one hour | |
294 | } | |
295 | ||
296 | if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) { | |
297 | define('MAGPIE_CACHE_FRESH_ONLY', 0); | |
298 | } | |
299 | ||
300 | if ( !defined('MAGPIE_DEBUG') ) { | |
301 | define('MAGPIE_DEBUG', 0); | |
302 | } | |
303 | ||
304 | if ( !defined('MAGPIE_USER_AGENT') ) { | |
305 | define('MAGPIE_USER_AGENT', 'MagpieRSS/0.3 (+http://magpierss.sf.net)' ); | |
306 | } | |
307 | ||
308 | if ( !defined('MAGPIE_FETCH_TIME_OUT') ) { | |
309 | define('MAGPIE_FETCH_TIME_OUT', 5); // 2 second timeout | |
310 | } | |
311 | } | |
312 | ||
313 | // NOTE: the following code should really be in Snoopy, or at least | |
314 | // somewhere other then rss_fetch! | |
315 | ||
316 | /*=======================================================================*\ | |
317 | HTTP STATUS CODE PREDICATES | |
318 | These functions attempt to classify an HTTP status code | |
319 | based on RFC 2616 and RFC 2518. | |
320 | ||
321 | All of them take an HTTP status code as input, and return true or false | |
322 | ||
323 | All this code is adapted from LWP's HTTP::Status. | |
324 | \*=======================================================================*/ | |
325 | ||
326 | ||
327 | /*=======================================================================*\ | |
328 | Function: is_info | |
329 | Purpose: return true if Informational status code | |
330 | \*=======================================================================*/ | |
331 | function is_info ($sc) { | |
332 | return $sc >= 100 && $sc < 200; | |
333 | } | |
334 | ||
335 | /*=======================================================================*\ | |
336 | Function: is_success | |
337 | Purpose: return true if Successful status code | |
338 | \*=======================================================================*/ | |
339 | function is_success ($sc) { | |
340 | return $sc >= 200 && $sc < 300; | |
341 | } | |
342 | ||
343 | /*=======================================================================*\ | |
344 | Function: is_redirect | |
345 | Purpose: return true if Redirection status code | |
346 | \*=======================================================================*/ | |
347 | function is_redirect ($sc) { | |
348 | return $sc >= 300 && $sc < 400; | |
349 | } | |
350 | ||
351 | /*=======================================================================*\ | |
352 | Function: is_error | |
353 | Purpose: return true if Error status code | |
354 | \*=======================================================================*/ | |
355 | function is_error ($sc) { | |
356 | return $sc >= 400 && $sc < 600; | |
357 | } | |
358 | ||
359 | /*=======================================================================*\ | |
360 | Function: is_client_error | |
361 | Purpose: return true if Error status code, and its a client error | |
362 | \*=======================================================================*/ | |
363 | function is_client_error ($sc) { | |
364 | return $sc >= 400 && $sc < 500; | |
365 | } | |
366 | ||
367 | /*=======================================================================*\ | |
368 | Function: is_client_error | |
369 | Purpose: return true if Error status code, and its a server error | |
370 | \*=======================================================================*/ | |
371 | function is_server_error ($sc) { | |
372 | return $sc >= 500 && $sc < 600; | |
373 | } | |
374 | ||
375 | ?> |