3 * Project: MagpieRSS: a simple RSS integration tool
4 * File: rss_fetch.inc, a simple functional interface
5 to fetching and parsing RSS files, via the
7 * Author: Kellan Elliott-McCrea <kellan@protest.net>
11 * The lastest version of MagpieRSS can be obtained from:
12 * http://magpierss.sourceforge.net
14 * For questions, help, comments, discussion, etc., please join the
15 * Mapgie mailing list:
16 * magpierss-general@lists.sourceforge.net
20 // Setup MAGPIE_DIR for use on hosts that don't include
21 // the current path in include_path.
22 // with thanks to rajiv and smarty
23 define('DIR_SEP', DIRECTORY_SEPARATOR);
25 if (!defined('MAGPIE_DIR')) {
26 define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP);
29 require_once( MAGPIE_DIR . 'rss_parse.inc' );
30 require_once( MAGPIE_DIR . 'rss_cache.inc' );
32 // for including 3rd party libraries
33 define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP);
34 require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc');
38 * CONSTANTS - redefine these in your script to change the
39 * behaviour of fetch_rss() currently, most options effect the cache
41 * MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects?
42 * For me a built in cache was essential to creating a "PHP-like"
43 * feel to Magpie, see rss_cache.inc for rationale
46 * MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects?
47 * This should be a location that the webserver can write to. If this
48 * directory does not already exist Mapie will try to be smart and create
49 * it. This will often fail for permissions reasons.
52 * MAPGIE_CACHE_AGE - How long to store cached RSS objects? In seconds.
55 * MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error
56 * instead of returning stale object?
58 * MAGPIE_DEBUG - Display debugging notices?
63 /*=======================================================================*\
65 Purpose: return RSS object for the give url
67 Input: url of RSS file
68 Output: parsed RSS object (see rss_parse.inc)
71 If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache.
73 NOTES ON RETRIEVING REMOTE FILES:
74 If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
75 return a cached object, and touch the cache object upon recieving a
78 NOTES ON FAILED REQUESTS:
79 If there is an HTTP error while fetching an RSS object, the cached
80 version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off)
81 \*=======================================================================*/
83 function fetch_rss ($url,$hash=false) {
84 // initialize constants
87 // if cache is disabled
88 if ( !MAGPIE_CACHE_ON ) {
89 // fetch file, and parse it
90 $resp = _fetch_remote_file( $url );
91 if ( is_success( $resp->status ) ) {
92 return _response_to_rss( $resp, $hash );
95 trigger_error("MagpieRSS: failed to fetch $url. Cache is off");
103 // 2. if there is a hit, make sure its fresh
104 // 3. if cached obj fails freshness check, fetch remote
105 // 4. if remote fails, return stale object, or error
107 $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE );
109 if (MAGPIE_DEBUG and $cache->ERROR) {
110 trigger_error($cache->ERROR, E_USER_WARNING);
114 $cache_status; // response of check_cache
115 $request_headers = array(); // HTTP headers to send with fetch
116 $rss; // parsed RSS object
117 $errormsg; // errors, if any
119 if (!$cache->ERROR) {
120 // return cache HIT, MISS, or STALE
121 $cache_status = $cache->check_cache( $url );
124 // if object cached, and cache is fresh, return cached obj
125 if ( $cache_status == 'HIT' ) {
126 $rss = $cache->get( $url );
128 $rss->from_cache = 1;
129 if ( MAGPIE_DEBUG > 1) {
130 trigger_error("Cache HIT", E_USER_NOTICE);
136 // else attempt a conditional get
139 if ( $cache_status == 'STALE' ) {
140 $rss = $cache->get( $url );
141 if ( $rss->etag and $rss->last_modified ) {
142 $request_headers['If-None-Match'] = $rss->etag;
143 $request_headers['If-Last-Modified'] = $rss->last_modified;
147 $resp = _fetch_remote_file( $url, $request_headers );
150 if ($resp->status == '304' ) {
151 // we have the most current copy
152 if ( MAGPIE_DEBUG > 1) {
153 $msg = "Got 304 for $url";
154 trigger_error($msg, E_USER_NOTICE);
156 // TODO: not sure if I should re-set the cache here or not
159 elseif ( is_success( $resp->status ) ) {
160 $rss = _response_to_rss( $resp );
162 if (MAGPIE_DEBUG > 1) {
163 trigger_error("Fetch successful", E_USER_NOTICE);
165 // add object to cache
166 $cache->set( $url, $rss );
171 $errormsg = "Failed to fetch $url. ";
172 if ( $resp->error ) {
173 $errormsg .= "Error: " . $resp->error;
176 $errormsg .= "Response: " . $resp->$response_code;
181 $errormsg = "Unable to retrieve RSS file for unknown reasons.";
186 // attempt to return cached object
188 if ( MAGPIE_DEBUG ) {
189 trigger_error("Returning STALE object for $url", E_USER_NOTICE);
194 // else we totally failed
196 trigger_error( "MagpieRSS: " . $errormsg );
200 } // end if ( !MAGPIE_CACHE_ON ) {
203 /*=======================================================================*\
204 Function: _fetch_remote_files
205 Purpose: retrieve an arbitrary remote file
206 Input: url of the remote file
207 headers to send along with the request (optional)
208 Output: an HTTP response object (see Snoopy.class.inc)
209 \*=======================================================================*/
210 function _fetch_remote_file ($url, $headers) {
211 // Snoopy is an HTTP client in PHP
212 $client = new Snoopy();
213 $client->agent = MAGPIE_USER_AGENT;
214 $client->read_timeout = MAGPIE_FETCH_TIME_OUT;
216 if (!empty($headers) ) {
217 $client->rawheaders = $headers;
220 if ( $client->fetch($url) ) {
228 /*=======================================================================*\
229 Function: _response_to_rss
230 Purpose: parse an HTTP response object into an RSS object
231 Input: an HTTP response object (see Snoopy)
232 Output: parsed RSS object (see rss_parse)
233 \*=======================================================================*/
234 function _response_to_rss ($resp,$hash=false) {
236 if (md5($resp->results)==$hash) {
237 echo "zadne nove data";
241 $rss = new MagpieRSS( $resp->results );
242 // if RSS parsed successfully
243 if ( $rss and !$rss->ERROR) {
245 // find Etag, and Last-Modified
246 foreach($resp->headers as $h) {
247 list($field, $val) = explode(": ", $h, 2);
248 if ( $field == 'ETag' ) {
252 if ( $field == 'Last-Modified' ) {
253 $rss->last_modified = $val;
258 } // else construct error message
260 $errormsg = "MagpieRSS: failed to parse $url.";
263 $errormsg .= " " . $rss->ERROR;
265 trigger_error($errormsg);
268 } // end if ($rss and !$rss->error)
271 /*=======================================================================*\
273 Purpose: setup constants with default values
274 check for user overrides
275 \*=======================================================================*/
277 if ( defined('MAGPIE_INITALIZED') ) {
281 define('MAGPIE_INITALIZED', 1);
284 if ( !defined('MAGPIE_CACHE_ON') ) {
285 define('MAGPIE_CACHE_ON', 1);
288 if ( !defined('MAGPIE_CACHE_DIR') ) {
289 define('MAGPIE_CACHE_DIR', './cache');
292 if ( !defined('MAGPIE_CACHE_AGE') ) {
293 define('MAGPIE_CACHE_AGE', 60*60); // one hour
296 if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) {
297 define('MAGPIE_CACHE_FRESH_ONLY', 0);
300 if ( !defined('MAGPIE_DEBUG') ) {
301 define('MAGPIE_DEBUG', 0);
304 if ( !defined('MAGPIE_USER_AGENT') ) {
305 define('MAGPIE_USER_AGENT', 'MagpieRSS/0.3 (+http://magpierss.sf.net)' );
308 if ( !defined('MAGPIE_FETCH_TIME_OUT') ) {
309 define('MAGPIE_FETCH_TIME_OUT', 5); // 2 second timeout
313 // NOTE: the following code should really be in Snoopy, or at least
314 // somewhere other then rss_fetch!
316 /*=======================================================================*\
317 HTTP STATUS CODE PREDICATES
318 These functions attempt to classify an HTTP status code
319 based on RFC 2616 and RFC 2518.
321 All of them take an HTTP status code as input, and return true or false
323 All this code is adapted from LWP's HTTP::Status.
324 \*=======================================================================*/
327 /*=======================================================================*\
329 Purpose: return true if Informational status code
330 \*=======================================================================*/
331 function is_info ($sc) {
332 return $sc >= 100 && $sc < 200;
335 /*=======================================================================*\
337 Purpose: return true if Successful status code
338 \*=======================================================================*/
339 function is_success ($sc) {
340 return $sc >= 200 && $sc < 300;
343 /*=======================================================================*\
344 Function: is_redirect
345 Purpose: return true if Redirection status code
346 \*=======================================================================*/
347 function is_redirect ($sc) {
348 return $sc >= 300 && $sc < 400;
351 /*=======================================================================*\
353 Purpose: return true if Error status code
354 \*=======================================================================*/
355 function is_error ($sc) {
356 return $sc >= 400 && $sc < 600;
359 /*=======================================================================*\
360 Function: is_client_error
361 Purpose: return true if Error status code, and its a client error
362 \*=======================================================================*/
363 function is_client_error ($sc) {
364 return $sc >= 400 && $sc < 500;
367 /*=======================================================================*\
368 Function: is_client_error
369 Purpose: return true if Error status code, and its a server error
370 \*=======================================================================*/
371 function is_server_error ($sc) {
372 return $sc >= 500 && $sc < 600;