Kyberia v1.0
[mirrors/Kyberia-bloodline.git] / inc / rss / rss_fetch.inc
1 <?php
2 /*
3 * Project: MagpieRSS: a simple RSS integration tool
4 * File: rss_fetch.inc, a simple functional interface
5 to fetching and parsing RSS files, via the
6 function fetch_rss()
7 * Author: Kellan Elliott-McCrea <kellan@protest.net>
8 * Version: 0.3
9 * License: GPL
10 *
11 * The lastest version of MagpieRSS can be obtained from:
12 * http://magpierss.sourceforge.net
13 *
14 * For questions, help, comments, discussion, etc., please join the
15 * Mapgie mailing list:
16 * magpierss-general@lists.sourceforge.net
17 *
18 */
19
20 // Setup MAGPIE_DIR for use on hosts that don't include
21 // the current path in include_path.
22 // with thanks to rajiv and smarty
23 define('DIR_SEP', DIRECTORY_SEPARATOR);
24
25 if (!defined('MAGPIE_DIR')) {
26 define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP);
27 }
28
29 require_once( MAGPIE_DIR . 'rss_parse.inc' );
30 require_once( MAGPIE_DIR . 'rss_cache.inc' );
31
32 // for including 3rd party libraries
33 define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP);
34 require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc');
35
36
37 /*
38 * CONSTANTS - redefine these in your script to change the
39 * behaviour of fetch_rss() currently, most options effect the cache
40 *
41 * MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects?
42 * For me a built in cache was essential to creating a "PHP-like"
43 * feel to Magpie, see rss_cache.inc for rationale
44 *
45 *
46 * MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects?
47 * This should be a location that the webserver can write to. If this
48 * directory does not already exist Mapie will try to be smart and create
49 * it. This will often fail for permissions reasons.
50 *
51 *
52 * MAPGIE_CACHE_AGE - How long to store cached RSS objects? In seconds.
53 *
54 *
55 * MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error
56 * instead of returning stale object?
57 *
58 * MAGPIE_DEBUG - Display debugging notices?
59 *
60 */
61
62
63 /*=======================================================================*\
64 Function: fetch_rss:
65 Purpose: return RSS object for the give url
66 maintain the cache
67 Input: url of RSS file
68 Output: parsed RSS object (see rss_parse.inc)
69
70 NOTES ON CACHEING:
71 If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache.
72
73 NOTES ON RETRIEVING REMOTE FILES:
74 If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
75 return a cached object, and touch the cache object upon recieving a
76 304.
77
78 NOTES ON FAILED REQUESTS:
79 If there is an HTTP error while fetching an RSS object, the cached
80 version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off)
81 \*=======================================================================*/
82
83 function fetch_rss ($url,$hash=false) {
84 // initialize constants
85 init();
86
87 // if cache is disabled
88 if ( !MAGPIE_CACHE_ON ) {
89 // fetch file, and parse it
90 $resp = _fetch_remote_file( $url );
91 if ( is_success( $resp->status ) ) {
92 return _response_to_rss( $resp, $hash );
93 }
94 else {
95 trigger_error("MagpieRSS: failed to fetch $url. Cache is off");
96 return false;
97 }
98 }
99 // else cache is ON
100 else {
101 // Flow
102 // 1. check cache
103 // 2. if there is a hit, make sure its fresh
104 // 3. if cached obj fails freshness check, fetch remote
105 // 4. if remote fails, return stale object, or error
106
107 $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE );
108
109 if (MAGPIE_DEBUG and $cache->ERROR) {
110 trigger_error($cache->ERROR, E_USER_WARNING);
111 }
112
113
114 $cache_status; // response of check_cache
115 $request_headers = array(); // HTTP headers to send with fetch
116 $rss; // parsed RSS object
117 $errormsg; // errors, if any
118
119 if (!$cache->ERROR) {
120 // return cache HIT, MISS, or STALE
121 $cache_status = $cache->check_cache( $url );
122 }
123
124 // if object cached, and cache is fresh, return cached obj
125 if ( $cache_status == 'HIT' ) {
126 $rss = $cache->get( $url );
127 if ( $rss ) {
128 $rss->from_cache = 1;
129 if ( MAGPIE_DEBUG > 1) {
130 trigger_error("Cache HIT", E_USER_NOTICE);
131 }
132 return $rss;
133 }
134 }
135
136 // else attempt a conditional get
137
138 // setup headers
139 if ( $cache_status == 'STALE' ) {
140 $rss = $cache->get( $url );
141 if ( $rss->etag and $rss->last_modified ) {
142 $request_headers['If-None-Match'] = $rss->etag;
143 $request_headers['If-Last-Modified'] = $rss->last_modified;
144 }
145 }
146
147 $resp = _fetch_remote_file( $url, $request_headers );
148
149 if ($resp) {
150 if ($resp->status == '304' ) {
151 // we have the most current copy
152 if ( MAGPIE_DEBUG > 1) {
153 $msg = "Got 304 for $url";
154 trigger_error($msg, E_USER_NOTICE);
155 }
156 // TODO: not sure if I should re-set the cache here or not
157 return $rss;
158 }
159 elseif ( is_success( $resp->status ) ) {
160 $rss = _response_to_rss( $resp );
161 if ( $rss ) {
162 if (MAGPIE_DEBUG > 1) {
163 trigger_error("Fetch successful", E_USER_NOTICE);
164 }
165 // add object to cache
166 $cache->set( $url, $rss );
167 return $rss;
168 }
169 }
170 else {
171 $errormsg = "Failed to fetch $url. ";
172 if ( $resp->error ) {
173 $errormsg .= "Error: " . $resp->error;
174 }
175 else {
176 $errormsg .= "Response: " . $resp->$response_code;
177 }
178 }
179 }
180 else {
181 $errormsg = "Unable to retrieve RSS file for unknown reasons.";
182 }
183
184 // else fetch failed
185
186 // attempt to return cached object
187 if ($rss) {
188 if ( MAGPIE_DEBUG ) {
189 trigger_error("Returning STALE object for $url", E_USER_NOTICE);
190 }
191 return $rss;
192 }
193
194 // else we totally failed
195
196 trigger_error( "MagpieRSS: " . $errormsg );
197
198 return false;
199
200 } // end if ( !MAGPIE_CACHE_ON ) {
201 } // end fetch_rss()
202
203 /*=======================================================================*\
204 Function: _fetch_remote_files
205 Purpose: retrieve an arbitrary remote file
206 Input: url of the remote file
207 headers to send along with the request (optional)
208 Output: an HTTP response object (see Snoopy.class.inc)
209 \*=======================================================================*/
210 function _fetch_remote_file ($url, $headers) {
211 // Snoopy is an HTTP client in PHP
212 $client = new Snoopy();
213 $client->agent = MAGPIE_USER_AGENT;
214 $client->read_timeout = MAGPIE_FETCH_TIME_OUT;
215
216 if (!empty($headers) ) {
217 $client->rawheaders = $headers;
218 }
219
220 if ( $client->fetch($url) ) {
221 return $client;
222 }
223 else {
224 return false;
225 }
226 }
227
228 /*=======================================================================*\
229 Function: _response_to_rss
230 Purpose: parse an HTTP response object into an RSS object
231 Input: an HTTP response object (see Snoopy)
232 Output: parsed RSS object (see rss_parse)
233 \*=======================================================================*/
234 function _response_to_rss ($resp,$hash=false) {
235
236 if (md5($resp->results)==$hash) {
237 echo "zadne nove data";
238 return false;
239 }
240
241 $rss = new MagpieRSS( $resp->results );
242 // if RSS parsed successfully
243 if ( $rss and !$rss->ERROR) {
244
245 // find Etag, and Last-Modified
246 foreach($resp->headers as $h) {
247 list($field, $val) = explode(": ", $h, 2);
248 if ( $field == 'ETag' ) {
249 $rss->etag = $val;
250 }
251
252 if ( $field == 'Last-Modified' ) {
253 $rss->last_modified = $val;
254 }
255 }
256
257 return $rss;
258 } // else construct error message
259 else {
260 $errormsg = "MagpieRSS: failed to parse $url.";
261
262 if ($rss) {
263 $errormsg .= " " . $rss->ERROR;
264 }
265 trigger_error($errormsg);
266
267 return false;
268 } // end if ($rss and !$rss->error)
269 }
270
271 /*=======================================================================*\
272 Function: init
273 Purpose: setup constants with default values
274 check for user overrides
275 \*=======================================================================*/
276 function init () {
277 if ( defined('MAGPIE_INITALIZED') ) {
278 return;
279 }
280 else {
281 define('MAGPIE_INITALIZED', 1);
282 }
283
284 if ( !defined('MAGPIE_CACHE_ON') ) {
285 define('MAGPIE_CACHE_ON', 1);
286 }
287
288 if ( !defined('MAGPIE_CACHE_DIR') ) {
289 define('MAGPIE_CACHE_DIR', './cache');
290 }
291
292 if ( !defined('MAGPIE_CACHE_AGE') ) {
293 define('MAGPIE_CACHE_AGE', 60*60); // one hour
294 }
295
296 if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) {
297 define('MAGPIE_CACHE_FRESH_ONLY', 0);
298 }
299
300 if ( !defined('MAGPIE_DEBUG') ) {
301 define('MAGPIE_DEBUG', 0);
302 }
303
304 if ( !defined('MAGPIE_USER_AGENT') ) {
305 define('MAGPIE_USER_AGENT', 'MagpieRSS/0.3 (+http://magpierss.sf.net)' );
306 }
307
308 if ( !defined('MAGPIE_FETCH_TIME_OUT') ) {
309 define('MAGPIE_FETCH_TIME_OUT', 5); // 2 second timeout
310 }
311 }
312
313 // NOTE: the following code should really be in Snoopy, or at least
314 // somewhere other then rss_fetch!
315
316 /*=======================================================================*\
317 HTTP STATUS CODE PREDICATES
318 These functions attempt to classify an HTTP status code
319 based on RFC 2616 and RFC 2518.
320
321 All of them take an HTTP status code as input, and return true or false
322
323 All this code is adapted from LWP's HTTP::Status.
324 \*=======================================================================*/
325
326
327 /*=======================================================================*\
328 Function: is_info
329 Purpose: return true if Informational status code
330 \*=======================================================================*/
331 function is_info ($sc) {
332 return $sc >= 100 && $sc < 200;
333 }
334
335 /*=======================================================================*\
336 Function: is_success
337 Purpose: return true if Successful status code
338 \*=======================================================================*/
339 function is_success ($sc) {
340 return $sc >= 200 && $sc < 300;
341 }
342
343 /*=======================================================================*\
344 Function: is_redirect
345 Purpose: return true if Redirection status code
346 \*=======================================================================*/
347 function is_redirect ($sc) {
348 return $sc >= 300 && $sc < 400;
349 }
350
351 /*=======================================================================*\
352 Function: is_error
353 Purpose: return true if Error status code
354 \*=======================================================================*/
355 function is_error ($sc) {
356 return $sc >= 400 && $sc < 600;
357 }
358
359 /*=======================================================================*\
360 Function: is_client_error
361 Purpose: return true if Error status code, and its a client error
362 \*=======================================================================*/
363 function is_client_error ($sc) {
364 return $sc >= 400 && $sc < 500;
365 }
366
367 /*=======================================================================*\
368 Function: is_client_error
369 Purpose: return true if Error status code, and its a server error
370 \*=======================================================================*/
371 function is_server_error ($sc) {
372 return $sc >= 500 && $sc < 600;
373 }
374
375 ?>
This page took 0.391396 seconds and 4 git commands to generate.