3 * Project: MagpieRSS: a simple RSS integration tool
4 * File: rss_parse.inc includes code for parsing
5 * RSS, and returning an RSS object
6 * Author: Kellan Elliott-McCrea <kellan@protest.net>
10 * The lastest version of MagpieRSS can be obtained from:
11 * http://magpierss.sourceforge.net
13 * For questions, help, comments, discussion, etc., please join the
14 * Mapgie mailing list:
15 * magpierss-general@lists.sourceforge.net
21 * NOTES ON RSS PARSING PHILOSOPHY (moderately important):
22 * MagpieRSS parse all versions of RSS with a few limitation (mod_content, and
23 * mod_taxonomy support is shaky) into a simple object, with 2 fields,
24 * the hash 'channel', and the array 'items'.
26 * MagpieRSS is a forgiving and inclusive parser. It currently makes no
27 * attempt to enforce the validity on an RSS feed. It will include any
28 * properly formatted tags it finds, allowing to you to mix RSS 0.93, with RSS
29 * 1.0, with tags or your own imagining. This sort of witches brew is a bad
30 * bad idea! But Magpie is less pendantic then I am.
32 * RSS validators are readily available on the web at:
33 * http://feeds.archive.org/validator/
34 * http://www.ldodds.com/rss_validator/1.0/validator.html
39 * EXAMPLE PARSE RESULTS:
41 * Magpie tries to parse RSS into ease to use PHP datastructures.
43 * For example, Magpie on encountering RSS 1.0 item entry:
45 * <item rdf:about="http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257">
46 * <title>Weekly Peace Vigil</title>
47 * <link>http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257</link>
48 * <description>Wear a white ribbon</description>
49 * <dc:subject>Peace</dc:subject>
50 * <ev:startdate>2002-06-01T11:00:00</ev:startdate>
51 * <ev:location>Northampton, MA</ev:location>
52 * <ev:enddate>2002-06-01T12:00:00</ev:enddate>
53 * <ev:type>Protest</ev:type>
56 * Would transform it into the following associative array, and push it
57 * onto the array $rss-items
60 * title => 'Weekly Peace Vigil',
62 * 'http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257',
63 * description => 'Wear a white ribbon',
68 * startdate => '2002-06-01T11:00:00',
69 * enddate => '2002-06-01T12:00:00',
71 * location => 'Northampton, MA'
79 * Hybrid parser, and object. (probably a bad idea! :)
83 * $some_rss = "<?xml version="1.0"......
85 * $rss = new MagpieRSS( $some_rss );
87 * // print rss chanel title
88 * echo $rss->channel['title'];
90 * // print the title of each item
91 * foreach ($rss->items as $item ) {
95 * see rss_fetch.inc for a simpler interface
100 var $current_item = array(); // item currently being parsed
101 var $items = array(); // collection of parsed items
102 var $channel = array(); // hash of channel fields
103 var $textinput = array();
104 var $image = array();
106 var $parent_field = array('RDF');
107 var $current_field = '';
108 var $current_namespace = false;
112 /*======================================================================*\
114 Purpose: Constructor, sets up XML parser,parses source,
115 and populates object..
116 Input: String containing the RSS to be parsed
117 \*======================================================================*/
118 function MagpieRSS ($source) {
119 $this->parser = xml_parser_create( );
121 # pass in parser, and a reference to this object
124 xml_set_object( $this->parser, &$this );
125 xml_set_element_handler($this->parser, 'start_element', 'end_element');
126 xml_set_character_data_handler( $this->parser, 'cdata' );
129 $status = xml_parse( $this->parser, $source );
132 $errorcode = xml_get_error_code( $this->parser );
133 $errormsg = xml_error_string( $errorcode );
134 $this->error( "RSS parse failure: $errormsg" );
137 xml_parser_free( $this->parser );
141 function start_element ($p, $element, &$attrs) {
142 $element = strtolower( $element );
144 # check for a namespace, and split if found
147 if ( strpos( $element, ':' ) ) {
148 list($namespace, $element) = split( ':', $element, 2);
150 $this->current_field = $element;
151 if ( $namespace and $namespace != 'rdf' ) {
152 $this->current_namespace = $namespace;
155 if ( $element == 'channel' ) {
156 array_unshift( $this->parent_field, 'channel' );
158 elseif ( $element == 'items' ) {
159 array_unshift( $this->parent_field, 'items' );
161 elseif ( $element == 'item' ) {
162 array_unshift( $this->parent_field, 'item' );
164 elseif ( $element == 'textinput' ) {
165 array_unshift( $this->parent_field, 'textinput' );
167 elseif ( $element == 'image' ) {
168 array_unshift( $this->parent_field, 'image' );
173 function end_element ($p, $element) {
174 $element = strtolower($element);
176 if ( $element == 'item' ) {
177 $this->items[] = $this->current_item;
178 $this->current_item = array();
179 array_shift( $this->parent_field );
181 elseif ( $element == 'channel' or $element == 'items' or
182 $element == 'textinput' or $element == 'image' ) {
183 array_shift( $this->parent_field );
186 $this->current_field = '';
187 $this->current_namespace = false;
190 function cdata ($p, $text) {
191 # skip item, channel, items first time we see them
193 if ( $this->parent_field[0] == $this->current_field or
194 ! $this->current_field ) {
197 elseif ( $this->parent_field[0] == 'channel') {
198 if ( $this->current_namespace ) {
199 $this->channel[ $this->current_namespace ][ $this->current_field ] .= $text;
202 $this->channel[ $this->current_field ] .= $text;
206 elseif ( $this->parent_field[0] == 'item' ) {
207 if ( $this->current_namespace ) {
208 $this->current_item[ $this->current_namespace ][ $this->current_field ] .= $text;
211 $this->current_item[ $this->current_field ] .= $text;
214 elseif ( $this->parent_field[0] == 'textinput' ) {
215 if ( $this->current_namespace ) {
216 $this->textinput[ $this->current_namespace ][ $this->current_field ] .= $text;
219 $this->textinput[ $this->current_field ] .= $text;
223 elseif ( $this->parent_field[0] == 'image' ) {
224 if ( $this->current_namespace ) {
225 $this->image[ $this->current_namespace ][ $this->current_field ] .= $text;
228 $this->image[ $this->current_field ] .= $text;
233 function error ($errormsg, $lvl=E_USER_ERROR) {
234 // append PHP's error message if track_errors enabled
235 if ( $php_errormsg ) {
236 $errormsg .= " ($php_errormsg)";
238 $this->ERROR = $errormsg;
239 if ( MAGPIE_DEBUG ) {
240 trigger_error( $errormsg, $lvl);
243 error_log( $errormsg, 0);
248 /*======================================================================*\
249 EVERYTHING BELOW HERE IS FOR DEBUGGING PURPOSES
250 \*======================================================================*/
251 function show_list () {
253 foreach ($this->items as $item) {
254 echo "<li>", $this->show_item( $item );
259 function show_channel () {
262 while ( list($key, $value) = each( $this->channel ) ) {
263 echo "<li> $key: $value";
268 function show_item ($item) {
269 echo "item: $item[title]";
271 while ( list($key, $value) = each($item) ) {
272 if ( is_array($value) ) {
273 echo "<br><b>$key</b>";
275 while ( list( $ns_key, $ns_value) = each( $value ) ) {
276 echo "<li>$ns_key: $ns_value";
281 echo "<li> $key: $value";
287 /*======================================================================*\
288 END DEBUGGING FUNCTIONS
289 \*======================================================================*/