| 1 | <?php |
| 2 | /* |
| 3 | * Project: MagpieRSS: a simple RSS integration tool |
| 4 | * File: rss_parse.inc includes code for parsing |
| 5 | * RSS, and returning an RSS object |
| 6 | * Author: Kellan Elliott-McCrea <kellan@protest.net> |
| 7 | * Version: 0.3 |
| 8 | * License: GPL |
| 9 | * |
| 10 | * The lastest version of MagpieRSS can be obtained from: |
| 11 | * http://magpierss.sourceforge.net |
| 12 | * |
| 13 | * For questions, help, comments, discussion, etc., please join the |
| 14 | * Mapgie mailing list: |
| 15 | * magpierss-general@lists.sourceforge.net |
| 16 | * |
| 17 | */ |
| 18 | |
| 19 | |
| 20 | /* |
| 21 | * NOTES ON RSS PARSING PHILOSOPHY (moderately important): |
| 22 | * MagpieRSS parse all versions of RSS with a few limitation (mod_content, and |
| 23 | * mod_taxonomy support is shaky) into a simple object, with 2 fields, |
| 24 | * the hash 'channel', and the array 'items'. |
| 25 | * |
| 26 | * MagpieRSS is a forgiving and inclusive parser. It currently makes no |
| 27 | * attempt to enforce the validity on an RSS feed. It will include any |
| 28 | * properly formatted tags it finds, allowing to you to mix RSS 0.93, with RSS |
| 29 | * 1.0, with tags or your own imagining. This sort of witches brew is a bad |
| 30 | * bad idea! But Magpie is less pendantic then I am. |
| 31 | * |
| 32 | * RSS validators are readily available on the web at: |
| 33 | * http://feeds.archive.org/validator/ |
| 34 | * http://www.ldodds.com/rss_validator/1.0/validator.html |
| 35 | * |
| 36 | */ |
| 37 | |
| 38 | /* |
| 39 | * EXAMPLE PARSE RESULTS: |
| 40 | * |
| 41 | * Magpie tries to parse RSS into ease to use PHP datastructures. |
| 42 | * |
| 43 | * For example, Magpie on encountering RSS 1.0 item entry: |
| 44 | * |
| 45 | * <item rdf:about="http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257"> |
| 46 | * <title>Weekly Peace Vigil</title> |
| 47 | * <link>http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257</link> |
| 48 | * <description>Wear a white ribbon</description> |
| 49 | * <dc:subject>Peace</dc:subject> |
| 50 | * <ev:startdate>2002-06-01T11:00:00</ev:startdate> |
| 51 | * <ev:location>Northampton, MA</ev:location> |
| 52 | * <ev:enddate>2002-06-01T12:00:00</ev:enddate> |
| 53 | * <ev:type>Protest</ev:type> |
| 54 | * </item> |
| 55 | * |
| 56 | * Would transform it into the following associative array, and push it |
| 57 | * onto the array $rss-items |
| 58 | * |
| 59 | * array( |
| 60 | * title => 'Weekly Peace Vigil', |
| 61 | * link => |
| 62 | * 'http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257', |
| 63 | * description => 'Wear a white ribbon', |
| 64 | * dc => array ( |
| 65 | * subject => 'Peace' |
| 66 | * ), |
| 67 | * ev => array ( |
| 68 | * startdate => '2002-06-01T11:00:00', |
| 69 | * enddate => '2002-06-01T12:00:00', |
| 70 | * type => 'Protest', |
| 71 | * location => 'Northampton, MA' |
| 72 | * ) |
| 73 | * ) |
| 74 | * |
| 75 | */ |
| 76 | |
| 77 | class MagpieRSS { |
| 78 | /* |
| 79 | * Hybrid parser, and object. (probably a bad idea! :) |
| 80 | * |
| 81 | * Useage Example: |
| 82 | * |
| 83 | * $some_rss = "<?xml version="1.0"...... |
| 84 | * |
| 85 | * $rss = new MagpieRSS( $some_rss ); |
| 86 | * |
| 87 | * // print rss chanel title |
| 88 | * echo $rss->channel['title']; |
| 89 | * |
| 90 | * // print the title of each item |
| 91 | * foreach ($rss->items as $item ) { |
| 92 | * echo $item[title]; |
| 93 | * } |
| 94 | * |
| 95 | * see rss_fetch.inc for a simpler interface |
| 96 | */ |
| 97 | |
| 98 | var $parser; |
| 99 | |
| 100 | var $current_item = array(); // item currently being parsed |
| 101 | var $items = array(); // collection of parsed items |
| 102 | var $channel = array(); // hash of channel fields |
| 103 | var $textinput = array(); |
| 104 | var $image = array(); |
| 105 | |
| 106 | var $parent_field = array('RDF'); |
| 107 | var $current_field = ''; |
| 108 | var $current_namespace = false; |
| 109 | |
| 110 | var $ERROR = ""; |
| 111 | |
| 112 | /*======================================================================*\ |
| 113 | Function: MagpieRSS |
| 114 | Purpose: Constructor, sets up XML parser,parses source, |
| 115 | and populates object.. |
| 116 | Input: String containing the RSS to be parsed |
| 117 | \*======================================================================*/ |
| 118 | function MagpieRSS ($source) { |
| 119 | $this->parser = xml_parser_create( ); |
| 120 | |
| 121 | # pass in parser, and a reference to this object |
| 122 | # setup handlers |
| 123 | # |
| 124 | xml_set_object( $this->parser, &$this ); |
| 125 | xml_set_element_handler($this->parser, 'start_element', 'end_element'); |
| 126 | xml_set_character_data_handler( $this->parser, 'cdata' ); |
| 127 | |
| 128 | |
| 129 | $status = xml_parse( $this->parser, $source ); |
| 130 | |
| 131 | if (! $status ) { |
| 132 | $errorcode = xml_get_error_code( $this->parser ); |
| 133 | $errormsg = xml_error_string( $errorcode ); |
| 134 | $this->error( "RSS parse failure: $errormsg" ); |
| 135 | } |
| 136 | |
| 137 | xml_parser_free( $this->parser ); |
| 138 | |
| 139 | } |
| 140 | |
| 141 | function start_element ($p, $element, &$attrs) { |
| 142 | $element = strtolower( $element ); |
| 143 | |
| 144 | # check for a namespace, and split if found |
| 145 | # |
| 146 | $namespace = false; |
| 147 | if ( strpos( $element, ':' ) ) { |
| 148 | list($namespace, $element) = split( ':', $element, 2); |
| 149 | } |
| 150 | $this->current_field = $element; |
| 151 | if ( $namespace and $namespace != 'rdf' ) { |
| 152 | $this->current_namespace = $namespace; |
| 153 | } |
| 154 | |
| 155 | if ( $element == 'channel' ) { |
| 156 | array_unshift( $this->parent_field, 'channel' ); |
| 157 | } |
| 158 | elseif ( $element == 'items' ) { |
| 159 | array_unshift( $this->parent_field, 'items' ); |
| 160 | } |
| 161 | elseif ( $element == 'item' ) { |
| 162 | array_unshift( $this->parent_field, 'item' ); |
| 163 | } |
| 164 | elseif ( $element == 'textinput' ) { |
| 165 | array_unshift( $this->parent_field, 'textinput' ); |
| 166 | } |
| 167 | elseif ( $element == 'image' ) { |
| 168 | array_unshift( $this->parent_field, 'image' ); |
| 169 | } |
| 170 | |
| 171 | } |
| 172 | |
| 173 | function end_element ($p, $element) { |
| 174 | $element = strtolower($element); |
| 175 | |
| 176 | if ( $element == 'item' ) { |
| 177 | $this->items[] = $this->current_item; |
| 178 | $this->current_item = array(); |
| 179 | array_shift( $this->parent_field ); |
| 180 | } |
| 181 | elseif ( $element == 'channel' or $element == 'items' or |
| 182 | $element == 'textinput' or $element == 'image' ) { |
| 183 | array_shift( $this->parent_field ); |
| 184 | } |
| 185 | |
| 186 | $this->current_field = ''; |
| 187 | $this->current_namespace = false; |
| 188 | } |
| 189 | |
| 190 | function cdata ($p, $text) { |
| 191 | # skip item, channel, items first time we see them |
| 192 | # |
| 193 | if ( $this->parent_field[0] == $this->current_field or |
| 194 | ! $this->current_field ) { |
| 195 | return; |
| 196 | } |
| 197 | elseif ( $this->parent_field[0] == 'channel') { |
| 198 | if ( $this->current_namespace ) { |
| 199 | $this->channel[ $this->current_namespace ][ $this->current_field ] .= $text; |
| 200 | } |
| 201 | else { |
| 202 | $this->channel[ $this->current_field ] .= $text; |
| 203 | } |
| 204 | |
| 205 | } |
| 206 | elseif ( $this->parent_field[0] == 'item' ) { |
| 207 | if ( $this->current_namespace ) { |
| 208 | $this->current_item[ $this->current_namespace ][ $this->current_field ] .= $text; |
| 209 | } |
| 210 | else { |
| 211 | $this->current_item[ $this->current_field ] .= $text; |
| 212 | } |
| 213 | } |
| 214 | elseif ( $this->parent_field[0] == 'textinput' ) { |
| 215 | if ( $this->current_namespace ) { |
| 216 | $this->textinput[ $this->current_namespace ][ $this->current_field ] .= $text; |
| 217 | } |
| 218 | else { |
| 219 | $this->textinput[ $this->current_field ] .= $text; |
| 220 | } |
| 221 | |
| 222 | } |
| 223 | elseif ( $this->parent_field[0] == 'image' ) { |
| 224 | if ( $this->current_namespace ) { |
| 225 | $this->image[ $this->current_namespace ][ $this->current_field ] .= $text; |
| 226 | } |
| 227 | else { |
| 228 | $this->image[ $this->current_field ] .= $text; |
| 229 | } |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | function error ($errormsg, $lvl=E_USER_ERROR) { |
| 234 | // append PHP's error message if track_errors enabled |
| 235 | if ( $php_errormsg ) { |
| 236 | $errormsg .= " ($php_errormsg)"; |
| 237 | } |
| 238 | $this->ERROR = $errormsg; |
| 239 | if ( MAGPIE_DEBUG ) { |
| 240 | trigger_error( $errormsg, $lvl); |
| 241 | } |
| 242 | else { |
| 243 | error_log( $errormsg, 0); |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | |
| 248 | /*======================================================================*\ |
| 249 | EVERYTHING BELOW HERE IS FOR DEBUGGING PURPOSES |
| 250 | \*======================================================================*/ |
| 251 | function show_list () { |
| 252 | echo "<ol>\n"; |
| 253 | foreach ($this->items as $item) { |
| 254 | echo "<li>", $this->show_item( $item ); |
| 255 | } |
| 256 | echo "</ol>"; |
| 257 | } |
| 258 | |
| 259 | function show_channel () { |
| 260 | echo "channel:<br>"; |
| 261 | echo "<ul>"; |
| 262 | while ( list($key, $value) = each( $this->channel ) ) { |
| 263 | echo "<li> $key: $value"; |
| 264 | } |
| 265 | echo "</ul>"; |
| 266 | } |
| 267 | |
| 268 | function show_item ($item) { |
| 269 | echo "item: $item[title]"; |
| 270 | echo "<ul>"; |
| 271 | while ( list($key, $value) = each($item) ) { |
| 272 | if ( is_array($value) ) { |
| 273 | echo "<br><b>$key</b>"; |
| 274 | echo "<ul>"; |
| 275 | while ( list( $ns_key, $ns_value) = each( $value ) ) { |
| 276 | echo "<li>$ns_key: $ns_value"; |
| 277 | } |
| 278 | echo "</ul>"; |
| 279 | } |
| 280 | else { |
| 281 | echo "<li> $key: $value"; |
| 282 | } |
| 283 | } |
| 284 | echo "</ul>"; |
| 285 | } |
| 286 | |
| 287 | /*======================================================================*\ |
| 288 | END DEBUGGING FUNCTIONS |
| 289 | \*======================================================================*/ |
| 290 | |
| 291 | |
| 292 | |
| 293 | } # end class RSS |
| 294 | ?> |