Commit | Line | Data |
---|---|---|
bc13d5d6 H |
1 | <?php |
2 | /* | |
3 | * Project: MagpieRSS: a simple RSS integration tool | |
4 | * File: rss_parse.inc includes code for parsing | |
5 | * RSS, and returning an RSS object | |
6 | * Author: Kellan Elliott-McCrea <kellan@protest.net> | |
7 | * Version: 0.3 | |
8 | * License: GPL | |
9 | * | |
10 | * The lastest version of MagpieRSS can be obtained from: | |
11 | * http://magpierss.sourceforge.net | |
12 | * | |
13 | * For questions, help, comments, discussion, etc., please join the | |
14 | * Mapgie mailing list: | |
15 | * magpierss-general@lists.sourceforge.net | |
16 | * | |
17 | */ | |
18 | ||
19 | ||
20 | /* | |
21 | * NOTES ON RSS PARSING PHILOSOPHY (moderately important): | |
22 | * MagpieRSS parse all versions of RSS with a few limitation (mod_content, and | |
23 | * mod_taxonomy support is shaky) into a simple object, with 2 fields, | |
24 | * the hash 'channel', and the array 'items'. | |
25 | * | |
26 | * MagpieRSS is a forgiving and inclusive parser. It currently makes no | |
27 | * attempt to enforce the validity on an RSS feed. It will include any | |
28 | * properly formatted tags it finds, allowing to you to mix RSS 0.93, with RSS | |
29 | * 1.0, with tags or your own imagining. This sort of witches brew is a bad | |
30 | * bad idea! But Magpie is less pendantic then I am. | |
31 | * | |
32 | * RSS validators are readily available on the web at: | |
33 | * http://feeds.archive.org/validator/ | |
34 | * http://www.ldodds.com/rss_validator/1.0/validator.html | |
35 | * | |
36 | */ | |
37 | ||
38 | /* | |
39 | * EXAMPLE PARSE RESULTS: | |
40 | * | |
41 | * Magpie tries to parse RSS into ease to use PHP datastructures. | |
42 | * | |
43 | * For example, Magpie on encountering RSS 1.0 item entry: | |
44 | * | |
45 | * <item rdf:about="http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257"> | |
46 | * <title>Weekly Peace Vigil</title> | |
47 | * <link>http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257</link> | |
48 | * <description>Wear a white ribbon</description> | |
49 | * <dc:subject>Peace</dc:subject> | |
50 | * <ev:startdate>2002-06-01T11:00:00</ev:startdate> | |
51 | * <ev:location>Northampton, MA</ev:location> | |
52 | * <ev:enddate>2002-06-01T12:00:00</ev:enddate> | |
53 | * <ev:type>Protest</ev:type> | |
54 | * </item> | |
55 | * | |
56 | * Would transform it into the following associative array, and push it | |
57 | * onto the array $rss-items | |
58 | * | |
59 | * array( | |
60 | * title => 'Weekly Peace Vigil', | |
61 | * link => | |
62 | * 'http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257', | |
63 | * description => 'Wear a white ribbon', | |
64 | * dc => array ( | |
65 | * subject => 'Peace' | |
66 | * ), | |
67 | * ev => array ( | |
68 | * startdate => '2002-06-01T11:00:00', | |
69 | * enddate => '2002-06-01T12:00:00', | |
70 | * type => 'Protest', | |
71 | * location => 'Northampton, MA' | |
72 | * ) | |
73 | * ) | |
74 | * | |
75 | */ | |
76 | ||
77 | class MagpieRSS { | |
78 | /* | |
79 | * Hybrid parser, and object. (probably a bad idea! :) | |
80 | * | |
81 | * Useage Example: | |
82 | * | |
83 | * $some_rss = "<?xml version="1.0"...... | |
84 | * | |
85 | * $rss = new MagpieRSS( $some_rss ); | |
86 | * | |
87 | * // print rss chanel title | |
88 | * echo $rss->channel['title']; | |
89 | * | |
90 | * // print the title of each item | |
91 | * foreach ($rss->items as $item ) { | |
92 | * echo $item[title]; | |
93 | * } | |
94 | * | |
95 | * see rss_fetch.inc for a simpler interface | |
96 | */ | |
97 | ||
98 | var $parser; | |
99 | ||
100 | var $current_item = array(); // item currently being parsed | |
101 | var $items = array(); // collection of parsed items | |
102 | var $channel = array(); // hash of channel fields | |
103 | var $textinput = array(); | |
104 | var $image = array(); | |
105 | ||
106 | var $parent_field = array('RDF'); | |
107 | var $current_field = ''; | |
108 | var $current_namespace = false; | |
109 | ||
110 | var $ERROR = ""; | |
111 | ||
112 | /*======================================================================*\ | |
113 | Function: MagpieRSS | |
114 | Purpose: Constructor, sets up XML parser,parses source, | |
115 | and populates object.. | |
116 | Input: String containing the RSS to be parsed | |
117 | \*======================================================================*/ | |
118 | function MagpieRSS ($source) { | |
119 | $this->parser = xml_parser_create( ); | |
120 | ||
121 | # pass in parser, and a reference to this object | |
122 | # setup handlers | |
123 | # | |
124 | xml_set_object( $this->parser, &$this ); | |
125 | xml_set_element_handler($this->parser, 'start_element', 'end_element'); | |
126 | xml_set_character_data_handler( $this->parser, 'cdata' ); | |
127 | ||
128 | ||
129 | $status = xml_parse( $this->parser, $source ); | |
130 | ||
131 | if (! $status ) { | |
132 | $errorcode = xml_get_error_code( $this->parser ); | |
133 | $errormsg = xml_error_string( $errorcode ); | |
134 | $this->error( "RSS parse failure: $errormsg" ); | |
135 | } | |
136 | ||
137 | xml_parser_free( $this->parser ); | |
138 | ||
139 | } | |
140 | ||
141 | function start_element ($p, $element, &$attrs) { | |
142 | $element = strtolower( $element ); | |
143 | ||
144 | # check for a namespace, and split if found | |
145 | # | |
146 | $namespace = false; | |
147 | if ( strpos( $element, ':' ) ) { | |
148 | list($namespace, $element) = split( ':', $element, 2); | |
149 | } | |
150 | $this->current_field = $element; | |
151 | if ( $namespace and $namespace != 'rdf' ) { | |
152 | $this->current_namespace = $namespace; | |
153 | } | |
154 | ||
155 | if ( $element == 'channel' ) { | |
156 | array_unshift( $this->parent_field, 'channel' ); | |
157 | } | |
158 | elseif ( $element == 'items' ) { | |
159 | array_unshift( $this->parent_field, 'items' ); | |
160 | } | |
161 | elseif ( $element == 'item' ) { | |
162 | array_unshift( $this->parent_field, 'item' ); | |
163 | } | |
164 | elseif ( $element == 'textinput' ) { | |
165 | array_unshift( $this->parent_field, 'textinput' ); | |
166 | } | |
167 | elseif ( $element == 'image' ) { | |
168 | array_unshift( $this->parent_field, 'image' ); | |
169 | } | |
170 | ||
171 | } | |
172 | ||
173 | function end_element ($p, $element) { | |
174 | $element = strtolower($element); | |
175 | ||
176 | if ( $element == 'item' ) { | |
177 | $this->items[] = $this->current_item; | |
178 | $this->current_item = array(); | |
179 | array_shift( $this->parent_field ); | |
180 | } | |
181 | elseif ( $element == 'channel' or $element == 'items' or | |
182 | $element == 'textinput' or $element == 'image' ) { | |
183 | array_shift( $this->parent_field ); | |
184 | } | |
185 | ||
186 | $this->current_field = ''; | |
187 | $this->current_namespace = false; | |
188 | } | |
189 | ||
190 | function cdata ($p, $text) { | |
191 | # skip item, channel, items first time we see them | |
192 | # | |
193 | if ( $this->parent_field[0] == $this->current_field or | |
194 | ! $this->current_field ) { | |
195 | return; | |
196 | } | |
197 | elseif ( $this->parent_field[0] == 'channel') { | |
198 | if ( $this->current_namespace ) { | |
199 | $this->channel[ $this->current_namespace ][ $this->current_field ] .= $text; | |
200 | } | |
201 | else { | |
202 | $this->channel[ $this->current_field ] .= $text; | |
203 | } | |
204 | ||
205 | } | |
206 | elseif ( $this->parent_field[0] == 'item' ) { | |
207 | if ( $this->current_namespace ) { | |
208 | $this->current_item[ $this->current_namespace ][ $this->current_field ] .= $text; | |
209 | } | |
210 | else { | |
211 | $this->current_item[ $this->current_field ] .= $text; | |
212 | } | |
213 | } | |
214 | elseif ( $this->parent_field[0] == 'textinput' ) { | |
215 | if ( $this->current_namespace ) { | |
216 | $this->textinput[ $this->current_namespace ][ $this->current_field ] .= $text; | |
217 | } | |
218 | else { | |
219 | $this->textinput[ $this->current_field ] .= $text; | |
220 | } | |
221 | ||
222 | } | |
223 | elseif ( $this->parent_field[0] == 'image' ) { | |
224 | if ( $this->current_namespace ) { | |
225 | $this->image[ $this->current_namespace ][ $this->current_field ] .= $text; | |
226 | } | |
227 | else { | |
228 | $this->image[ $this->current_field ] .= $text; | |
229 | } | |
230 | } | |
231 | } | |
232 | ||
233 | function error ($errormsg, $lvl=E_USER_ERROR) { | |
234 | // append PHP's error message if track_errors enabled | |
235 | if ( $php_errormsg ) { | |
236 | $errormsg .= " ($php_errormsg)"; | |
237 | } | |
238 | $this->ERROR = $errormsg; | |
239 | if ( MAGPIE_DEBUG ) { | |
240 | trigger_error( $errormsg, $lvl); | |
241 | } | |
242 | else { | |
243 | error_log( $errormsg, 0); | |
244 | } | |
245 | } | |
246 | ||
247 | ||
248 | /*======================================================================*\ | |
249 | EVERYTHING BELOW HERE IS FOR DEBUGGING PURPOSES | |
250 | \*======================================================================*/ | |
251 | function show_list () { | |
252 | echo "<ol>\n"; | |
253 | foreach ($this->items as $item) { | |
254 | echo "<li>", $this->show_item( $item ); | |
255 | } | |
256 | echo "</ol>"; | |
257 | } | |
258 | ||
259 | function show_channel () { | |
260 | echo "channel:<br>"; | |
261 | echo "<ul>"; | |
262 | while ( list($key, $value) = each( $this->channel ) ) { | |
263 | echo "<li> $key: $value"; | |
264 | } | |
265 | echo "</ul>"; | |
266 | } | |
267 | ||
268 | function show_item ($item) { | |
269 | echo "item: $item[title]"; | |
270 | echo "<ul>"; | |
271 | while ( list($key, $value) = each($item) ) { | |
272 | if ( is_array($value) ) { | |
273 | echo "<br><b>$key</b>"; | |
274 | echo "<ul>"; | |
275 | while ( list( $ns_key, $ns_value) = each( $value ) ) { | |
276 | echo "<li>$ns_key: $ns_value"; | |
277 | } | |
278 | echo "</ul>"; | |
279 | } | |
280 | else { | |
281 | echo "<li> $key: $value"; | |
282 | } | |
283 | } | |
284 | echo "</ul>"; | |
285 | } | |
286 | ||
287 | /*======================================================================*\ | |
288 | END DEBUGGING FUNCTIONS | |
289 | \*======================================================================*/ | |
290 | ||
291 | ||
292 | ||
293 | } # end class RSS | |
294 | ?> |