51ff3226 |
1 | <?php |
2 | /* |
3 | ====================================================================== |
4 | lastRSS 0.6 |
5 | |
6 | Simple yet powerfull PHP class to parse RSS files. |
7 | |
8 | by Vojtech Semecky, webmaster@webdot.cz |
9 | |
10 | Latest version, features, manual and examples: |
11 | http://lastrss.webdot.cz/ |
12 | |
13 | ---------------------------------------------------------------------- |
14 | TODO |
15 | - Iconv nedavat na cely, ale jen na TITLE a DESCRIPTION (u item i celkove) |
16 | ---------------------------------------------------------------------- |
17 | LICENSE |
18 | |
19 | This program is free software; you can redistribute it and/or |
20 | modify it under the terms of the GNU General Public License (GPL) |
21 | as published by the Free Software Foundation; either version 2 |
22 | of the License, or (at your option) any later version. |
23 | |
24 | This program is distributed in the hope that it will be useful, |
25 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
26 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
27 | GNU General Public License for more details. |
28 | |
29 | To read the license please visit http://www.gnu.org/copyleft/gpl.html |
30 | ====================================================================== |
31 | */ |
32 | |
33 | class lastRSS { |
34 | // ------------------------------------------------------------------- |
35 | // Settings |
36 | // ------------------------------------------------------------------- |
37 | var $channeltags = array ('title', 'link', 'description', 'language', 'copyright', 'managingEditor', 'webMaster', 'pubDate', 'lastBuildDate', 'rating', 'docs'); |
38 | var $itemtags = array('title', 'link', 'description', 'author', 'category', 'comments', 'enclosure', 'guid', 'pubDate', 'source'); |
39 | var $imagetags = array('title', 'url', 'link', 'width', 'height'); |
40 | var $textinputtags = array('title', 'description', 'name', 'link'); |
41 | |
42 | // ------------------------------------------------------------------- |
43 | // Parse RSS file and returns associative array. |
44 | // ------------------------------------------------------------------- |
45 | function Get ($rss_url) { |
46 | // If CACHE ENABLED |
47 | if ($this->cache_dir != '') { |
48 | $cache_file = $this->cache_dir . '/rsscache_' . md5($rss_url); |
49 | $timedif = @(time() - filemtime($cache_file)); |
50 | if ($timedif < $this->cache_time) { |
51 | // cached file is fresh enough, return cached array |
52 | $result = unserialize(join('', file($cache_file))); |
53 | // set 'cached' to 1 only if cached file is correct |
54 | if ($result) $result['cached'] = 1; |
55 | } else { |
56 | // cached file is too old, create new |
57 | $result = $this->Parse($rss_url); |
58 | $serialized = serialize($result); |
59 | if ($f = @fopen($cache_file, 'w')) { |
60 | fwrite ($f, $serialized, strlen($serialized)); |
61 | fclose($f); |
62 | } |
63 | if ($result) $result['cached'] = 0; |
64 | } |
65 | } |
66 | // If CACHE DISABLED >> load and parse the file directly |
67 | else { |
68 | $result = $this->Parse($rss_url); |
69 | if ($result) $result['cached'] = 0; |
70 | } |
71 | // return result |
72 | return $result; |
73 | } |
74 | |
75 | // ------------------------------------------------------------------- |
76 | // Modification of preg_match(); return trimed field with index 1 |
77 | // from 'classic' preg_match() array output |
78 | // ------------------------------------------------------------------- |
79 | function my_preg_match ($pattern, $subject) { |
80 | preg_match($pattern, $subject, $out); |
81 | return trim($out[1]); |
82 | } |
83 | |
84 | // ------------------------------------------------------------------- |
85 | // Replace HTML entities &something; by real characters |
86 | // ------------------------------------------------------------------- |
87 | function unhtmlentities ($string) { |
88 | $trans_tbl = get_html_translation_table (HTML_ENTITIES); |
89 | $trans_tbl = array_flip ($trans_tbl); |
90 | return strtr ($string, $trans_tbl); |
91 | } |
92 | |
93 | // ------------------------------------------------------------------- |
94 | // Encoding conversion functiuon |
95 | // ------------------------------------------------------------------- |
96 | function MyConvertEncoding($in_charset, $out_charset, $string) { |
97 | // if substitute_character |
98 | if ($this->subs_char) { |
99 | // Iconv() to UTF-8. mb_convert_encoding() to $out_charset |
100 | $utf = iconv($in_charset, 'UTF-8', $string); |
101 | mb_substitute_character($this->subs_char); |
102 | return mb_convert_encoding ($utf, $out_charset, 'UTF-8'); |
103 | } else { |
104 | // Iconv() to $out_charset |
105 | return iconv($in_charset, $out_charset, $string); |
106 | } |
107 | } |
108 | |
109 | // ------------------------------------------------------------------- |
110 | // Parse() is private method used by Get() to load and parse RSS file. |
111 | // Don't use Parse() in your scripts - use Get($rss_file) instead. |
112 | // ------------------------------------------------------------------- |
113 | function Parse ($rss_url) { |
114 | // Open and load RSS file |
115 | if ($f = @fopen($rss_url, 'r')) { |
116 | echo "FOPENED"; |
117 | $rss_content = ''; |
118 | while (!feof($f)) { |
119 | $rss_content .= fgets($f, 4096); |
120 | } |
121 | fclose($f); |
122 | |
123 | // Parse document encoding |
124 | $result['encoding'] = $this->my_preg_match("'encoding=[\'\"](.*?)[\'\"]'si", $rss_content); |
125 | |
126 | // If code page is set convert character encoding to required |
127 | if ($this->cp != '') |
128 | $rss_content = $this->MyConvertEncoding($result['encoding'], $this->cp, $rss_content); |
129 | |
130 | // Parse CHANNEL info |
131 | preg_match("'<channel.*?>(.*?)</channel>'si", $rss_content, $out_channel); |
132 | foreach($this->channeltags as $channeltag) |
133 | { |
134 | $temp = $this->my_preg_match("'<$channeltag.*?>(.*?)</$channeltag>'si", $out_channel[1]); |
135 | if ($temp != '') $result[$channeltag] = $temp; // Set only if not empty |
136 | |
137 | } |
138 | |
139 | // Parse TEXTINPUT info |
140 | preg_match("'<textinput(|[^>]*[^/])>(.*?)</textinput>'si", $rss_content, $out_textinfo); |
141 | // This a little strange regexp means: |
142 | // Look for tag <textinput> with or without any attributes, but skip truncated version <textinput /> (it's not beggining tag) |
143 | if ($out_textinfo[2]) { |
144 | foreach($this->textinputtags as $textinputtag) { |
145 | $temp = $this->my_preg_match("'<$textinputtag.*?>(.*?)</$textinputtag>'si", $out_textinfo[2]); |
146 | if ($temp != '') $result['textinput_'.$textinputtag] = $temp; // Set only if not empty |
147 | } |
148 | } |
149 | // Parse IMAGE info |
150 | preg_match("'<image.*?>(.*?)</image>'si", $rss_content, $out_imageinfo); |
151 | if ($out_imageinfo[1]) { |
152 | foreach($this->imagetags as $imagetag) { |
153 | $temp = $this->my_preg_match("'<$imagetag.*?>(.*?)</$imagetag>'si", $out_imageinfo[1]); |
154 | if ($temp != '') $result['image_'.$imagetag] = $temp; // Set only if not empty |
155 | } |
156 | } |
157 | // Parse ITEMS |
158 | preg_match_all("'<item(| .*?)>(.*?)</item>'si", $rss_content, $items); |
159 | $rss_items = $items[2]; |
160 | $result['items_count'] = count($items[1]); |
161 | $i = 0; |
162 | $result['items'] = array(); // create array even if there are no items |
163 | foreach($rss_items as $rss_item) { |
164 | // Parse one item |
165 | foreach($this->itemtags as $itemtag) |
166 | { |
167 | $temp = $this->my_preg_match("'<$itemtag.*?>(.*?)</$itemtag>'si", $rss_item); |
168 | if ($temp != '') $result[items][$i][$itemtag] = $temp; // Set only if not empty |
169 | } |
170 | // Strip HTML tags and other bullshit from DESCRIPTION (if description is presented) |
171 | if ($result['items'][$i]['description']) |
172 | $result['items'][$i]['description'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['description']))); |
173 | // Item counter |
174 | $i++; |
175 | } |
176 | return $result; |
177 | } |
178 | else // Error in opening return False |
179 | { |
180 | return False; |
181 | } |
182 | } |
183 | } |
184 | |
b3399798 |
185 | ?> |