Commit | Line | Data |
---|---|---|
b42b2bf9 H |
1 | <?php |
2 | /* | |
3 | ====================================================================== | |
4 | lastRSS 0.6 | |
5 | ||
6 | Simple yet powerfull PHP class to parse RSS files. | |
7 | ||
8 | by Vojtech Semecky, webmaster@webdot.cz | |
9 | ||
10 | Latest version, features, manual and examples: | |
11 | http://lastrss.webdot.cz/ | |
12 | ||
13 | ---------------------------------------------------------------------- | |
14 | TODO | |
15 | - Iconv nedavat na cely, ale jen na TITLE a DESCRIPTION (u item i celkove) | |
16 | ---------------------------------------------------------------------- | |
17 | LICENSE | |
18 | ||
19 | This program is free software; you can redistribute it and/or | |
20 | modify it under the terms of the GNU General Public License (GPL) | |
21 | as published by the Free Software Foundation; either version 2 | |
22 | of the License, or (at your option) any later version. | |
23 | ||
24 | This program is distributed in the hope that it will be useful, | |
25 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
26 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
27 | GNU General Public License for more details. | |
28 | ||
29 | To read the license please visit http://www.gnu.org/copyleft/gpl.html | |
30 | ====================================================================== | |
31 | */ | |
32 | ||
33 | class lastRSS { | |
34 | // ------------------------------------------------------------------- | |
35 | // Settings | |
36 | // ------------------------------------------------------------------- | |
37 | var $channeltags = array ('title', 'link', 'description', 'language', 'copyright', 'managingEditor', 'webMaster', 'pubDate', 'lastBuildDate', 'rating', 'docs'); | |
38 | var $itemtags = array('title', 'link', 'description', 'author', 'category', 'comments', 'enclosure', 'guid', 'pubDate', 'source'); | |
39 | var $imagetags = array('title', 'url', 'link', 'width', 'height'); | |
40 | var $textinputtags = array('title', 'description', 'name', 'link'); | |
41 | ||
42 | // ------------------------------------------------------------------- | |
43 | // Parse RSS file and returns associative array. | |
44 | // ------------------------------------------------------------------- | |
45 | function Get ($rss_url) { | |
46 | // If CACHE ENABLED | |
47 | if ($this->cache_dir != '') { | |
48 | $cache_file = $this->cache_dir . '/rsscache_' . md5($rss_url); | |
49 | $timedif = @(time() - filemtime($cache_file)); | |
50 | if ($timedif < $this->cache_time) { | |
51 | // cached file is fresh enough, return cached array | |
52 | $result = unserialize(join('', file($cache_file))); | |
53 | // set 'cached' to 1 only if cached file is correct | |
54 | if ($result) $result['cached'] = 1; | |
55 | } else { | |
56 | // cached file is too old, create new | |
57 | $result = $this->Parse($rss_url); | |
58 | $serialized = serialize($result); | |
59 | if ($f = @fopen($cache_file, 'w')) { | |
60 | fwrite ($f, $serialized, strlen($serialized)); | |
61 | fclose($f); | |
62 | } | |
63 | if ($result) $result['cached'] = 0; | |
64 | } | |
65 | } | |
66 | // If CACHE DISABLED >> load and parse the file directly | |
67 | else { | |
68 | $result = $this->Parse($rss_url); | |
69 | if ($result) $result['cached'] = 0; | |
70 | } | |
71 | // return result | |
72 | return $result; | |
73 | } | |
74 | ||
75 | // ------------------------------------------------------------------- | |
76 | // Modification of preg_match(); return trimed field with index 1 | |
77 | // from 'classic' preg_match() array output | |
78 | // ------------------------------------------------------------------- | |
79 | function my_preg_match ($pattern, $subject) { | |
80 | preg_match($pattern, $subject, $out); | |
81 | return trim($out[1]); | |
82 | } | |
83 | ||
84 | // ------------------------------------------------------------------- | |
85 | // Replace HTML entities &something; by real characters | |
86 | // ------------------------------------------------------------------- | |
87 | function unhtmlentities ($string) { | |
88 | $trans_tbl = get_html_translation_table (HTML_ENTITIES); | |
89 | $trans_tbl = array_flip ($trans_tbl); | |
90 | return strtr ($string, $trans_tbl); | |
91 | } | |
92 | ||
93 | // ------------------------------------------------------------------- | |
94 | // Encoding conversion functiuon | |
95 | // ------------------------------------------------------------------- | |
96 | function MyConvertEncoding($in_charset, $out_charset, $string) { | |
97 | // if substitute_character | |
98 | if ($this->subs_char) { | |
99 | // Iconv() to UTF-8. mb_convert_encoding() to $out_charset | |
100 | $utf = iconv($in_charset, 'UTF-8', $string); | |
101 | mb_substitute_character($this->subs_char); | |
102 | return mb_convert_encoding ($utf, $out_charset, 'UTF-8'); | |
103 | } else { | |
104 | // Iconv() to $out_charset | |
105 | return iconv($in_charset, $out_charset, $string); | |
106 | } | |
107 | } | |
108 | ||
109 | // ------------------------------------------------------------------- | |
110 | // Parse() is private method used by Get() to load and parse RSS file. | |
111 | // Don't use Parse() in your scripts - use Get($rss_file) instead. | |
112 | // ------------------------------------------------------------------- | |
113 | function Parse ($rss_url) { | |
114 | // Open and load RSS file | |
115 | if ($f = @fopen($rss_url, 'r')) { | |
116 | echo "FOPENED"; | |
117 | $rss_content = ''; | |
118 | while (!feof($f)) { | |
119 | $rss_content .= fgets($f, 4096); | |
120 | } | |
121 | fclose($f); | |
122 | ||
123 | // Parse document encoding | |
124 | $result['encoding'] = $this->my_preg_match("'encoding=[\'\"](.*?)[\'\"]'si", $rss_content); | |
125 | ||
126 | // If code page is set convert character encoding to required | |
127 | if ($this->cp != '') | |
128 | $rss_content = $this->MyConvertEncoding($result['encoding'], $this->cp, $rss_content); | |
129 | ||
130 | // Parse CHANNEL info | |
131 | preg_match("'<channel.*?>(.*?)</channel>'si", $rss_content, $out_channel); | |
132 | foreach($this->channeltags as $channeltag) | |
133 | { | |
134 | $temp = $this->my_preg_match("'<$channeltag.*?>(.*?)</$channeltag>'si", $out_channel[1]); | |
135 | if ($temp != '') $result[$channeltag] = $temp; // Set only if not empty | |
136 | ||
137 | } | |
138 | ||
139 | // Parse TEXTINPUT info | |
140 | preg_match("'<textinput(|[^>]*[^/])>(.*?)</textinput>'si", $rss_content, $out_textinfo); | |
141 | // This a little strange regexp means: | |
142 | // Look for tag <textinput> with or without any attributes, but skip truncated version <textinput /> (it's not beggining tag) | |
143 | if ($out_textinfo[2]) { | |
144 | foreach($this->textinputtags as $textinputtag) { | |
145 | $temp = $this->my_preg_match("'<$textinputtag.*?>(.*?)</$textinputtag>'si", $out_textinfo[2]); | |
146 | if ($temp != '') $result['textinput_'.$textinputtag] = $temp; // Set only if not empty | |
147 | } | |
148 | } | |
149 | // Parse IMAGE info | |
150 | preg_match("'<image.*?>(.*?)</image>'si", $rss_content, $out_imageinfo); | |
151 | if ($out_imageinfo[1]) { | |
152 | foreach($this->imagetags as $imagetag) { | |
153 | $temp = $this->my_preg_match("'<$imagetag.*?>(.*?)</$imagetag>'si", $out_imageinfo[1]); | |
154 | if ($temp != '') $result['image_'.$imagetag] = $temp; // Set only if not empty | |
155 | } | |
156 | } | |
157 | // Parse ITEMS | |
158 | preg_match_all("'<item(| .*?)>(.*?)</item>'si", $rss_content, $items); | |
159 | $rss_items = $items[2]; | |
160 | $result['items_count'] = count($items[1]); | |
161 | $i = 0; | |
162 | $result['items'] = array(); // create array even if there are no items | |
163 | foreach($rss_items as $rss_item) { | |
164 | // Parse one item | |
165 | foreach($this->itemtags as $itemtag) | |
166 | { | |
167 | $temp = $this->my_preg_match("'<$itemtag.*?>(.*?)</$itemtag>'si", $rss_item); | |
168 | if ($temp != '') $result[items][$i][$itemtag] = $temp; // Set only if not empty | |
169 | } | |
170 | // Strip HTML tags and other bullshit from DESCRIPTION (if description is presented) | |
171 | if ($result['items'][$i]['description']) | |
172 | $result['items'][$i]['description'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['description']))); | |
173 | // Item counter | |
174 | $i++; | |
175 | } | |
176 | return $result; | |
177 | } | |
178 | else // Error in opening return False | |
179 | { | |
180 | return False; | |
181 | } | |
182 | } | |
183 | } | |
184 | ||
185 | ?> |