| 1 | <?php |
| 2 | /* |
| 3 | ====================================================================== |
| 4 | lastRSS 0.6 |
| 5 | |
| 6 | Simple yet powerfull PHP class to parse RSS files. |
| 7 | |
| 8 | by Vojtech Semecky, webmaster@webdot.cz |
| 9 | |
| 10 | Latest version, features, manual and examples: |
| 11 | http://lastrss.webdot.cz/ |
| 12 | |
| 13 | ---------------------------------------------------------------------- |
| 14 | TODO |
| 15 | - Iconv nedavat na cely, ale jen na TITLE a DESCRIPTION (u item i celkove) |
| 16 | ---------------------------------------------------------------------- |
| 17 | LICENSE |
| 18 | |
| 19 | This program is free software; you can redistribute it and/or |
| 20 | modify it under the terms of the GNU General Public License (GPL) |
| 21 | as published by the Free Software Foundation; either version 2 |
| 22 | of the License, or (at your option) any later version. |
| 23 | |
| 24 | This program is distributed in the hope that it will be useful, |
| 25 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 26 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 27 | GNU General Public License for more details. |
| 28 | |
| 29 | To read the license please visit http://www.gnu.org/copyleft/gpl.html |
| 30 | ====================================================================== |
| 31 | */ |
| 32 | |
| 33 | class lastRSS { |
| 34 | // ------------------------------------------------------------------- |
| 35 | // Settings |
| 36 | // ------------------------------------------------------------------- |
| 37 | var $channeltags = array ('title', 'link', 'description', 'language', 'copyright', 'managingEditor', 'webMaster', 'pubDate', 'lastBuildDate', 'rating', 'docs'); |
| 38 | var $itemtags = array('title', 'link', 'description', 'author', 'category', 'comments', 'enclosure', 'guid', 'pubDate', 'source'); |
| 39 | var $imagetags = array('title', 'url', 'link', 'width', 'height'); |
| 40 | var $textinputtags = array('title', 'description', 'name', 'link'); |
| 41 | |
| 42 | // ------------------------------------------------------------------- |
| 43 | // Parse RSS file and returns associative array. |
| 44 | // ------------------------------------------------------------------- |
| 45 | function Get ($rss_url) { |
| 46 | // If CACHE ENABLED |
| 47 | if ($this->cache_dir != '') { |
| 48 | $cache_file = $this->cache_dir . '/rsscache_' . md5($rss_url); |
| 49 | $timedif = @(time() - filemtime($cache_file)); |
| 50 | if ($timedif < $this->cache_time) { |
| 51 | // cached file is fresh enough, return cached array |
| 52 | $result = unserialize(join('', file($cache_file))); |
| 53 | // set 'cached' to 1 only if cached file is correct |
| 54 | if ($result) $result['cached'] = 1; |
| 55 | } else { |
| 56 | // cached file is too old, create new |
| 57 | $result = $this->Parse($rss_url); |
| 58 | $serialized = serialize($result); |
| 59 | if ($f = @fopen($cache_file, 'w')) { |
| 60 | fwrite ($f, $serialized, strlen($serialized)); |
| 61 | fclose($f); |
| 62 | } |
| 63 | if ($result) $result['cached'] = 0; |
| 64 | } |
| 65 | } |
| 66 | // If CACHE DISABLED >> load and parse the file directly |
| 67 | else { |
| 68 | $result = $this->Parse($rss_url); |
| 69 | if ($result) $result['cached'] = 0; |
| 70 | } |
| 71 | // return result |
| 72 | return $result; |
| 73 | } |
| 74 | |
| 75 | // ------------------------------------------------------------------- |
| 76 | // Modification of preg_match(); return trimed field with index 1 |
| 77 | // from 'classic' preg_match() array output |
| 78 | // ------------------------------------------------------------------- |
| 79 | function my_preg_match ($pattern, $subject) { |
| 80 | preg_match($pattern, $subject, $out); |
| 81 | return trim($out[1]); |
| 82 | } |
| 83 | |
| 84 | // ------------------------------------------------------------------- |
| 85 | // Replace HTML entities &something; by real characters |
| 86 | // ------------------------------------------------------------------- |
| 87 | function unhtmlentities ($string) { |
| 88 | $trans_tbl = get_html_translation_table (HTML_ENTITIES); |
| 89 | $trans_tbl = array_flip ($trans_tbl); |
| 90 | return strtr ($string, $trans_tbl); |
| 91 | } |
| 92 | |
| 93 | // ------------------------------------------------------------------- |
| 94 | // Encoding conversion functiuon |
| 95 | // ------------------------------------------------------------------- |
| 96 | function MyConvertEncoding($in_charset, $out_charset, $string) { |
| 97 | // if substitute_character |
| 98 | if ($this->subs_char) { |
| 99 | // Iconv() to UTF-8. mb_convert_encoding() to $out_charset |
| 100 | $utf = iconv($in_charset, 'UTF-8', $string); |
| 101 | mb_substitute_character($this->subs_char); |
| 102 | return mb_convert_encoding ($utf, $out_charset, 'UTF-8'); |
| 103 | } else { |
| 104 | // Iconv() to $out_charset |
| 105 | return iconv($in_charset, $out_charset, $string); |
| 106 | } |
| 107 | } |
| 108 | |
| 109 | // ------------------------------------------------------------------- |
| 110 | // Parse() is private method used by Get() to load and parse RSS file. |
| 111 | // Don't use Parse() in your scripts - use Get($rss_file) instead. |
| 112 | // ------------------------------------------------------------------- |
| 113 | function Parse ($rss_url) { |
| 114 | // Open and load RSS file |
| 115 | if ($f = @fopen($rss_url, 'r')) { |
| 116 | echo "FOPENED"; |
| 117 | $rss_content = ''; |
| 118 | while (!feof($f)) { |
| 119 | $rss_content .= fgets($f, 4096); |
| 120 | } |
| 121 | fclose($f); |
| 122 | |
| 123 | // Parse document encoding |
| 124 | $result['encoding'] = $this->my_preg_match("'encoding=[\'\"](.*?)[\'\"]'si", $rss_content); |
| 125 | |
| 126 | // If code page is set convert character encoding to required |
| 127 | if ($this->cp != '') |
| 128 | $rss_content = $this->MyConvertEncoding($result['encoding'], $this->cp, $rss_content); |
| 129 | |
| 130 | // Parse CHANNEL info |
| 131 | preg_match("'<channel.*?>(.*?)</channel>'si", $rss_content, $out_channel); |
| 132 | foreach($this->channeltags as $channeltag) |
| 133 | { |
| 134 | $temp = $this->my_preg_match("'<$channeltag.*?>(.*?)</$channeltag>'si", $out_channel[1]); |
| 135 | if ($temp != '') $result[$channeltag] = $temp; // Set only if not empty |
| 136 | |
| 137 | } |
| 138 | |
| 139 | // Parse TEXTINPUT info |
| 140 | preg_match("'<textinput(|[^>]*[^/])>(.*?)</textinput>'si", $rss_content, $out_textinfo); |
| 141 | // This a little strange regexp means: |
| 142 | // Look for tag <textinput> with or without any attributes, but skip truncated version <textinput /> (it's not beggining tag) |
| 143 | if ($out_textinfo[2]) { |
| 144 | foreach($this->textinputtags as $textinputtag) { |
| 145 | $temp = $this->my_preg_match("'<$textinputtag.*?>(.*?)</$textinputtag>'si", $out_textinfo[2]); |
| 146 | if ($temp != '') $result['textinput_'.$textinputtag] = $temp; // Set only if not empty |
| 147 | } |
| 148 | } |
| 149 | // Parse IMAGE info |
| 150 | preg_match("'<image.*?>(.*?)</image>'si", $rss_content, $out_imageinfo); |
| 151 | if ($out_imageinfo[1]) { |
| 152 | foreach($this->imagetags as $imagetag) { |
| 153 | $temp = $this->my_preg_match("'<$imagetag.*?>(.*?)</$imagetag>'si", $out_imageinfo[1]); |
| 154 | if ($temp != '') $result['image_'.$imagetag] = $temp; // Set only if not empty |
| 155 | } |
| 156 | } |
| 157 | // Parse ITEMS |
| 158 | preg_match_all("'<item(| .*?)>(.*?)</item>'si", $rss_content, $items); |
| 159 | $rss_items = $items[2]; |
| 160 | $result['items_count'] = count($items[1]); |
| 161 | $i = 0; |
| 162 | $result['items'] = array(); // create array even if there are no items |
| 163 | foreach($rss_items as $rss_item) { |
| 164 | // Parse one item |
| 165 | foreach($this->itemtags as $itemtag) |
| 166 | { |
| 167 | $temp = $this->my_preg_match("'<$itemtag.*?>(.*?)</$itemtag>'si", $rss_item); |
| 168 | if ($temp != '') $result[items][$i][$itemtag] = $temp; // Set only if not empty |
| 169 | } |
| 170 | // Strip HTML tags and other bullshit from DESCRIPTION (if description is presented) |
| 171 | if ($result['items'][$i]['description']) |
| 172 | $result['items'][$i]['description'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['description']))); |
| 173 | // Item counter |
| 174 | $i++; |
| 175 | } |
| 176 | return $result; |
| 177 | } |
| 178 | else // Error in opening return False |
| 179 | { |
| 180 | return False; |
| 181 | } |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | ?> |