| 1 | <? |
| 2 | error_reporting(0); |
| 3 | // This program is public domain. Do with this what you want. |
| 4 | // |
| 5 | // Disclaimer. Don't expect this to be here, to work, or to get fixed. |
| 6 | // But if you have a question or comment, email: mailto:julian_bond@voidstar.com |
| 7 | // |
| 8 | // If you're using Gnews2rss you presumably find it useful. |
| 9 | // Please email Google (news-feedback@google.com) asking them to produce RSS |
| 10 | // directly out of Google News Search. |
| 11 | // |
| 12 | // And why not host it yourself to save my bandwidth costs. |
| 13 | |
| 14 | // Note: This version requires curl support in your php installation |
| 15 | |
| 16 | $q=$_GET['q']; |
| 17 | |
| 18 | if ($q) { |
| 19 | parse_html($q); |
| 20 | } else { |
| 21 | show_form(); |
| 22 | } |
| 23 | |
| 24 | //**************** |
| 25 | function show_form() { |
| 26 | |
| 27 | $server = getenv("SERVER_NAME"); |
| 28 | $request = getenv("REQUEST_URI"); |
| 29 | ?> |
| 30 | |
| 31 | <center><B>Gnews2RSS at VoidStar.com</B></center> |
| 32 | <p>An experimental convertor that takes a Google News search and turns it |
| 33 | into RSS</p> |
| 34 | |
| 35 | <form action="<? print "http://" . $server . $request; ?>"> |
| 36 | <br />Number of entries to return: <select name="num"> |
| 37 | <option name="5">5</option> |
| 38 | <option name="5">10</option> |
| 39 | <option name="5" selected>15</option> |
| 40 | <option name="5">20</option> |
| 41 | <option name="5">25</option> |
| 42 | <option name="5">50</option> |
| 43 | <option name="5">75</option> |
| 44 | <option name="5">100</option> |
| 45 | </select> |
| 46 | <br />Search Query:<input type="text" name="q" size=50> |
| 47 | <br /><input type="submit" value="Create RSS"> |
| 48 | </form> |
| 49 | <br /><b>Usage</b>: |
| 50 | http://www.voidstar.com/gnews2rss.php?num=number_of_items&q=your_query |
| 51 | <br /> |
| 52 | <br /><b>Example</b>: <a |
| 53 | href="http://www.voidstar.com/gnews2rss.php?q=wifi+OR+WLAN+OR+80211">http://www.voidstar.com/gnews2rss.php?q=wifi+OR+WLAN+OR+80211</a> |
| 54 | <br /> |
| 55 | <br /><b>Disclaimer</b> Don't expect this to be here, to work, or to get |
| 56 | fixed. But if you have a question or comment, email: <a |
| 57 | href="mailto:julian_bond@voidstar.com">julian_bond@voidstar.com</a></li> |
| 58 | <br /> |
| 59 | <br /><b>Notes:</b> |
| 60 | <ul> |
| 61 | <li>num defaults to 15</li> |
| 62 | <li>Source <a |
| 63 | href="http://www.voidstar.com/gnews2rss.php.txt">http://www.voidstar.com/gnews2rss.php.txt</a> |
| 64 | If you have access to PHP, why not run your own. </li> |
| 65 | <li>Source <a |
| 66 | href="http://www.voidstar.com/gnews2rss.php.curl.txt">http://www.voidstar.com/gnews2rss.php.curl.txt</a> |
| 67 | A version using curl. </li> |
| 68 | <li>If you're using Gnews2rss you presumably find it useful. Please <a |
| 69 | href="mailto:news-feedback@google.com">email Google</a> asking them to |
| 70 | produce RSS directly out of Google News Search. And why not <a |
| 71 | href="http://www.voidstar.com/gnews2rss.php.txt">host it yourself</a> to |
| 72 | save my bandwidth costs.</li> |
| 73 | <li>Note: Some people have had trouble making it work on their servers |
| 74 | where no results seem to be found. This seems to be related to fopen(), |
| 75 | certain versions of php and some check that Google does. |
| 76 | If you get this, read the source and try uncommenting the lines just |
| 77 | after the data is collected to see what google returns. |
| 78 | Alternatively try the cURL version of the source, for which of course you |
| 79 | will need cURL support in PHP.</li> |
| 80 | <li>I'm happy to talk on email about all this, but I can't get involved |
| 81 | in debugging your installation. Go back and read the disclaimer!</li> |
| 82 | </ul> |
| 83 | <? |
| 84 | } |
| 85 | |
| 86 | //**************** |
| 87 | function parse_html($q){ |
| 88 | |
| 89 | header("Cache-Control: public"); |
| 90 | |
| 91 | $itemregexp = "%<a class=y href=\"(.+?)\">(.+?)<br><font size=-1><font color=#6f6f6f>(.+?)</font><br></table>%is"; |
| 92 | $allowable_tags = "<A><B><BR><BLOCKQUOTE><CENTER><DD><DL><DT><HR><I><IMG><LI><OL><P><PRE><U><UL>"; |
| 93 | |
| 94 | $num = ($num) ? $num+1 : 16 ; |
| 95 | |
| 96 | $url = |
| 97 | "http://news.google.com/news?hl=en&num=$num&scoring=d&q=".urlencode($q); |
| 98 | |
| 99 | if ($fp = @fopen($url, "r")) { |
| 100 | while (!feof($fp)) $data .= fgets($fp, 128); |
| 101 | fclose($fp); |
| 102 | } |
| 103 | |
| 104 | // ******************* |
| 105 | // Some people seem to have problems with google not returning anything |
| 106 | // uncomment the following lines and comment out the content-type header |
| 107 | // to see what google is returning. |
| 108 | |
| 109 | // print "<html>"; |
| 110 | // print "<pre>"; |
| 111 | // print htmlentities($data); |
| 112 | header("Content-Type: text/xml"); |
| 113 | |
| 114 | $data = strstr($data,"Sorted by date</b>"); |
| 115 | |
| 116 | eregi("<title>(.*)</title>", $data, $title); |
| 117 | $channel_title = $title[1]; |
| 118 | |
| 119 | $match_count = preg_match_all($itemregexp, $data, $items); |
| 120 | $match_count = ($match_count > 25) ? 25 : $match_count; |
| 121 | |
| 122 | $output .= "<?xml version=\"1.0\" encoding=\"iso-8859-1\" ?>\n"; |
| 123 | $output .= "<!-- generator=\"gnews2rss/1.0\" -->\n"; |
| 124 | $output .= "<!DOCTYPE rss >\n"; |
| 125 | |
| 126 | $output .= "<rss version=\"2.0\">\n"; |
| 127 | $output .= " <channel>\n"; |
| 128 | $output .= " <title>Google News Search: $q</title>\n"; |
| 129 | $output .= " <link>". htmlentities($url) ."</link>\n"; |
| 130 | $output .= " <description>Google News Search: $q</description>\n"; |
| 131 | $output .= " <webMaster>julian_bond@voidstar.com</webMaster>\n"; |
| 132 | $output .= " <language>en-us</language>\n"; |
| 133 | $output .= " <generator><a href=\"http://www.voidstar.com/gnews2rss.php\">GNews2Rss</a></generator>\n"; |
| 134 | |
| 135 | $day = date("d"); |
| 136 | if ($day == 1 || $day == 11 || $day == 21) { |
| 137 | $output .= " <item>\n"; |
| 138 | $output .= " <title>". date("d-M-y"). " Do you find Gnews2RSS useful?</title>\n"; |
| 139 | $output .= " <link>http://www.voidstar.com/gnews2rss.php</link>\n"; |
| 140 | $output .= " <description>If you're using Gnews2rss you presumably find it useful. Please <a href=\"mailto:news-feedback@google.com\">email Google</a> asking them to produce RSS directly out of Google News Search. And why not <a href=\"http://www.voidstar.com/gnews2rss.php.txt\">host it yourself</a> to save my bandwidth costs.</description>\n"; |
| 141 | $output .= " </item>\n"; |
| 142 | } |
| 143 | |
| 144 | for ($i=0; $i< $match_count; $i++) { |
| 145 | |
| 146 | $item_url = $items[1][$i]; |
| 147 | $title = $items[2][$i]; |
| 148 | $title = strip_tags($title); |
| 149 | $desc = $items[3][$i]; |
| 150 | |
| 151 | $desc = eregi_replace(" - .* ago</font><br>", "<br>", $desc); |
| 152 | $desc = strip_tags($desc, $allowable_tags); |
| 153 | $desc = htmlspecialchars($desc); |
| 154 | |
| 155 | $output .= " <item>\n"; |
| 156 | $output .= " <title>". htmlspecialchars($title) ."</title>\n"; |
| 157 | $output .= " <link>". htmlspecialchars($item_url) ."</link>\n"; |
| 158 | $output .= " <description>". $desc ."</description>\n"; |
| 159 | $output .= " </item>\n"; |
| 160 | } |
| 161 | |
| 162 | $output .= " </channel>\n"; |
| 163 | $output .= "</rss>\n"; |
| 164 | |
| 165 | print $output; |
| 166 | |
| 167 | //**************** |
| 168 | // More debug stuff |
| 169 | // print "<pre>"; |
| 170 | // print htmlentities($output); |
| 171 | // print "</pre>"; |
| 172 | |
| 173 | } |
| 174 | |
| 175 | |
| 176 | ?> |