Commit | Line | Data |
---|---|---|
b42b2bf9 H |
1 | <? |
2 | error_reporting(0); | |
3 | // This program is public domain. Do with this what you want. | |
4 | // | |
5 | // Disclaimer. Don't expect this to be here, to work, or to get fixed. | |
6 | // But if you have a question or comment, email: mailto:julian_bond@voidstar.com | |
7 | // | |
8 | // If you're using Gnews2rss you presumably find it useful. | |
9 | // Please email Google (news-feedback@google.com) asking them to produce RSS | |
10 | // directly out of Google News Search. | |
11 | // | |
12 | // And why not host it yourself to save my bandwidth costs. | |
13 | ||
14 | // Note: This version requires curl support in your php installation | |
15 | ||
16 | $q=$_GET['q']; | |
17 | ||
18 | if ($q) { | |
19 | parse_html($q); | |
20 | } else { | |
21 | show_form(); | |
22 | } | |
23 | ||
24 | //**************** | |
25 | function show_form() { | |
26 | ||
27 | $server = getenv("SERVER_NAME"); | |
28 | $request = getenv("REQUEST_URI"); | |
29 | ?> | |
30 | ||
31 | <center><B>Gnews2RSS at VoidStar.com</B></center> | |
32 | <p>An experimental convertor that takes a Google News search and turns it | |
33 | into RSS</p> | |
34 | ||
35 | <form action="<? print "http://" . $server . $request; ?>"> | |
36 | <br />Number of entries to return: <select name="num"> | |
37 | <option name="5">5</option> | |
38 | <option name="5">10</option> | |
39 | <option name="5" selected>15</option> | |
40 | <option name="5">20</option> | |
41 | <option name="5">25</option> | |
42 | <option name="5">50</option> | |
43 | <option name="5">75</option> | |
44 | <option name="5">100</option> | |
45 | </select> | |
46 | <br />Search Query:<input type="text" name="q" size=50> | |
47 | <br /><input type="submit" value="Create RSS"> | |
48 | </form> | |
49 | <br /><b>Usage</b>: | |
50 | http://www.voidstar.com/gnews2rss.php?num=number_of_items&q=your_query | |
51 | <br /> | |
52 | <br /><b>Example</b>: <a | |
53 | href="http://www.voidstar.com/gnews2rss.php?q=wifi+OR+WLAN+OR+80211">http://www.voidstar.com/gnews2rss.php?q=wifi+OR+WLAN+OR+80211</a> | |
54 | <br /> | |
55 | <br /><b>Disclaimer</b> Don't expect this to be here, to work, or to get | |
56 | fixed. But if you have a question or comment, email: <a | |
57 | href="mailto:julian_bond@voidstar.com">julian_bond@voidstar.com</a></li> | |
58 | <br /> | |
59 | <br /><b>Notes:</b> | |
60 | <ul> | |
61 | <li>num defaults to 15</li> | |
62 | <li>Source <a | |
63 | href="http://www.voidstar.com/gnews2rss.php.txt">http://www.voidstar.com/gnews2rss.php.txt</a> | |
64 | If you have access to PHP, why not run your own. </li> | |
65 | <li>Source <a | |
66 | href="http://www.voidstar.com/gnews2rss.php.curl.txt">http://www.voidstar.com/gnews2rss.php.curl.txt</a> | |
67 | A version using curl. </li> | |
68 | <li>If you're using Gnews2rss you presumably find it useful. Please <a | |
69 | href="mailto:news-feedback@google.com">email Google</a> asking them to | |
70 | produce RSS directly out of Google News Search. And why not <a | |
71 | href="http://www.voidstar.com/gnews2rss.php.txt">host it yourself</a> to | |
72 | save my bandwidth costs.</li> | |
73 | <li>Note: Some people have had trouble making it work on their servers | |
74 | where no results seem to be found. This seems to be related to fopen(), | |
75 | certain versions of php and some check that Google does. | |
76 | If you get this, read the source and try uncommenting the lines just | |
77 | after the data is collected to see what google returns. | |
78 | Alternatively try the cURL version of the source, for which of course you | |
79 | will need cURL support in PHP.</li> | |
80 | <li>I'm happy to talk on email about all this, but I can't get involved | |
81 | in debugging your installation. Go back and read the disclaimer!</li> | |
82 | </ul> | |
83 | <? | |
84 | } | |
85 | ||
86 | //**************** | |
87 | function parse_html($q){ | |
88 | ||
89 | header("Cache-Control: public"); | |
90 | ||
91 | $itemregexp = "%<a class=y href=\"(.+?)\">(.+?)<br><font size=-1><font color=#6f6f6f>(.+?)</font><br></table>%is"; | |
92 | $allowable_tags = "<A><B><BR><BLOCKQUOTE><CENTER><DD><DL><DT><HR><I><IMG><LI><OL><P><PRE><U><UL>"; | |
93 | ||
94 | $num = ($num) ? $num+1 : 16 ; | |
95 | ||
96 | $url = | |
97 | "http://news.google.com/news?hl=en&num=$num&scoring=d&q=".urlencode($q); | |
98 | ||
99 | if ($fp = @fopen($url, "r")) { | |
100 | while (!feof($fp)) $data .= fgets($fp, 128); | |
101 | fclose($fp); | |
102 | } | |
103 | ||
104 | // ******************* | |
105 | // Some people seem to have problems with google not returning anything | |
106 | // uncomment the following lines and comment out the content-type header | |
107 | // to see what google is returning. | |
108 | ||
109 | // print "<html>"; | |
110 | // print "<pre>"; | |
111 | // print htmlentities($data); | |
112 | header("Content-Type: text/xml"); | |
113 | ||
114 | $data = strstr($data,"Sorted by date</b>"); | |
115 | ||
116 | eregi("<title>(.*)</title>", $data, $title); | |
117 | $channel_title = $title[1]; | |
118 | ||
119 | $match_count = preg_match_all($itemregexp, $data, $items); | |
120 | $match_count = ($match_count > 25) ? 25 : $match_count; | |
121 | ||
122 | $output .= "<?xml version=\"1.0\" encoding=\"iso-8859-1\" ?>\n"; | |
123 | $output .= "<!-- generator=\"gnews2rss/1.0\" -->\n"; | |
124 | $output .= "<!DOCTYPE rss >\n"; | |
125 | ||
126 | $output .= "<rss version=\"2.0\">\n"; | |
127 | $output .= " <channel>\n"; | |
128 | $output .= " <title>Google News Search: $q</title>\n"; | |
129 | $output .= " <link>". htmlentities($url) ."</link>\n"; | |
130 | $output .= " <description>Google News Search: $q</description>\n"; | |
131 | $output .= " <webMaster>julian_bond@voidstar.com</webMaster>\n"; | |
132 | $output .= " <language>en-us</language>\n"; | |
133 | $output .= " <generator><a href=\"http://www.voidstar.com/gnews2rss.php\">GNews2Rss</a></generator>\n"; | |
134 | ||
135 | $day = date("d"); | |
136 | if ($day == 1 || $day == 11 || $day == 21) { | |
137 | $output .= " <item>\n"; | |
138 | $output .= " <title>". date("d-M-y"). " Do you find Gnews2RSS useful?</title>\n"; | |
139 | $output .= " <link>http://www.voidstar.com/gnews2rss.php</link>\n"; | |
140 | $output .= " <description>If you're using Gnews2rss you presumably find it useful. Please <a href=\"mailto:news-feedback@google.com\">email Google</a> asking them to produce RSS directly out of Google News Search. And why not <a href=\"http://www.voidstar.com/gnews2rss.php.txt\">host it yourself</a> to save my bandwidth costs.</description>\n"; | |
141 | $output .= " </item>\n"; | |
142 | } | |
143 | ||
144 | for ($i=0; $i< $match_count; $i++) { | |
145 | ||
146 | $item_url = $items[1][$i]; | |
147 | $title = $items[2][$i]; | |
148 | $title = strip_tags($title); | |
149 | $desc = $items[3][$i]; | |
150 | ||
151 | $desc = eregi_replace(" - .* ago</font><br>", "<br>", $desc); | |
152 | $desc = strip_tags($desc, $allowable_tags); | |
153 | $desc = htmlspecialchars($desc); | |
154 | ||
155 | $output .= " <item>\n"; | |
156 | $output .= " <title>". htmlspecialchars($title) ."</title>\n"; | |
157 | $output .= " <link>". htmlspecialchars($item_url) ."</link>\n"; | |
158 | $output .= " <description>". $desc ."</description>\n"; | |
159 | $output .= " </item>\n"; | |
160 | } | |
161 | ||
162 | $output .= " </channel>\n"; | |
163 | $output .= "</rss>\n"; | |
164 | ||
165 | print $output; | |
166 | ||
167 | //**************** | |
168 | // More debug stuff | |
169 | // print "<pre>"; | |
170 | // print htmlentities($output); | |
171 | // print "</pre>"; | |
172 | ||
173 | } | |
174 | ||
175 | ||
176 | ?> |