Minor display cleanup
[mirrors/Kyberia-bloodline.git] / scripts / google2rss.php
1 <?
2 error_reporting(0);
3 // This program is public domain. Do with this what you want.
4 //
5 // Disclaimer. Don't expect this to be here, to work, or to get fixed.
6 // But if you have a question or comment, email: mailto:julian_bond@voidstar.com
7 //
8 // If you're using Gnews2rss you presumably find it useful.
9 // Please email Google (news-feedback@google.com) asking them to produce RSS
10 // directly out of Google News Search.
11 //
12 // And why not host it yourself to save my bandwidth costs.
13
14 // Note: This version requires curl support in your php installation
15
16 $q=$_GET['q'];
17
18 if ($q) {
19 parse_html($q);
20 } else {
21 show_form();
22 }
23
24 //****************
25 function show_form() {
26
27 $server = getenv("SERVER_NAME");
28 $request = getenv("REQUEST_URI");
29 ?>
30
31 <center><B>Gnews2RSS at VoidStar.com</B></center>
32 <p>An experimental convertor that takes a Google News search and turns it
33 into RSS</p>
34
35 <form action="<? print "http://" . $server . $request; ?>">
36 <br />Number of entries to return: <select name="num">
37 <option name="5">5</option>
38 <option name="5">10</option>
39 <option name="5" selected>15</option>
40 <option name="5">20</option>
41 <option name="5">25</option>
42 <option name="5">50</option>
43 <option name="5">75</option>
44 <option name="5">100</option>
45 </select>
46 <br />Search Query:<input type="text" name="q" size=50>
47 <br /><input type="submit" value="Create RSS">
48 </form>
49 <br /><b>Usage</b>:
50 http://www.voidstar.com/gnews2rss.php?num=number_of_items&q=your_query
51 <br />
52 <br /><b>Example</b>: <a
53 href="http://www.voidstar.com/gnews2rss.php?q=wifi+OR+WLAN+OR+80211">http://www.voidstar.com/gnews2rss.php?q=wifi+OR+WLAN+OR+80211</a>
54 <br />
55 <br /><b>Disclaimer</b> Don't expect this to be here, to work, or to get
56 fixed. But if you have a question or comment, email: <a
57 href="mailto:julian_bond@voidstar.com">julian_bond@voidstar.com</a></li>
58 <br />
59 <br /><b>Notes:</b>
60 <ul>
61 <li>num defaults to 15</li>
62 <li>Source <a
63 href="http://www.voidstar.com/gnews2rss.php.txt">http://www.voidstar.com/gnews2rss.php.txt</a>
64 If you have access to PHP, why not run your own. </li>
65 <li>Source <a
66 href="http://www.voidstar.com/gnews2rss.php.curl.txt">http://www.voidstar.com/gnews2rss.php.curl.txt</a>
67 A version using curl. </li>
68 <li>If you're using Gnews2rss you presumably find it useful. Please <a
69 href="mailto:news-feedback@google.com">email Google</a> asking them to
70 produce RSS directly out of Google News Search. And why not <a
71 href="http://www.voidstar.com/gnews2rss.php.txt">host it yourself</a> to
72 save my bandwidth costs.</li>
73 <li>Note: Some people have had trouble making it work on their servers
74 where no results seem to be found. This seems to be related to fopen(),
75 certain versions of php and some check that Google does.
76 If you get this, read the source and try uncommenting the lines just
77 after the data is collected to see what google returns.
78 Alternatively try the cURL version of the source, for which of course you
79 will need cURL support in PHP.</li>
80 <li>I'm happy to talk on email about all this, but I can't get involved
81 in debugging your installation. Go back and read the disclaimer!</li>
82 </ul>
83 <?
84 }
85
86 //****************
87 function parse_html($q){
88
89 header("Cache-Control: public");
90
91 $itemregexp = "%<a class=y href=\"(.+?)\">(.+?)<br><font size=-1><font color=#6f6f6f>(.+?)</font><br></table>%is";
92 $allowable_tags = "<A><B><BR><BLOCKQUOTE><CENTER><DD><DL><DT><HR><I><IMG><LI><OL><P><PRE><U><UL>";
93
94 $num = ($num) ? $num+1 : 16 ;
95
96 $url =
97 "http://news.google.com/news?hl=en&num=$num&scoring=d&q=".urlencode($q);
98
99 if ($fp = @fopen($url, "r")) {
100 while (!feof($fp)) $data .= fgets($fp, 128);
101 fclose($fp);
102 }
103
104 // *******************
105 // Some people seem to have problems with google not returning anything
106 // uncomment the following lines and comment out the content-type header
107 // to see what google is returning.
108
109 // print "<html>";
110 // print "<pre>";
111 // print htmlentities($data);
112 header("Content-Type: text/xml");
113
114 $data = strstr($data,"Sorted by date</b>");
115
116 eregi("<title>(.*)</title>", $data, $title);
117 $channel_title = $title[1];
118
119 $match_count = preg_match_all($itemregexp, $data, $items);
120 $match_count = ($match_count > 25) ? 25 : $match_count;
121
122 $output .= "<?xml version=\"1.0\" encoding=\"iso-8859-1\" ?>\n";
123 $output .= "<!-- generator=\"gnews2rss/1.0\" -->\n";
124 $output .= "<!DOCTYPE rss >\n";
125
126 $output .= "<rss version=\"2.0\">\n";
127 $output .= " <channel>\n";
128 $output .= " <title>Google News Search: $q</title>\n";
129 $output .= " <link>". htmlentities($url) ."</link>\n";
130 $output .= " <description>Google News Search: $q</description>\n";
131 $output .= " <webMaster>julian_bond@voidstar.com</webMaster>\n";
132 $output .= " <language>en-us</language>\n";
133 $output .= " <generator>&lt;a href=\"http://www.voidstar.com/gnews2rss.php\">GNews2Rss&lt;/a></generator>\n";
134
135 $day = date("d");
136 if ($day == 1 || $day == 11 || $day == 21) {
137 $output .= " <item>\n";
138 $output .= " <title>". date("d-M-y"). " Do you find Gnews2RSS useful?</title>\n";
139 $output .= " <link>http://www.voidstar.com/gnews2rss.php</link>\n";
140 $output .= " <description>If you're using Gnews2rss you presumably find it useful. Please &lt;a href=\"mailto:news-feedback@google.com\">email Google&lt;/a> asking them to produce RSS directly out of Google News Search. And why not &lt;a href=\"http://www.voidstar.com/gnews2rss.php.txt\">host it yourself&lt;/a> to save my bandwidth costs.</description>\n";
141 $output .= " </item>\n";
142 }
143
144 for ($i=0; $i< $match_count; $i++) {
145
146 $item_url = $items[1][$i];
147 $title = $items[2][$i];
148 $title = strip_tags($title);
149 $desc = $items[3][$i];
150
151 $desc = eregi_replace("&nbsp;-&nbsp;.* ago</font><br>", "<br>", $desc);
152 $desc = strip_tags($desc, $allowable_tags);
153 $desc = htmlspecialchars($desc);
154
155 $output .= " <item>\n";
156 $output .= " <title>". htmlspecialchars($title) ."</title>\n";
157 $output .= " <link>". htmlspecialchars($item_url) ."</link>\n";
158 $output .= " <description>". $desc ."</description>\n";
159 $output .= " </item>\n";
160 }
161
162 $output .= " </channel>\n";
163 $output .= "</rss>\n";
164
165 print $output;
166
167 //****************
168 // More debug stuff
169 // print "<pre>";
170 // print htmlentities($output);
171 // print "</pre>";
172
173 }
174
175
176 ?>
This page took 0.464759 seconds and 4 git commands to generate.