Merged some nasty programs from softz.harvie.cz
[mirrors/Programs.git] / php / crawler / old / crawlboost.phps
1 #!/usr/bin/php
2 <?php
3 //CrawlB00st 0.1 (filter for crawlers)
4 //<-Harvie 2oo7
5 /*
6 *This thing will boost crawling of web.
7 *Use it well...
8 */
9
10 $stdin = fopen('php://stdin', 'r');
11 while(!feof($stdin)) {
12 $url = trim(fgets($stdin)); //echo($url); //Debug
13 echo("$url\n");
14 $in = @file($url); if(!$in || !is_array($in)) return(1);
15 foreach($in as $line) {
16 $line = spliti('href="http://', $line);
17 if(sizeof($line) > 1) {
18 array_shift($line); //print_r($line); //Debug
19 foreach($line as $nurl) {
20 $nurl = spliti('(\?|#|\*|")', $nurl);
21 $nurl = 'http://'.trim(htmlspecialchars_decode($nurl[0])); //echo($nurl."\n"); //Debug
22 $test = @fopen($nurl, 'r');
23 if($test) echo("$nurl\n");
24 }
25 }
26 }
27 }
28
This page took 0.269778 seconds and 4 git commands to generate.