Commit | Line | Data |
---|---|---|
84aff5c8 H |
1 | #!/usr/bin/php |
2 | <?php | |
3 | //CrawlB00st 0.1 (filter for crawlers) | |
4 | //<-Harvie 2oo7 | |
5 | /* | |
6 | *This thing will boost crawling of web. | |
7 | *Use it well... | |
8 | */ | |
9 | ||
10 | $stdin = fopen('php://stdin', 'r'); | |
11 | while(!feof($stdin)) { | |
12 | $url = trim(fgets($stdin)); //echo($url); //Debug | |
13 | echo("$url\n"); | |
14 | $in = @file($url); if(!$in || !is_array($in)) return(1); | |
15 | foreach($in as $line) { | |
16 | $line = spliti('href="http://', $line); | |
17 | if(sizeof($line) > 1) { | |
18 | array_shift($line); //print_r($line); //Debug | |
19 | foreach($line as $nurl) { | |
20 | $nurl = spliti('(\?|#|\*|")', $nurl); | |
21 | $nurl = 'http://'.trim(htmlspecialchars_decode($nurl[0])); //echo($nurl."\n"); //Debug | |
22 | $test = @fopen($nurl, 'r'); | |
23 | if($test) echo("$nurl\n"); | |
24 | } | |
25 | } | |
26 | } | |
27 | } | |
28 |