| 1 | #!/usr/bin/php |
| 2 | <?php |
| 3 | //CrawlB00st 0.1 (filter for crawlers) |
| 4 | //<-Harvie 2oo7 |
| 5 | /* |
| 6 | *This thing will boost crawling of web. |
| 7 | *Use it well... |
| 8 | */ |
| 9 | |
| 10 | $stdin = fopen('php://stdin', 'r'); |
| 11 | while(!feof($stdin)) { |
| 12 | $url = trim(fgets($stdin)); //echo($url); //Debug |
| 13 | echo("$url\n"); |
| 14 | $in = @file($url); if(!$in || !is_array($in)) return(1); |
| 15 | foreach($in as $line) { |
| 16 | $line = spliti('href="http://', $line); |
| 17 | if(sizeof($line) > 1) { |
| 18 | array_shift($line); //print_r($line); //Debug |
| 19 | foreach($line as $nurl) { |
| 20 | $nurl = spliti('(\?|#|\*|")', $nurl); |
| 21 | $nurl = 'http://'.trim(htmlspecialchars_decode($nurl[0])); //echo($nurl."\n"); //Debug |
| 22 | $test = @fopen($nurl, 'r'); |
| 23 | if($test) echo("$nurl\n"); |
| 24 | } |
| 25 | } |
| 26 | } |
| 27 | } |
| 28 | |