- #$urlqueue->enqueue($url);
- print "#$i:\tGET $url\n";
- $_ = LWP::Simple::get($url);
- my @urls;
- if($_) {
- @urls = /(http:\/\/[_a-zA-Z0-9\.\-]+\.[a-zA-Z]{2,4}\/{1}[-_~&=\ ?\.a-z0-9\/]*)/g; #urls
- #@urls = /(http:\/\/[^\/'" ]*)/g; #domains
- if($urlqueue->pending() < 1000) {
- #print("#$i:\tENQ: @urls\n");
- $urlqueue->enqueue(uniq(@urls));
- #while(uniq(@urls)) { $urlqueue->enqueue($_); }
- }
+ #print "#$i:\tGET $url\n";
+ my @urls = crawl_url($url, \%crawled);
+ $db->sync();
+ if($urlqueue->pending() < 1000) {
+ #print("#$i:\tENQ: @urls\n");
+ $urlqueue->enqueue(@urls);