7 use LWP
::Simple
qw($ua get); $ua->timeout(3);
10 print STDERR "This is libwww-perl-$LWP::VERSION\n";
14 tie my @queue, "BerkeleyDB::Recno", -Filename => 'queue.db', -Flags => DB_CREATE || die "Cannot open DB1!\n";
15 tie my %crawled, "BerkeleyDB::Hash", -Filename => 'urls.db', -Flags => DB_CREATE || die "Cannot open DB2!\n";
17 push(@queue, 'http://root.cz/');
18 push(@queue, 'http://blog.harvie.cz/');
27 @$i = grep(!$h{$_}++, @$i);
32 for(my $i=0; $i<$threads; $i++) {
33 my $thr = threads->create(sub {
34 print("[*] Worker #$i running!\n");
36 my $env = new BerkeleyDB::Env
38 -Flags => DB_CREATE| DB_INIT_CDB | DB_INIT_MPOOL
39 or die "cannot open environment: $BerkeleyDB::Error\n";
42 tie my @queue, "BerkeleyDB::Recno", -Filename => 'queue.db', -Flags => DB_CREATE, -Env => $env || die "Cannot open DB1!\n";
43 tie my %crawled, "BerkeleyDB::Hash", -Filename => 'urls.db', -Flags => DB_CREATE, -Env => $env || die "Cannot open DB2!\n";
46 print "omg ".shift(@queue)."\n";
47 if(length($_) < 3) { sleep(1); next; }
49 print "#$i:\tGET $_\n";
50 $_ = LWP::Simple::get($_);
54 @urls = /(http:\/\/[_a-zA-Z0-9\.\-]+\.[a-zA-Z]{2,4}\/{1}[-_~&=\ ?\.a-z0-9\/]*)/g; #urls
55 #@urls = /(http:\/\/[^\/'" ]*)/g; #domains
56 if($i && @queue < 1000) {
57 #print("#$i:\tENQ: @urls\n");
58 push(@queue, uniq(@urls));
59 #while(uniq(@urls)) { push(@queue, $_); }
64 print("[*] Worker #$i stopped!\n");
69 print '###: '.@queue."\n";
71 print '###: '.@queue."\n";
This page took 0.309781 seconds and 4 git commands to generate.