docs
[mirrors/Programs.git] / bash / bashcrawl / crawl.bash
CommitLineData
21c4e167
H
1#!/bin/bash
2
3CURL_PATH="curl";
4CURL_TIMEOUT=1;
5#CURL_ARGS=--get --location --connect-timeout "$CURL_TIMEOUT" --max-time "$CURL_TIMEOUT" --compressed --stderr curl.debug;
6
7# $cmd = "$curl --url \"$url\" $args";
8
9
10grab_urls() {
11 if [[ $( "$CURL_PATH" "--url" "$1" --get --location --connect-timeout "$CURL_TIMEOUT" --max-time "$CURL_TIMEOUT" --compressed --stderr curl.debug) =~ \(http:\/\/[_a-zA-Z0-9\.\-]+\.[a-zA-Z]{2,4}\/{1}[-_~\&=\ ?\.a-z0-9\/]*\) ]]; then
12 for i in $( seq 0 $[ ${#BASH_REMATCH}-1 ] ); do
13 # echo $i;
14 if [ "${BASH_REMATCH[i]}" != "" ]; then
15 echo "${BASH_REMATCH[i]}";
16 fi;
17 done | sort -u;
18 fi;
19}
20
21grab_urls http://www.abclinuxu.cz/
This page took 0.191352 seconds and 4 git commands to generate.