From a213fc45d0b4dfca853d725e0c38d0a335836727 Mon Sep 17 00:00:00 2001 From: Hubert Kario Date: Thu, 8 May 2014 02:06:57 +0200 Subject: [PATCH] remove the folder/file part from url some hostnames in the top-1m.csv file have folder or site specified in them, cut it off before using --- top1m/testtop1m.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/top1m/testtop1m.sh b/top1m/testtop1m.sh index 46e7064..23a9775 100755 --- a/top1m/testtop1m.sh +++ b/top1m/testtop1m.sh @@ -54,7 +54,7 @@ count=$(wc -l top-1m.csv | awk '{print $1}') while [ $i -lt $count ] do echo processings sites $i to $((i + parallel)) - for t in $(tail -$(($count - $i)) top-1m.csv | head -$parallel |cut -d ',' -f 2) + for t in $(tail -$(($count - $i)) top-1m.csv | head -$parallel |cut -d ',' -f 2|cut -d "/" -f 1) do (scan_hostname $t)& done