2
0
mirror of https://github.com/mozilla/cipherscan.git synced 2024-11-22 14:23:41 +01:00

remove the folder/file part from url

some hostnames in the top-1m.csv file have folder or site specified
in them, cut it off before using
This commit is contained in:
Hubert Kario 2014-05-08 02:06:57 +02:00
parent 00b20a20ed
commit a213fc45d0

View File

@ -54,7 +54,7 @@ count=$(wc -l top-1m.csv | awk '{print $1}')
while [ $i -lt $count ] while [ $i -lt $count ]
do do
echo processings sites $i to $((i + parallel)) echo processings sites $i to $((i + parallel))
for t in $(tail -$(($count - $i)) top-1m.csv | head -$parallel |cut -d ',' -f 2) for t in $(tail -$(($count - $i)) top-1m.csv | head -$parallel |cut -d ',' -f 2|cut -d "/" -f 1)
do do
(scan_hostname $t)& (scan_hostname $t)&
done done