2
0
mirror of https://github.com/mozilla/cipherscan.git synced 2024-11-05 15:33:42 +01:00
cipherscan/top1m/testtop1m.sh

99 lines
2.6 KiB
Bash
Raw Normal View History

2014-01-09 17:52:17 +01:00
#!/usr/bin/env bash
parallel=10
max_bg=50
absolute_max_bg=100
max_load_avg=50
if [ $(ulimit -u) -lt $((10*absolute_max_bg)) ]; then
echo "max user processes too low, use ulimit -u to increase"
exit 1
fi
2014-01-09 17:52:17 +01:00
[ ! -e "results" ] && mkdir results
function wait_for_jobs() {
local no_jobs
no_jobs=$(jobs | wc -l)
while [ $no_jobs -gt $1 ] || awk -v maxload=$max_load_avg '{ if ($1 < maxload) exit 1 }' /proc/loadavg; do
if awk -v maxload=$max_load_avg '{ if ($1 > maxload) exit 1 }' /proc/loadavg && [ $no_jobs -lt $absolute_max_bg ]; then
return
fi
sleep 1
no_jobs=$(jobs | wc -l)
done
}
function scan_host() {
2014-05-16 18:16:45 +02:00
# do not scan the same host multiple times
if [ -e results/$1@$2 ]; then
return
fi
tcping -u 10000000 $2 443;
if [ $? -gt 0 ]; then
return
fi
../cipherscan -json -servername $1 $2:443 > results/$1@$2
}
function scan_host_no_sni() {
2014-05-16 18:16:45 +02:00
# do not scan the same host multiple times
if [ -e results/$1 ]; then
return
fi
tcping -u 10000000 $1 443;
if [ $? -gt 0 ]; then
return
fi
../cipherscan -json $1:443 > results/$1
}
function scan_hostname() {
# check if the hostname isn't an IP address (since we can't put IP
# addresses to SNI extension)
if [[ ! -z $(awk -F. '$1>=0 && $1<=255 && $2>=0 && $2<=255 &&
$3>=0 && $3<=255 && $4>=0 && $4<=255 && NF==4' <<<"$1") ]]; then
scan_host_no_sni $1
return
fi
local host_ips=$(host $1 | awk '/has address/ {print $4}')
local www_ips=$(host www.$1 | awk '/has address/ {print $4}')
if [ ! -z "$host_ips" ] && [ ! -z "$www_ips" ]; then
# list of IPs that are in www but not in host
local diff=$(grep -Fv "$host_ips" <<< "$www_ips")
2014-05-16 16:11:01 +02:00
head -n 1 <<< "$host_ips" | while read ip; do
scan_host $1 $ip
2014-05-16 16:11:01 +02:00
done
if [ ! -z "$diff" ]; then
2014-05-16 16:11:01 +02:00
head -n 1 <<<"$diff" | while read ip; do
scan_host www.$1 $ip
2014-05-16 16:11:01 +02:00
done
fi
else
if [ ! -z "$host_ips" ]; then
2014-05-16 16:11:01 +02:00
head -n 1 <<<"$host_ips" | while read ip; do
scan_host $1 $ip
2014-05-16 16:11:01 +02:00
done
fi
if [ ! -z "$www_ips" ]; then
2014-05-16 16:11:01 +02:00
head -n 1 <<<"$www_ips" | while read ip; do
scan_host www.$1 $ip
2014-05-16 16:11:01 +02:00
done
fi
fi
}
i=0
count=$(wc -l top-1m.csv | awk '{print $1}')
while [ $i -lt $count ]
2014-01-09 17:52:17 +01:00
do
2014-01-09 21:16:40 +01:00
echo processings sites $i to $((i + parallel))
for t in $(tail -$(($count - $i)) top-1m.csv | head -$parallel |cut -d ',' -f 2|cut -d "/" -f 1)
2014-01-09 17:52:17 +01:00
do
(scan_hostname $t)&
2014-01-09 17:52:17 +01:00
done
2014-01-09 21:16:40 +01:00
i=$(( i + parallel))
wait_for_jobs $max_bg
2014-01-09 17:52:17 +01:00
done
wait