diff --git a/top1m/Makefile b/top1m/Makefile new file mode 100644 index 0000000..469d11c --- /dev/null +++ b/top1m/Makefile @@ -0,0 +1,2 @@ +build: parse_CAs.c + gcc -o parse_CAs parse_CAs.c -lssl -lcrypto -ljson-c --std=gnu99 diff --git a/top1m/parse_CAs.c b/top1m/parse_CAs.c new file mode 100644 index 0000000..987d609 --- /dev/null +++ b/top1m/parse_CAs.c @@ -0,0 +1,553 @@ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * Author: Hubert Kario - 2014 + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX(a,b) \ + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a > _b ? _a : _b; }) + +#ifndef X509_V_FLAG_TRUSTED_FIRST +/* + * OpenSSL implements the same chain building logic as does NSS but it doesn't + * use it by default, it's also not available in stock 1.0.1 but is backported + * for example on Fedora + */ +#warning "X509_V_FLAG_TRUSTED_FIRST not available, chain creation will be unreliable" +#define X509_V_FLAG_TRUSTED_FIRST 0 +#endif + +#define MAX_BUFFER_SIZE 8192 + +static char* CA_TRUSTED = "./ca_trusted"; +static char* CA_ALL = "./ca_files"; +static char* CERTS_DIR = "./certs"; + +/* SSL context that knows only about trust anchors */ +SSL_CTX *trusted_only; +/* SSL context that also has access to other CA certs */ +SSL_CTX *all_CAs; + +// load certificate from file to a OpenSSL object +X509 *load_cert(char *filename) +{ + BIO* f; + X509 *ret; + + f = BIO_new(BIO_s_file()); + BIO_read_filename(f, filename); + + ret = PEM_read_bio_X509_AUX(f, NULL, 0, NULL); + if (ret == NULL) + fprintf(stderr, "Unable to load file %s as X509 certificate\n", filename); + + BIO_free_all(f); + + return ret; +} + +// convert sha256 to a file name, if the file exists +// search in "all CAs" dir and "leaf certs" directories +char *hash_to_filename(const char *hash) +{ + char *tmp_f_name; + size_t n; + int ret; + + n = strlen(hash) + MAX(MAX(strlen(CA_TRUSTED), strlen(CA_ALL)), + strlen(CERTS_DIR)) + 1 + // slash in name + strlen(".pem") + 1; + + tmp_f_name = malloc(n); + if (!tmp_f_name) { + fprintf(stderr, "Out of memory (line %i)\n", __LINE__); + abort(); + } + + /* first check if the file is in directory with regular certs */ + ret = snprintf(tmp_f_name, n, "%s/%s.pem", CERTS_DIR, hash); + if (ret >= n) { + fprintf(stderr, "Out of buffer space (line %i)\n", __LINE__); + abort(); + } + if (access(tmp_f_name, F_OK) != -1) { + return tmp_f_name; + } + + ret = snprintf(tmp_f_name, n, "%s/%s.pem", CA_ALL, hash); + if (ret >= n) { + fprintf(stderr, "Out of buffer space (line %i)\n", __LINE__); + abort(); + } + if (access(tmp_f_name, F_OK) != -1) { + return tmp_f_name; + } + + // file not found + free(tmp_f_name); + return NULL; +} + +// take certificate hashes, check their validity and output json that +// will indicate which certificate were used for verification, whatever +// the chain was trusted and if all certificates needed for verification +// (with the exception of root CA) were present in hashes +int process_chain(const char **cert_hashes) +{ + int ret; + int rc; // return code from function + char *f_name; + + X509 *cert; + X509 *x509; + + X509_STORE *store; + + X509_STORE_CTX *csc; + + STACK_OF(X509) *ustack; + STACK_OF(X509) *vstack; + + // load certificates to temp structures + + // first the end entity cert + // (EE cert needs to be passed separately to OpenSSL verification context) + f_name = hash_to_filename(cert_hashes[0]); + if (f_name == NULL) + return 1; + + cert = load_cert(f_name); + free(f_name); + if (cert == NULL) { + printf("can't load certificate!\n"); + return 1; + } + + // then the intermediate certificates + ustack = sk_X509_new_null(); + + for (int i=1; cert_hashes[i]!=NULL; i++) { + //printf(".\n"); + f_name = hash_to_filename(cert_hashes[i]); + if (f_name == NULL) { + // file not found + continue; + } + x509 = load_cert(f_name); + if (x509 == NULL) { + // loading cert failed + continue; + } + sk_X509_push(ustack, x509); + free(f_name); + } + + // first try with just trusted certificates + + store = SSL_CTX_get_cert_store(trusted_only); + if (store == NULL) { + fprintf(stderr, "store init failed\n"); + return 1; + } + X509_STORE_set_flags(store, X509_V_FLAG_TRUSTED_FIRST); + + csc = X509_STORE_CTX_new(); + + ret = X509_STORE_CTX_init(csc, store, cert, ustack); + if (ret != 1) { + return 1; + } + + ret = X509_verify_cert(csc); + + if (ret != 1) { + // printf("%s\n", X509_verify_cert_error_string(csc->error)); + } else { + // chain is complete, output certificate hashes + printf("{\"chain\":\"complete\",\"certificates\":["); + vstack = X509_STORE_CTX_get_chain(csc); + for(int i=0; i= 0) { + lseek(fd, -1, SEEK_CUR); + } + + // parse the json object from the file + tok = json_tokener_new(); + do { + rc = read(fd, buffer, len); + if (rc < 0) + break; + obj = json_tokener_parse_ex(tok, buffer, rc); + } while ((jerr = json_tokener_get_error(tok)) == json_tokener_continue); + + if (jerr != json_tokener_success){ + fprintf(stderr, "error in file %s, line: %s\n", filename, buffer); + } + +tok_free: + json_tokener_free(tok); + +close_fd: + close(fd); + +err: + if (ret) { + fprintf(stderr, "error while reading file: %i", ret); + } + return obj; +} + +// process all ciphersuites one by one from a given host results file +int process_host_results(char *filename) +{ + int fd; + int ret = 0; + int rc; + size_t sz; + size_t alloc_size = 64 * 1024; + const char *str; + struct json_object *root; + struct json_object *ciphers; + struct json_object *current; + struct json_object *certificates; + + struct json_object **known_chains; + known_chains = malloc(sizeof(struct json_object*) * 1); + known_chains[0] = NULL; + + struct lh_table *table; + enum json_type obj_t; + json_bool j_rc; + + root = read_json_from_file(filename); + if (root == NULL) { + ret = 1; + goto err; + } + + obj_t = json_object_get_type(root); + str = json_type_to_name(obj_t); + + j_rc = json_object_object_get_ex(root, "ciphersuite", &ciphers); + if (j_rc == FALSE) { + ret = 1; + goto json_free; + } + + // ok, we've got the ciphersuite part, we can print the json header for + // the host file + printf("{\"host\":\"%s\",\"chains\":[", filename); + + int first_printed=0; + for(int i=0; i < json_object_array_length(ciphers); i++) { + current = json_object_array_get_idx(ciphers, i); +#ifdef DEBUG + printf("\t[%i]:\n", i); +#endif + j_rc = json_object_object_get_ex(current, "certificates", &certificates); + if (j_rc == FALSE) + continue; + + const char** certs; + certs = calloc(sizeof(const char*), json_object_array_length(certificates) + 1); + int j; + for (j=0; j < json_object_array_length(certificates); j++) { + certs[j] = json_object_get_string(json_object_array_get_idx(certificates, j)); +#ifdef DEBUG + printf("\t\t\t%s\n", certs[j]); +#endif + } + rc = register_known_chains(&known_chains, certificates); +#ifdef DEBUG + printf("\t\t%i\n", rc); +#endif + + if (rc == 0 && j > 0) { + if (first_printed != 0) + printf(","); + if (process_chain(certs) != 0) { + fprintf(stderr, "error while processing chains!\n"); + } else { + first_printed = 1; + } + } + +#ifdef DEBUG + // print whole json "object" object + json_object_object_foreach(current, key, val) { + str = json_object_to_json_string(val); + printf("\t\t%s: %s\n", key, str); + } +#endif + + free(certs); + } + printf("]}"); + +json_free: + json_object_put(root); + +err: + free(known_chains); + return ret; +} + +int main(int argc, char** argv) +{ + int ret; + + DIR *dirp; + struct dirent *direntp; + + char buffer[MAX_BUFFER_SIZE] = {}; + + SSL_load_error_strings(); + SSL_library_init(); + + /* init trust stores with certificate locations */ + trusted_only = SSL_CTX_new(SSLv23_method()); + if (trusted_only == NULL) { + ERR_print_errors_fp(stderr); + return 1; + } + + ret = SSL_CTX_load_verify_locations(trusted_only, NULL, CA_TRUSTED); + if (ret != 1) { + ERR_print_errors_fp(stderr); + return 1; + } + + all_CAs = SSL_CTX_new(SSLv23_method()); + if (all_CAs == NULL) { + ERR_print_errors_fp(stderr); + return 1; + } + + ret = SSL_CTX_load_verify_locations(all_CAs, NULL, CA_ALL); + if (ret != 1) { + ERR_print_errors_fp(stderr); + return 1; + } + + /* traverse the result directory, check all files in turn */ + dirp=opendir("results"); + while((direntp=readdir(dirp)) != NULL) { + if (strcmp(direntp->d_name, ".") == 0) + continue; + if (strcmp(direntp->d_name, "..") == 0) + continue; + + ret = snprintf(buffer, MAX_BUFFER_SIZE-1, "results/%s", direntp->d_name); + if (ret >= MAX_BUFFER_SIZE-1) { + fprintf(stderr, "Out of buffer space (line %i)\n", __LINE__); + abort(); + } + + ret = process_host_results(buffer); + if (ret == 1) { + fprintf(stderr, "error while processing %s\n", buffer); + } + if (ret == 0) + printf("\n"); + } + closedir(dirp); + + /* clean up */ + SSL_CTX_free(trusted_only); + SSL_CTX_free(all_CAs); + all_CAs = NULL; + trusted_only = NULL; + + return ret; +} diff --git a/top1m/parse_CAs.py b/top1m/parse_CAs.py new file mode 100644 index 0000000..f3db0bf --- /dev/null +++ b/top1m/parse_CAs.py @@ -0,0 +1,331 @@ +#!/usr/bin/env python + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# Author: Hubert Kario - 2014 + +from __future__ import division + +path = "./results/" +ca_certs_path = "./ca_files" +certs_path = "./certs" + +""" only root CAs, no cached intermediate certs """ +trust_path = "./ca_trusted" + +import json +import sys +from collections import defaultdict +import os +from OpenSSL import crypto + +invocations = defaultdict(int) + +total = 0 +hosts = 0 +chains = defaultdict(int) +chain_len = defaultdict(int) +keysize = defaultdict(int) +keysize_per_chain = defaultdict(int) +root_CA = defaultdict(int) +sig_alg = defaultdict(int) +intermediate_CA = defaultdict(int) +effective_security = defaultdict(int) + +subject_hashes = {} +issuer_hashes = {} + +def get_cert_subject_name(cert): + subject = cert.get_subject() + commonName = None + organization = None + + for elem,val in subject.get_components(): + if elem == "CN" and commonName is None: + commonName = val + if elem == "O" and organization is None: + organization = val + + s_hash = "(" + ("%0.8X" % subject.hash()).lower() + ") " + + if commonName is not None: + return s_hash + commonName + elif organization is not None: + return s_hash + organization + else: + return s_hash + +def get_path_for_hash(cert_hash): + f_name = certs_path + '/' + cert_hash + '.pem' + if not os.path.exists(f_name): + f_name = ca_certs_path + '/' + cert_hash + '.pem' + if not os.path.exists(f_name): + #print "File with hash " + c_hash + " is missing!" + return None + return f_name + +""" convert RSA and DSA key sizes to estimated Level of Security """ +def rsa_key_size_to_los(size): + if size < 760: + return 40 + elif size < 1020: + return 64 + elif size < 2040: + return 80 + elif size < 3068: + return 112 + elif size < 4094: + return 128 + elif size < 7660: + return 152 + elif size < 15300: + return 192 + else: + return 256 + +""" convert signature algotihm to estimated Level of Security """ +def sig_alg_to_los(name): + if 'MD5' in name.upper(): + return 64 + elif 'SHA1' in name.upper(): + return 80 + elif 'SHA224' in name.upper(): + return 112 + elif 'SHA256' in name.upper(): + return 128 + elif 'SHA384' in name.upper(): + return 192 + elif 'SHA512' in name.upper(): + return 256 + else: + raise UnknownSigAlgError + +def collect_key_sizes(file_names): + + tmp_keysize = {} + + """ don't collect signature alg for the self signed root """ + with open(file_names[-1]) as cert_file: + cert_pem = cert_file.read() + + cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem) + + pubkey = cert.get_pubkey() + if pubkey.type() == crypto.TYPE_RSA: + keysize['RSA ' + str(pubkey.bits())] += 1 + tmp_keysize['RSA ' + str(pubkey.bits())] = 1 + security_level = rsa_key_size_to_los(pubkey.bits()) + elif pubkey.type() == crypto.TYPE_DSA: + keysize['DSA ' + str(pubkey.bits())] += 1 + tmp_keysize['DSA ' + str(pubkey.bits())] = 1 + security_level = rsa_key_size_to_los(pubkey.bits()) + # following 408 should be crypto.TYPE_ECDSA, but even new(ish) version + # of OpenSSL Python module don't define it + elif pubkey.type() == 408: + keysize['ECDSA ' + str(pubkey.bits())] += 1 + tmp_keysize['ECDSA ' + str(pubkey.bits())] = 1 + security_level = pubkey.bits()/2 + else: + keysize[str(pubkey.type()) + ' ' + str(pubkey.bits())] += 1 + security_level = 0 + + root_CA[get_cert_subject_name(cert)] += 1 + + """ exclude the self signed root and server cert from stats """ + for f_name in file_names[1:-1]: + with open(f_name) as cert_file: + cert_pem = cert_file.read() + + cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem) + + pubkey = cert.get_pubkey() + if pubkey.type() == crypto.TYPE_RSA: + keysize['RSA ' + str(pubkey.bits())] += 1 + tmp_keysize['RSA ' + str(pubkey.bits())] = 1 + c_key_level = rsa_key_size_to_los(pubkey.bits()) + elif pubkey.type() == crypto.TYPE_DSA: + keysize['DSA ' + str(pubkey.bits())] += 1 + tmp_keysize['DSA ' + str(pubkey.bits())] = 1 + c_key_level = rsa_key_size_to_los(pubkey.bits()) + elif pubkey.type() == 408: + keysize['ECDSA ' + str(pubkey.bits())] += 1 + tmp_keysize['ECDSA ' + str(pubkey.bits())] = 1 + c_key_level = pubkey.bits() / 2 + else: + keysize[str(pubkey.type()) + ' ' + str(pubkey.bits())] += 1 + c_key_level = 0 + + if security_level > c_key_level: + security_level = c_key_level + + sig_alg[cert.get_signature_algorithm()] += 1 + c_sig_level = sig_alg_to_los(cert.get_signature_algorithm()) + if security_level > c_sig_level: + security_level = c_sig_level + + intermediate_CA[get_cert_subject_name(cert)] += 1 + + for key_s in tmp_keysize: + keysize_per_chain[key_s] += 1 + + # XXX doesn't handle the situation in which the CA uses its certificate + # for a web server properly + with open(file_names[0]) as cert_file: + cert_pem = cert_file.read() + + cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem) + + pubkey = cert.get_pubkey() + if pubkey.type() == crypto.TYPE_RSA: + c_key_level = rsa_key_size_to_los(pubkey.bits()) + elif pubkey.type() == crypto.TYPE_DSA: + c_key_level = rsa_key_size_to_los(pubkey.bits()) + elif pubkey.type() == 408: + c_key_level = pubkey.bits() / 2 + else: + c_key_level = 0 + + if security_level > c_key_level: + security_level = c_key_level + + c_sig_level = sig_alg_to_los(cert.get_signature_algorithm()) + if security_level > c_sig_level: + security_level = c_sig_level + + effective_security[security_level] += 1 + + +with open("parsed") as res_file: + for line in res_file: + try: + res = json.loads(line) + except ValueError as e: + print "can't process line: " + line + continue + + f=res + + try: + server_chain_trusted = False + server_chain_complete = False + server_chains = [] + valid = False + + """ Keep certificates in memory for a given file """ + known_certs = {} + + if not "chains" in f: + continue + + results = f["chains"] + + """ discard hosts with empty results """ + if len(results) < 1: + continue + + """ loop over list of ciphers """ + for entry in results: + + """ skip invalid results """ + if not 'chain' in entry: + continue + + valid = True + + if entry['chain'] == "untrusted": + continue + + if entry['chain'] == "complete": + server_chain_complete = True + server_chain_trusted = True + + if entry['chain'] == "incomplete": + server_chain_trusted = True + + server_chains += [entry['certificates']] + + if server_chain_trusted: + if server_chain_complete: + chains["complete"] += 1 + print "complete: " + f['host'] + else: + chains["incomplete"] += 1 + print "incomplete: " + f['host'] + else: + chains["untrusted"] += 1 + print "untrusted: " + f['host'] + + if valid: + hosts += 1 + + for chain in server_chains: + f_names = [] + for hash in chain: + path = get_path_for_hash(hash) + f_names += [path] + + collect_key_sizes(f_names) + chain_len[str(len(chain))] += 1 + if len(chain) == 1: + sys.stderr.write("file with chain 1 long: " + line) + total += 1 + except TypeError as e: + + sys.stderr.write("can't process: " + line) + continue + +""" Display stats """ +#print "openssl invocations: " + str(invocations["openssl"]) + +print "Statistics from " + str(total) + " chains provided by " + str(hosts) + " hosts" + +print("\nServer provided chains Count Percent") +print("-------------------------+---------+-------") +for stat in sorted(chains): + percent = round(chains[stat] / hosts * 100, 4) + sys.stdout.write(stat.ljust(25) + " " + str(chains[stat]).ljust(10) + str(percent).ljust(4) + "\n") + +print("\nTrusted chain statistics") +print("========================") + + +print("\nChain length Count Percent") +print("-------------------------+---------+-------") +for stat in sorted(chain_len): + percent = round(chain_len[stat] / total * 100, 4) + sys.stdout.write(stat.ljust(25) + " " + str(chain_len[stat]).ljust(10) + str(percent).ljust(4) + "\n") + +print("\nCA key size in chains Count") +print("-------------------------+---------") +for stat in sorted(keysize): + sys.stdout.write(stat.ljust(25) + " " + str(keysize[stat]).ljust(10) + "\n") + +print("\nChains with CA key Count Percent") +print("-------------------------+---------+-------") +for stat in sorted(keysize_per_chain): + percent = round(keysize_per_chain[stat] / total * 100, 4) + sys.stdout.write(stat.ljust(25) + " " + str(keysize_per_chain[stat]).ljust(10) + str(percent).ljust(4) + "\n") + +print("\nSignature algorithm (ex. root) Count") +print("------------------------------+---------") +for stat in sorted(sig_alg): + sys.stdout.write(stat.ljust(30) + " " + str(sig_alg[stat]).ljust(10) + "\n") + +print("\nEff. host cert chain LoS Count Percent") +print("-------------------------+---------+-------") +for stat in sorted(effective_security): + percent = round(effective_security[stat] / total * 100, 4) + sys.stdout.write(str(stat).ljust(25) + " " + str(effective_security[stat]).ljust(10) + str(percent).ljust(4) + "\n") + +print("\nRoot CAs Count Percent") +print("---------------------------------------------+---------+-------") +for stat in sorted(root_CA): + percent = round(root_CA[stat] / total * 100, 4) + sys.stdout.write(stat.ljust(45)[0:45] + " " + str(root_CA[stat]).ljust(10) + str(percent).ljust(4) + "\n") + +print("\nIntermediate CA Count Percent") +print("---------------------------------------------+---------+-------") +for stat in sorted(intermediate_CA): + percent = round(intermediate_CA[stat] / total * 100, 4) + sys.stdout.write(stat.ljust(45)[0:45] + " " + str(intermediate_CA[stat]).ljust(10) + str(percent).ljust(4) + "\n") diff --git a/top1m/process-certificate-statistics.sh b/top1m/process-certificate-statistics.sh new file mode 100755 index 0000000..b9256d8 --- /dev/null +++ b/top1m/process-certificate-statistics.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +if [ ! -d ./ca_files ]; then + echo "Directory with collected CA certificates missing!" >&2 + exit 1 +fi + +if [ ! -d ./ca_trusted ]; then + echo "Directory with just trust anchors missing!" >&2 + exit 1 +fi + +if [ ! -d ./certs ]; then + echo "Directory with certificates missing!" >&2 + exit 1 +fi + +if ! ls -f ./ca_files/????????.? > /dev/null; then + echo "CA certificates directory not hashed properly (use c_rehash)" >&2 + exit 1 +fi + +if ! ls -f ./ca_trusted/????????.? > /dev/null; then + echo "Directory with trust anchors not hashed properly (use c_rehash)" >&2 + exit 1 +fi + +if [ ! -d ./results ]; then + echo "Directory with scan results missing!" >&2 + exit 1 +fi + +if [ ! -x ./parse_CAs ]; then + echo "Compiling parse_CAs script" + gcc -o parse_CAs parse_CAs.c -lssl -lcrypto -ljson-c --std=gnu99 + if [ $? -ne 0 ]; then + echo "Compilation failed, aborting" >&2 + exit 1 + fi +fi + +echo "Verifying certificate chains from results files" +./parse_CAs > parsed +echo "Calculating statistics for verified certificate chains" +python parse_CAs.py > trust_scan +echo "Done!" +echo "Results are in \"trust_scan\" file"