#!/usr/bin/env python2

# This script takes a naive approach to searching for possible security syncs
# from Debian. This is useful for identifying candidates for a
# security-fake-sync when debian has not released a DSA/DLA, and thus the
# debian2ubuntu tool does not identify the sync.

from __future__ import print_function

import cve_lib
from functools import partial
import json
from multiprocessing.dummy import Pool as ThreadPool
import optparse
import re
import subprocess
import sys
from threading import Thread

try:
    # TODO: Make this file configurable
    with open("/home/msalvatore/scratch/skip_syncs.json") as fh:
        json_data = fh.read()
        skip_dict = json.loads(json_data)
except:
    skip_dict = dict()
    pass

def get_releases():
    releases = cve_lib.all_releases

    for eol in cve_lib.eol_releases:
        if eol in releases:
            releases.remove(eol)

    return releases

def load_cve_pkg_data():
    (cves, uems) = cve_lib.get_cve_list()

    (table, priority, cves, namemap, cveinfo) = cve_lib.load_table(cves, uems)

    pkgs = dict()
    for cve in table.keys():
        for pkg in table[cve].keys():
            pkgs[pkg] = True

    return pkgs

def get_release_information_from_umt_search(pkg, umt_results):
    umt_search_result = subprocess.check_output(['umt', 'search', pkg]) # Use `umt search` to get Ubuntu and Debian released versions of the package

    if "universe" not in umt_search_result:
        return

    release_version_regex = re.compile(r"(.*?): (.*?),.*$")
    ubuntu_releases = dict()
    debian_releases = dict()
    empty_line_count = 0
    # Parse output of `umt search line by line
    for line in umt_search_result.splitlines():
        if not line.strip():
            empty_line_count = empty_line_count+1
            continue
        # Parse Ubuntu release/version pairs
        if empty_line_count == 2:
            matches = release_version_regex.search(line)
            ubuntu_releases[matches.group(1)] = matches.group(2)
        # Parse Debian release/version pairs
        elif empty_line_count == 4:
            matches = release_version_regex.search(line)
            debian_releases[matches.group(1)] = matches.group(2)

    umt_results[pkg] = (ubuntu_releases, debian_releases)

def should_skip(pkg, ubuntu_version, debian_version):
    if pkg in skip_dict:
        for opportunity in skip_dict[pkg]:
            if opportunity["ubuntu-version"] == ubuntu_version and opportunity["debian-version"] == debian_version:
                return True

    return False

build_suffix_regex = re.compile(r"~?build(\d+\.?)+.*$")
numeric_revision_suffix_regex = re.compile(r"-(\d+(\.\d+)?)$")
security_suffix_regex = re.compile(r"\+deb(\d)+u(\d+)$")
def can_sync_versions(ubuntu_version, debian_version, debian_revision_check):
    ubuntu_modified_version = build_suffix_regex.sub("", ubuntu_version)

    if debian_version.startswith(ubuntu_modified_version) and len(debian_version) > len(ubuntu_modified_version):
        return True

    debian_security_suffix = security_suffix_regex.search(debian_version) 
    ubuntu_security_suffix = security_suffix_regex.search(ubuntu_modified_version) 
    if (debian_security_suffix and ubuntu_security_suffix 
            and debian_security_suffix.group(1) == ubuntu_security_suffix.group(1)
            and debian_security_suffix.group(2) > ubuntu_security_suffix.group(2)):
        return True

    if (debian_revision_check):
        debian_revision_suffix = numeric_revision_suffix_regex.search(debian_version)
        ubuntu_revision_suffix = numeric_revision_suffix_regex.search(ubuntu_modified_version)
        if debian_revision_suffix and ubuntu_revision_suffix:
            if (numeric_revision_suffix_regex.sub("", debian_version) == numeric_revision_suffix_regex.sub("", ubuntu_modified_version)
                and float(debian_revision_suffix.group(1)) > float(ubuntu_revision_suffix.group(1))):
                return True

    return False

def find_sync_opportunities(pkg, ubuntu_releases, debian_releases, debian_revision_check=False):
    pkg_printed = False
    # Find potential syncs
    for dr in debian_releases:
        for ur in ubuntu_releases:
            if can_sync_versions(ubuntu_releases[ur], debian_releases[dr], debian_revision_check):
                if should_skip(pkg, ubuntu_releases[ur], debian_releases[dr]):
                    continue
                if not pkg_printed:
                    print(pkg)
                    pkg_printed = True
                if re.search(r"\+deb\d+u\d+", debian_releases[dr]):
                    sys.stdout.write("***")
                print("\tPossible sync for package %s. Sync %s: %s with %s: %s" %(pkg, ur, ubuntu_releases[ur], dr, debian_releases[dr]))
                print("\t\t $UST/build-tools/security-fake-sync -v %s -r %s %s" % (debian_releases[dr], ur, pkg))
                sys.stdout.flush()

parser = optparse.OptionParser()
parser.add_option("-r", "--debian-revision", help="Compare the debian revision to look for syncs. This produces a lot of false positives but also finds sync opportunities that would be otherwise missed.", action="store_true")
(opt, args) = parser.parse_args()

releases = get_releases()
pkgs = load_cve_pkg_data()

## Search for possible syncs
umt_results = dict()

print("Gathering version information with `umt`")
pool = ThreadPool(64)
pool.map(partial(get_release_information_from_umt_search, umt_results=umt_results) , pkgs.keys())
pool.close()
pool.join()
print("Finished gathering version information with `umt`")
print()

for pkg in sorted(pkgs.keys()): # check each package for which there is a CVE
    if pkg in umt_results:
        (ubuntu_releases, debian_releases) = umt_results[pkg]
        find_sync_opportunities(pkg, ubuntu_releases, debian_releases, opt.debian_revision)
