#!/usr/bin/env python2

# Generate an overview over fixed and outstanding CVE issues on a mirror with
# packages and changelogs.
#
# Author: Martin Pitt <martin.pitt@ubuntu.com>
# (C) 2005 Canonical. Ltd.
#
# This script is distributed under the terms and conditions of the GNU General
# Public License, Version 2 or later. See http://www.gnu.org/copyleft/gpl.html
# for details.

import os, os.path, re, datetime, sys, gzip, pickle, urllib, cache_urllib, copy
import apt_pkg
import xml.sax, xml.sax.handler, xml.sax.xmlreader

rookery_archive_root="/srv/archive.ubuntu.com/ubuntu"
rookery_changelog_base = "/srv/changelogs.ubuntu.com/www/changelogs/"
changelog_url_base="http://changelogs.ubuntu.com/changelogs/"

# settings for automatic CVE database evaluation
cvexml_url="http://cve.mitre.org/cve/downloads/allitems.xml"

ignore_strings = ["** REJECT **", "** RESERVED **", "Internet Explorer",
    "Windows 98", "Windows 2000", "Windows XP", "Windows Server 2003",
    "Windows NT", "Mercury Board", "ZeroBoard", "AntiVirus", "Microsoft", 
    "SGI IRIX", "FreeBSD", "IBM AIX", "SCO", "OS X", "Mac OS",
    "Cisco", "ActiveX"]

# releases which will not be shown on the unfixed page
obsolete_releases = ['warty', 'hoary', 'breezy']
LTS_releases = [ 'dapper' ]
oldest_nonLTS_release = 'edgy' 
warn_about_soon_to_be_orphaned_cves = 0

###################

def merge_list(list, list2, intersection = None):
    """Write the union of list and list2 into list. If intersection is not
    None, that list will be filled with the intersection of list and list2."""

    for e in list2:
        if e not in list:
            list.append(e)
        else:
            if intersection != None:
                intersection.append(e)

###################

def subtract_list(list, list2):
    """Remove all elements from list which occur in list2."""

    for e in list2:
        if e in list:
            list.remove(e)

###################

def get_archive_map(rootdir, cachefile = None):
    """Generate a archive map from a Debian-style package archive directory.

    rootdir: path to archive root path
    cachefile: path to a cache file (created if not existing)
    return: mapping: package -> release -> (component, version, path)
    """

    # read cache if it exists
    if cachefile:
        try:
            return pickle.load(open(cachefile, "rb"))
        except IOError:
            pass

    map = {}
    currpkg = None
    currversion = None
    currdir = None

    for release in os.listdir(rootdir + "/dists"):
        if release.split('-')[0] in obsolete_releases:
            continue
        for comp in os.listdir(rootdir + "/dists/" + release):
            compdir = rootdir + "/dists/" + release + "/" + comp
            if not os.path.isdir(compdir):
                continue
            srcfile = compdir + "/source/Sources.gz"
            if not os.path.exists(srcfile):
                print >> sys.stderr, 'ERROR: Sources file %s does not exist, ignoring' % srcfile
                continue
            #print >> sys.stderr, "Scanning", srcfile

            for line in gzip.open(srcfile):
                line = line.strip()
                if not line:
                    if (not currpkg) or (not currversion) or (not currdir):
                        print >> sys.stderr, "Error: end of package record without all data available"
                        sys.exit(1)
                    map.setdefault(currpkg, {})[release] = (comp, currversion, currdir)
                    currpkg = None
                    currversion = None
                    currdir = None
                    continue

                attr = line.split(":", 1)

                if len(attr) < 2:
                    continue

                if attr[0] == "Package":
                    if currpkg:
                        print >> sys.stderr, "Error: read two Packages: lines in a row"
                        sys.exit(1)
                    currpkg = attr[1].strip()

                if attr[0] == "Version":
                    if currversion:
                        print >> sys.stderr, "Error: read two Version: lines in a row"
                        sys.exit(1)
                    currversion = attr[1].strip()
                    # remove epochs
                    colpos = currversion.find(':')
                    if colpos >= 0:
                        currversion = currversion[colpos+1:]

                if attr[0] == "Directory":
                    if currdir:
                        print >> sys.stderr, "Error: read two Directory: lines in a row"
                        sys.exit(1)
                    currdir = attr[1].strip()

    # write cache 
    if cachefile:
        try:
            pickle.dump(map, open(cachefile, "wb"))
        except IOError:
            pass

    return map

###################

def parse_CVEs(url):
    """Return a list of all CVE numbers mentioned in the given URL."""

    list = []
    cvere = re.compile("((?:CAN|can|CVE|cve)-\d\d\d\d-\d\d\d\d)")
    try:
        for cve in cvere.finditer(cache_urllib.urlopen(url).read()):
            list.append(cve.group().upper().replace('CAN', 'CVE', 1))
    except IOError:
        print >> sys.stderr, "Could not open", url

    return list

###################

def parse_changelog_version(url):
    """Parse the topmost version of the changelog at given URL."""

    header = re.compile("([a-z0-9-+\.]+)\s+\((.*)\)\s+([a-z-]+)\s*.*;\s*urgency=")

    version = "n/a"
    try:
        f = cache_urllib.urlopen(url)
        m = None
        while True:
            line = f.readline()
            if not line:
                #print >> sys.stderr, "Could not find header line in", url
                return version
            m = header.search(line)
            if m:
                version = m.group(2)
                break

    except IOError:
        print >> sys.stderr, "Could not open", url

    return version

###################

def get_changelog_cve_map(rootdir, archive_map, cachefile = None):
    """Generate a CVE map from a Debian-style changelog archive directory.

    rootdir: path to changelog archive root path
    archive_map: archive map (as generated by get_archive_map)
    cachefile: path to a cache file (created if not existing)
    return: mapping: CVE number -> package -> [release]
    """

    # read cache if it exists
    if cachefile:
        try:
            return pickle.load(open(cachefile, "rb"))
        except IOError:
            pass

    map = {}

    orig_dir = os.getcwd()
    os.chdir(rootdir)
    for root, dirs, files in os.walk("."):
        if "changelog" not in files:
            continue

        (pkg, version) = os.path.basename(root).split("_")
        rel = []

        try: pkgmap = archive_map[pkg]
        except: continue

        for maprel, (mapcomp, mapver, mappath) in pkgmap.iteritems():
            if mapver == version:
                rel.append(maprel)

        if not rel:
            continue

        for cve in parse_CVEs(root + "/changelog"):
            merge_list(map.setdefault(cve, {}).setdefault(pkg, []), rel)

    os.chdir(orig_dir)

    # write cache 
    if cachefile:
        try:
            pickle.dump(map, open(cachefile, "wb"))
        except IOError:
            pass

    return map

###################

def get_manual_cve_map(file, archive_map):
    """Read a CVE map from a manual override file.

    file: path to file
    archive_map: archive map (as generated by get_archive_map)
    return: mapping: CVE number -> package -> [release]
    """
    map = {}
    for line in open(file):
        # allow comments
        if line.startswith('#'): continue
        fields = line.split()
        cve = fields.pop(0)
        pkg = fields.pop(0)

        # make sure the pkg is known to the system
        if archive_map.has_key(pkg) == 0:
            print >> sys.stderr, '%s: package "%s" not in archive' % (file,pkg)
            continue

        # if no releases are specified, assume all releases
        if len(fields) == 0:
            fields = archive_map[pkg].keys()
        merge_list(map.setdefault(cve, {}).setdefault(pkg, []), fields)

    return map

###################

def merge_cve_map(cve_map, cve_map2, intersection = None):
    """Merge cve_map2 into cve_map. If intersection is not None, the
    intersecion of cve_map and cve_map2 will be written into this."""

    for cve, pkgmap in cve_map2.iteritems():
        for pkg, rels in pkgmap.iteritems():
            relintersect = []
            merge_list(cve_map.setdefault(cve, {}).setdefault(pkg, []), rels, relintersect)
            if intersection != None and relintersect:
                intersection.setdefault(cve,{})[pkg] = relintersect

###################

def subtract_cve_map(cve_map, cve_map2):
    """Remove all items from cve_map which occur in cve_map2."""

    for cve, pkgmap in cve_map2.iteritems():
        for pkg, rels in pkgmap.iteritems():
            if cve_map.has_key(cve) and cve_map[cve].has_key(pkg):
                subtract_list(cve_map[cve][pkg], rels)
                if len(cve_map[cve][pkg]) == 0:
                    del cve_map[cve][pkg]
                if len(cve_map[cve]) == 0:
                    del cve_map[cve]

###################

def get_unfixed_map(archive_map, cve_map, nonvuln_map): 
    """Generate a mapping of unfixed issues.
    
    return: component -> release -> CVE -> [package]
    """

    fixed_cves = copy.deepcopy(cve_map)
    merge_cve_map(fixed_cves, nonvuln_map)

    map = {}
    for cve, pkgs in fixed_cves.iteritems():
        for pkg, fixedreleases in pkgs.iteritems():
            pkgmap = archive_map[pkg]
            for release in pkgmap.iterkeys():
                if release.find("-security") > 0:
                    continue

                if nonvuln_map.has_key(cve) and nonvuln_map[cve].has_key(pkg) \
                    and release in nonvuln_map[cve][pkg]:
                    continue

		# ignore -updates versions which are superseded by
		# -security
		if release.endswith('-updates'):
		    sec_release = release.split('-')[0] + '-security'
		    sec_version = pkgmap.get(sec_release, (None, '0'))[1]
		    if apt_pkg.version_compare (sec_version, pkgmap[release][1]) > 0:
			#print >> sys.stderr, '%s: CVE %s is unfixed in %s (%s), but superseded by %s (%s)' % (
			#    pkg, cve, release, pkgmap[release][1], sec_release, sec_version)
			continue

                if release not in fixedreleases and (release+"-security") not in fixedreleases:
                    map.setdefault(pkgmap[release][0], {}).setdefault(release, {}). \
                        setdefault(cve, []).append(pkg)

    return map

###################

class CVEHandler(xml.sax.handler.ContentHandler):
    """SAX handler for processing mitre's CVE database XML."""

    def __init__(self, ignore, output):
        self.curr_cve = None
        self.curr_desc = None
        self.curr_chars = ""
        self.ignore = ignore
        self.output = output

    def startElement(self, name, attrs):
        if name == "item":
            self.curr_cve = attrs['name']
        if name == "desc":
            self.curr_chars = ""

    def characters(self, content):
        self.curr_chars += content

    def endElement(self, name):
        if name == "desc":
            self.curr_desc = self.curr_chars.encode("ascii", "replace")

        if name == "item":
            self.handle_cve()

    def handle_cve(self):
        if self.curr_cve in self.ignore:
            return

        parts = self.curr_cve.split("-")
        if int(parts[1]) < 2005:
            return

        for s in ignore_strings:
            if self.curr_desc.find(s) >= 0 and self.curr_desc.find("Linux") < 0:
                return
        print >> self.output, '<tr><td><a href="http://cve.mitre.org/cgi-bin/cvename.cgi?name=%s">%s</a></td><td>%s</td></tr>' % (
            self.curr_cve, self.curr_cve, self.curr_desc)

###################

def begin_page(fname, title):
    """Create fname, print a standard page header and opening body/html tags 

    return: file handle"""

    f = open(fname, "w")

    print >> f, """<html>
    <head>
      <title>Ubuntu CVE status - %s</title>
    </head>

    <body>
      <p><a href="fixed.html">[Fixed CVEs]</a> 
      <a href="unfixed.html">[Unfixed CVEs]</a>
      <a href="unchecked.html">[Unchecked CVEs]</a>
      <a href="nonvuln.html">[CVEs that do not apply to Ubuntu]</a></p>

      <h1>Ubuntu CVE status</h1>
      <h2>%s</h2>
      <hr />
    """ % (title, title)

    return f

###################

def end_page(out):
    """Print a standard page footer and closing body/html tags to out and
    close out."""

    print >> out, '<p>Automatically generated by <a \
        href="http://people.ubuntu.com/~pitti/bzr/ubuntu-cve">ubuntu-cve</a> on', \
        datetime.datetime.now().isoformat(' '), "</p>\n</body></html>"

    out.close()

###################
## main
###################

apt_pkg.init_system()

mypath = os.path.abspath(os.path.dirname(sys.argv[0]))

archive_map = get_archive_map(rookery_archive_root, mypath+"/archive_map.cache")

cache_urllib.set_cache(mypath+"/changelogs.cache")

if len(sys.argv) < 2:
    print >> sys.stderr, "Usage:", sys.argv[0], "<destination directory>"
    sys.exit(1)

destdir = sys.argv[1]

#for pkg, map in archive_map.iteritems():
    #print pkg, ":", " / ".join(["%s->%s,%s" % (r,c,v) for r,(c,v,p) in map.iteritems()])
#sys.exit(0)

CVE = get_changelog_cve_map(rookery_changelog_base, archive_map, mypath+"/can_map.cache")
fixed_manual = get_manual_cve_map(mypath + "/cve-fixed.txt", archive_map)
nonvulns = get_manual_cve_map(mypath + "/cve-nonvulns.txt", archive_map)
redundantFixed = {}
merge_cve_map(CVE, fixed_manual, redundantFixed)

if redundantFixed:
    print "CVE numbers in cve-fixed.txt which are mentioned in the changelogs:"
    for cve, pkgmap in redundantFixed.iteritems():
        print cve, ":", pkgmap

subtract_cve_map(CVE, nonvulns)

# Fixed issues 

fixedout = begin_page(destdir + "/fixed.html", "Fixed issues")

print >> fixedout, """<table border="1">
    <tr><th>CVE number</th> <th>Source package (Fixed versions)</th></tr>"""

# Keep a record of the cve mappings
mappings = file(destdir + "/mappings.txt","w")

cvekeys = CVE.keys()
cvekeys.sort()
cvekeys.reverse()
for cve in cvekeys:
    pkgs = CVE[cve]
    print >> fixedout, '<tr><td><a href="http://cve.mitre.org/cgi-bin/cvename.cgi?name=%s">%s</a></td>' % (cve, cve)
    print >> fixedout, '<td><table>'

    for pkg, releases in pkgs.iteritems():
        pkgmap = archive_map[pkg]
        print >> mappings, cve, pkg, pkgmap
        try:
            print >> fixedout, "<tr><td>%s</td><td>(%s)</td></tr>" % (
                pkg,
                ", ".join(['<a href="%s%s/%s_%s/changelog">%s</a>/%s' % (
                    changelog_url_base,pkgmap[r][2],pkg,pkgmap[r][1],r,pkgmap[r][0]) for r in releases])
            )
        except KeyError:
            print >> sys.stderr, "fixed: %s/%s does not exist" % (pkg, r)

        # Also, find packages that are fixed only in the oldest non-LTS
        # release, so that they can be added to the "fixed in previous
        # releases" list when it goes obsolete, in case a fix vanishes
        # from a changelog, or was only mentioned in the local cve-*txt files.
        if warn_about_soon_to_be_orphaned_cves != 0:
            fixed_releases = []
            for r in releases:
                rbase = r.split('-')[0]
                if rbase in LTS_releases:
                    continue
                fixed_releases.append(r)
            if len(fixed_releases)==1 and oldest_nonLTS_release == fixed_releases[0].split('-')[0]:
                print >> sys.stderr, "%s %s %s: fix will expire when distro goes obsolete" % (cve, pkg, fixed_releases[0])

    print >> fixedout, '</table></td></tr>'

print >> fixedout, "  </table>"

mappings.close()

end_page(fixedout)

# Unfixed issues
CVEIgnoreList = parse_CVEs(mypath + "/cve-ignore.txt")

unfixedout = begin_page(destdir + "/unfixed.html", "Unfixed issues")

unfixed = get_unfixed_map(archive_map, CVE, nonvulns)

#merge manual unfixed list
unfixed_manual = get_manual_cve_map(mypath + "/cve-vulns.txt", archive_map)

for cve, pkg_rel_map in unfixed_manual.iteritems():
    for pkg, rels in pkg_rel_map.iteritems():
        fixed_rels = CVE.get(cve,{}).get(pkg,[])
        for rel in rels:
            if rel.find("-security") > 0:
                continue
            if rel not in fixed_rels and (rel + "-security") not in fixed_rels:
                try:
                    comp = archive_map[pkg][rel][0]
                    unfixed.setdefault(comp, {}).setdefault(rel, {}).setdefault(cve, []).append(pkg)
                except KeyError:
                    print >> sys.stderr, "%s: %s/%s does not exist, but occurs in cve-vulns.txt" % (cve, pkg, rel)
            else:
                print >> sys.stderr, "%s: fixed in %s/%s, but occurs in cve-vulns.txt" % (cve, pkg, rel)

components = unfixed.keys()
components.sort()

for comp in components:
    print >> unfixedout, "<h2>Unfixed issues in the", comp, "component</h2>"
    for rel in unfixed[comp].iterkeys():
        if rel.split('-')[0] in obsolete_releases:
            continue
        print >> unfixedout, """<table border="1">
        <tr><th colspan="2">Release: %s</th></tr>
        <tr><th>CVE number</th> <th>Source package</th></tr>
        """ % rel

        cvemap = unfixed[comp][rel]
        cvekeys = cvemap.keys()
        cvekeys.sort()
        cvekeys.reverse()
        for cve in cvekeys:
            if cve in CVEIgnoreList:
                continue
            print >> unfixedout, '<tr><td><a href="http://cve.mitre.org/cgi-bin/cvename.cgi?name=%s">%s</a></td>' % (cve, cve)

            print >> unfixedout, "<td><table>"

            for pkg in cvemap[cve]:
                pkgmap = archive_map[pkg]
                if pkgmap[rel][0] != comp:
                    print >> sys.stderr, "Inconsistency for package", pkg, "-", \
                        pkgmap[rel][0], "in archive map,", comp, "in unfixed map"
                debian_chlog_url = "http://changelogs.debian.net/" + pkg
                debianver = parse_changelog_version(debian_chlog_url)
                debiancves = parse_CVEs(debian_chlog_url)
                #print >> sys.stderr, pkg, "Debian: ", debianver, debiancves
                #debianver = 0
                #debiancves = []
                if cve in debiancves:
                    debian_status = "fixed"
                elif debianver == 'n/a':
                    debian_status = "n/a"
                else:
                    debian_status = "vulnerable"
                if pkgmap.has_key(rel+'-security'):
                    version = pkgmap[rel+'-security'][1]
                else:
                    version = pkgmap[rel][1]
                print >> unfixedout, '<tr><td>%s [Ubuntu: <a href="%s%s/%s_%s/changelog">%s</a>] [Debian: <a href="%s">%s</a>, %s]</td></tr>' % (
                    pkg, 
                    changelog_url_base, pkgmap[rel][2], pkg, version, version,
                    debian_chlog_url, debianver, debian_status
                )

            print >> unfixedout, "</table></td></tr>"

        print >> unfixedout, "</table><p>&nbsp;</p>"

end_page(unfixedout)

# Vulnerabilities that do not apply to Ubuntu

nonvulnout = begin_page(destdir + "/nonvuln.html", "Vulnerabilities that do not apply to Ubuntu")

print >> nonvulnout, """
  <table border="1">
    <tr><th>CVE number</th> <th>Source package (Nonvulnerable releases)</th></tr>
"""

cvekeys = nonvulns.keys()
cvekeys.sort()
cvekeys.reverse()
for cve in cvekeys:
    pkgs = nonvulns[cve]
    print >> nonvulnout, '<tr><td><a href="http://cve.mitre.org/cgi-bin/cvename.cgi?name=%s">%s</a></td>' % (cve, cve)
    print >> nonvulnout, '<td><table>'

    for pkg, releases in pkgs.iteritems():
        pkgmap = archive_map[pkg]
        try:
            print >> nonvulnout, "<tr><td>%s</td><td>(%s)</td></tr>" % (
                pkg,
                ", ".join(['<a href="%s%s/%s_%s/changelog">%s</a>/%s' % (
                    changelog_url_base,pkgmap[r][2],pkg,pkgmap[r][1],r,pkgmap[r][0]) for r in releases])
            )
        except KeyError:
            print >> sys.stderr, "nonvulns: %s/%s does not exist" % (pkg, r)
    print >> nonvulnout, '</table></td></tr>'

print >> nonvulnout, "  </table>"

end_page(nonvulnout)

# CVE database review

uncheckedout = begin_page(destdir + "/unchecked.html", "Unchecked CVEs from the CVE database")

merge_list(CVEIgnoreList, CVE.keys())
merge_list(CVEIgnoreList, nonvulns.keys())
merge_list(CVEIgnoreList, unfixed_manual.keys())
# Skip additional CVEs that are fixed, but in prior releases and missing
# from current changelogs.
merge_list(CVEIgnoreList, parse_CVEs(mypath + "/cve-outdated.txt"))

print >> uncheckedout, """
  <table border="1">
"""

parser = xml.sax.make_parser()
parser.setContentHandler(CVEHandler(CVEIgnoreList, uncheckedout))
parser.parse(urllib.urlopen(cvexml_url))

print >> uncheckedout, "  </table>"

end_page(uncheckedout)
