#!/usr/bin/env python2

# This script finds the top 100 packages in universe by popularity

import cve_lib
import json
import operator
import optparse
import source_map
import sys
import time

NANOSECONDS_PER_SECOND = 1000000000
SECONDS_PER_YEAR = 31556926
SECONDS_PER_DAY = 86400
POPULARITY_WINDOW = SECONDS_PER_YEAR * 1
NOW = time.time();

def get_supported_releases():
    releases = cve_lib.all_releases
    for eol in cve_lib.eol_releases:
        if eol in releases:
            releases.remove(eol)

    return releases

def which_source(pkg):
    for r in releases:
        if pkg in srcmap[r]:
            try:
                source = (srcmap[r][pkg]['source'], srcmap[r][pkg]['section'])
                return source
            except KeyError: # package was in list for the release but there was no source
                # so most likely the source name is the same as the package name
                return (pkg, srcmap[r][pkg]['section'])
    # this package wasn't in any source map most likely because it is 
    # packaged only for an EOL'd release such as precise
    #if opt.debug:
        #print("Not present in active release but being downloaded: ", pkg)
    return ('unknown', 'unknown')

def load_package_popularity(popfile):
    popularity = dict()
    with open(popfile) as json_data:
        d = json.load(json_data)
        length = len(d["results"][0]["series"][0]["values"])
        for x in range(0, length):
            timestamp = d["results"][0]["series"][0]["values"][x][0] / NANOSECONDS_PER_SECOND
            (source, section) = which_source(d["results"][0]["series"][0]["values"][x][1])
            downloads = d["results"][0]["series"][0]["values"][x][2]

            # Only look at universe packages and data in the last 2 years
            if section != "universe" or ((NOW - timestamp) > POPULARITY_WINDOW):
                continue;

            if source in popularity:
                    popularity[source] += downloads
            else:
                popularity[source] = downloads

    # Average downloads per package per day
    for source in popularity.keys():
        popularity[source] = popularity[source] / (POPULARITY_WINDOW /  SECONDS_PER_DAY)

    return popularity

def print_top_100(popularity):
    popularity = sorted(popularity.items(), key=operator.itemgetter(1), reverse=True)
    for x in range(0,100):
        print(popularity[x])

srcmap = source_map.load('packages')
releases = get_supported_releases()

popularity_file = "package-popularity.json"
popularity = load_package_popularity(popularity_file)

print_top_100(popularity)
