#! /usr/bin/python
# Massage Canonical mirror list into the Mirrors.masterlist format.

import sys
import re

# Keep this up to date! Yes, it's an ugly hack.
country_map = {
    'Australia': 'AU Australia',
    'Austria': 'AT Austria',
    'Belgium': 'BE Belgium',
    'Brazil': 'BR Brazil',
    'Canada': 'CA Canada',
    'China': 'CN China',
    '.*Germany': 'DE Germany',
    'Denmark': 'DK Denmark',
    'Spain': 'ES Spain',
    'France': 'FR France',
    'UK': 'GB Great Britain',
    'Greece': 'GR Greece',
    'Croatia': 'HR Croatia',
    'Hungary': 'HU Hungary',
    'Ireland': 'IE Ireland',
    'Iceland': 'IS Iceland',
    'Italy': 'IT Italy',
    'Korea': 'KR Korea',
    'Lithuania': 'LT Lithuania',
    'Malaysia': 'MY Malaysia',
    'Namibia': 'NA Namibia',
    'Holland|Netherlands': 'NL Netherlands',
    'New Zealand': 'NZ New Zealand',
    'Norway': 'NO Norway',
    'Paraguay': 'PY Paraguay',
    'Romania': 'RO Romania',
    'Russia': 'RU Russia',
    'Sweden': 'SE Sweden',
    'Taiwan': 'TW Taiwan',
    'Thailand': 'TH Thailand',
    '.*US.*|United States': 'US United States'
}

def reformat(mirrorlist):
    httpre = re.compile(r'^http://([^/]+)(.+)$')
    ftpre = re.compile(r'^ftp://([^/]+)(.+)$')
    rsyncre = re.compile(r'^rsync://([^/]+)(?:/|::)(.+)$')
    country_map_keys = country_map.keys()
    country_map_keys.sort()
    country = None
    maintainer = None
    sites = {}

    for line in mirrorlist:
        line = line.strip()
        if line == '' or line.startswith('#'):
            continue

        if line.startswith('['):
            country = None
            line = ' '.join(line.split()[1:])
            if line.startswith('('):
                match = re.match('\(([^)]*)\) (.*)', line)
                if match is None:
                    continue
                (country, maintainer) = match.groups()
            else:
                country = line.split()[0]
                maintainer = ' '.join(line.split()[1:])
            for regex in country_map_keys:
                if re.match(regex, country):
                    country = country_map[regex]
                    break
            continue

        linetype = None
        if line.find('[a]') != -1:
            linetype = 'archive'
        elif line.find('[r]') != -1:
            linetype = 'cdimage'
        else:
            continue

        site = None
        archive_http = None
        archive_ftp = None
        archive_rsync = None
        mirror = line.split()[-1]
        mirrorbits = httpre.match(mirror)
        if mirrorbits is not None:
            (site, archive_http) = mirrorbits.group(1, 2)
            if not archive_http.endswith('/'):
                archive_http += '/'
        else:
            mirrorbits = ftpre.match(mirror)
            if mirrorbits is not None:
                (site, archive_ftp) = mirrorbits.group(1, 2)
                if not archive_ftp.endswith('/'):
                    archive_ftp += '/'
            else:
                mirrorbits = rsyncre.match(mirror)
                if mirrorbits is not None:
                    (site, archive_rsync) = mirrorbits.group(1, 2)
                    if not archive_rsync.endswith('/'):
                        archive_rsync += '/'
        if site is None:
            continue

        if site not in sites:
            sites[site] = {}
        if archive_http is not None:
            sites[site]['%s-http' % linetype] = archive_http
        if archive_ftp is not None:
            sites[site]['%s-ftp' % linetype] = archive_ftp
        if archive_rsync is not None:
            sites[site]['%s-rsync' % linetype] = archive_rsync
        sites[site]['country'] = country
        sites[site]['maintainer'] = maintainer

    sites_keys = sites.keys()
    sites_keys.sort()
    for site in sites_keys:
        print "Site: %s" % site
        if 'archive-http' in sites[site]:
            print "Archive-http: %s" % sites[site]['archive-http']
        if 'archive-ftp' in sites[site]:
            print "Archive-ftp: %s" % sites[site]['archive-ftp']
        if 'archive-rsync' in sites[site]:
            print "Archive-rsync: %s" % sites[site]['archive-rsync']
        if 'cdimage-http' in sites[site]:
            print "CDImage-http: %s" % sites[site]['cdimage-http']
        if 'cdimage-ftp' in sites[site]:
            print "CDImage-ftp: %s" % sites[site]['cdimage-ftp']
        if 'cdimage-rsync' in sites[site]:
            print "CDImage-rsync: %s" % sites[site]['cdimage-rsync']
        if sites[site]['country'] is not None:
            print "Country: %s" % sites[site]['country']
        if sites[site]['maintainer'] is not None:
            print "Maintainer: %s" % sites[site]['maintainer']
        print

if __name__ == '__main__':
    if len(sys.argv) > 1:
        reformat(open(sys.argv[1]))
