#!/bin/bash
# Copyright (C) 2008-2018 Canonical, Ltd.
# Authors: Kees Cook <kees@ubuntu.com>
#          Jamie Strandboge <jamie@ubuntu.com>
# License: GPLv3
#
# This script pulls down all the supported architectures (those that would
# appear in a sis-generate-usn run) Releases and Packages files.
#
# TODO: perform GPG tests (we're only using this info for component
# matching)
set -e

renice 10 -p $$ >/dev/null
ionice -c 2 -n 7 -p $$

help() {
    cat <<EOM
Usage: packages_mirror [OPTIONS]

  -t               use timestamps (ie, don't update files if they have been
                   updated within the last day)
  -f               when using timestamps, force updating the files
  -v               verbose output
  -V               even more verbose output
  -r RELEASE       only update Ubuntu release
  -A               pull only release pocket for the devel release
  -p               pull only partner
  -u               pull only ubuntu archive
EOM
}

find_devel_release() {
    cvelib="$(dirname $0)/cve_lib.py"
    grep '^devel_release =' "$cvelib" | cut -d "'" -f 2 || true
}

use_timestamp="no"
force_timestamp="no"
verbosity_args="-q"
only_release=
devel_quick_pockets=
very_verbose=""
only_partner="no"
only_ubuntu="no"

while getopts "AhftvVpur:" opt
do
    case "$opt" in
        f) force_timestamp="yes";;
        t) use_timestamp="yes";;
        v) verbosity_args="";;
        V) verbosity_args=""
           very_verbose="yes";;
        A) devel_quick_pockets="yes";;
        r) only_release="$OPTARG";;
        p) only_partner="yes";;
        u) only_ubuntu="yes";;
        h) help ; exit 0;;
        ?) help;;
    esac
done
shift $((OPTIND - 1))

#server=se.archive.ubuntu.com
#server=us.archive.ubuntu.com
server=archive.ubuntu.com
ports=${ports:-ports.ubuntu.com}

. "$HOME"/.ubuntu-cve-tracker.conf

for var in packages_mirror debian_mirror partner_mirror; do
  if [ -z "${!var}" ]; then
    echo "'$var' not defined in ~/.ubuntu-cve-tracker.conf" >&2
    exit 1
  fi
done
outPath=$packages_mirror
mkdir -p "$outPath"

debianPath=$debian_mirror
mkdir -p "$debianPath"

partnerPath=$partner_mirror
mkdir -p "$partnerPath"

partner_dir_excludes=$(PYTHONPATH="${PYTHONPATH:+$PYTHONPATH:}$(dirname $0)" python3 -c '#
import cve_lib, sys
s = ""
for r in cve_lib.eol_releases + ["breezy", "warty", "hoary"]:
  if r in ["dapper", "breezy", "warty", "hoary"]: # http://archive.canonical.com/dists/ does not have these any more
    continue
  # Dont exclude ESM releases
  if r in ["trusty", "xenial"]:
      continue
  s += "dists/%s*/," % r
sys.stdout.write(s.rstrip(","))
')

function compressed_ext()
{
    rel="$1"
    # Use .gz in newer releases for now because Xenial and Yakkety only have
    # .xz enabled for some pockets and the list of pockets seems to be changing
    ext="gz"

    # Prior releases without any .xz Packages files
    bz2releases='^(trusty|vivid)$'

    if echo "$rel" | cut -d- -f1 | grep -Eq "$bz2releases" ; then
        ext="bz2"
    fi

    echo "$ext"
}

function spew_bin_lines()
{
    rel="$1"
    repo="$2"
    arch="$3"
    ext=$(compressed_ext "$rel")

    echo "dists/$rel/$repo/binary-$arch/Release"
    echo "dists/$rel/$repo/binary-$arch/Packages.$ext"
    echo "dists/$rel/$repo/debian-installer/binary-$arch/Packages.$ext"
    echo "dists/$rel/$repo/binary-$arch/Packages.gz"
    echo "dists/$rel/$repo/debian-installer/binary-$arch/Packages.gz"
}

function spew_src_lines()
{
    rel="$1"
    repo="$2"
    ext=$(compressed_ext "$rel")

    echo "dists/$rel/$repo/binary-$arch/Release"
    echo "dists/$rel/$repo/source/Release"
    echo "dists/$rel/$repo/source/Sources.$ext"
    echo "dists/$rel/$repo/source/Sources.gz"
}

function gen_packages()
{
    release="$1"
    arches="$2"

    if [ -n "$only_release" ] && [ "$release" != "$only_release" ]; then
        echo "Skipping '$release' (specified '-r $only_release')" >&2
        return
    fi

    release_list="$release $release-updates $release-security $release-proposed $release-backports"
    devel_release=$(find_devel_release)
    if [ -n "$devel_release" ] && [ "$devel_quick_pockets" = "yes" ] && [ "$devel_release" = "$release" ]; then
        release_list="$release"
    fi

    for rel in $release_list
    do
        for repo in main restricted universe multiverse
        do
            for arch in $arches
            do
                spew_bin_lines $rel $repo $arch
            done
            spew_src_lines $rel $repo
        done
    done
}

function pull_packages()
{
    filelist="$1"
    url="$2"

    if [ "$verbosity_args" != "-q" ]; then
        echo "Fetching:"
        cat "$filelist"
    fi

    count=1
    num_tries=3
    while [ "$count" -le "$num_tries" ]; do
        # FIXME: how do I get both the return code and stdout into a variable?
        log=$(mktemp -t rsync-XXXXXX)
        set +e
        if [ "$verbosity_args" != "-q" ]; then
            rsync -rlptv --progress --files-from="$filelist" $url $outPath/ 2>&1 | tee "$log"
        else
            rsync -rlptq --files-from="$filelist" $url $outPath/ >"$log" 2>&1
        fi
        rc=$?

        # keep trying if not successful (23 is missing files)
        if [ $rc -eq 0 ] || [ $rc -eq 23 ]; then
            break
        fi
        count=$((count + 1))
        if [ "$verbosity_args" != "-q" ]; then
            echo "Try: $count" >&2
        fi
    done
    set -e
    OUT=$(< "$log" grep -Ev '(debian-installer|^rsync.*code 23)' || true)
    rm -f "$log" || true

    # Report errors, if any
    if [ -n "$OUT" ]; then
        echo "$OUT"
    fi

    # Some unknown error -- die
    if [ $rc -ne 0 ] && [ $rc -ne 23 ]; then
        return 1
    fi
    # Missing files (rc 23), die only if it's not a debian-installer path
    if [ $rc -eq 23 ] && [ -n "$OUT" ]; then
        return 1
    fi

    # Create uncompressed versions too
    while read -r filename
    do
        if ! echo "$filename" | grep -Eq '\.(bz2|xz)$' ; then
            continue
        fi
        ext=$(echo "$filename" | awk -F. '{print $NF}')
        src="$outPath/$filename"
        dest_dir=$(dirname "$src")
        dest_file=$(basename "$src" ."$ext")
        dest="$dest_dir/$dest_file"

        # Skip missing files (debian-installer)
        if [ ! -r "$src" ]; then
            continue
        fi
        if [ ! -f "$dest" ] || [ "$src" -nt "$dest" ]; then
            case "$ext" in
                bz2) bzcat "$src" > "$dest";;
                xz) xzcat "$src" > "$dest";;
            esac
        fi
    done < <(cat "$filelist")
}

# do_use_timestamp() returns '0' if timestamp doesn't exist or has been modified
# yesterday or more.
function do_use_timestamp() {
    mtime=0
    if [ -n "$2" ]; then
        mtime="$2"
    fi
    if [ "$use_timestamp" = "no" ] || [ "$force_timestamp" = "yes" ]; then
        return 0
    elif [ ! -e "$1" ]; then
        return 0
    else
        tmp=$(find "$1" -mtime +$mtime)
        if [ -n "$tmp" ]; then
            return 0
        fi
    fi
    local days=$((mtime + 1))
    echo "'$1' exists and was modified within the last $days day(s). Skipping." >&2
    return 1
}

if [ "$only_partner" = "no" ]; then
    timestamp="${outPath}.timestamp"
    if [ ! -e "${outPath}/dists" ] || do_use_timestamp "$timestamp" ; then
        pull=$(mktemp -t packages-XXXXXX)
        trap "rm -f $pull" EXIT HUP INT QUIT TERM

        # Sync Packages for non-ports supported arches
        if [ "$verbosity_args" != "-q" ]; then
            echo "Generating arch lists"
        fi
        gen_packages trusty   "amd64 i386"               >> "$pull"
        gen_packages xenial   "amd64 i386"               >> "$pull"
        gen_packages bionic   "amd64 i386"               >> "$pull"
        gen_packages focal    "amd64 i386"               >> "$pull"
        gen_packages impish   "amd64 i386"               >> "$pull"
        gen_packages jammy    "amd64 i386"               >> "$pull"
        gen_packages kinetic  "amd64 i386"               >> "$pull"

        pull_packages "$pull" rsync://$server/ubuntu || echo "FAIL: supported architectures" >&2
        cat /dev/null > "$pull"

        # Sync Packages for ports arches
        if [ "$verbosity_args" != "-q" ]; then
            echo "Generating ports arch lists"
        fi
        gen_packages trusty   "powerpc ppc64el armhf arm64"       >> "$pull"
        gen_packages xenial   "powerpc ppc64el armhf arm64 s390x" >> "$pull"
        gen_packages bionic   "ppc64el armhf arm64 s390x"         >> "$pull"
        gen_packages focal    "ppc64el armhf arm64 s390x riscv64" >> "$pull"
	gen_packages impish   "ppc64el armhf arm64 s390x riscv64" >> "$pull"
        gen_packages jammy    "ppc64el armhf arm64 s390x riscv64" >> "$pull"
        gen_packages kinetic  "ppc64el armhf arm64 s390x riscv64" >> "$pull"

        pull_packages "$pull" rsync://$ports/ubuntu-ports || echo "FAIL: ports architectures" >&2
        cat /dev/null > "$pull"

        if [ "$use_timestamp" = "yes" ]; then
            touch "$timestamp"
        else
            rm -f "$timestamp"
        fi
    fi
fi

#
# The wget trees
#
if [ "$only_ubuntu" = "no" ]; then
    if [ -z "$very_verbose" ]; then
        verbosity_args="-q"
    fi

    # TODO: add Debian support for -r
    if [ -z "$only_release" ] && [ "$only_partner" = "no" ]; then
        # Sync Sources from Debian testing
        timestamp="${debianPath}.timestamp"
        if [ ! -e "${debianPath}/dists" ] || do_use_timestamp "$timestamp" 6 ; then
            cd "$debianPath"
            for i in main contrib non-free
            do
                wget $verbosity_args -N -R '*=*' -R 'Contents*' -X '/debian/dists/testing/*/source/Sources.diff,/debian/dists/testing/*/source/by-hash' -np -r http://ftp.debian.org/debian/dists/testing/$i/source/
            done
            ln -sf ftp.debian.org/debian/dists dists
            # remove dangling symlink
            rm -f ftp.debian.org/debian/dists/dists || true

            if [ "$use_timestamp" = "yes" ]; then
                touch "$timestamp"
            else
                rm -f "$timestamp"
            fi
        fi
    fi

    if [ -z "$only_release" ] || [ "$only_partner" = "yes" ]; then
        # Sync Canonical Partner Repo
        timestamp="${partnerPath}.timestamp"
        if [ ! -e "${partnerPath}/dists" ] || do_use_timestamp "$timestamp" 6 ; then
            cd "$partnerPath"
            misc_partner_excludes="robots.txt,misc/,project/,icons/,ls-lR.gz"
            wget $verbosity_args -N -R '*=*' -R 'Contents*' -X "${misc_partner_excludes},${partner_dir_excludes}" -np -r http://archive.canonical.com/dists/

            ln -sf archive.canonical.com/dists dists
            # remove dangling symlink
            rm -f archive.canonical.com/dists/dists || true

            # remove dists.[0-9] files
            rm -f archive.canonical.com/dists.[0-9]*

            if [ "$use_timestamp" = "yes" ]; then
                touch "$timestamp"
            else
                rm -f "$timestamp"
            fi
            has_sources=$(find archive.canonical.com/dists -name Sources$)
            if [ ! -z "$has_sources" ]; then
                echo ""
                echo "WARNING: $partnerPath has Sources files! These should be removed now that -partner uses Sources.gz"
            fi
        fi
    fi
fi

# generate source package lists for umt grep
for path in "$outPath" "$debianPath" "$partnerPath"; do
  find $path -name Sources.gz -exec zgrep '^Package: .*' {} \; | cut -c10- | sort -u > $path/sources
done
