#!/usr/bin/env python2

# actev-get-video.py
# Author(s): Jon Fiscus

# This software was developed by employees of the National Institute of
# Standards and Technology (NIST), an agency of the Federal
# Government. Pursuant to title 17 United States Code Section 105, works
# of NIST employees are not subject to copyright protection in the
# United States and are considered to be in the public
# domain. Permission to freely use, copy, modify, and distribute this
# software and its documentation without fee is hereby granted, provided
# that this notice and disclaimer of warranty appears in all copies.

# THE SOFTWARE IS PROVIDED 'AS IS' WITHOUT ANY WARRANTY OF ANY KIND,
# EITHER EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT LIMITED
# TO, ANY WARRANTY THAT THE SOFTWARE WILL CONFORM TO SPECIFICATIONS, ANY
# IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE, AND FREEDOM FROM INFRINGEMENT, AND ANY WARRANTY THAT THE
# DOCUMENTATION WILL CONFORM TO THE SOFTWARE, OR ANY WARRANTY THAT THE
# SOFTWARE WILL BE ERROR FREE. IN NO EVENT SHALL NIST BE LIABLE FOR ANY
# DAMAGES, INCLUDING, BUT NOT LIMITED TO, DIRECT, INDIRECT, SPECIAL OR
# CONSEQUENTIAL DAMAGES, ARISING OUT OF, RESULTING FROM, OR IN ANY WAY
# CONNECTED WITH THIS SOFTWARE, WHETHER OR NOT BASED UPON WARRANTY,
# CONTRACT, TORT, OR OTHERWISE, WHETHER OR NOT INJURY WAS SUSTAINED BY
# PERSONS OR PROPERTY OR OTHERWISE, AND WHETHER OR NOT LOSS WAS
# SUSTAINED FROM, OR AROSE OUT OF THE RESULTS OF, OR USE OF, THE
# SOFTWARE OR SERVICES PROVIDED HEREUNDER.

# Distributions of NIST software should also include copyright and
# licensing statements of any third-party software that are legally
# bundled with the code in compliance with the conditions of those
# licenses.

import sys
import os
import errno
import argparse
import json
import jsonschema
import pandas as pd 
from operator import add
import subprocess

def err_quit(msg, exit_status=1):
    print("[Error] {}".format(msg))
    exit(exit_status)

def warn(msg):
    print("[Warning] {}".format(msg))

def load_json(json_fn):
    try:
        with open(json_fn, 'r') as json_f:
            return json.load(json_f)
    except IOError as ioerr:
        err_quit("{}. Aborting!".format(ioerr))

def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc:
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            err_quit("{}. Aborting!".format(exc))

def dir_exists(path):
    return(os.path.isdir(path))

def file_exists(path):
    return(os.path.isfile(path))

def file_size(path):
    return(os.path.getsize(path))

def touch_file(path):
    try:
        out = subprocess.check_output(['touch', path])
    except subprocess.CalledProcessError as e:
        error_quit("Touch of File " + path +  " failed")

def delete_file(path):
    if os.path.exists(path):
        os.remove(path)
    else:
        warn("Delete of " + path + " requested but it does note exist")


class DataSet(object):
    ads_root = ""
    name = ""
    vid_dir = ""
    vid_set_dir = ""
    manifest_file = ""
    manifest = None
    valid = ""
    transfer_tool = None
    
    def __init__(self, ads_root, name, transfer_tool):
        # self.radius is an instance variable
        self.ads_root = ads_root
        self.name = name
        self.transfer_tool = transfer_tool
        if os.path.isfile(DataSet.get_video_manifest(ads_root, name)):
            self.vid_dir = DataSet.get_video_dir(ads_root)
            self.vid_set_dir = DataSet.get_video_dir(ads_root) + "/" + name
            self.manifest_file = DataSet.get_video_manifest(ads_root, name)
            self.manifest = pd.read_csv(self.manifest_file, dtype=object)
            #print(self.manifest.head())
            self.manifest.ClipByteSize = self.manifest.ClipByteSize.astype(int)
            self.valid = True
            self.review_manifest()

    def review_manifest(self):
        file_present = []
        local_file = []
        local_file_byte_size = []
        for index, row in self.manifest.iterrows():
            lfile = "{}/{}/{}".format(self.vid_set_dir, str(row['RelativeClipPath']), str(row['ClipID']))

            ### if the file exists but is invalid, we need to do something
            if (file_exists(lfile)):
                fs = file_size(lfile)
                if (not fs == row['ClipByteSize']):               
                    warn("Deleting file {}.  It is present but the size is {} but should be {}".format(lfile, fs, row['ClipByteSize']))
                    delete_file(lfile)            
                    if (file_exists(lfile + ".failed")):
                        delete_exists(lfile + ".failed")
                        
            if (file_exists(lfile)):
                local_file.append(lfile)
                fs = file_size(lfile)
                local_file_byte_size.append(fs)
                if (not fs == row['ClipByteSize']):               
                    warn("File {} present but the size is {} but should be {}".format(lfile, fs, row['ClipByteSize']))
                    file_present.append(False)
                else:
                    file_present.append(True)
            else:
                file_present.append(False)
                local_file.append(lfile)
                local_file_byte_size.append(None)
        self.manifest['file_present'] = file_present
        self.manifest['local_file'] = local_file
        self.manifest['local_file_byte_size'] = local_file_byte_size

    def load_video(self, dryrun):
        for index, row in self.manifest.iterrows():
            if row['URL_Type'] == 'file':
                if (not row['file_present']):
                    if (file_exists(row['local_file'] + ".failed")):
                        warn("Delete " + row['local_file'] + ".failed to attempt re-download")
                    else: 
                        if (self.transfer_tool == 'wget'):
                            com="wget --user {} --password '{}' -P {} {} ".format(row['user'], row['password'], self.vid_set_dir + "/" + row['RelativeClipPath'], row['URL'])
                        elif (self.transfer_tool == 'curl'):
                            com="curl --user '{}:{}' {} --output {}".format(row['user'], row['password'], row['URL'], self.vid_set_dir + "/" + row['RelativeClipPath'] + "/" + row['ClipID'])
                        else:
                            err_quit("Internal Error")
                        print(com)
                        if not dryrun:
                            mkdir_p(self.vid_set_dir + "/" + row['RelativeClipPath'])
                            try:
                                out = subprocess.check_output(com, shell=True)
                            except subprocess.CalledProcessError as e:
                            #print(e.output)
                                warn("File "+row['local_file']+" exception")
                                touch_file(row['local_file'] + ".failed")
                            else:
                            ### Did it work?
                                if (not file_exists(row['local_file'])):
                                    warn("File "+row['local_file']+" not downloaded")

    @staticmethod
    def get_video_dir(root):
        return(root + "/video")

    @staticmethod
    def get_video_manifest(root, video_set):
        return(DataSet.get_video_dir(root) + "/" + video_set + "/" + "MANIFEST")

    def is_valid(self):
        return(self.valid)

    def dump_info(self):
        print("DataSet Info:")
        print("   Name: {}".format(self.name))
        print("   isValid: {}".format(self.valid))
        if self.valid:
            print("   VidDir: {}".format(self.vid_dir))
            print("   Manifest_file: {}".format(self.manifest_file))
            print("      Num Videos: {}".format(len(self.manifest.ClipID)))
            print("          Num Videos with file Download: {}".
                  format(len(self.manifest.URL_Type[self.manifest.URL_Type == 'file'])))
            print("          Num Videos with archive Download: {}".
                  format(len(self.manifest.URL_Type[self.manifest.URL_Type == 'archive'])))
            print("      Download Status:")
            print("          Videos downloaded: {} files, {} bytes, {:.2f} GB".
                  format(len(self.manifest.URL_Type[self.manifest.file_present == True]), 
                         self.manifest.ClipByteSize[self.manifest.file_present == True].sum(), 
                         self.manifest.ClipByteSize[self.manifest.file_present == True].sum() / 1000000000))
            print("          Videos NOT downloaded: {} files, {} bytes, {:.2f} GB".
                  format(len(self.manifest.URL_Type[self.manifest.file_present == False]),
                         self.manifest.ClipByteSize[self.manifest.file_present == False].sum(), 
                         self.manifest.ClipByteSize[self.manifest.file_present == False].sum() / 1000000000))
            



def survey_datasets(root, select_sets, select_partitions, transfer_tool):
    video_dir = DataSet.get_video_dir(root)

    if (not dir_exists(video_dir)):
        err_quit("video dir {} does not exist".format(video_dir))
    
    sets = []
    for f in os.listdir(video_dir):
        if (len(select_sets) == 0 or (f in select_sets)):
            ds = DataSet(root, f, transfer_tool)
            if ds.is_valid():
                sets.append(ds) 
            else:
                warn("Directory {} in {} is not valid".format(f, video_dir))
    return(sets)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Video Data Downloader for ActEV Evaluations")

    parser.add_argument("--operation", type=str, nargs='?', help='The operation to perform')
    parser.add_argument("--transfer_tool", type=str, nargs='?', help='The transfer utility to use', choices=['curl', 'wget'], default="curl")
    parser.add_argument("--root_dir", type=str, nargs=1, help='The operation to perform', 
                        default=os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
    parser.add_argument("--sets", type=str, nargs="*", help='Limit the datasets to the list', 
                        default=[])
    parser.add_argument("--partitions", type=str, nargs="*", help='Limit the downloaded datasets to files in a partition', 
                        default=[])
    
    args = parser.parse_args()

    sets = survey_datasets(args.root_dir, args.sets, args.partitions, args.transfer_tool)
    for set in sets:
        if args.operation == "summary":
            set.dump_info()
        elif args.operation == "download":
            set.load_video(False)
        elif args.operation == "download-dryrun":
            set.load_video(True)
        elif args.operation == "manifest":
            print(set.manifest.head())
