import sys
import dateutil
from dateutil import parser
import datetime
import argparse
import json
import re
from pyzotero import zotero
from difflib import SequenceMatcher

global_verbose = False

def parseDate(s):
    testString = s
    # deal with dates like this Sept.-Oct./2005
    m = re.search('(\w+\.-\w+\.)/(\d+)',s)
    if m and m.group(2):
        if global_verbose:
            print('WARNING: Working around unsupported date format: {}'.format(s))
        testString = m.group(2)
        
    d1 = dateutil.parser.parse(testString, default=datetime.datetime(1,1,1,1,1))
    d2 = dateutil.parser.parse(testString, default=datetime.datetime(2,2,2,2,2))

    year = month = 1
    yearString = monthString = ''

    if d1.year==d2.year:
        year = d1.year
        yearString = str(year)

    if d1.month==d2.month:
        month = d1.month
        monthString = d1.strftime('%b.')
        
    d = datetime.datetime(year,month,1,1,1)

    return d, yearString, monthString

class EntryValueParser:
    parserName = None
    keyName = None

    def getKeyValue(self, item):
        return item['data'].get(self.keyName, '')
    
    def parse(self, item):
        return self.getKeyValue(item)

    def parseToFormat(self, item, format_string):
        s = self.parse(item)
        return format_string.format(s) if s else ''

class AuthorParser(EntryValueParser):
    parserName = keyName = 'author'

    def parse(self, item, kind='text'):
        authorList = []
        for creator in item['data']['creators']:
            if creator['creatorType'] == 'author':
                if creator.get('lastName', '') and creator.get('firstName', ''):
                    authorList.append('{}. {}'.format(creator['firstName'][0], creator['lastName']))
                else:
                    if global_verbose:
                        otherName = creator['name'] if 'name' in creator else 'NO_NAME' 
                        print('WARNING: did not find author first and last: {}, {}'.format(otherName, item['key']))

                    if 'name' in creator:
                        authorList.append(creator['name'])


        if len(authorList) == 1:
            return '{}'.format(authorList[0])
        elif len(authorList) == 2:
            return '{} and {}'.format(authorList[0], authorList[1])

        elif len(authorList) > 6:
            if kind=='html':
                return '{} <i>et al</i>.'.format(authorList[0])
            else:
                return '{} et al.'.format(authorList[0])

        elif len(authorList) == 0:
            return ''
        else:
            return '{}, and {}'.format(', '.join(authorList[:-1]), authorList[-1])

class TitleParser(EntryValueParser):
    parserName = keyName = 'title'

class VolumeParser(EntryValueParser):
    parserName = keyName = 'volume'

    def parse(self, item):
        s = self.getKeyValue(item)
        return 'vol. {}'.format(s) if s else ''

class NumberParser(EntryValueParser):
    parserName = 'number/issue'
    keyName = 'issue'

    def parse(self, item):
        s = self.getKeyValue(item)
        return 'no. {}'.format(s) if s else ''

class PagesParser(EntryValueParser):
    parserName = keyName = 'pages'

    def parse(self, item):
        s = self.getKeyValue(item)
        return 'pp. {}'.format(s) if s else ''

class PublicationTitleParser(EntryValueParser):
    parserName = keyName = 'publicationTitle'

class PlaceParser(EntryValueParser):
    parserName = keyName = 'place'    

class SchoolParser(EntryValueParser):
    parserName = keyName = 'university'

class InstitutionParser(EntryValueParser):
    parserName = keyName = 'institution'

class ReportNumberParser(EntryValueParser):
    parserName = keyName = 'reportNumber'

class ThesisTypeParser(EntryValueParser):
    parserName = keyName = 'thesisType'

class PublisherParser(EntryValueParser):
    parserName = keyName = 'publisher'    

class BookTitleParser(EntryValueParser):
    parserName = keyName = 'bookTitle'    

class EditionParser(EntryValueParser):
    parserName = keyName = 'edition'

class ConferenceParser(EntryValueParser):
    parserName = 'conference/proceedings'

    def parse(self, item, kind='text'):
        proceedingsTitle = item['data']['proceedingsTitle']
        conferenceName = item['data']['conferenceName']

        if conferenceName:
            return 'presented at the {}'.format(conferenceName)        
        elif proceedingsTitle:
            if kind=='html':
                return 'in <span class="kwbib-conferencePaper-proceedingsTitle">{}</span>'.format(proceedingsTitle)
            else:
                return 'in {}'.format(proceedingsTitle)                
        else:
            return ''

class DateParser(EntryValueParser):
    parserName = keyName = 'date'

    def parse(self, item, include_month=False):
        s = item['data']['date']

        if not s:
            return ''

        d, yearString, monthString = parseDate(s)

        if include_month and yearString and monthString:
            return '{} {}'.format(monthString, yearString)

        return yearString

class ItemTypeGenerator:
    typeString = None
    requiredData = []
    
    def canGenerate(self, requiredTypeString):
        return self.typeString == requiredTypeString

    def getMissingData(self, item):
        missingData = []

        for r in self.requiredData:
            if hasattr(r, 'parserName'):
                if not r().parse(item).strip():
                    missingData.append(r.parserName)
            else:
                print(r)
                if not r(item).strip():
                    missingData.append(str(r))
                
        return missingData
                
class ConferencePaperGenerator(ItemTypeGenerator):
    typeString = 'conferencePaper'
    requiredData = [AuthorParser, TitleParser, ConferenceParser, DateParser]
    
    def generate(self, item, kind='text'):
        s = '{author}, "{title}," {conference},{place} {date}.'
        s = s.format(author=AuthorParser().parse(item, kind=kind),
                     title=TitleParser().parse(item),
                     conference=ConferenceParser().parse(item, kind),
                     place=PlaceParser().parseToFormat(item,' {},'),
                     date=DateParser().parse(item, include_month=False))
        return s, ''

class JournalArticleGenerator(ItemTypeGenerator):
    typeString = 'journalArticle'
    requiredData = [AuthorParser, TitleParser, PublicationTitleParser, DateParser]

    def generate(self, item, kind='text'):
        s = '{author}, "{title}," {journal},{volume}{number}{pages} {date}.'
        if kind=='html':
            s = '{author}, "{title}," <span class="kwbib-journal-journal">{journal}</span>,{volume}{number}{pages} {date}.'            
        s = s.format(author=AuthorParser().parse(item, kind=kind),
                     title=TitleParser().parse(item),
                     journal=PublicationTitleParser().parse(item),
                     volume=VolumeParser().parseToFormat(item, ' {},'),
                     number=NumberParser().parseToFormat(item, ' {},'),
                     pages=PagesParser().parseToFormat(item, ' {},'),                     
                     date=DateParser().parse(item, include_month=True))
        return s, ''

class ThesisGenerator(ItemTypeGenerator):
    typeString = 'thesis'
    requiredData = [AuthorParser, TitleParser, DateParser, SchoolParser]

    def generate(self, item, kind='text'):
        s = '{author}, "{title},"{thesis_type} {school}, {date}.'
        s = s.format(author=AuthorParser().parse(item, kind=kind),
                     title=TitleParser().parse(item),
                     thesis_type=ThesisTypeParser().parseToFormat(item, ' {},'),
                     school=SchoolParser().parse(item),
                     date=DateParser().parse(item, include_month=False))
        return s, ''

class ReportGenerator(ItemTypeGenerator):
    typeString = 'report'
    requiredData = [AuthorParser, TitleParser, InstitutionParser, DateParser]

    def generate(self, item, kind='text'):
        s = '{author}, "{title}," {institution},{report_number} {date}.'
        s = s.format(author=AuthorParser().parse(item, kind=kind),
                     title=TitleParser().parse(item),
                     institution=InstitutionParser().parse(item),
                     report_number=ReportNumberParser().parseToFormat(item, ' {},'),
                     date=DateParser().parse(item, include_month=True))
        return s, ''

class BookSectionGenerator(ItemTypeGenerator):
    typeString = 'bookSection'
    requiredData = [AuthorParser, TitleParser, DateParser, PublisherParser, BookTitleParser, PagesParser]

    def generate(self, item, kind='text'):
        s = '{author}, "{title}," in {book_title}{edition}. {publisher}, {date}, {pages}.'
        if kind=='html':
            s = '{author}, "{title}," in <span class="kwbib-bookSection-book-title">{book_title}</span>{edition}. {publisher}, {date}, {pages}.'
        s = s.format(author=AuthorParser().parse(item, kind=kind),
                     title=TitleParser().parse(item),
                     book_title=BookTitleParser().parse(item),
                     publisher=PublisherParser().parse(item),
                     edition=EditionParser().parseToFormat(item, ', {}'),
                     place=PlaceParser().parseToFormat(item, ' {},'),
                     pages=PagesParser().parse(item),
                     date=DateParser().parse(item, include_month=False))
        return s, ''

class BookGenerator(ItemTypeGenerator):
    typeString = 'book'
    requiredData = [AuthorParser, DateParser, PublisherParser, TitleParser]

    def generate(self, item, kind='text'):
        s = '{author}, {title}{edition}. {publisher}, {date}.'
        if kind=='html':
            s = '{author}, <span class="kwbib-bookSection-book-title">{title}</span>{edition}. {publisher}, {date}.'            

        s = s.format(author=AuthorParser().parse(item, kind=kind),
                     title=TitleParser().parse(item),
                     publisher=PublisherParser().parse(item),
                     edition=EditionParser().parseToFormat(item, ', {}'),
                     date=DateParser().parse(item, include_month=False))
        return s, ''    

itemTypeGenerators = [ConferencePaperGenerator(),
                      JournalArticleGenerator(),
                      ThesisGenerator(),
                      ReportGenerator(),
                      BookSectionGenerator(),
                      BookGenerator()]

def generateItemString(item, kind='text'):
    entryType = item['data']['itemType']
    generator = None

    for typeGenerator in itemTypeGenerators:
        if typeGenerator.canGenerate(entryType):
            generator = typeGenerator
            break

    if not generator:
        return (None, 'ERROR: No generator found for for type: {} : {}'.format(entryType, item['key']))

    missingData = generator.getMissingData(item)
    if missingData:
        for d in missingData:
            return (None, 'ERROR: Entry missing required data {} : {} : {}'.format(entryType, d, item['key']))
        
    return generator.generate(item, kind=kind)


def generateCollectionString(items, kind='text', verbose=False, sort=False):

    sortedItems = items

    if sort:
        dates = [i['data']['date'] for i in items]
        dates = [d if d else '1900' for d in dates]
        dates = [parseDate(d) for d in dates]
        sortedItems = [x for _,x in sorted(zip(dates,items), key=lambda pair: pair[0], reverse=True)]

    s = ''

    # used for whitespace padding
    maxCountCharacters = len(str(len(items)))
    nextCount = 1

    if kind == 'html':
        s += '<ol class="kwbib">\n'
    
    for item in sortedItems:
        contentString, errorString = generateItemString(item, kind=kind)

        if contentString:
            if kind=='text':
                entryFormat = '{padding}[{number}] {item_content}\n'
                numberPadding = maxCountCharacters - len(str(nextCount))
                s += entryFormat.format(padding=' ' * numberPadding,
                                        number=nextCount,
                                        item_content=contentString)
                nextCount += 1

            if kind=='html':
                entryFormat = '<li class="kbib-entry">{item_content}</li>\n'
                s += entryFormat.format(item_content=contentString)
            
        if errorString and verbose:
            s += '\n{0}\n{1}\n{0}\n\n'.format('*'*20, errorString)

    if kind == 'html':
        s += '</ol>\n'
        
    return s


#
# manage caching
def updateCache(credentialsFile, cacheFile):
    print('Updating cache...')

    # load credentials
    libCredentials = None
    print('Loading credentials from file: {}'.format(credentialsFile))
    with open(credentialsFile, 'r') as f:
        libCredentials = json.load(f)
        f.close()

    # load items
    print('Loading data from Zotero server...')
    zot = zotero.Zotero(libCredentials['library_id'], 
                        libCredentials['library_type'], 
                        libCredentials['api_key'])
    allItems = zot.everything(zot.top())
    allCollections = zot.everything(zot.collections())
    print('Downloaded {} items and {} collections from Zotero'.format(len(allItems), len(allCollections)))

    with open(cacheFile, 'w') as f:
        json.dump({'items' : allItems, 'collections' : allCollections}, f)
        f.close()

    print('Cache updated.')
    
def loadDataFromCache(cacheFile, verbose):
    if verbose:
        print('Loading credentials from cache: {}'.format(cacheFile))

    cacheData = None
    with open(cacheFile, 'r') as f:
        cacheData = json.load(f)
        f.close()

    if verbose:
        print('Loaded {} items and {} collections from cache'.format(len(cacheData['items']),
                                                                     len(cacheData['collections'])))
    return cacheData

def getCollectionKey(allCollections, collectionName):
    for collection in allCollections:
        if collection['data']['name'] == collectionName:
            return collection['key'] 
    return None

def similar(a,b):
    return SequenceMatcher(None,a,b).ratio() > 0.7

def cleanName(s):
    return s.lower().translate({ord(i):None for i in '.,-'}).strip()

def reportNames(collectionItems, allItems, authorLastNames, authorFirstNames):
    for item in allItems:
        if item in collectionItems:
            continue

        for creator in item['data']['creators']:
            if 'lastName' in creator and 'firstName' in creator:
                lastName = cleanName(creator['lastName'])
                firstName = cleanName(creator['firstName'])

                matchedLastName = False
                for n in [cleanName(x) for x in authorLastNames]:
                    if similar(lastName, n):
                        matchedLastName = True

                matchedFirstName = False
                for n in [cleanName(x) for x in authorFirstNames]:
                    if similar(firstName, n):
                        matchedFirstName = True
                        
                if matchedLastName and matchedFirstName:
                    print('Does this creator of item {} match the author?\n{}'.format(item['key'], creator))
                
def reportSimilar(items, key):
    matches = []
    for i1, item1 in enumerate(items):
        value1 = item1['data'].get(key)
        if not value1:
            continue

        for i2, item2 in enumerate(items):
            if i1 == i2:
                continue
            if (i1,i2) in matches:
                continue
            
            value2 = item2['data'].get(key)            
            if not value2:
                continue

            if similar(value1, value2):
                print('Found similar values of {}:\n{}: {}\n{}: {}'.format(key, item1['key'], value1, item2['key'], value2))
                matches.append((i2,i1))

def getCollectionKey(cacheData, collectionName):
    collectionKey = ''
    for collection in cacheData['collections']:
        if collection['data']['name'] == collectionName:
            return collection['key']
    return None
    
def getItemsForCollection(cacheData, collectionName):
    collectionKey = getCollectionKey(cacheData, collectionName)
    if not collectionKey:
        print('Error: Could not find collection with name: {}'.format(collectionName))
        return None

    collectionItems = []
    for item in cacheData['items']:
        if collectionKey in item['data']['collections']:
            collectionItems.append(item)
    return collectionItems

def getTimestampForCollection(cacheData, collectionName):
    collectionKey = getCollectionKey(cacheData, collectionName)
    if not collectionKey:
        print('Error: Could not find collection with name: {}'.format(collectionName))
        return None

    timestamp = None
    for item in cacheData['items']:
        if collectionKey in item['data']['collections']:
            itemDatetime = dateutil.parser.parse(item['data']['dateModified'])
            if not timestamp or itemDatetime > timestamp:
                timestamp = itemDatetime
    return timestamp

def getSubCollections(cacheData, parentCollectionName):
    subCollections = []
    parentKey = getCollectionKey(cacheData, parentCollectionName)
    for collection in cacheData['collections']:
        if collection['data']['parentCollection'] == parentKey:
            subCollections.append(collection['data']['name'])
            
    return subCollections

if __name__=='__main__':
    parser = argparse.ArgumentParser()

    # modes
    parser.add_argument('-u', '--update_cache', action='store_true', default=False)
    parser.add_argument('-g', '--generate_bib', action='store_true', default=False)

    parser.add_argument('--list_collections', action='store_true', default=False, help='lists all live collections and their timestamps')
    parser.add_argument('--check', type=str, help='name of collection to check')
    parser.add_argument('--html', type=str, help='name of collection to generate')
    
    parser.add_argument('-n', '--collection_name', type=str, help='name of collection to be generated')
    parser.add_argument('-i', '--credentials_file', type=str, help='file to store Zotero cache; used when updating cache')
    parser.add_argument('-c', '--cache_file', type=str, help='file with Zotero credentials; used when updating cache',
                        default='../zotero_bib_cache.json')
    
    parser.add_argument('-v', '--verbose', action='store_true', default=False)
    parser.add_argument('-f', '--output_format', type=str, default='text', help='output format: text, html')
    parser.add_argument('--no_sort', action='store_true', default=False, help='use to not sort by date')    
    
    args = parser.parse_args()

    global_verbose = args.verbose


    if args.check:
        args.generate_bib = True
        args.collection_name = args.check
        args.verbose = True
        args.output_format = 'text'
    elif args.html:
        args.generate_bib = True
        args.collection_name = args.html
        args.verbose = False
        args.output_format = 'html'        

    # Mode: list live collections along with timestamps
    if args.list_collections:
        cacheData = loadDataFromCache(args.cache_file, args.verbose)
        liveCollections = getSubCollections(cacheData, 'Live')
        print('[')
        for collectionName in liveCollections:
            timestamp = getTimestampForCollection(cacheData, collectionName)
            print('{{ "name" : "{}", "timestamp" : "{}"}}'.format(collectionName, timestamp))
        print(']')
        sys.exit()
        
    # Mode: generate/update cache
    if args.update_cache:
        if not (args.credentials_file and args.cache_file):
            print('Error: credentials file and cache file required when updating cache')
            sys.exit()

        updateCache(args.credentials_file, args.cache_file)
        sys.exit()

    # Mode: generate output for items
    if args.generate_bib:
        if not args.cache_file:
            print('Error: cache file required when generating bibliographies')
            sys.exit()        
        cacheData = loadDataFromCache(args.cache_file, args.verbose)

        items = getItemsForCollection(cacheData, args.collection_name) if args.collection_name else cacheData['items']
        if not items:
            print('Error: Did not find any matching bibliography entries.')
            sys.exit()

        timestamp = getTimestampForCollection(cacheData, args.collection_name)            

        if args.verbose:
            print('{} has {} {} and was last modified {}'.format(args.collection_name,
                                                                 len(items),
                                                                 'entry' if len(items) < 2 else 'entries',
                                                                 timestamp))
            print('Checking for near duplicate titles...')
            reportSimilar(items, 'title')
            reportNames(items, cacheData['items'], ['Davis'], ['Brad', 'Brad C', 'B', 'B C'])
            
        s = generateCollectionString(items, verbose=args.verbose, kind=args.output_format, sort=not args.no_sort)

        if args.output_format=='html':
            s = '<input type="hidden" name="ztimestamp" value="{}"/>\n'.format(timestamp) + s 
        
        print(s)
        sys.exit()
    
    
