import re
from datetime import datetime
from collections import namedtuple
import pandas as pd

def get_date(str):
    match = re.match(r'(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december)\s+(\d+),\s*(\d+)', str.lower())
    if match:
        month_conversation_map = {'jan':'01',
                                  'janurary':'01',
                                  'feb':'02',
                                  'february':'02',
                                  'mar':'03',
                                  'march':'03',
                                  'apr':'04',
                                  'april':'04',
                                  'may':'05',
                                  'jun':'06',
                                  'june':'06',
                                  'jul':'07',
                                  'july':'07',
                                  'aug':'08',
                                  'august':'08',
                                  'sep':'09',
                                  'september':'09',
                                  'oct':'10',
                                  'october':'10',
                                  'nov':'11',
                                  'november':'11',
                                  'dec':'12',
                                  'december':'12'}

        raw_month = match.group(1)
        month = month_conversation_map.get(raw_month)
        if month == None:
            print(f'Problem month: {raw_month} from {str}')
            return None
        day = match.group(2)
        if len(day) == 1:
            day = '0' + day
        year = match.group(3)[-2:]

        return datetime.strptime(f'{month}{day}{year}','%m%d%y')


def get_trials(str):
    ret = []
    match = re.match(r'(random|real)\s+words\s*-\s*(\d+)\s*mins?\s*-\s*(\d+)\s*wpm\s*-\s*(.+)', str)
    if match:
        mins = int(match.group(2))
        wpm = int(match.group(3))
        trials = match.group(4)

        for trial_raw in re.findall(r'\d+%',trials):
            trial = int(trial_raw[:-1])
            ret.append((wpm,mins,trial))

    return ret



EntryTuple = namedtuple('EntryTuple', 'date day_num wpm mins pcorrect'.split())

def get_entries(filename='Morse Code Progress.txt'):
    log_file = open(filename, 'r')
    entries = []
    first_date = None
    for line in log_file:
        possible_entry_date = get_date(line)
        if possible_entry_date:
            entry_date = possible_entry_date.strftime('%Y-%m-%d')
            if first_date == None:
                first_date = possible_entry_date
                day_num = 1
            else:
                day_num = (possible_entry_date - first_date).days

        for wpm, mins, pcorrect in get_trials(line):
            entries.append(EntryTuple(entry_date, day_num, wpm, mins, pcorrect))
            #word_trial_file.write(f'{entry_date},{day_num},{wpm},{mins},{pcorrect}\n')

    return entries


def get_max_pcorrect_group_by_day_num_and_wpm(entries):
    t = entries.groupby(['day_num','wpm']).max()

    day_num = [ind[0] for ind in t.index]
    t['day_num'] = day_num

    wpm = [ind[1] for ind in t.index]
    t['wpm'] = wpm

    t['index'] = range(0,len(t))
    t = t.set_index('index')

    t = t[['day_num','wpm','pcorrect']]
    return t

def write_grouped_entries(entries):
    word_trial_file = open('word_trials.csv', 'w')
    word_trial_file.write('Day,WPM,Percent Correct\n')
    for day_num, wpm, pcorrect in grouped_entries.itertuples(index=False):
        print(f'{day_num},{wpm},{pcorrect}')
        word_trial_file.write(f'{day_num},{wpm},{pcorrect}\n')
    word_trial_file.close()


entries = pd.DataFrame(get_entries())[['day_num','wpm','pcorrect']]
grouped_entries = get_max_pcorrect_group_by_day_num_and_wpm(entries)
write_grouped_entries(grouped_entries)

entry_count = len(entries)
grouped_entry_count = len(grouped_entries)
print(f'Found {entry_count} entries')
print(f'Found Grouped {grouped_entry_count} entries')


