import datetime as dt
import numpy as np
import os
import pandas as pd
from matplotlib import pyplot as plt
from zz_NewsSocialSignaling import Config, Helper
from ResearchTools import ChartTools


def analyze_asian_barometer():
    min_obs_year = 100
    election_data = pd.read_stata(Config.fn_elections)
    df = pd.read_table(Config.fn_asian_barometer, sep=',', low_memory=False)

    # filter obs w/o good interview date
    df = df.loc[~pd.isna(df.interviewmonth)].copy()
    df['interviewmonth'] = df.interviewmonth.apply(lambda d: dt.datetime.strptime(d, '%Y-%m-%d').date())
    df['interviewyear'] = df.interviewmonth.apply(lambda d: d.year)
    year_counts = df.interviewyear.value_counts()
    yrs2drop = list(year_counts[year_counts < min_obs_year].index)
    df = df.loc[~df.interviewyear.isin(yrs2drop)].copy()

    # check countries w/ matching election data
    df.countryraw = df.countryraw.apply(lambda s: s.upper())
    election_countries = list(election_data.country.unique())
    for el in df.countryraw.unique():
        if el not in election_countries:
            print('{0} not in election data'.format(el))

    # add months to next election and months from last election
    df['closest_elect'] = -100000
    # df['next_closest_elect'] = -1
    country2elec = dict((el, [el.date() for el in election_data.loc[election_data.country == el].electmonth]) for el in df.countryraw.unique() if el in election_countries)
    for ix in df.index:
        country = df.loc[ix, 'countryraw']
        if country not in country2elec:
            continue
        survey_month = df.loc[ix, 'interviewmonth']
        elect_diffs = [Helper.month_diff(survey_month, el) for el in country2elec[country]]
        closest_elect_abs = 100000
        closest_elect = 1000000
        for el in elect_diffs:
            if np.abs(el) < closest_elect_abs:
                closest_elect_abs = np.abs(el)
                closest_elect = el
        if closest_elect_abs > 12:
            x = 1
        if closest_elect_abs == 2:
            x = 1
        df.loc[ix, 'closest_elect'] = closest_elect

    # plot survey by election counts
    num_months = 24
    tmp = df.loc[np.abs(df.closest_elect) <= num_months].closest_elect.value_counts()
    for el in range(-num_months, num_months+1):
        if el not in tmp.index:
            tmp[el] = 0
    tmp = tmp.sort_index()
    fig, ax = plt.subplots()
    tmp.plot.bar(ax=ax)
    plt.title('Asian Barometer Number of Surveys')
    plt.xlabel('Months to Nearest Election')
    plt.ylabel('Number of Surveys')
    ChartTools.save_show_plot(fig=fig, fn=os.path.join(Config.dir_asian_barometer_figs, 'survey_relative_to_elections.png'), show_graph=False, pickle_fig=False)

    for country in df.countryraw.unique():
        if country not in country2elec:
            continue
        print(country)
        dates = country2elec[country]
        dates.sort()
        print([el.strftime('%Y-%m') for el in dates])
        print(df.loc[df.countryraw == country].interviewmonth.value_counts())


def analyze_anes():
    df = pd.read_table(Config.fn_anes, sep=',')

    # drop missing vals
    old_len = len(df)
    df = df.loc[(df.days_pre_election != 'missing') | (df.days_post_election != 'missing')]
    print('Dropping {0}/{1} obs b/c missing. {2} Remain'.format(old_len - len(df), old_len, len(df)))

    assert not np.any((df.days_pre_election == 'missing') & (df.days_post_election == 'missing'))
    df['days2election'] = [float(df.loc[el, 'days_post_election']) if df.loc[el, 'days_pre_election'] == 'missing'
                           else -float(df.loc[el, 'days_pre_election']) for el in df.index]
    df['pre_election'] = df['days2election'] < 0
    df['weeks2election'] = df.days2election.apply(lambda d: int(d / 7))
    old_len = len(df)
    df = df[[el for el in df.columns if el != 'days_post_election' and el != 'days_pre_election']].copy()
    # df = df.loc[~np.any(df.values == 'missing', axis=1)].copy()
    print('Dropping {0}/{1} obs b/c missing. {2} Remain'.format(old_len - len(df), old_len, len(df)))

    # restrict to surveys w/ both pre and post
    old_len = len(df)
    df = df.loc[df.has_post & df.has_pre]
    print('Dropping {0}/{1} obs b/c missing pre or post. {2} Remain'.format(old_len - len(df), old_len, len(df)))

    # plot survey by election counts
    num_weeks = 12
    tmp = df.loc[np.abs(df.weeks2election) <= num_weeks].weeks2election.value_counts()
    for el in range(-num_weeks, num_weeks + 1):
        if el not in tmp.index:
            tmp[el] = 0
    tmp = tmp.sort_index()
    fig, ax = plt.subplots()
    tmp.plot.bar(ax=ax)
    plt.title('ANES Number of Surveys')
    plt.xlabel('Weeks to Nearest Election')
    plt.ylabel('Number of Surveys')
    ChartTools.save_show_plot(fig=fig, fn=os.path.join(Config.dir_anes_figs, 'survey_relative_to_elections.png'), show_graph=False,
                              pickle_fig=False)

    df = df.applymap(lambda x: np.nan if x == 'missing' else x)

    # plot news consumption relative to election
    tmp = df.loc[(~pd.isna(df.newspaper_days_last_week)) & (~pd.isna(df.tvnews_days_last_week))]
    aggs = dict((el, pd.DataFrame(index=list(np.sort(tmp.newspaper_days_last_week.unique())),
                                  columns=['newspaper', 'tv news'])) for el in ['pre', 'post'])
    tmp_dfs = {
        'pre': tmp.loc[tmp.pre_election].copy(),
        'post': tmp.loc[~tmp.pre_election].copy()
    }
    for key in aggs:
        print('Number of {0} election observations: {1}'.format(key, len(tmp_dfs[key])))
        n = len(tmp_dfs[key])
        assert tmp_dfs[key].newspaper_days_last_week.value_counts().sum() == n, str(
            'Expected {0}, got {1}'.format(n, tmp_dfs[key].newspaper_days_last_week.value_counts().sum()))
        assert tmp_dfs[key].tvnews_days_last_week.value_counts().sum() == n
        for ix in aggs[key].index:
            aggs[key].loc[ix, 'newspaper'] = (tmp_dfs[key].newspaper_days_last_week == ix).sum() / n
            aggs[key].loc[ix, 'tv news'] = (tmp_dfs[key].tvnews_days_last_week == ix).sum() / n
        print(aggs[key])
        assert np.all(aggs[key].sum() > 0.99) and np.all(aggs[key].sum() < 1.01)

    # for country in df.countryraw.unique():
    #     if country not in country2elec:
    #         continue
    #     print(country)
    #     dates = country2elec[country]
    #     dates.sort()
    #     print([el.strftime('%Y-%m') for el in dates])
    #     print(df.loc[df.countryraw == country].interviewmonth.value_counts())


if __name__ == '__main__':
    analyze_anes()