import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from ResearchTools import ChartTools
from matplotlib import pyplot as plt
import os


def simulate_estimator(n, public_share, share_news_only, read_te, slant_te):
    df = pd.DataFrame(index=range(n))
    df['public'] = [int(el) for el in np.random.rand(n) < public_share]
    df['news_only'] = [int(el) for el in np.random.rand(n) < share_news_only]
    df['read_news'] = [int(el) for el in np.random.rand(n) < (0.5 + read_te * df.public)]
    df.loc[df.news_only == 1, 'read_news'] = np.nan  # for read analysis, need to have option to read either news or sports
    df['news_slant'] = np.random.randn(n) + slant_te * df.public
    df.loc[df.news_only == 0, 'news_slant'] = np.nan  # for slant analysis, need to have only news options. Could code sports as 0 slant?

    # hypothesis 1
    mod1 = smf.ols('read_news ~ 1 + public', data=df).fit(cov_type='HC2')
    hyp_1_p_val = mod1.pvalues['public']

    # hypothesis 2
    mod2 = smf.ols('news_slant ~ 1 + public', data=df).fit(cov_type='HC2')
    hyp_2_p_val = mod2.pvalues['public']

    return {
        'h1': hyp_1_p_val,
        'h2': hyp_2_p_val
    }


def mde(n, required_power, share_news_only, sig_p, public_share):
    num_sims = 1000
    effect_sizes = np.linspace(0, 0.2, 20)
    ix = 0
    mdes = {}
    while True:
        effect_size = effect_sizes[ix]
        print(effect_size)
        p_vals = {'h1': [], 'h2': []}
        for b in range(num_sims):
            tmp_ps = simulate_estimator(n=n, public_share=public_share, share_news_only=share_news_only, read_te=effect_size, slant_te=effect_size)
            p_vals['h1'].append(tmp_ps['h1'])
            p_vals['h2'].append(tmp_ps['h1'])

        # now check if we can reject 80% of time for either hypothesis
        for h in p_vals:
            if h in mdes:
                continue
            if np.mean([int(el < sig_p) for el in p_vals[h]]) >= required_power:
                mdes[h] = effect_size

        if len(mdes) == len(p_vals):
            break
        ix += 1
    return mdes


def main():
    required_power = 0.8
    public_share = 0.5
    share_news_only = 0.5
    sig_p = 0.05
    # simulate_estimator(n=1000, public_share=public_share, share_news_only=share_news_only, read_te=0.1, slant_te=0.1)

    mdes = pd.DataFrame(index=[1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000])
    for n in mdes.index:
        tmp_mde = mde(n=n, required_power=required_power, share_news_only=share_news_only, sig_p=sig_p, public_share=public_share)
        mdes.loc[n, 'h1'] = tmp_mde['h1']
        mdes.loc[n, 'h2'] = tmp_mde['h2']
    fig, ax = plt.subplots()
    mdes.plot(ax=ax)
    plt.xlabel('')
    ChartTools.save_show_plot(fig=fig, fn=os.path.join(os.path.dirname(__file__), 'mde.pdf'), pickle_fig=False, show_graph=False)


if __name__ == '__main__':
    main()