# Load required modules
import numpy as np
import pandas as pd
import doubleml as dml

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LassoCV, LogisticRegressionCV
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error
from sklearn.metrics import log_loss
from xgboost import XGBClassifier, XGBRegressor

import matplotlib.pyplot as plt
import scipy.stats as stats


import pandas as pd

# Load data set from url (internet connection required)
url = 'https://raw.githubusercontent.com/DoubleML/doubleml-docs/master/doc/examples/data/high42.CSV'
df = pd.read_csv(url)


print(df.shape)


df.head()


# Specify explanatory variables for data-backend
features_base = list(df.columns.values)[2:]

# TODO: Initialize DoubleMLData (data-backend of DoubleML)


# TODO: print data backend


# TODO: Calculate unconditional average treatment effect


# TODO: Initialize Linear and Logistic Regression learners


# TODO: Initialize one ML learner of your choice


# TODO: Initialize a second ML learner of your choice
#      (proceed as long as you like)


# TODO: Initialize benchmark DoubleMLIRM model


# TODO: Initialize a DoubleMLIRM model using the ML learners of your choice


# TODO: Fit benchmark DoubleMLIRM model using the fit() method

# HINT: set parameter 'store_predictions = True' for later model diagnostics


# TODO: Summarize your results


def pred_acc_irm(DoubleML, prop):
    """
    A function to calculate prediction accuracy values for every repetition
    of a Double Machine Learning model using IRM, DoubleMLIRM
    
    ...
    
    Parameters
    ----------
    DoubleML : doubleml.double_ml_irm.DoubleMLIRM
        The IRM Double Machine Learning model
    prop : bool
        Indication if RMSE values have to be computed for main regression or
        log loss values for propensity score
    """
    
    # export data and predictions of the DoubleML model
    y = DoubleML._dml_data.y
    d = DoubleML._dml_data.d
    g0 = DoubleML.predictions.get('ml_g0')
    g1 = DoubleML.predictions.get('ml_g1')    
    m = DoubleML.predictions.get('ml_m')
    
    # dimensions of prediction array
    h = g0.shape[0]
    w = DoubleML.n_rep
    
    # check whether treatment is binary 
    if np.isin(d, [0,1]).all() == False:
        raise ValueError("Treatment must be a binary variable.")
    
    # prepare array to store prediction accuracy measure values
    pred_acc_array = np.zeros((w,))
    
    # check whether to assess main regression or propensity score accuracy:   
    if prop == False:
        
        # evaluate main regression accuracy
        # export an array with correctly picked prediction values    
        export_pred_array = np.zeros((h, w))            
        for i in range(w):
            for j in range(h):
                if d[j] == 0:
                    export_pred_array[j,i] = g0[j,i]
                else:
                    export_pred_array[j,i] = g1[j,i]
    
        # fill array that contains rmse of each repetition
        for i in range(w):
            pred_acc_array[i] = mean_squared_error(y, export_pred_array[:,i], squared=False)    
    else:
        
        # evaluate propensity score accuracy
        # fill array that contains log loss of each repetition
        for i in range(w):
            pred_acc_array[i] = log_loss(d, m[:,i], eps=0.025)
    
    return pred_acc_array


# TODO: Evaluate the predictive performance for `ml_g` and `ml_m` using the
#       helper function `pred_acc_irm()`.


def rep_propscore_plot(DoubleML):
    """
    A function to create histograms as sublots for every repetition's propensity score density 
    of a Double Machine Learning model
    
    ...
    
    Parameters
    ----------
    DoubleML : doubleml
        The Double Machine Learning model
    """
    
    
    #export nuisance part from the DoubleML model
    m = DoubleML.predictions.get('ml_m')
    
    # dimensions of nuisance array
    h = m.shape[0]
    rep = DoubleML.n_rep
    i = 0
    
    # create histograms as subplots covering the propensity score densities of all repetitions
    if rep > 1:
        fig, ax = plt.subplots(1, rep, figsize=[20,4.8])
    
        for i in range(rep):
            ax[i].hist(np.reshape(m[:,i], h), range=[0,1], bins=25, density=False)
            ax[i].set_title('repetition ' + str(i+1))
            ax[i].set_xlabel("prop_score")
            ax[i].set_ylabel("count")
    
    else:
        fig, ax = plt.subplots(figsize=[20,4.8])
        ax.hist(np.reshape(m[:,i], h), range=[0,1], bins=25, density=False)
        ax.hist(np.reshape(m[:,i], h), range=[0,1], bins=25, density=False)
        ax.set_title('repetition ' + str(i+1))
        ax.set_xlabel("prop_score")
        ax.set_ylabel("count")
        
    plt.show()


# (TODO): Summarize the propensity score estimates


# TODO: Fit the ML DoubleMLIRM model using the fit() method


# TODO: Summarize your results


# TODO: Evaluate the predictive performance for `ml_g` and `ml_m` using the
#       helper function `pred_acc_irm()`.


# (TODO): Summarize the propensity score estimates


# TODO: Summarize the results on the nuisance estimation in a table or figure


## TODO: After calling fit(), access the coefficient parameter,
##      the standard error and confidence interval accessing the fiels
##      `coef` and `summary`.


## TODO: After calling fit(), access the coefficient parameter,
##      the standard error and confidence interval accessing the fiels
##      `coef` and `summary`.

Python: A/B Testing with DoubleML¶

0. Problem Formulation: A/B Testing¶

The A/B Testing Scenario¶

Why control for individual characteristics?¶

Why use machine learning to analyze A/B tests?¶

1. Data-Backend¶

The data set¶

2. Causal Model¶

2.1. Interactive regression model (IRM)¶

2.2. Naive Approach: Unconditional estimate of ATE¶

3. ML Methods¶

3.1. Benchmark using linear and logistic regression¶

3.2. Instantiate one or several ML learners of your choice¶

4. DML Specifications¶

4.1. Linear and logistic benchmark model¶

4.2. ML Model of your choice¶

4.3. - 4.X. ML Model of your choice¶

5. Estimation¶

5.1. Estimation for the Benchmark IRM¶

5.2. Estimation Diagnostics for the Benchmark IRM¶

5.2.1. Assess the Predictive Performance in the benchmark IRM¶

Optional: 5.2.2. Evaluation of Propensity Score Estimates in the Benchmark IRM¶

5.3. Estimation for ML Model¶

5.3. Estimation Diagnostics for the IRM using ML Methods¶

5.3.1. Assess the Predictive Performance in the IRM using ML methods¶

Optional: 5.3.2. Evaluation of Propensity Score Estimates in the Benchmark IRM¶

5.4. - 5.X. ML Model of your choice¶

5.X+1 Summarize your Results on the Quality of Estimation¶

6. Inference¶

6.1. Inference for the benchmark IRM¶

6.2. Inference for the IRM using ML methods¶

6.3. - 6.X. ML Model of your choice¶

References¶