import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
import sys
import MIDASpy as md

data_in = pd.read_csv("Data/cces_jss_format.csv")

cont_vars = ["citylength_1", "numchildren", "birthyr"]
vals = data_in.nunique()

cat_vars = list(data_in.columns[(vals.values > 2) & ~(data_in.columns.isin(cont_vars))])

bin_vars = list(data_in.columns[vals.values == 2])

data_bin = data_in[bin_vars].apply(md.binary_conv)

constructor_list = [data_in[cont_vars], data_bin]

data_cat = data_in[cat_vars]

data_oh, cat_col_list = md.cat_conv(data_cat)

constructor_list.append(data_oh)

data_0 = pd.concat(constructor_list, axis = 1)

scaler = MinMaxScaler()

data_scaled = scaler.fit_transform(data_0)
data_scaled = pd.DataFrame(data_scaled, columns = data_0.columns)

na_loc = data_scaled.isnull()
data_scaled[na_loc] = np.nan

imputer = md.Midas(layer_structure = [256, 256],
                       vae_layer = False,
                       seed = 89,
                       input_drop = 0.75)

imputer.build_model(data_scaled,
                    binary_columns = bin_vars,
                    softmax_columns =  cat_col_list)

imputer.train_model(training_epochs = 10)

imputations = imputer.yield_samples(m = 10)

analysis_dfs = []

for df in imputations:
  df_unscaled = scaler.inverse_transform(df)
  df_unscaled = pd.DataFrame(df_unscaled, columns = data_scaled.columns)
  df["age"] = 2018 - df_unscaled["birthyr"]
  df["CC18_415a"] = np.where(df_unscaled["CC18_415a"] >= 0.5, 1, 0)
  analysis_dfs.append(df.loc[:,["age", "CC18_415a"]])

model = md.combine(y_var = "CC18_415a",
                   X_vars = ["age"],
                   df_list = analysis_dfs)
model

