import gradio as gr
import pandas as pd
import numpy as np
from lime import lime_tabular
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import fetch_california_housing

def trainme(MedInc, AveOccup, HouseAge, Latitude, Longitude, Price):

  #Call base dataset
  housing = fetch_california_housing()

  #As a dataframe, with tragets
  housing_df = pd.DataFrame(data=housing.data, columns=housing.feature_names)
  housing_df["Price"] = housing.target
  housing_df = housing_df.drop(['AveBedrms', 'Population', 'AveRooms'], axis=1)

  #Put the new line at index 0
  housing_df.iloc[0] = [MedInc, AveOccup, HouseAge, Latitude, Longitude, Price]

  #Split
  y = pd.DataFrame(data=housing_df['Price']).to_numpy()
  X = housing_df[['MedInc','AveOccup','HouseAge', 'Latitude', 'Longitude']].to_numpy()

  #create a machine learning model and train it
  regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
  regressor.fit(X,np.ravel(y))

  #Create a redable/clean feature list
  clean_features = ['Median income','Average number of household members','Median house age','Latitude', 'Longitude']

  #Create a lime object, regression mode
  from lime import lime_tabular
  explainer = lime_tabular.LimeTabularExplainer(X, mode="regression", feature_names=clean_features)

  #Create the expl
  explanation = explainer.explain_instance(X[0], regressor.predict, num_features=5)
  listing = explanation.as_list()

  #Get pred and actual scores
  Pred_value = regressor.predict(X[0].reshape(1,-1))*100000
  Actual_value = y[0]*100000
  Error_rate = ((Pred_value - Actual_value)/Actual_value) *100

  #Exp as pd df
  df_result = pd.DataFrame(data=listing, columns=['Scenario', 'Score'])

  #Create a clean feature name column
  df_result['Feature name'] = df_result['Scenario'].str.partition('>')[0].str.partition('<')[0]
  
  #Extract the value
  df_result['Value'] = df_result['Scenario'].str.extract('([0-9][,.]*[0-9]*)')

  #drop between rows
  prefixes = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '-']
  df_result['between'] = df_result['Scenario'].str.startswith(tuple(prefixes))
  df_result.drop(df_result[df_result.between == True].index, inplace=True)
  df_result = df_result.drop(columns='between', axis=1).reset_index(drop=True)

  #Create a is negative column
  df_result['Is negative'] = df_result['Score']<0

  #Separate positive and negative
  df_result['Is greater'] = df_result['Scenario'].str.contains('>|>=', regex=True)

  #First why
  first_Feature_name = df_result['Feature name'][0]
  first_score = df_result['Score'][0]
  
  if df_result['Is greater'][0] == True:
    first_value = 'above {}'.format(df_result['Value'][0])
  else:
    first_value = 'below {}'.format(df_result['Value'][0])

  if df_result['Is negative'][0] == False:
    first_status = '📈👍'
    first_status_txt  = 'helps'
  else:
    first_status = '📉👎'
    first_status_txt  = 'penalizes'

  if (df_result['Is greater'][0] == True) and (df_result['Is negative'][0] == True):
    first_result = 'too high'
  elif (df_result['Is greater'][0] == True) and (df_result['Is negative'][0] == False):
    first_result = 'rightfully high'
  elif (df_result['Is greater'][0] == False) and (df_result['Is negative'][0] == True):
    first_result = 'too low'
  else:
    first_result = 'low, and that\'s a great thing'

  #Second why
  second_Feature_name = df_result['Feature name'][1]
  second_score = df_result['Score'][1]
  
  if df_result['Is greater'][0] == True:
    second_value = 'above {}'.format(df_result['Value'][1])
  else:
    second_value = 'below {}'.format(df_result['Value'][1])

  if df_result['Is negative'][1] == False:
    second_status = '📈👍'
    second_status_txt  = 'helps'
  else:
    second_status = '📉👎'
    second_status_txt  = 'penalizes'

  if (df_result['Is greater'][1] == True) and (df_result['Is negative'][1] == True):
    second_result = 'too high'
  elif (df_result['Is greater'][1] == True) and (df_result['Is negative'][1] == False):
    second_result = 'rightfully high'
  elif (df_result['Is greater'][1] == False) and (df_result['Is negative'][1] == True):
    second_result = 'too low'
  else:
    second_result = 'low, and that\'s a great thing'


  #Third why
  third_Feature_name = df_result['Feature name'][2]
  third_score = df_result['Score'][2]
  if df_result['Is greater'][0] == True:
    third_value = 'above {}'.format(df_result['Value'][2])
  else:
    third_value = 'below {}'.format(df_result['Value'][2])

  if df_result['Is negative'][2] == False:
    third_status = '📈👍'
    third_status_txt  = 'helps'

  else:
    third_status = '📉👎'
    third_status_txt  = 'penalizes'


  if (df_result['Is greater'][2] == True) and (df_result['Is negative'][2] == True):
    third_result = 'too high'
  elif (df_result['Is greater'][2] == True) and (df_result['Is negative'][2] == False):
    third_result = 'rightfully high'
  elif (df_result['Is greater'][2] == False) and (df_result['Is negative'][2] == True):
    third_result = 'too low'
  else:
    third_result = 'low, and that\'s a great thing'


  first_phrase = "🔮 I predict it should be worth {} $ ({}% error rate)".format(int(Pred_value), int(Error_rate))
  second_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(first_status, first_Feature_name, first_result, first_value, first_status_txt, round(first_score, 2))
  third_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(second_status, second_Feature_name, second_result, second_value, second_status_txt, round(second_score, 2))
  forth_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(third_status, third_Feature_name, third_result, third_value, third_status_txt, round(third_score, 2))


  final_list = [first_phrase, second_phrase, third_phrase, forth_phrase]

  return final_list

#Define gradio UI


title = '🔮 Explain me like I\'m 5'

description = """
<center>
Gradio Demo for model interpretation powered by LIME.\n
To use it, simply choose your values, and hit 'Submit'. It will train a vanilla Random Forest Regressor, interpret it using Lime and generating an explanation in plain English using a fill-in-the-blank template.
</center>
"""

article = "Originally made by [Ulysse Bottello](https://github.com/ulyssebottello), using [California Housing Dataset](https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset) and [Lime](https://github.com/marcotcr/lime)"


input = [gr.inputs.Slider(0.5, 15, default=4.406300, label='Median income'),
        gr.inputs.Slider(1, 30, default=2.446602, label='Average number of household members'),
        gr.inputs.Slider(1, 52, default=13, label='Median house age'),
        gr.inputs.Slider(32, 42, default=38.260000, label='Latitude of the block'),
        gr.inputs.Slider(-124, -111, default=-120.330000, label='Longitude of the block'),
        gr.inputs.Slider(0.14, 5, default=1.558000, label='Price'),
         ]
output = [gr.outputs.Textbox(label='Prediction'),
          gr.outputs.Textbox(label='Why?'),
          gr.outputs.Textbox(label='And why?'),
          gr.outputs.Textbox(label='Then, why?')
          ]

iface = gr.Interface(fn=trainme, 
                     inputs=input, 
                     outputs=output,
                     title = title,
                     description= description,
                     article = article)
iface.launch()