Spaces:

ulysse
/

lyme

Build error

File size: 7,476 Bytes

e6bc495
 
 
c050cb5
e6bc495
5689d12
e6bc495
 
8dc09a3
e6bc495
55eb523
 
e6bc495
55eb523
 
 
8dc09a3
e6bc495
55eb523
8dc09a3
e6bc495
55eb523
 
8dc09a3
c050cb5
6fbbd53
085af1d
8dc09a3
5689d12
e6bc495
8dc09a3
e6bc495
 
0efefa4
e6bc495
 
 
8dc09a3
e6bc495
 
 
5689d12
55eb523
e6bc495
 
 
 
 
 
 
c050cb5
 
 
e6bc495
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c050cb5
 
 
 
 
e6bc495
 
 
c050cb5
e6bc495
 
c050cb5
e6bc495
 
 
 
 
 
 
 
55eb523
e6bc495
 
 
 
c050cb5
 
 
 
 
e6bc495
 
 
c050cb5
e6bc495
 
c050cb5
e6bc495
 
 
 
 
 
 
 
55eb523
e6bc495
 
 
 
 
c050cb5
 
 
 
e6bc495
 
 
c050cb5
e6bc495
 
 
c050cb5
e6bc495
 
 
 
 
 
 
 
 
55eb523
e6bc495
 
 
c050cb5
 
 
 
e6bc495
 
 
 
0efefa4
50f6e8c
 
 
 
 
 
 
 
 
 
 
 
 
 
55eb523
66884d3
8dc09a3
 
 
 
 
55eb523
ee3915f
 
 
 
 
e6bc495
55eb523
 
 
50f6e8c
 
 
8dc09a3

import gradio as gr
import pandas as pd
import numpy as np
from lime import lime_tabular
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import fetch_california_housing

def trainme(MedInc, AveOccup, HouseAge, Latitude, Longitude, Price):

  #Call base dataset
  housing = fetch_california_housing()

  #As a dataframe, with tragets
  housing_df = pd.DataFrame(data=housing.data, columns=housing.feature_names)
  housing_df["Price"] = housing.target
  housing_df = housing_df.drop(['AveBedrms', 'Population', 'AveRooms'], axis=1)

  #Put the new line at index 0
  housing_df.iloc[0] = [MedInc, AveOccup, HouseAge, Latitude, Longitude, Price]

  #Split
  y = pd.DataFrame(data=housing_df['Price']).to_numpy()
  X = housing_df[['MedInc','AveOccup','HouseAge', 'Latitude', 'Longitude']].to_numpy()

  #create a machine learning model and train it
  regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
  regressor.fit(X,np.ravel(y))

  #Create a redable/clean feature list
  clean_features = ['Median income','Average number of household members','Median house age','Latitude', 'Longitude']

  #Create a lime object, regression mode
  from lime import lime_tabular
  explainer = lime_tabular.LimeTabularExplainer(X, mode="regression", feature_names=clean_features)

  #Create the expl
  explanation = explainer.explain_instance(X[0], regressor.predict, num_features=5)
  listing = explanation.as_list()

  #Get pred and actual scores
  Pred_value = regressor.predict(X[0].reshape(1,-1))*100000
  Actual_value = y[0]*100000
  Error_rate = ((Pred_value - Actual_value)/Actual_value) *100

  #Exp as pd df
  df_result = pd.DataFrame(data=listing, columns=['Scenario', 'Score'])

  #Create a clean feature name column
  df_result['Feature name'] = df_result['Scenario'].str.partition('>')[0].str.partition('<')[0]
  
  #Extract the value
  df_result['Value'] = df_result['Scenario'].str.extract('([0-9][,.]*[0-9]*)')

  #drop between rows
  prefixes = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '-']
  df_result['between'] = df_result['Scenario'].str.startswith(tuple(prefixes))
  df_result.drop(df_result[df_result.between == True].index, inplace=True)
  df_result = df_result.drop(columns='between', axis=1).reset_index(drop=True)

  #Create a is negative column
  df_result['Is negative'] = df_result['Score']<0

  #Separate positive and negative
  df_result['Is greater'] = df_result['Scenario'].str.contains('>|>=', regex=True)

  #First why
  first_Feature_name = df_result['Feature name'][0]
  first_score = df_result['Score'][0]
  
  if df_result['Is greater'][0] == True:
    first_value = 'above {}'.format(df_result['Value'][0])
  else:
    first_value = 'below {}'.format(df_result['Value'][0])

  if df_result['Is negative'][0] == False:
    first_status = '📈👍'
    first_status_txt  = 'helps'
  else:
    first_status = '📉👎'
    first_status_txt  = 'penalizes'

  if (df_result['Is greater'][0] == True) and (df_result['Is negative'][0] == True):
    first_result = 'too high'
  elif (df_result['Is greater'][0] == True) and (df_result['Is negative'][0] == False):
    first_result = 'rightfully high'
  elif (df_result['Is greater'][0] == False) and (df_result['Is negative'][0] == True):
    first_result = 'too low'
  else:
    first_result = 'low, and that\'s a great thing'

  #Second why
  second_Feature_name = df_result['Feature name'][1]
  second_score = df_result['Score'][1]
  
  if df_result['Is greater'][0] == True:
    second_value = 'above {}'.format(df_result['Value'][1])
  else:
    second_value = 'below {}'.format(df_result['Value'][1])

  if df_result['Is negative'][1] == False:
    second_status = '📈👍'
    second_status_txt  = 'helps'
  else:
    second_status = '📉👎'
    second_status_txt  = 'penalizes'

  if (df_result['Is greater'][1] == True) and (df_result['Is negative'][1] == True):
    second_result = 'too high'
  elif (df_result['Is greater'][1] == True) and (df_result['Is negative'][1] == False):
    second_result = 'rightfully high'
  elif (df_result['Is greater'][1] == False) and (df_result['Is negative'][1] == True):
    second_result = 'too low'
  else:
    second_result = 'low, and that\'s a great thing'


  #Third why
  third_Feature_name = df_result['Feature name'][2]
  third_score = df_result['Score'][2]
  if df_result['Is greater'][0] == True:
    third_value = 'above {}'.format(df_result['Value'][2])
  else:
    third_value = 'below {}'.format(df_result['Value'][2])

  if df_result['Is negative'][2] == False:
    third_status = '📈👍'
    third_status_txt  = 'helps'

  else:
    third_status = '📉👎'
    third_status_txt  = 'penalizes'


  if (df_result['Is greater'][2] == True) and (df_result['Is negative'][2] == True):
    third_result = 'too high'
  elif (df_result['Is greater'][2] == True) and (df_result['Is negative'][2] == False):
    third_result = 'rightfully high'
  elif (df_result['Is greater'][2] == False) and (df_result['Is negative'][2] == True):
    third_result = 'too low'
  else:
    third_result = 'low, and that\'s a great thing'


  first_phrase = "🔮 I predict it should be worth {} $ ({}% error rate)".format(int(Pred_value), int(Error_rate))
  second_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(first_status, first_Feature_name, first_result, first_value, first_status_txt, round(first_score, 2))
  third_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(second_status, second_Feature_name, second_result, second_value, second_status_txt, round(second_score, 2))
  forth_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(third_status, third_Feature_name, third_result, third_value, third_status_txt, round(third_score, 2))


  final_list = [first_phrase, second_phrase, third_phrase, forth_phrase]

  return final_list

#Define gradio UI


title = '🔮 Explain me like I\'m 5'

description = """
<center>
Gradio Demo for model interpretation powered by LIME.\n
To use it, simply choose your values, and hit 'Submit'. It will train a vanilla Random Forest Regressor, interpret it using Lime and generating an explanation in plain English using a fill-in-the-blank template.
</center>
"""

article = "Originally made by [Ulysse Bottello](https://github.com/ulyssebottello), using [California Housing Dataset](https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset) and [Lime](https://github.com/marcotcr/lime)"


input = [gr.inputs.Slider(0.5, 15, default=4.406300, label='Median income'),
        gr.inputs.Slider(1, 30, default=2.446602, label='Average number of household members'),
        gr.inputs.Slider(1, 52, default=13, label='Median house age'),
        gr.inputs.Slider(32, 42, default=38.260000, label='Latitude of the block'),
        gr.inputs.Slider(-124, -111, default=-120.330000, label='Longitude of the block'),
        gr.inputs.Slider(0.14, 5, default=1.558000, label='Price'),
         ]
output = [gr.outputs.Textbox(label='Prediction'),
          gr.outputs.Textbox(label='Why?'),
          gr.outputs.Textbox(label='And why?'),
          gr.outputs.Textbox(label='Then, why?')
          ]

iface = gr.Interface(fn=trainme, 
                     inputs=input, 
                     outputs=output,
                     title = title,
                     description= description,
                     article = article)
iface.launch()