lyme / app.py
ulysse's picture
New description with article
50f6e8c
import gradio as gr
import pandas as pd
import numpy as np
from lime import lime_tabular
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import fetch_california_housing
def trainme(MedInc, AveOccup, HouseAge, Latitude, Longitude, Price):
#Call base dataset
housing = fetch_california_housing()
#As a dataframe, with tragets
housing_df = pd.DataFrame(data=housing.data, columns=housing.feature_names)
housing_df["Price"] = housing.target
housing_df = housing_df.drop(['AveBedrms', 'Population', 'AveRooms'], axis=1)
#Put the new line at index 0
housing_df.iloc[0] = [MedInc, AveOccup, HouseAge, Latitude, Longitude, Price]
#Split
y = pd.DataFrame(data=housing_df['Price']).to_numpy()
X = housing_df[['MedInc','AveOccup','HouseAge', 'Latitude', 'Longitude']].to_numpy()
#create a machine learning model and train it
regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor.fit(X,np.ravel(y))
#Create a redable/clean feature list
clean_features = ['Median income','Average number of household members','Median house age','Latitude', 'Longitude']
#Create a lime object, regression mode
from lime import lime_tabular
explainer = lime_tabular.LimeTabularExplainer(X, mode="regression", feature_names=clean_features)
#Create the expl
explanation = explainer.explain_instance(X[0], regressor.predict, num_features=5)
listing = explanation.as_list()
#Get pred and actual scores
Pred_value = regressor.predict(X[0].reshape(1,-1))*100000
Actual_value = y[0]*100000
Error_rate = ((Pred_value - Actual_value)/Actual_value) *100
#Exp as pd df
df_result = pd.DataFrame(data=listing, columns=['Scenario', 'Score'])
#Create a clean feature name column
df_result['Feature name'] = df_result['Scenario'].str.partition('>')[0].str.partition('<')[0]
#Extract the value
df_result['Value'] = df_result['Scenario'].str.extract('([0-9][,.]*[0-9]*)')
#drop between rows
prefixes = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '-']
df_result['between'] = df_result['Scenario'].str.startswith(tuple(prefixes))
df_result.drop(df_result[df_result.between == True].index, inplace=True)
df_result = df_result.drop(columns='between', axis=1).reset_index(drop=True)
#Create a is negative column
df_result['Is negative'] = df_result['Score']<0
#Separate positive and negative
df_result['Is greater'] = df_result['Scenario'].str.contains('>|>=', regex=True)
#First why
first_Feature_name = df_result['Feature name'][0]
first_score = df_result['Score'][0]
if df_result['Is greater'][0] == True:
first_value = 'above {}'.format(df_result['Value'][0])
else:
first_value = 'below {}'.format(df_result['Value'][0])
if df_result['Is negative'][0] == False:
first_status = '๐Ÿ“ˆ๐Ÿ‘'
first_status_txt = 'helps'
else:
first_status = '๐Ÿ“‰๐Ÿ‘Ž'
first_status_txt = 'penalizes'
if (df_result['Is greater'][0] == True) and (df_result['Is negative'][0] == True):
first_result = 'too high'
elif (df_result['Is greater'][0] == True) and (df_result['Is negative'][0] == False):
first_result = 'rightfully high'
elif (df_result['Is greater'][0] == False) and (df_result['Is negative'][0] == True):
first_result = 'too low'
else:
first_result = 'low, and that\'s a great thing'
#Second why
second_Feature_name = df_result['Feature name'][1]
second_score = df_result['Score'][1]
if df_result['Is greater'][0] == True:
second_value = 'above {}'.format(df_result['Value'][1])
else:
second_value = 'below {}'.format(df_result['Value'][1])
if df_result['Is negative'][1] == False:
second_status = '๐Ÿ“ˆ๐Ÿ‘'
second_status_txt = 'helps'
else:
second_status = '๐Ÿ“‰๐Ÿ‘Ž'
second_status_txt = 'penalizes'
if (df_result['Is greater'][1] == True) and (df_result['Is negative'][1] == True):
second_result = 'too high'
elif (df_result['Is greater'][1] == True) and (df_result['Is negative'][1] == False):
second_result = 'rightfully high'
elif (df_result['Is greater'][1] == False) and (df_result['Is negative'][1] == True):
second_result = 'too low'
else:
second_result = 'low, and that\'s a great thing'
#Third why
third_Feature_name = df_result['Feature name'][2]
third_score = df_result['Score'][2]
if df_result['Is greater'][0] == True:
third_value = 'above {}'.format(df_result['Value'][2])
else:
third_value = 'below {}'.format(df_result['Value'][2])
if df_result['Is negative'][2] == False:
third_status = '๐Ÿ“ˆ๐Ÿ‘'
third_status_txt = 'helps'
else:
third_status = '๐Ÿ“‰๐Ÿ‘Ž'
third_status_txt = 'penalizes'
if (df_result['Is greater'][2] == True) and (df_result['Is negative'][2] == True):
third_result = 'too high'
elif (df_result['Is greater'][2] == True) and (df_result['Is negative'][2] == False):
third_result = 'rightfully high'
elif (df_result['Is greater'][2] == False) and (df_result['Is negative'][2] == True):
third_result = 'too low'
else:
third_result = 'low, and that\'s a great thing'
first_phrase = "๐Ÿ”ฎ I predict it should be worth {} $ ({}% error rate)".format(int(Pred_value), int(Error_rate))
second_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(first_status, first_Feature_name, first_result, first_value, first_status_txt, round(first_score, 2))
third_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(second_status, second_Feature_name, second_result, second_value, second_status_txt, round(second_score, 2))
forth_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(third_status, third_Feature_name, third_result, third_value, third_status_txt, round(third_score, 2))
final_list = [first_phrase, second_phrase, third_phrase, forth_phrase]
return final_list
#Define gradio UI
title = '๐Ÿ”ฎ Explain me like I\'m 5'
description = """
<center>
Gradio Demo for model interpretation powered by LIME.\n
To use it, simply choose your values, and hit 'Submit'. It will train a vanilla Random Forest Regressor, interpret it using Lime and generating an explanation in plain English using a fill-in-the-blank template.
</center>
"""
article = "Originally made by [Ulysse Bottello](https://github.com/ulyssebottello), using [California Housing Dataset](https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset) and [Lime](https://github.com/marcotcr/lime)"
input = [gr.inputs.Slider(0.5, 15, default=4.406300, label='Median income'),
gr.inputs.Slider(1, 30, default=2.446602, label='Average number of household members'),
gr.inputs.Slider(1, 52, default=13, label='Median house age'),
gr.inputs.Slider(32, 42, default=38.260000, label='Latitude of the block'),
gr.inputs.Slider(-124, -111, default=-120.330000, label='Longitude of the block'),
gr.inputs.Slider(0.14, 5, default=1.558000, label='Price'),
]
output = [gr.outputs.Textbox(label='Prediction'),
gr.outputs.Textbox(label='Why?'),
gr.outputs.Textbox(label='And why?'),
gr.outputs.Textbox(label='Then, why?')
]
iface = gr.Interface(fn=trainme,
inputs=input,
outputs=output,
title = title,
description= description,
article = article)
iface.launch()