Spaces:

ulysse
/

lyme

Build error

App Files Files Community

lyme / app.py

ulysse

New description with article

50f6e8c over 3 years ago

raw

history blame contribute delete

7.48 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	from lime import lime_tabular
	from sklearn.linear_model import LinearRegression
	from sklearn.ensemble import RandomForestRegressor
	from sklearn.datasets import fetch_california_housing

	def trainme(MedInc, AveOccup, HouseAge, Latitude, Longitude, Price):

	#Call base dataset
	housing = fetch_california_housing()

	#As a dataframe, with tragets
	housing_df = pd.DataFrame(data=housing.data, columns=housing.feature_names)
	housing_df["Price"] = housing.target
	housing_df = housing_df.drop(['AveBedrms', 'Population', 'AveRooms'], axis=1)

	#Put the new line at index 0
	housing_df.iloc[0] = [MedInc, AveOccup, HouseAge, Latitude, Longitude, Price]

	#Split
	y = pd.DataFrame(data=housing_df['Price']).to_numpy()
	X = housing_df[['MedInc','AveOccup','HouseAge', 'Latitude', 'Longitude']].to_numpy()

	#create a machine learning model and train it
	regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
	regressor.fit(X,np.ravel(y))

	#Create a redable/clean feature list
	clean_features = ['Median income','Average number of household members','Median house age','Latitude', 'Longitude']

	#Create a lime object, regression mode
	from lime import lime_tabular
	explainer = lime_tabular.LimeTabularExplainer(X, mode="regression", feature_names=clean_features)

	#Create the expl
	explanation = explainer.explain_instance(X[0], regressor.predict, num_features=5)
	listing = explanation.as_list()

	#Get pred and actual scores
	Pred_value = regressor.predict(X[0].reshape(1,-1))*100000
	Actual_value = y[0]*100000
	Error_rate = ((Pred_value - Actual_value)/Actual_value) *100

	#Exp as pd df
	df_result = pd.DataFrame(data=listing, columns=['Scenario', 'Score'])

	#Create a clean feature name column
	df_result['Feature name'] = df_result['Scenario'].str.partition('>')[0].str.partition('<')[0]

	#Extract the value
	df_result['Value'] = df_result['Scenario'].str.extract('([0-9][,.][0-9])')

	#drop between rows
	prefixes = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '-']
	df_result['between'] = df_result['Scenario'].str.startswith(tuple(prefixes))
	df_result.drop(df_result[df_result.between == True].index, inplace=True)
	df_result = df_result.drop(columns='between', axis=1).reset_index(drop=True)

	#Create a is negative column
	df_result['Is negative'] = df_result['Score']<0

	#Separate positive and negative
	df_result['Is greater'] = df_result['Scenario'].str.contains('>\|>=', regex=True)

	#First why
	first_Feature_name = df_result['Feature name'][0]
	first_score = df_result['Score'][0]

	if df_result['Is greater'][0] == True:
	first_value = 'above {}'.format(df_result['Value'][0])
	else:
	first_value = 'below {}'.format(df_result['Value'][0])

	if df_result['Is negative'][0] == False:
	first_status = '📈👍'
	first_status_txt = 'helps'
	else:
	first_status = '📉👎'
	first_status_txt = 'penalizes'

	if (df_result['Is greater'][0] == True) and (df_result['Is negative'][0] == True):
	first_result = 'too high'
	elif (df_result['Is greater'][0] == True) and (df_result['Is negative'][0] == False):
	first_result = 'rightfully high'
	elif (df_result['Is greater'][0] == False) and (df_result['Is negative'][0] == True):
	first_result = 'too low'
	else:
	first_result = 'low, and that\'s a great thing'

	#Second why
	second_Feature_name = df_result['Feature name'][1]
	second_score = df_result['Score'][1]

	if df_result['Is greater'][0] == True:
	second_value = 'above {}'.format(df_result['Value'][1])
	else:
	second_value = 'below {}'.format(df_result['Value'][1])

	if df_result['Is negative'][1] == False:
	second_status = '📈👍'
	second_status_txt = 'helps'
	else:
	second_status = '📉👎'
	second_status_txt = 'penalizes'

	if (df_result['Is greater'][1] == True) and (df_result['Is negative'][1] == True):
	second_result = 'too high'
	elif (df_result['Is greater'][1] == True) and (df_result['Is negative'][1] == False):
	second_result = 'rightfully high'
	elif (df_result['Is greater'][1] == False) and (df_result['Is negative'][1] == True):
	second_result = 'too low'
	else:
	second_result = 'low, and that\'s a great thing'


	#Third why
	third_Feature_name = df_result['Feature name'][2]
	third_score = df_result['Score'][2]
	if df_result['Is greater'][0] == True:
	third_value = 'above {}'.format(df_result['Value'][2])
	else:
	third_value = 'below {}'.format(df_result['Value'][2])

	if df_result['Is negative'][2] == False:
	third_status = '📈👍'
	third_status_txt = 'helps'

	else:
	third_status = '📉👎'
	third_status_txt = 'penalizes'


	if (df_result['Is greater'][2] == True) and (df_result['Is negative'][2] == True):
	third_result = 'too high'
	elif (df_result['Is greater'][2] == True) and (df_result['Is negative'][2] == False):
	third_result = 'rightfully high'
	elif (df_result['Is greater'][2] == False) and (df_result['Is negative'][2] == True):
	third_result = 'too low'
	else:
	third_result = 'low, and that\'s a great thing'


	first_phrase = "🔮 I predict it should be worth {} $ ({}% error rate)".format(int(Pred_value), int(Error_rate))
	second_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(first_status, first_Feature_name, first_result, first_value, first_status_txt, round(first_score, 2))
	third_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(second_status, second_Feature_name, second_result, second_value, second_status_txt, round(second_score, 2))
	forth_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(third_status, third_Feature_name, third_result, third_value, third_status_txt, round(third_score, 2))


	final_list = [first_phrase, second_phrase, third_phrase, forth_phrase]

	return final_list

	#Define gradio UI


	title = '🔮 Explain me like I\'m 5'

	description = """
	<center>
	Gradio Demo for model interpretation powered by LIME.\n
	To use it, simply choose your values, and hit 'Submit'. It will train a vanilla Random Forest Regressor, interpret it using Lime and generating an explanation in plain English using a fill-in-the-blank template.
	</center>
	"""

	article = "Originally made by [Ulysse Bottello](https://github.com/ulyssebottello), using [California Housing Dataset](https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset) and [Lime](https://github.com/marcotcr/lime)"


	input = [gr.inputs.Slider(0.5, 15, default=4.406300, label='Median income'),
	gr.inputs.Slider(1, 30, default=2.446602, label='Average number of household members'),
	gr.inputs.Slider(1, 52, default=13, label='Median house age'),
	gr.inputs.Slider(32, 42, default=38.260000, label='Latitude of the block'),
	gr.inputs.Slider(-124, -111, default=-120.330000, label='Longitude of the block'),
	gr.inputs.Slider(0.14, 5, default=1.558000, label='Price'),
	]
	output = [gr.outputs.Textbox(label='Prediction'),
	gr.outputs.Textbox(label='Why?'),
	gr.outputs.Textbox(label='And why?'),
	gr.outputs.Textbox(label='Then, why?')
	]

	iface = gr.Interface(fn=trainme,
	inputs=input,
	outputs=output,
	title = title,
	description= description,
	article = article)
	iface.launch()