import gradio as gr import pandas as pd import numpy as np from lime import lime_tabular from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor from sklearn.datasets import fetch_california_housing def trainme(MedInc, AveOccup, HouseAge, Latitude, Longitude, Price): #Call base dataset housing = fetch_california_housing() #As a dataframe, with tragets housing_df = pd.DataFrame(data=housing.data, columns=housing.feature_names) housing_df["Price"] = housing.target housing_df = housing_df.drop(['AveBedrms', 'Population', 'AveRooms'], axis=1) #Put the new line at index 0 housing_df.iloc[0] = [MedInc, AveOccup, HouseAge, Latitude, Longitude, Price] #Split y = pd.DataFrame(data=housing_df['Price']).to_numpy() X = housing_df[['MedInc','AveOccup','HouseAge', 'Latitude', 'Longitude']].to_numpy() #create a machine learning model and train it regressor = RandomForestRegressor(n_estimators = 10, random_state = 0) regressor.fit(X,np.ravel(y)) #Create a redable/clean feature list clean_features = ['Median income','Average number of household members','Median house age','Latitude', 'Longitude'] #Create a lime object, regression mode from lime import lime_tabular explainer = lime_tabular.LimeTabularExplainer(X, mode="regression", feature_names=clean_features) #Create the expl explanation = explainer.explain_instance(X[0], regressor.predict, num_features=5) listing = explanation.as_list() #Get pred and actual scores Pred_value = regressor.predict(X[0].reshape(1,-1))*100000 Actual_value = y[0]*100000 Error_rate = ((Pred_value - Actual_value)/Actual_value) *100 #Exp as pd df df_result = pd.DataFrame(data=listing, columns=['Scenario', 'Score']) #Create a clean feature name column df_result['Feature name'] = df_result['Scenario'].str.partition('>')[0].str.partition('<')[0] #Extract the value df_result['Value'] = df_result['Scenario'].str.extract('([0-9][,.]*[0-9]*)') #drop between rows prefixes = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '-'] df_result['between'] = df_result['Scenario'].str.startswith(tuple(prefixes)) df_result.drop(df_result[df_result.between == True].index, inplace=True) df_result = df_result.drop(columns='between', axis=1).reset_index(drop=True) #Create a is negative column df_result['Is negative'] = df_result['Score']<0 #Separate positive and negative df_result['Is greater'] = df_result['Scenario'].str.contains('>|>=', regex=True) #First why first_Feature_name = df_result['Feature name'][0] first_score = df_result['Score'][0] if df_result['Is greater'][0] == True: first_value = 'above {}'.format(df_result['Value'][0]) else: first_value = 'below {}'.format(df_result['Value'][0]) if df_result['Is negative'][0] == False: first_status = '📈👍' first_status_txt = 'helps' else: first_status = '📉👎' first_status_txt = 'penalizes' if (df_result['Is greater'][0] == True) and (df_result['Is negative'][0] == True): first_result = 'too high' elif (df_result['Is greater'][0] == True) and (df_result['Is negative'][0] == False): first_result = 'rightfully high' elif (df_result['Is greater'][0] == False) and (df_result['Is negative'][0] == True): first_result = 'too low' else: first_result = 'low, and that\'s a great thing' #Second why second_Feature_name = df_result['Feature name'][1] second_score = df_result['Score'][1] if df_result['Is greater'][0] == True: second_value = 'above {}'.format(df_result['Value'][1]) else: second_value = 'below {}'.format(df_result['Value'][1]) if df_result['Is negative'][1] == False: second_status = '📈👍' second_status_txt = 'helps' else: second_status = '📉👎' second_status_txt = 'penalizes' if (df_result['Is greater'][1] == True) and (df_result['Is negative'][1] == True): second_result = 'too high' elif (df_result['Is greater'][1] == True) and (df_result['Is negative'][1] == False): second_result = 'rightfully high' elif (df_result['Is greater'][1] == False) and (df_result['Is negative'][1] == True): second_result = 'too low' else: second_result = 'low, and that\'s a great thing' #Third why third_Feature_name = df_result['Feature name'][2] third_score = df_result['Score'][2] if df_result['Is greater'][0] == True: third_value = 'above {}'.format(df_result['Value'][2]) else: third_value = 'below {}'.format(df_result['Value'][2]) if df_result['Is negative'][2] == False: third_status = '📈👍' third_status_txt = 'helps' else: third_status = '📉👎' third_status_txt = 'penalizes' if (df_result['Is greater'][2] == True) and (df_result['Is negative'][2] == True): third_result = 'too high' elif (df_result['Is greater'][2] == True) and (df_result['Is negative'][2] == False): third_result = 'rightfully high' elif (df_result['Is greater'][2] == False) and (df_result['Is negative'][2] == True): third_result = 'too low' else: third_result = 'low, and that\'s a great thing' first_phrase = "🔮 I predict it should be worth {} $ ({}% error rate)".format(int(Pred_value), int(Error_rate)) second_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(first_status, first_Feature_name, first_result, first_value, first_status_txt, round(first_score, 2)) third_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(second_status, second_Feature_name, second_result, second_value, second_status_txt, round(second_score, 2)) forth_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(third_status, third_Feature_name, third_result, third_value, third_status_txt, round(third_score, 2)) final_list = [first_phrase, second_phrase, third_phrase, forth_phrase] return final_list #Define gradio UI title = '🔮 Explain me like I\'m 5' description = """