File size: 7,476 Bytes
e6bc495
 
 
c050cb5
e6bc495
5689d12
e6bc495
 
8dc09a3
e6bc495
55eb523
 
e6bc495
55eb523
 
 
8dc09a3
e6bc495
55eb523
8dc09a3
e6bc495
55eb523
 
8dc09a3
c050cb5
6fbbd53
085af1d
8dc09a3
5689d12
e6bc495
8dc09a3
e6bc495
 
0efefa4
e6bc495
 
 
8dc09a3
e6bc495
 
 
5689d12
55eb523
e6bc495
 
 
 
 
 
 
c050cb5
 
 
e6bc495
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c050cb5
 
 
 
 
e6bc495
 
 
c050cb5
e6bc495
 
c050cb5
e6bc495
 
 
 
 
 
 
 
55eb523
e6bc495
 
 
 
c050cb5
 
 
 
 
e6bc495
 
 
c050cb5
e6bc495
 
c050cb5
e6bc495
 
 
 
 
 
 
 
55eb523
e6bc495
 
 
 
 
c050cb5
 
 
 
e6bc495
 
 
c050cb5
e6bc495
 
 
c050cb5
e6bc495
 
 
 
 
 
 
 
 
55eb523
e6bc495
 
 
c050cb5
 
 
 
e6bc495
 
 
 
0efefa4
50f6e8c
 
 
 
 
 
 
 
 
 
 
 
 
 
55eb523
66884d3
8dc09a3
 
 
 
 
55eb523
ee3915f
 
 
 
 
e6bc495
55eb523
 
 
50f6e8c
 
 
8dc09a3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import gradio as gr
import pandas as pd
import numpy as np
from lime import lime_tabular
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import fetch_california_housing

def trainme(MedInc, AveOccup, HouseAge, Latitude, Longitude, Price):

  #Call base dataset
  housing = fetch_california_housing()

  #As a dataframe, with tragets
  housing_df = pd.DataFrame(data=housing.data, columns=housing.feature_names)
  housing_df["Price"] = housing.target
  housing_df = housing_df.drop(['AveBedrms', 'Population', 'AveRooms'], axis=1)

  #Put the new line at index 0
  housing_df.iloc[0] = [MedInc, AveOccup, HouseAge, Latitude, Longitude, Price]

  #Split
  y = pd.DataFrame(data=housing_df['Price']).to_numpy()
  X = housing_df[['MedInc','AveOccup','HouseAge', 'Latitude', 'Longitude']].to_numpy()

  #create a machine learning model and train it
  regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
  regressor.fit(X,np.ravel(y))

  #Create a redable/clean feature list
  clean_features = ['Median income','Average number of household members','Median house age','Latitude', 'Longitude']

  #Create a lime object, regression mode
  from lime import lime_tabular
  explainer = lime_tabular.LimeTabularExplainer(X, mode="regression", feature_names=clean_features)

  #Create the expl
  explanation = explainer.explain_instance(X[0], regressor.predict, num_features=5)
  listing = explanation.as_list()

  #Get pred and actual scores
  Pred_value = regressor.predict(X[0].reshape(1,-1))*100000
  Actual_value = y[0]*100000
  Error_rate = ((Pred_value - Actual_value)/Actual_value) *100

  #Exp as pd df
  df_result = pd.DataFrame(data=listing, columns=['Scenario', 'Score'])

  #Create a clean feature name column
  df_result['Feature name'] = df_result['Scenario'].str.partition('>')[0].str.partition('<')[0]
  
  #Extract the value
  df_result['Value'] = df_result['Scenario'].str.extract('([0-9][,.]*[0-9]*)')

  #drop between rows
  prefixes = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '-']
  df_result['between'] = df_result['Scenario'].str.startswith(tuple(prefixes))
  df_result.drop(df_result[df_result.between == True].index, inplace=True)
  df_result = df_result.drop(columns='between', axis=1).reset_index(drop=True)

  #Create a is negative column
  df_result['Is negative'] = df_result['Score']<0

  #Separate positive and negative
  df_result['Is greater'] = df_result['Scenario'].str.contains('>|>=', regex=True)

  #First why
  first_Feature_name = df_result['Feature name'][0]
  first_score = df_result['Score'][0]
  
  if df_result['Is greater'][0] == True:
    first_value = 'above {}'.format(df_result['Value'][0])
  else:
    first_value = 'below {}'.format(df_result['Value'][0])

  if df_result['Is negative'][0] == False:
    first_status = '๐Ÿ“ˆ๐Ÿ‘'
    first_status_txt  = 'helps'
  else:
    first_status = '๐Ÿ“‰๐Ÿ‘Ž'
    first_status_txt  = 'penalizes'

  if (df_result['Is greater'][0] == True) and (df_result['Is negative'][0] == True):
    first_result = 'too high'
  elif (df_result['Is greater'][0] == True) and (df_result['Is negative'][0] == False):
    first_result = 'rightfully high'
  elif (df_result['Is greater'][0] == False) and (df_result['Is negative'][0] == True):
    first_result = 'too low'
  else:
    first_result = 'low, and that\'s a great thing'

  #Second why
  second_Feature_name = df_result['Feature name'][1]
  second_score = df_result['Score'][1]
  
  if df_result['Is greater'][0] == True:
    second_value = 'above {}'.format(df_result['Value'][1])
  else:
    second_value = 'below {}'.format(df_result['Value'][1])

  if df_result['Is negative'][1] == False:
    second_status = '๐Ÿ“ˆ๐Ÿ‘'
    second_status_txt  = 'helps'
  else:
    second_status = '๐Ÿ“‰๐Ÿ‘Ž'
    second_status_txt  = 'penalizes'

  if (df_result['Is greater'][1] == True) and (df_result['Is negative'][1] == True):
    second_result = 'too high'
  elif (df_result['Is greater'][1] == True) and (df_result['Is negative'][1] == False):
    second_result = 'rightfully high'
  elif (df_result['Is greater'][1] == False) and (df_result['Is negative'][1] == True):
    second_result = 'too low'
  else:
    second_result = 'low, and that\'s a great thing'


  #Third why
  third_Feature_name = df_result['Feature name'][2]
  third_score = df_result['Score'][2]
  if df_result['Is greater'][0] == True:
    third_value = 'above {}'.format(df_result['Value'][2])
  else:
    third_value = 'below {}'.format(df_result['Value'][2])

  if df_result['Is negative'][2] == False:
    third_status = '๐Ÿ“ˆ๐Ÿ‘'
    third_status_txt  = 'helps'

  else:
    third_status = '๐Ÿ“‰๐Ÿ‘Ž'
    third_status_txt  = 'penalizes'


  if (df_result['Is greater'][2] == True) and (df_result['Is negative'][2] == True):
    third_result = 'too high'
  elif (df_result['Is greater'][2] == True) and (df_result['Is negative'][2] == False):
    third_result = 'rightfully high'
  elif (df_result['Is greater'][2] == False) and (df_result['Is negative'][2] == True):
    third_result = 'too low'
  else:
    third_result = 'low, and that\'s a great thing'


  first_phrase = "๐Ÿ”ฎ I predict it should be worth {} $ ({}% error rate)".format(int(Pred_value), int(Error_rate))
  second_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(first_status, first_Feature_name, first_result, first_value, first_status_txt, round(first_score, 2))
  third_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(second_status, second_Feature_name, second_result, second_value, second_status_txt, round(second_score, 2))
  forth_phrase = '{} {}is {}.\r\nBeing {} {} the prediciton by {}%'.format(third_status, third_Feature_name, third_result, third_value, third_status_txt, round(third_score, 2))


  final_list = [first_phrase, second_phrase, third_phrase, forth_phrase]

  return final_list

#Define gradio UI


title = '๐Ÿ”ฎ Explain me like I\'m 5'

description = """
<center>
Gradio Demo for model interpretation powered by LIME.\n
To use it, simply choose your values, and hit 'Submit'. It will train a vanilla Random Forest Regressor, interpret it using Lime and generating an explanation in plain English using a fill-in-the-blank template.
</center>
"""

article = "Originally made by [Ulysse Bottello](https://github.com/ulyssebottello), using [California Housing Dataset](https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset) and [Lime](https://github.com/marcotcr/lime)"


input = [gr.inputs.Slider(0.5, 15, default=4.406300, label='Median income'),
        gr.inputs.Slider(1, 30, default=2.446602, label='Average number of household members'),
        gr.inputs.Slider(1, 52, default=13, label='Median house age'),
        gr.inputs.Slider(32, 42, default=38.260000, label='Latitude of the block'),
        gr.inputs.Slider(-124, -111, default=-120.330000, label='Longitude of the block'),
        gr.inputs.Slider(0.14, 5, default=1.558000, label='Price'),
         ]
output = [gr.outputs.Textbox(label='Prediction'),
          gr.outputs.Textbox(label='Why?'),
          gr.outputs.Textbox(label='And why?'),
          gr.outputs.Textbox(label='Then, why?')
          ]

iface = gr.Interface(fn=trainme, 
                     inputs=input, 
                     outputs=output,
                     title = title,
                     description= description,
                     article = article)
iface.launch()