Spaces:
Runtime error
Runtime error
Isabel Gwara
commited on
Commit
·
5c2fff1
1
Parent(s):
1b929ef
Update app.py
Browse files
app.py
CHANGED
|
@@ -70,14 +70,14 @@ for (colname, colval) in uncleaned_data.iteritems():
|
|
| 70 |
|
| 71 |
cat_value_dicts[colname] = new_dict
|
| 72 |
data[colname] = transformed_col_vals
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
def train_model():
|
| 80 |
-
# select features and
|
| 81 |
cols = len(data.columns)
|
| 82 |
num_features = cols - 1
|
| 83 |
x = data.iloc[: , :num_features]
|
|
@@ -91,10 +91,11 @@ def train_model():
|
|
| 91 |
model.fit(x_train, y_train.values.ravel())
|
| 92 |
y_pred = model.predict(x_test)
|
| 93 |
|
| 94 |
-
# save the model to file
|
| 95 |
with open('model.pkl', 'wb') as f:
|
| 96 |
pkl.dump(model, f)
|
| 97 |
|
|
|
|
| 98 |
with open('acc.txt', 'w+') as f:
|
| 99 |
acc = metrics.accuracy_score(y_test, y_pred)
|
| 100 |
f.write(str(round(acc * 100, 1)) + '%')
|
|
@@ -105,24 +106,28 @@ def train_model():
|
|
| 105 |
### rerun logic ###
|
| 106 |
### -------------------------------- ###
|
| 107 |
|
|
|
|
|
|
|
| 108 |
try:
|
| 109 |
with open('model.pkl', 'rb') as f:
|
| 110 |
model = pkl.load(f)
|
|
|
|
|
|
|
|
|
|
| 111 |
except FileNotFoundError as e:
|
| 112 |
model = train_model()
|
| 113 |
|
|
|
|
| 114 |
with open('acc.txt', 'r') as f:
|
| 115 |
acc = f.read()
|
| 116 |
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
| 120 |
### ------------------------------- ###
|
| 121 |
### interface creation ###
|
| 122 |
### ------------------------------- ###
|
| 123 |
|
| 124 |
-
|
| 125 |
-
#
|
| 126 |
def general_predictor(input_list):
|
| 127 |
features = []
|
| 128 |
|
|
@@ -140,9 +145,9 @@ def general_predictor(input_list):
|
|
| 140 |
|
| 141 |
def get_feat():
|
| 142 |
feats = [abs(x) for x in model.coef_[0]]
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
return
|
| 146 |
|
| 147 |
form = st.form('ml-inputs')
|
| 148 |
|
|
@@ -171,7 +176,7 @@ if form.form_submit_button("Submit to get your recommendation!"):
|
|
| 171 |
col1, col2 = st.columns(2)
|
| 172 |
col1.metric("Number of Different Possible Results", len(cat_value_dicts[final_colname]))
|
| 173 |
col2.metric("Model Accuracy", acc)
|
| 174 |
-
|
| 175 |
|
| 176 |
|
| 177 |
with open('info.md') as f:
|
|
|
|
| 70 |
|
| 71 |
cat_value_dicts[colname] = new_dict
|
| 72 |
data[colname] = transformed_col_vals
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
### -------------------------------- ###
|
| 76 |
+
### model training ###
|
| 77 |
+
### -------------------------------- ###
|
| 78 |
+
|
| 79 |
def train_model():
|
| 80 |
+
# select features and prediction; automatically selects last column as prediction
|
| 81 |
cols = len(data.columns)
|
| 82 |
num_features = cols - 1
|
| 83 |
x = data.iloc[: , :num_features]
|
|
|
|
| 91 |
model.fit(x_train, y_train.values.ravel())
|
| 92 |
y_pred = model.predict(x_test)
|
| 93 |
|
| 94 |
+
# save the model to file using the pickle package
|
| 95 |
with open('model.pkl', 'wb') as f:
|
| 96 |
pkl.dump(model, f)
|
| 97 |
|
| 98 |
+
# save model accuracy to file using the pickle package
|
| 99 |
with open('acc.txt', 'w+') as f:
|
| 100 |
acc = metrics.accuracy_score(y_test, y_pred)
|
| 101 |
f.write(str(round(acc * 100, 1)) + '%')
|
|
|
|
| 106 |
### rerun logic ###
|
| 107 |
### -------------------------------- ###
|
| 108 |
|
| 109 |
+
# check to see if this is the first time running the script,
|
| 110 |
+
# if the model has already been trained and saved, load it
|
| 111 |
try:
|
| 112 |
with open('model.pkl', 'rb') as f:
|
| 113 |
model = pkl.load(f)
|
| 114 |
+
|
| 115 |
+
# if this is the first time running the script, train the model
|
| 116 |
+
# and save it to the file model.pkl
|
| 117 |
except FileNotFoundError as e:
|
| 118 |
model = train_model()
|
| 119 |
|
| 120 |
+
# read the model accuracy from file
|
| 121 |
with open('acc.txt', 'r') as f:
|
| 122 |
acc = f.read()
|
| 123 |
|
| 124 |
|
|
|
|
|
|
|
| 125 |
### ------------------------------- ###
|
| 126 |
### interface creation ###
|
| 127 |
### ------------------------------- ###
|
| 128 |
|
| 129 |
+
# uses the logistic regression to predict for a generic number
|
| 130 |
+
# of features
|
| 131 |
def general_predictor(input_list):
|
| 132 |
features = []
|
| 133 |
|
|
|
|
| 145 |
|
| 146 |
def get_feat():
|
| 147 |
feats = [abs(x) for x in model.coef_[0]]
|
| 148 |
+
max_val = max(feats)
|
| 149 |
+
idx = feats.index(max_val)
|
| 150 |
+
return data.columns[idx]
|
| 151 |
|
| 152 |
form = st.form('ml-inputs')
|
| 153 |
|
|
|
|
| 176 |
col1, col2 = st.columns(2)
|
| 177 |
col1.metric("Number of Different Possible Results", len(cat_value_dicts[final_colname]))
|
| 178 |
col2.metric("Model Accuracy", acc)
|
| 179 |
+
st.metric("Most Important Question", get_feat())
|
| 180 |
|
| 181 |
|
| 182 |
with open('info.md') as f:
|