Update my_pages/multiverse.py
Browse files- my_pages/multiverse.py +35 -19
my_pages/multiverse.py
CHANGED
|
@@ -8,27 +8,31 @@ import pandas as pd
|
|
| 8 |
import numpy as np
|
| 9 |
|
| 10 |
from sklearn.model_selection import train_test_split
|
| 11 |
-
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
| 12 |
from sklearn.neural_network import MLPClassifier
|
| 13 |
from sklearn.linear_model import SGDClassifier
|
| 14 |
from sklearn.ensemble import RandomForestClassifier
|
|
|
|
| 15 |
from sklearn.preprocessing import LabelEncoder
|
|
|
|
|
|
|
| 16 |
|
| 17 |
choices_list = [
|
| 18 |
{"label": "Data Scaling", "options": [
|
| 19 |
"MinMax Scaler",
|
| 20 |
-
"Standard Scaler"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
]},
|
| 22 |
{"label": "Model Architecture", "options": [
|
| 23 |
"Logistic Regression",
|
| 24 |
-
"
|
| 25 |
"Neural Network (Small)"
|
| 26 |
]},
|
| 27 |
-
{"label": "Training Iterations", "options": [
|
| 28 |
-
"200",
|
| 29 |
-
"500",
|
| 30 |
-
"1000"
|
| 31 |
-
]},
|
| 32 |
{"label": "Random Seed", "options": [
|
| 33 |
"1", "2", "3", "4", "5", "6", "7", "8", "9", "10"
|
| 34 |
]}
|
|
@@ -217,17 +221,22 @@ def render():
|
|
| 217 |
##########################
|
| 218 |
|
| 219 |
|
| 220 |
-
def split_and_scale(features, label, test_split=0.2, preprocess_scale=
|
| 221 |
X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=test_split, random_state=0)
|
| 222 |
|
| 223 |
-
if preprocess_scale:
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
scaler.fit(X_train)
|
| 226 |
X_train, X_test = scaler.transform(X_train), scaler.transform(X_test)
|
| 227 |
|
| 228 |
return X_train, X_test, y_train, y_test
|
| 229 |
|
| 230 |
-
def get_stackoverflow_dataset(test_split=0.2, preprocess_scale=
|
| 231 |
data = pd.read_csv('loan_approval_dataset.csv')
|
| 232 |
|
| 233 |
features = data.drop(columns=["loan_id", " loan_status"])
|
|
@@ -242,15 +251,22 @@ def render():
|
|
| 242 |
|
| 243 |
|
| 244 |
### Main Code Starts Here
|
| 245 |
-
scaler,
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
placeholder = st.empty()
|
| 251 |
-
modelclass_dict = {'Neural Network (Small)': MLPClassifier([10], random_state=
|
| 252 |
-
'Logistic Regression': SGDClassifier(random_state=
|
| 253 |
-
'
|
| 254 |
model = modelclass_dict[arch]
|
| 255 |
placeholder.write("Training your model.")
|
| 256 |
model.fit(X_train, y_train)
|
|
|
|
| 8 |
import numpy as np
|
| 9 |
|
| 10 |
from sklearn.model_selection import train_test_split
|
| 11 |
+
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
|
| 12 |
from sklearn.neural_network import MLPClassifier
|
| 13 |
from sklearn.linear_model import SGDClassifier
|
| 14 |
from sklearn.ensemble import RandomForestClassifier
|
| 15 |
+
from sklearn.tree import DecisionTreeClassifier
|
| 16 |
from sklearn.preprocessing import LabelEncoder
|
| 17 |
+
from sklearn.feature_selection import SelectKBest, f_classif
|
| 18 |
+
from sklearn.decomposition import PCA
|
| 19 |
|
| 20 |
choices_list = [
|
| 21 |
{"label": "Data Scaling", "options": [
|
| 22 |
"MinMax Scaler",
|
| 23 |
+
"Standard Scaler",
|
| 24 |
+
"Robust Scaler"
|
| 25 |
+
]},
|
| 26 |
+
{"label": "Feature Selection", "options": [
|
| 27 |
+
"Select K Best (k=5)",
|
| 28 |
+
"PCA (n=5)",
|
| 29 |
+
"All Features"
|
| 30 |
]},
|
| 31 |
{"label": "Model Architecture", "options": [
|
| 32 |
"Logistic Regression",
|
| 33 |
+
"Decision Tree",
|
| 34 |
"Neural Network (Small)"
|
| 35 |
]},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
{"label": "Random Seed", "options": [
|
| 37 |
"1", "2", "3", "4", "5", "6", "7", "8", "9", "10"
|
| 38 |
]}
|
|
|
|
| 221 |
##########################
|
| 222 |
|
| 223 |
|
| 224 |
+
def split_and_scale(features, label, test_split=0.2, preprocess_scale=None):
|
| 225 |
X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=test_split, random_state=0)
|
| 226 |
|
| 227 |
+
if preprocess_scale is not None:
|
| 228 |
+
if preprocess_scale=="MinMax Scaler":
|
| 229 |
+
scaler = MinMaxScaler()
|
| 230 |
+
elif preprocess_scale=="Standard Scaler":
|
| 231 |
+
scaler = StandardScaler()
|
| 232 |
+
elif preprocess_scale=="Robust Scaler":
|
| 233 |
+
scaler = RobustScaler()
|
| 234 |
scaler.fit(X_train)
|
| 235 |
X_train, X_test = scaler.transform(X_train), scaler.transform(X_test)
|
| 236 |
|
| 237 |
return X_train, X_test, y_train, y_test
|
| 238 |
|
| 239 |
+
def get_stackoverflow_dataset(test_split=0.2, preprocess_scale=None):
|
| 240 |
data = pd.read_csv('loan_approval_dataset.csv')
|
| 241 |
|
| 242 |
features = data.drop(columns=["loan_id", " loan_status"])
|
|
|
|
| 251 |
|
| 252 |
|
| 253 |
### Main Code Starts Here
|
| 254 |
+
scaler, feature_sel, arch, seed = selected_path[0], selected_path[1], selected_path[2], int(selected_path[3])
|
| 255 |
+
X_train, X_test, y_train, y_test = get_stackoverflow_dataset(preprocess_scale=scaler)
|
| 256 |
+
|
| 257 |
+
if feature_sel=="Select K Best (k=5)":
|
| 258 |
+
selector = SelectKBest(score_func=f_classif, k=5)
|
| 259 |
+
X_train = selector.fit_transform(X_train, y_train)
|
| 260 |
+
X_test = selector.transform(X_test)
|
| 261 |
+
elif feature_sel=="PCA (n=5)":
|
| 262 |
+
pca = PCA(n_components=2)
|
| 263 |
+
X_train = pca.fit_transform(X_train, y_train)
|
| 264 |
+
X_test = pca.transform(X_test)
|
| 265 |
|
| 266 |
placeholder = st.empty()
|
| 267 |
+
modelclass_dict = {'Neural Network (Small)': MLPClassifier([10], random_state=seed, max_iter=iterations),
|
| 268 |
+
'Logistic Regression': SGDClassifier(random_state=seed, max_iter=iterations),
|
| 269 |
+
'Decision Tree': DecisionTreeClassifier(random_state=seed)}
|
| 270 |
model = modelclass_dict[arch]
|
| 271 |
placeholder.write("Training your model.")
|
| 272 |
model.fit(X_train, y_train)
|