prakharg24 commited on
Commit
ea5b2dd
·
verified ·
1 Parent(s): cf1521b

Update my_pages/multiverse.py

Browse files
Files changed (1) hide show
  1. my_pages/multiverse.py +35 -19
my_pages/multiverse.py CHANGED
@@ -8,27 +8,31 @@ import pandas as pd
8
  import numpy as np
9
 
10
  from sklearn.model_selection import train_test_split
11
- from sklearn.preprocessing import MinMaxScaler, StandardScaler
12
  from sklearn.neural_network import MLPClassifier
13
  from sklearn.linear_model import SGDClassifier
14
  from sklearn.ensemble import RandomForestClassifier
 
15
  from sklearn.preprocessing import LabelEncoder
 
 
16
 
17
  choices_list = [
18
  {"label": "Data Scaling", "options": [
19
  "MinMax Scaler",
20
- "Standard Scaler"
 
 
 
 
 
 
21
  ]},
22
  {"label": "Model Architecture", "options": [
23
  "Logistic Regression",
24
- "Random Forest",
25
  "Neural Network (Small)"
26
  ]},
27
- {"label": "Training Iterations", "options": [
28
- "200",
29
- "500",
30
- "1000"
31
- ]},
32
  {"label": "Random Seed", "options": [
33
  "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"
34
  ]}
@@ -217,17 +221,22 @@ def render():
217
  ##########################
218
 
219
 
220
- def split_and_scale(features, label, test_split=0.2, preprocess_scale=True):
221
  X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=test_split, random_state=0)
222
 
223
- if preprocess_scale:
224
- scaler = MinMaxScaler()
 
 
 
 
 
225
  scaler.fit(X_train)
226
  X_train, X_test = scaler.transform(X_train), scaler.transform(X_test)
227
 
228
  return X_train, X_test, y_train, y_test
229
 
230
- def get_stackoverflow_dataset(test_split=0.2, preprocess_scale=True):
231
  data = pd.read_csv('loan_approval_dataset.csv')
232
 
233
  features = data.drop(columns=["loan_id", " loan_status"])
@@ -242,15 +251,22 @@ def render():
242
 
243
 
244
  ### Main Code Starts Here
245
- scaler, arch, iterations, seed = selected_path[0], selected_path[1], int(selected_path[2]), int(selected_path[3])
246
- random_seed = seed
247
-
248
- X_train, X_test, y_train, y_test = get_stackoverflow_dataset()
 
 
 
 
 
 
 
249
 
250
  placeholder = st.empty()
251
- modelclass_dict = {'Neural Network (Small)': MLPClassifier([10], random_state=random_seed, max_iter=iterations),
252
- 'Logistic Regression': SGDClassifier(random_state=random_seed, max_iter=iterations),
253
- 'Random Forest': RandomForestClassifier(random_state=random_seed)}
254
  model = modelclass_dict[arch]
255
  placeholder.write("Training your model.")
256
  model.fit(X_train, y_train)
 
8
  import numpy as np
9
 
10
  from sklearn.model_selection import train_test_split
11
+ from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
12
  from sklearn.neural_network import MLPClassifier
13
  from sklearn.linear_model import SGDClassifier
14
  from sklearn.ensemble import RandomForestClassifier
15
+ from sklearn.tree import DecisionTreeClassifier
16
  from sklearn.preprocessing import LabelEncoder
17
+ from sklearn.feature_selection import SelectKBest, f_classif
18
+ from sklearn.decomposition import PCA
19
 
20
  choices_list = [
21
  {"label": "Data Scaling", "options": [
22
  "MinMax Scaler",
23
+ "Standard Scaler",
24
+ "Robust Scaler"
25
+ ]},
26
+ {"label": "Feature Selection", "options": [
27
+ "Select K Best (k=5)",
28
+ "PCA (n=5)",
29
+ "All Features"
30
  ]},
31
  {"label": "Model Architecture", "options": [
32
  "Logistic Regression",
33
+ "Decision Tree",
34
  "Neural Network (Small)"
35
  ]},
 
 
 
 
 
36
  {"label": "Random Seed", "options": [
37
  "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"
38
  ]}
 
221
  ##########################
222
 
223
 
224
+ def split_and_scale(features, label, test_split=0.2, preprocess_scale=None):
225
  X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=test_split, random_state=0)
226
 
227
+ if preprocess_scale is not None:
228
+ if preprocess_scale=="MinMax Scaler":
229
+ scaler = MinMaxScaler()
230
+ elif preprocess_scale=="Standard Scaler":
231
+ scaler = StandardScaler()
232
+ elif preprocess_scale=="Robust Scaler":
233
+ scaler = RobustScaler()
234
  scaler.fit(X_train)
235
  X_train, X_test = scaler.transform(X_train), scaler.transform(X_test)
236
 
237
  return X_train, X_test, y_train, y_test
238
 
239
+ def get_stackoverflow_dataset(test_split=0.2, preprocess_scale=None):
240
  data = pd.read_csv('loan_approval_dataset.csv')
241
 
242
  features = data.drop(columns=["loan_id", " loan_status"])
 
251
 
252
 
253
  ### Main Code Starts Here
254
+ scaler, feature_sel, arch, seed = selected_path[0], selected_path[1], selected_path[2], int(selected_path[3])
255
+ X_train, X_test, y_train, y_test = get_stackoverflow_dataset(preprocess_scale=scaler)
256
+
257
+ if feature_sel=="Select K Best (k=5)":
258
+ selector = SelectKBest(score_func=f_classif, k=5)
259
+ X_train = selector.fit_transform(X_train, y_train)
260
+ X_test = selector.transform(X_test)
261
+ elif feature_sel=="PCA (n=5)":
262
+ pca = PCA(n_components=2)
263
+ X_train = pca.fit_transform(X_train, y_train)
264
+ X_test = pca.transform(X_test)
265
 
266
  placeholder = st.empty()
267
+ modelclass_dict = {'Neural Network (Small)': MLPClassifier([10], random_state=seed, max_iter=iterations),
268
+ 'Logistic Regression': SGDClassifier(random_state=seed, max_iter=iterations),
269
+ 'Decision Tree': DecisionTreeClassifier(random_state=seed)}
270
  model = modelclass_dict[arch]
271
  placeholder.write("Training your model.")
272
  model.fit(X_train, y_train)