prakharg24 commited on
Commit
cf1521b
·
verified ·
1 Parent(s): e93f78d

Update my_pages/multiverse.py

Browse files
Files changed (1) hide show
  1. my_pages/multiverse.py +13 -18
my_pages/multiverse.py CHANGED
@@ -12,6 +12,7 @@ from sklearn.preprocessing import MinMaxScaler, StandardScaler
12
  from sklearn.neural_network import MLPClassifier
13
  from sklearn.linear_model import SGDClassifier
14
  from sklearn.ensemble import RandomForestClassifier
 
15
 
16
  choices_list = [
17
  {"label": "Data Scaling", "options": [
@@ -216,41 +217,35 @@ def render():
216
  ##########################
217
 
218
 
219
- def split_and_scale(features_raw, features, label, group, test_split=0.2, preprocess_scale=True):
220
- X_raw_train, X_raw_test, X_train, X_test, y_train, y_test, group_train, group_test = train_test_split(
221
- features_raw, features, label, group, test_size=test_split, random_state=0)
222
 
223
  if preprocess_scale:
224
  scaler = MinMaxScaler()
225
  scaler.fit(X_train)
226
  X_train, X_test = scaler.transform(X_train), scaler.transform(X_test)
227
 
228
- return X_raw_train, X_raw_test, X_train, X_test, y_train, y_test, group_train, group_test
229
 
230
  def get_stackoverflow_dataset(test_split=0.2, preprocess_scale=True):
231
- raw_data = pd.read_csv('stackoverflow_full.csv')
232
- raw_data = raw_data[raw_data['Country']=='Canada']
233
-
234
- features = raw_data[['Age', 'EdLevel', 'Employment', 'Gender', 'MainBranch', 'YearsCode', 'YearsCodePro', 'PreviousSalary', 'ComputerSkills']]
235
- features_raw = features.copy()
236
-
237
- categorical_cols = ['Age', 'EdLevel', 'Employment', 'Gender', 'MainBranch']
238
- if len(categorical_cols) > 0:
239
- features = pd.get_dummies(features, columns=categorical_cols)
240
 
241
- label = np.array(raw_data['Employed'].astype(int))
242
- group = features['Gender_Man'].astype('category').cat.codes
243
 
244
- features, label, group = np.array(features), np.array(label), np.array(group)
245
 
246
- return split_and_scale(features_raw, features, label, group, test_split, preprocess_scale)
247
 
248
 
249
  ### Main Code Starts Here
250
  scaler, arch, iterations, seed = selected_path[0], selected_path[1], int(selected_path[2]), int(selected_path[3])
251
  random_seed = seed
252
 
253
- X_raw_train, X_raw_test, X_train, X_test, y_train, y_test, group_train, group_test = get_stackoverflow_dataset()
254
 
255
  placeholder = st.empty()
256
  modelclass_dict = {'Neural Network (Small)': MLPClassifier([10], random_state=random_seed, max_iter=iterations),
 
12
  from sklearn.neural_network import MLPClassifier
13
  from sklearn.linear_model import SGDClassifier
14
  from sklearn.ensemble import RandomForestClassifier
15
+ from sklearn.preprocessing import LabelEncoder
16
 
17
  choices_list = [
18
  {"label": "Data Scaling", "options": [
 
217
  ##########################
218
 
219
 
220
+ def split_and_scale(features, label, test_split=0.2, preprocess_scale=True):
221
+ X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=test_split, random_state=0)
 
222
 
223
  if preprocess_scale:
224
  scaler = MinMaxScaler()
225
  scaler.fit(X_train)
226
  X_train, X_test = scaler.transform(X_train), scaler.transform(X_test)
227
 
228
+ return X_train, X_test, y_train, y_test
229
 
230
  def get_stackoverflow_dataset(test_split=0.2, preprocess_scale=True):
231
+ data = pd.read_csv('loan_approval_dataset.csv')
232
+
233
+ features = data.drop(columns=["loan_id", " loan_status"])
234
+ features = pd.get_dummies(features, columns=[" education", " self_employed"], drop_first=True).values
 
 
 
 
 
235
 
236
+ le = LabelEncoder()
237
+ label = le.fit_transform(data[" loan_status"])
238
 
239
+ features, label = np.array(features), np.array(label)
240
 
241
+ return split_and_scale(features, label, test_split, preprocess_scale)
242
 
243
 
244
  ### Main Code Starts Here
245
  scaler, arch, iterations, seed = selected_path[0], selected_path[1], int(selected_path[2]), int(selected_path[3])
246
  random_seed = seed
247
 
248
+ X_train, X_test, y_train, y_test = get_stackoverflow_dataset()
249
 
250
  placeholder = st.empty()
251
  modelclass_dict = {'Neural Network (Small)': MLPClassifier([10], random_state=random_seed, max_iter=iterations),