prakharg24 commited on
Commit
754f471
·
verified ·
1 Parent(s): 0d36b45

Update my_pages/multiverse.py

Browse files
Files changed (1) hide show
  1. my_pages/multiverse.py +79 -0
my_pages/multiverse.py CHANGED
@@ -3,6 +3,16 @@ import streamlit as st
3
  import plotly.graph_objects as go
4
  from utils import add_navigation
5
 
 
 
 
 
 
 
 
 
 
 
6
  choices_list = [
7
  {"label": "Data Scaling", "options": [
8
  "MinMax Scaler",
@@ -203,3 +213,72 @@ def render():
203
  )
204
 
205
  st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import plotly.graph_objects as go
4
  from utils import add_navigation
5
 
6
+ import random
7
+ import pandas as pd
8
+ import numpy as np
9
+
10
+ from sklearn.model_selection import train_test_split
11
+ from sklearn.preprocessing import MinMaxScaler, StandardScaler
12
+ from sklearn.neural_network import MLPClassifier
13
+ from sklearn.linear_model import SGDClassifier
14
+ from sklearn.ensemble import RandomForestClassifier
15
+
16
  choices_list = [
17
  {"label": "Data Scaling", "options": [
18
  "MinMax Scaler",
 
213
  )
214
 
215
  st.plotly_chart(fig, use_container_width=True)
216
+
217
+ ##########################
218
+ ##########################
219
+ ##########################
220
+
221
+
222
+ def split_and_scale(features_raw, features, label, group, test_split=0.2, preprocess_scale=True):
223
+ X_raw_train, X_raw_test, X_train, X_test, y_train, y_test, group_train, group_test = train_test_split(
224
+ features_raw, features, label, group, test_size=test_split, random_state=0)
225
+
226
+ if preprocess_scale:
227
+ scaler = MinMaxScaler()
228
+ scaler.fit(X_train)
229
+ X_train, X_test = scaler.transform(X_train), scaler.transform(X_test)
230
+
231
+ return X_raw_train, X_raw_test, X_train, X_test, y_train, y_test, group_train, group_test
232
+
233
+ def get_stackoverflow_dataset(test_split=0.2, preprocess_scale=True):
234
+ raw_data = pd.read_csv('stackoverflow_full.csv')
235
+ raw_data = raw_data[raw_data['Country']=='Canada']
236
+
237
+ features = raw_data[['Age', 'EdLevel', 'Employment', 'Gender', 'MainBranch', 'YearsCode', 'YearsCodePro', 'PreviousSalary', 'ComputerSkills']]
238
+ features_raw = features.copy()
239
+
240
+ categorical_cols = ['Age', 'EdLevel', 'Employment', 'Gender', 'MainBranch']
241
+ if len(categorical_cols) > 0:
242
+ features = pd.get_dummies(features, columns=categorical_cols)
243
+
244
+ label = np.array(raw_data['Employed'].astype(int))
245
+ group = features['Gender_Man'].astype('category').cat.codes
246
+
247
+ features, label, group = np.array(features), np.array(label), np.array(group)
248
+
249
+ return split_and_scale(features_raw, features, label, group, test_split, preprocess_scale)
250
+
251
+
252
+ ### Main Code Starts Here
253
+ scaler, arch, iterations, seed = selected_path[0], selected_path[1], int(selected_path[2]), int(selected_path[3])
254
+ random_seed = seed
255
+
256
+ modellist = ['mlp', 'mlpbig', 'lr', 'rf']
257
+ modeltype = "Logistic Regression",
258
+ "Random Forest",
259
+ "Neural Network (Small)"
260
+
261
+ X_raw_train, X_raw_test, X_train, X_test, y_train, y_test, group_train, group_test = get_stackoverflow_dataset()
262
+
263
+ placeholder = st.empty()
264
+ modelclass_dict = {'Neural Network (Small)': MLPClassifier([10], random_state=random_seed, max_iter=iterations),
265
+ 'Logistic Regression': SGDClassifier(random_state=random_seed, max_iter=iterations),
266
+ 'Random Forest': RandomForestClassifier(random_state=random_seed, max_iter=iterations)}
267
+ model = modelclass_dict[arch]
268
+ placeholder.write("Training your model.")
269
+ model.fit(X_train, y_train)
270
+ placeholder.empty()
271
+
272
+ acc = model.score(X_test, y_test)
273
+
274
+ acc_men = model.score(X_test[group_test==1], y_test[group_test==1])
275
+ acc_women = model.score(X_test[group_test==0], y_test[group_test==0])
276
+
277
+ disp = abs(acc_men - acc_women)
278
+
279
+ st.subheader("📊 Model Performance Metrics")
280
+ st.markdown(f"""
281
+ Your model was tested on a separate test dataset, and you achieved the following overall model accuracy as well as disparity in accuracy across men and women in the dataset.
282
+ """)
283
+ st.metric(label="Model Accuracy", value=f"{acc * 100:.1f}%")
284
+ st.metric(label="Gender Disparity in Accuracy", value=f"{disp * 100:.1f}%")