verifiability

Sleeping

App Files Files Community

prakharg24 commited on Aug 23, 2025

Commit

754f471

verified ·

1 Parent(s): 0d36b45

Update my_pages/multiverse.py

Browse files

Files changed (1) hide show

my_pages/multiverse.py +79 -0

my_pages/multiverse.py CHANGED Viewed

@@ -3,6 +3,16 @@ import streamlit as st
 import plotly.graph_objects as go
 from utils import add_navigation
 choices_list = [
     {"label": "Data Scaling", "options": [
         "MinMax Scaler",
@@ -203,3 +213,72 @@ def render():
     )
     st.plotly_chart(fig, use_container_width=True)

 import plotly.graph_objects as go
 from utils import add_navigation
+import random
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import MinMaxScaler, StandardScaler
+from sklearn.neural_network import MLPClassifier
+from sklearn.linear_model import SGDClassifier
+from sklearn.ensemble import RandomForestClassifier
 choices_list = [
     {"label": "Data Scaling", "options": [
         "MinMax Scaler",
     )
     st.plotly_chart(fig, use_container_width=True)
+    ##########################
+    ##########################
+    ##########################
+    def split_and_scale(features_raw, features, label, group, test_split=0.2, preprocess_scale=True):
+        X_raw_train, X_raw_test, X_train, X_test, y_train, y_test, group_train, group_test = train_test_split(
+            features_raw, features, label, group, test_size=test_split, random_state=0)
+        if preprocess_scale:
+            scaler = MinMaxScaler()
+            scaler.fit(X_train)
+            X_train, X_test = scaler.transform(X_train), scaler.transform(X_test)
+        return X_raw_train, X_raw_test, X_train, X_test, y_train, y_test, group_train, group_test
+    def get_stackoverflow_dataset(test_split=0.2, preprocess_scale=True):
+        raw_data = pd.read_csv('stackoverflow_full.csv')
+        raw_data = raw_data[raw_data['Country']=='Canada']
+        features = raw_data[['Age', 'EdLevel', 'Employment', 'Gender', 'MainBranch', 'YearsCode', 'YearsCodePro', 'PreviousSalary', 'ComputerSkills']]
+        features_raw = features.copy()
+        categorical_cols = ['Age', 'EdLevel', 'Employment', 'Gender', 'MainBranch']
+        if len(categorical_cols) > 0:
+            features = pd.get_dummies(features, columns=categorical_cols)
+        label = np.array(raw_data['Employed'].astype(int))
+        group = features['Gender_Man'].astype('category').cat.codes
+        features, label, group = np.array(features), np.array(label), np.array(group)
+        return split_and_scale(features_raw, features, label, group, test_split, preprocess_scale)
+    ### Main Code Starts Here
+    scaler, arch, iterations, seed = selected_path[0], selected_path[1], int(selected_path[2]), int(selected_path[3])
+    random_seed = seed
+    modellist = ['mlp', 'mlpbig', 'lr', 'rf']
+    modeltype = "Logistic Regression",
+        "Random Forest",
+        "Neural Network (Small)"
+    X_raw_train, X_raw_test, X_train, X_test, y_train, y_test, group_train, group_test = get_stackoverflow_dataset()
+    placeholder = st.empty()
+    modelclass_dict = {'Neural Network (Small)': MLPClassifier([10], random_state=random_seed, max_iter=iterations),
+                       'Logistic Regression': SGDClassifier(random_state=random_seed, max_iter=iterations),
+                       'Random Forest': RandomForestClassifier(random_state=random_seed, max_iter=iterations)}
+    model = modelclass_dict[arch]
+    placeholder.write("Training your model.")
+    model.fit(X_train, y_train)
+    placeholder.empty()
+    acc = model.score(X_test, y_test)
+    acc_men = model.score(X_test[group_test==1], y_test[group_test==1])
+    acc_women = model.score(X_test[group_test==0], y_test[group_test==0])
+    disp = abs(acc_men - acc_women)
+    st.subheader("📊 Model Performance Metrics")
+    st.markdown(f"""
+    Your model was tested on a separate test dataset, and you achieved the following overall model accuracy as well as disparity in accuracy across men and women in the dataset.
+    """)
+    st.metric(label="Model Accuracy", value=f"{acc * 100:.1f}%")
+    st.metric(label="Gender Disparity in Accuracy", value=f"{disp * 100:.1f}%")