Update my_pages/multiverse.py
Browse files- my_pages/multiverse.py +13 -18
my_pages/multiverse.py
CHANGED
|
@@ -12,6 +12,7 @@ from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
|
| 12 |
from sklearn.neural_network import MLPClassifier
|
| 13 |
from sklearn.linear_model import SGDClassifier
|
| 14 |
from sklearn.ensemble import RandomForestClassifier
|
|
|
|
| 15 |
|
| 16 |
choices_list = [
|
| 17 |
{"label": "Data Scaling", "options": [
|
|
@@ -216,41 +217,35 @@ def render():
|
|
| 216 |
##########################
|
| 217 |
|
| 218 |
|
| 219 |
-
def split_and_scale(
|
| 220 |
-
|
| 221 |
-
features_raw, features, label, group, test_size=test_split, random_state=0)
|
| 222 |
|
| 223 |
if preprocess_scale:
|
| 224 |
scaler = MinMaxScaler()
|
| 225 |
scaler.fit(X_train)
|
| 226 |
X_train, X_test = scaler.transform(X_train), scaler.transform(X_test)
|
| 227 |
|
| 228 |
-
return
|
| 229 |
|
| 230 |
def get_stackoverflow_dataset(test_split=0.2, preprocess_scale=True):
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
features =
|
| 235 |
-
features_raw = features.copy()
|
| 236 |
-
|
| 237 |
-
categorical_cols = ['Age', 'EdLevel', 'Employment', 'Gender', 'MainBranch']
|
| 238 |
-
if len(categorical_cols) > 0:
|
| 239 |
-
features = pd.get_dummies(features, columns=categorical_cols)
|
| 240 |
|
| 241 |
-
|
| 242 |
-
|
| 243 |
|
| 244 |
-
features, label
|
| 245 |
|
| 246 |
-
return split_and_scale(
|
| 247 |
|
| 248 |
|
| 249 |
### Main Code Starts Here
|
| 250 |
scaler, arch, iterations, seed = selected_path[0], selected_path[1], int(selected_path[2]), int(selected_path[3])
|
| 251 |
random_seed = seed
|
| 252 |
|
| 253 |
-
|
| 254 |
|
| 255 |
placeholder = st.empty()
|
| 256 |
modelclass_dict = {'Neural Network (Small)': MLPClassifier([10], random_state=random_seed, max_iter=iterations),
|
|
|
|
| 12 |
from sklearn.neural_network import MLPClassifier
|
| 13 |
from sklearn.linear_model import SGDClassifier
|
| 14 |
from sklearn.ensemble import RandomForestClassifier
|
| 15 |
+
from sklearn.preprocessing import LabelEncoder
|
| 16 |
|
| 17 |
choices_list = [
|
| 18 |
{"label": "Data Scaling", "options": [
|
|
|
|
| 217 |
##########################
|
| 218 |
|
| 219 |
|
| 220 |
+
def split_and_scale(features, label, test_split=0.2, preprocess_scale=True):
|
| 221 |
+
X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=test_split, random_state=0)
|
|
|
|
| 222 |
|
| 223 |
if preprocess_scale:
|
| 224 |
scaler = MinMaxScaler()
|
| 225 |
scaler.fit(X_train)
|
| 226 |
X_train, X_test = scaler.transform(X_train), scaler.transform(X_test)
|
| 227 |
|
| 228 |
+
return X_train, X_test, y_train, y_test
|
| 229 |
|
| 230 |
def get_stackoverflow_dataset(test_split=0.2, preprocess_scale=True):
|
| 231 |
+
data = pd.read_csv('loan_approval_dataset.csv')
|
| 232 |
+
|
| 233 |
+
features = data.drop(columns=["loan_id", " loan_status"])
|
| 234 |
+
features = pd.get_dummies(features, columns=[" education", " self_employed"], drop_first=True).values
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
+
le = LabelEncoder()
|
| 237 |
+
label = le.fit_transform(data[" loan_status"])
|
| 238 |
|
| 239 |
+
features, label = np.array(features), np.array(label)
|
| 240 |
|
| 241 |
+
return split_and_scale(features, label, test_split, preprocess_scale)
|
| 242 |
|
| 243 |
|
| 244 |
### Main Code Starts Here
|
| 245 |
scaler, arch, iterations, seed = selected_path[0], selected_path[1], int(selected_path[2]), int(selected_path[3])
|
| 246 |
random_seed = seed
|
| 247 |
|
| 248 |
+
X_train, X_test, y_train, y_test = get_stackoverflow_dataset()
|
| 249 |
|
| 250 |
placeholder = st.empty()
|
| 251 |
modelclass_dict = {'Neural Network (Small)': MLPClassifier([10], random_state=random_seed, max_iter=iterations),
|