Mpavan45 commited on
Commit
5bf15d7
·
verified ·
1 Parent(s): a6e190e

Update pages/4_Model Creation and Evaluation.py

Browse files
pages/4_Model Creation and Evaluation.py CHANGED
@@ -71,96 +71,171 @@ st.write("Model training and selection is a crucial phase in machine learning. A
71
  st.subheader("Data Splitting")
72
  st.write("The dataset is divided into training and testing sets. The training set is used to train the model, while the testing set is used to evaluate its performance on unseen data.")
73
 
74
- # Ensure the required data is in session_state
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  if 'X_res' in st.session_state and 'y_res' in st.session_state:
76
- # Retrieve data from session state
77
  X_res = st.session_state['X_res']
78
  y_res = st.session_state['y_res']
79
-
80
- # Perform train-test split
81
  x_train, x_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)
82
 
83
- # Optional: Store train-test split data back into session state
84
- st.session_state['X_train'] = x_train
85
- st.session_state['X_test'] = x_test
86
- st.session_state['y_train'] = y_train
87
- st.session_state['y_test'] = y_test
88
-
89
- st.write("Train-test split completed!")
90
-
91
- # Debugging: Check data shapes
92
- st.write(f"x_train shape: {x_train.shape}, x_test shape: {x_test.shape}")
93
- st.write(f"y_train shape: {y_train.shape}, y_test shape: {y_test.shape}")
94
-
95
- # Data Preprocessing
96
- st.subheader("Data Preprocessing")
97
- st.write("""
98
- Before training the model, the data may need to be preprocessed. This includes scaling features using techniques like:
99
- - **StandardScaler**: Standardizes features by removing the mean and scaling to unit variance.
100
- - **MinMaxScaler**: Scales features to a specific range, typically between 0 and 1.
101
- """)
102
-
103
- # Data Scaling
104
- scaler = StandardScaler()
105
- x_train_std = scaler.fit_transform(x_train)
106
- x_test_std = scaler.transform(x_test)
107
-
108
- # Hyperparameter Tuning with Optuna
109
- st.subheader("Hyperparameter Tuning with Optuna")
110
- st.write("""
111
- Optuna is an automatic hyperparameter optimization framework that allows us to efficiently search for the best hyperparameters for our models. It uses a technique called Bayesian Optimization to find the optimal set of hyperparameters that maximize the model's performance.
112
- """)
113
-
114
- # Optuna Objective Function
115
- def objective(trial):
116
- # Model Selection
117
- algo = trial.suggest_categorical("choice", ["KNN", "Logistic"])
118
- if algo == "KNN":
119
- n = trial.suggest_int("n_neighbors", 1, 50)
120
- p = trial.suggest_int("distance", 1, 2)
121
- model = KNeighborsClassifier(n_neighbors=n, p=p)
122
- else:
123
- solver, penalty = trial.suggest_categorical("choices", [("lbfgs", "l2"), ("newton-cg", "l2"), ("sag", "l2"), ("saga", "l1"), ("saga", "l2"), ("saga", "elasticnet")])
124
- C = trial.suggest_uniform("lambda", 0.01, 1000)
125
- if penalty == "elasticnet":
126
- model = LogisticRegression(C=C, solver=solver, penalty=penalty, multi_class="multinomial", l1_ratio=0.3)
127
- else:
128
- model = LogisticRegression(C=C, solver=solver, penalty=penalty, multi_class="multinomial")
129
-
130
- return cross_val_score(model, x_train_std, y_train, cv=5, scoring="neg_log_loss").mean()
131
-
132
- # Optuna Study
133
- study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
134
- study.optimize(objective, n_trials=200)
135
-
136
- # Display Results
137
- st.write("### Optuna Results")
138
- st.write("Best Parameters:", study.best_params)
139
- st.write(study.trials_dataframe())
140
-
141
- # Best Parameters
142
- solver = study.best_params.get('solver', 'lbfgs')
143
- penalty = study.best_params.get('penalty', 'l2')
144
- C = study.best_params.get('lambda', 1.0)
145
-
146
- # Model Training with Best Parameters
147
- model = LogisticRegression(C=C, solver=solver, penalty=penalty, multi_class="multinomial", max_iter=500)
148
- model.fit(x_train_std, y_train)
149
-
150
- # Evaluation Metrics
151
- y_pred_probs = model.predict_proba(x_test_std)
152
- loss = log_loss(y_test, y_pred_probs)
153
- y_pred = np.argmax(y_pred_probs, axis=1)
154
 
155
- # Confusion Matrix
156
- cm = confusion_matrix(y_test, y_pred)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
- st.write("### Model Evaluation Results")
159
- st.write(f"Log-Loss Score: {loss}")
160
- st.write("Confusion Matrix:")
161
- st.write(cm)
 
 
 
 
 
 
162
 
163
- else:
164
- st.error("Training and testing data are not available. Please run the previous steps first.")
165
-
166
 
 
71
  st.subheader("Data Splitting")
72
  st.write("The dataset is divided into training and testing sets. The training set is used to train the model, while the testing set is used to evaluate its performance on unseen data.")
73
 
74
+ # Page Title
75
+ st.markdown("<h1 style='text-align:center; color:purple;'>Model Creation and Evaluation</h1>", unsafe_allow_html=True)
76
+
77
+ # Code and Output 1: Data Splitting
78
+ st.subheader("Step 1: Data Splitting")
79
+
80
+ # Code for Data Splitting
81
+ code_1 = """
82
+ # Data Splitting
83
+ x_train, x_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)
84
+ """
85
+ st.code(code_1, language='python')
86
+
87
+ # Output for Data Splitting
88
  if 'X_res' in st.session_state and 'y_res' in st.session_state:
 
89
  X_res = st.session_state['X_res']
90
  y_res = st.session_state['y_res']
 
 
91
  x_train, x_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)
92
 
93
+ st.write(f"x_train shape: {x_train.shape}")
94
+ st.write(f"x_test shape: {x_test.shape}")
95
+ st.write(f"y_train shape: {y_train.shape}")
96
+ st.write(f"y_test shape: {y_test.shape}")
97
+ else:
98
+ st.error("Training and testing data are not available. Please run the previous steps first.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
+ # Code and Output 2: Data Scaling
101
+ st.subheader("Step 2: Data Scaling")
102
+
103
+ # Code for Data Scaling
104
+ code_2 = """
105
+ # Data Scaling
106
+ scaler = StandardScaler()
107
+ x_train_std = scaler.fit_transform(x_train)
108
+ x_test_std = scaler.transform(x_test)
109
+ """
110
+ st.code(code_2, language='python')
111
+
112
+ # Output for Data Scaling
113
+ scaler = StandardScaler()
114
+ x_train_std = scaler.fit_transform(x_train)
115
+ x_test_std = scaler.transform(x_test)
116
+
117
+ st.write(f"Scaled x_train_std shape: {x_train_std.shape}")
118
+ st.write(f"Scaled x_test_std shape: {x_test_std.shape}")
119
+
120
+ # Code and Output 3: Optuna Optimization
121
+ st.subheader("Step 3: Hyperparameter Optimization with Optuna")
122
+
123
+ # Code for Optuna
124
+ code_3 = """
125
+ # Optuna Objective Function
126
+ def objective(trial):
127
+ algo = trial.suggest_categorical("choice", ["KNN", "Logistic"])
128
+ if algo == "KNN":
129
+ n = trial.suggest_int("n_neighbors", 1, 50)
130
+ p = trial.suggest_int("distance", 1, 2)
131
+ model = KNeighborsClassifier(n_neighbors=n, p=p)
132
+ else:
133
+ solver, penalty = trial.suggest_categorical("choices", [("lbfgs", "l2"), ("newton-cg", "l2")])
134
+ C = trial.suggest_uniform("lambda", 0.01, 1000)
135
+ model = LogisticRegression(C=C, solver=solver, penalty=penalty)
136
+ return cross_val_score(model, x_train_std, y_train, cv=5, scoring="neg_log_loss").mean()
137
+
138
+ study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
139
+ study.optimize(objective, n_trials=200)
140
+ """
141
+ st.code(code_3, language='python')
142
+
143
+ # Output for Optuna Optimization
144
+ study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
145
+ study.optimize(objective, n_trials=200)
146
+
147
+ st.write("Best Parameters found by Optuna:", study.best_params)
148
+ st.write("All Trials Dataframe:")
149
+ st.write(study.trials_dataframe())
150
+
151
+ # Code and Output 4: Model Training with Best Parameters
152
+ st.subheader("Step 4: Model Training with Best Parameters")
153
+
154
+ # Code for Model Training
155
+ code_4 = """
156
+ # Model Training with Best Parameters
157
+ solver = study.best_params.get('solver', 'lbfgs')
158
+ penalty = study.best_params.get('penalty', 'l2')
159
+ C = study.best_params.get('lambda', 1.0)
160
+ model = LogisticRegression(C=C, solver=solver, penalty=penalty, max_iter=500)
161
+ model.fit(x_train_std, y_train)
162
+ """
163
+ st.code(code_4, language='python')
164
+
165
+ # Output for Model Training
166
+ solver = study.best_params.get('solver', 'lbfgs')
167
+ penalty = study.best_params.get('penalty', 'l2')
168
+ C = study.best_params.get('lambda', 1.0)
169
+ model = LogisticRegression(C=C, solver=solver, penalty=penalty, max_iter=500)
170
+ model.fit(x_train_std, y_train)
171
+
172
+ st.write("Model has been trained successfully!")
173
+
174
+ # Code and Output 5: Model Evaluation
175
+ st.subheader("Step 5: Model Evaluation")
176
+
177
+ # Code for Model Evaluation
178
+ code_5 = """
179
+ # Model Evaluation
180
+ y_pred_probs = model.predict_proba(x_test_std)
181
+ loss = log_loss(y_test, y_pred_probs)
182
+ y_pred = np.argmax(y_pred_probs, axis=1)
183
+ cm = confusion_matrix(y_test, y_pred)
184
+ """
185
+ st.code(code_5, language='python')
186
+
187
+ # Output for Model Evaluation
188
+ y_pred_probs = model.predict_proba(x_test_std)
189
+ loss = log_loss(y_test, y_pred_probs)
190
+ y_pred = np.argmax(y_pred_probs, axis=1)
191
+ cm = confusion_matrix(y_test, y_pred)
192
+
193
+ st.write(f"Log-Loss Score: {loss}")
194
+ st.write("Confusion Matrix:")
195
+ st.write(cm)
196
+
197
+ import streamlit as st
198
+
199
+ # Custom CSS for buttons
200
+ st.markdown(
201
+ """
202
+ <style>
203
+ .custom-button {
204
+ display: inline-block;
205
+ padding: 5px 10px;
206
+ font-size: 14px;
207
+ color: #ffffff;
208
+ background-color: #4CAF50;
209
+ border: none;
210
+ border-radius: 5px;
211
+ text-align: center;
212
+ text-decoration: none;
213
+ transition: background-color 0.3s ease, transform 0.2s ease;
214
+ cursor: pointer;
215
+ }
216
+ .custom-button:hover {
217
+ background-color: #45a049;
218
+ transform: scale(1.05);
219
+ }
220
+ .button-container {
221
+ display: flex;
222
+ justify-content: space-between;
223
+ margin-top: 20px;
224
+ }
225
+ </style>
226
+ """,
227
+ unsafe_allow_html=True,
228
+ )
229
 
230
+ # Navigation Buttons
231
+ st.markdown(
232
+ """
233
+ <div class="button-container">
234
+ <a href="pages/3_EDA_and_Feature_Engineering" target="_self" class="custom-button">Previous ⏮️</a>
235
+ <a href="pages/5_Conclusion" target="_self" class="custom-button">Next ⏭️</a>
236
+ </div>
237
+ """,
238
+ unsafe_allow_html=True,
239
+ )
240
 
 
 
 
241