trohith89 commited on
Commit
7b29dae
·
verified ·
1 Parent(s): 74238f0

Update pages/4_Model_Creation_and_Evaluation.py

Browse files
pages/4_Model_Creation_and_Evaluation.py CHANGED
@@ -104,15 +104,33 @@ if df is not None:
104
 
105
  # Define the objective function for Optuna
106
  st.code("""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  def objective(trial):
108
- # Choose algorithm
109
- algo = trial.suggest_categorical("algo", ["lor", "svc"])
110
 
 
 
111
  if algo == "svc":
112
- # SVC hyperparameters
113
  c = trial.suggest_float("C", 0.001, 1000, log=True)
114
  kernel = trial.suggest_categorical("kernel", ['linear', 'poly', 'rbf', 'sigmoid'])
115
-
116
  if kernel == 'poly':
117
  degree = trial.suggest_int("degree", 1, 3)
118
  model = SVC(C=c, kernel=kernel, degree=degree, random_state=42)
@@ -122,7 +140,7 @@ if df is not None:
122
  else:
123
  model = SVC(C=c, kernel=kernel, random_state=42)
124
  else:
125
- # Logistic Regression hyperparameters
126
  solver, penalty = trial.suggest_categorical(
127
  "choices", [
128
  ("lbfgs", "l2"), ("newton-cg", "l2"),
@@ -132,22 +150,62 @@ if df is not None:
132
  )
133
  reg_strength = trial.suggest_float("C", 0.001, 1000, log=True)
134
  l1_ratio = trial.suggest_float("l1_ratio", 0, 1) if penalty == "elasticnet" else None
135
-
136
  if penalty == "elasticnet":
137
- model = LogisticRegression(
138
- solver=solver, penalty=penalty, C=reg_strength,
139
- l1_ratio=l1_ratio, random_state=42
140
- )
141
  else:
142
- model = LogisticRegression(
143
- solver=solver, penalty=penalty, C=reg_strength, random_state=42
144
- )
145
-
146
- # Perform cross-validation and return the mean score
147
- score = cross_val_score(model, x_train_std, y_train, cv=5, scoring="accuracy").mean()
148
- return score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  """, language="python")
150
 
 
 
 
 
 
 
 
 
 
151
  # Create and optimize the study
152
  st.code("""
153
  study = optuna.create_study(direction="maximize")
@@ -158,7 +216,7 @@ if df is not None:
158
 
159
  # Create the best model
160
  st.markdown("## Create the Best Model")
161
- model = SVC(kernel='rbf', gamma='scale', C=53.123097332514455)
162
  st.write(model)
163
 
164
  # Train the model
@@ -168,9 +226,57 @@ if df is not None:
168
  # Model Evaluation
169
  st.markdown("# Model Evaluation")
170
  y_pred = model.predict(x_test_std)
171
- st.write("Accuracy:", accuracy_score(y_test, y_pred))
172
- st.write("Classification Report:\n", classification_report(y_test, y_pred))
173
- st.write("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
  else:
176
  st.warning("No Dataset Found")
 
104
 
105
  # Define the objective function for Optuna
106
  st.code("""
107
+ import numpy as np
108
+ import optuna
109
+ from sklearn.svm import SVC
110
+ from sklearn.linear_model import LogisticRegression
111
+ from sklearn.model_selection import cross_validate
112
+ from sklearn.preprocessing import StandardScaler
113
+
114
+ # Check for NaN or infinite values in the data
115
+ assert not np.any(np.isnan(x_train_std)), "Input data contains NaN values"
116
+ assert not np.any(np.isnan(y_train)), "Target data contains NaN values"
117
+ assert not np.any(np.isinf(x_train_std)), "Input data contains infinite values"
118
+
119
+ # Global lists to store training and validation scores for each trial
120
+ training_scores = []
121
+ validation_scores = []
122
+
123
  def objective(trial):
124
+ # Log trial parameters for debugging
125
+ print(f"Trial params: {trial.params}")
126
 
127
+ algo = trial.suggest_categorical("algo", ["lor", "svc"])
128
+
129
  if algo == "svc":
130
+ # Hyperparameters for SVC
131
  c = trial.suggest_float("C", 0.001, 1000, log=True)
132
  kernel = trial.suggest_categorical("kernel", ['linear', 'poly', 'rbf', 'sigmoid'])
133
+
134
  if kernel == 'poly':
135
  degree = trial.suggest_int("degree", 1, 3)
136
  model = SVC(C=c, kernel=kernel, degree=degree, random_state=42)
 
140
  else:
141
  model = SVC(C=c, kernel=kernel, random_state=42)
142
  else:
143
+ # Hyperparameters for Logistic Regression
144
  solver, penalty = trial.suggest_categorical(
145
  "choices", [
146
  ("lbfgs", "l2"), ("newton-cg", "l2"),
 
150
  )
151
  reg_strength = trial.suggest_float("C", 0.001, 1000, log=True)
152
  l1_ratio = trial.suggest_float("l1_ratio", 0, 1) if penalty == "elasticnet" else None
153
+
154
  if penalty == "elasticnet":
155
+ model = LogisticRegression(solver=solver, penalty=penalty, C=reg_strength, l1_ratio=l1_ratio, random_state=42)
 
 
 
156
  else:
157
+ model = LogisticRegression(solver=solver, penalty=penalty, C=reg_strength, random_state=42)
158
+
159
+ # Cross-validation scoring with training and validation
160
+ try:
161
+ scores = cross_validate(
162
+ model, x_train_std, y_train, cv=5,
163
+ scoring="accuracy", return_train_score=True
164
+ )
165
+ train_score = scores["train_score"].mean()
166
+ val_score = scores["test_score"].mean()
167
+
168
+ # Append scores to global lists
169
+ training_scores.append(train_score)
170
+ validation_scores.append(val_score)
171
+ except ValueError as e:
172
+ print(f"Error during cross-validation: {e}")
173
+ train_score, val_score = float("-inf"), float("-inf")
174
+
175
+ return val_score
176
+
177
+ # Running the optimization
178
+ study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
179
+ study.optimize(objective, n_trials=100)
180
+
181
+ # Plotting training vs. validation scores
182
+ import matplotlib.pyplot as plt
183
+
184
+ plt.figure(figsize=(10, 6))
185
+ plt.plot(training_scores, label="Training Score", marker="o")
186
+ plt.plot(validation_scores, label="Validation Score", marker="x")
187
+ plt.xlabel("Trial")
188
+ plt.ylabel("Accuracy")
189
+ plt.title("Training vs. Validation Scores Across Trials")
190
+ plt.legend()
191
+ plt.grid()
192
+ plt.show()
193
+
194
+ # Display best trial
195
+ print("Best Parameters:")
196
+ print(study.best_params)
197
+
198
  """, language="python")
199
 
200
+ st.markdown(
201
+ """
202
+ <div style="text-align: center;">
203
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/67441c51a784a9d15cb12871/FqUoV8hSyCWU3WocaqqGc.png" width="70%" />
204
+ </div>
205
+ """,
206
+ unsafe_allow_html=True
207
+ )
208
+
209
  # Create and optimize the study
210
  st.code("""
211
  study = optuna.create_study(direction="maximize")
 
216
 
217
  # Create the best model
218
  st.markdown("## Create the Best Model")
219
+ model = SVC(kernel='poly', gamma = 'scale', C = 974.1963187644974, degree = 2)
220
  st.write(model)
221
 
222
  # Train the model
 
226
  # Model Evaluation
227
  st.markdown("# Model Evaluation")
228
  y_pred = model.predict(x_test_std)
229
+
230
+ # Evaluation metrics
231
+ print("Accuracy:", accuracy_score(y_test, y_pred))
232
+ print("Classification Report:\n", classification_report(y_test, y_pred))
233
+ print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
234
+
235
+ import streamlit as st
236
+ import pandas as pd
237
+ import seaborn as sns
238
+ import matplotlib.pyplot as plt
239
+ from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
240
+
241
+ # Example: Replace this with your actual test data and predictions
242
+ y_pred = model.predict(x_test_std)
243
+
244
+ # Calculate evaluation metrics
245
+ conf_matrix = confusion_matrix(y_test, y_pred)
246
+ class_report = classification_report(y_test, y_pred, output_dict=True) # Output as a dictionary
247
+
248
+ # Convert the classification report to a DataFrame
249
+ class_report_df = pd.DataFrame(class_report).iloc[:-1, :-1] # Exclude support and accuracy rows
250
+
251
+ # Streamlit app
252
+ st.title("Model Evaluation: Confusion Matrix and Classification Report")
253
+
254
+ # Plotting with Matplotlib and Seaborn
255
+ fig, axs = plt.subplots(1, 2, figsize=(16, 6))
256
+
257
+ # Confusion Matrix Heatmap
258
+ sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False, ax=axs[0], annot_kws={"size": 14})
259
+ axs[0].set_title("Confusion Matrix", fontsize=16)
260
+ axs[0].set_xlabel("Predicted Labels", fontsize=14)
261
+ axs[0].set_ylabel("True Labels", fontsize=14)
262
+
263
+ # Classification Report Heatmap
264
+ sns.heatmap(class_report_df, annot=True, fmt=".2f", cmap="YlGnBu", cbar=False, ax=axs[1], annot_kws={"size": 12})
265
+ axs[1].set_title("Classification Report", fontsize=16)
266
+ axs[1].set_xlabel("Metrics", fontsize=14)
267
+ axs[1].set_ylabel("Classes", fontsize=14)
268
+
269
+ # Adjust layout
270
+ plt.tight_layout()
271
+
272
+ # Display the plots in Streamlit
273
+ st.pyplot(fig)
274
+
275
+ # Display additional metrics (optional)
276
+ accuracy = accuracy_score(y_test, y_pred)
277
+ st.write(f"**Accuracy:** {accuracy:.2f}")
278
+
279
+
280
 
281
  else:
282
  st.warning("No Dataset Found")