pavanmutha commited on
Commit
e7399a2
·
verified ·
1 Parent(s): abe6fa2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py CHANGED
@@ -17,6 +17,7 @@ from huggingface_hub import login
17
  from sklearn.ensemble import RandomForestClassifier
18
  from sklearn.model_selection import train_test_split, cross_val_score
19
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 
20
  from sklearn.preprocessing import LabelEncoder
21
  from PIL import Image
22
 
@@ -151,6 +152,35 @@ def analyze_data(csv_file, additional_notes=""):
151
  run.finish()
152
  return format_analysis_report(analysis_result, visuals)
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
 
156
 
@@ -171,6 +201,61 @@ def train_model(_):
171
 
172
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  def objective(trial):
175
  params = {
176
  "n_estimators": trial.suggest_int("n_estimators", 50, 200),
 
17
  from sklearn.ensemble import RandomForestClassifier
18
  from sklearn.model_selection import train_test_split, cross_val_score
19
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
20
+ from sklearn.metrics import ConfusionMatrixDisplay
21
  from sklearn.preprocessing import LabelEncoder
22
  from PIL import Image
23
 
 
152
  run.finish()
153
  return format_analysis_report(analysis_result, visuals)
154
 
155
+ def compare_models():
156
+ if df_global is None:
157
+ return "Please upload and preprocess a dataset first."
158
+
159
+ target = df_global.columns[-1]
160
+ X = df_global.drop(target, axis=1)
161
+ y = df_global[target]
162
+
163
+ if y.dtype == 'object':
164
+ y = LabelEncoder().fit_transform(y)
165
+
166
+ models = {
167
+ "RandomForest": RandomForestClassifier(),
168
+ "LogisticRegression": LogisticRegression(max_iter=1000),
169
+ "SVC": SVC()
170
+ }
171
+
172
+ results = []
173
+ for name, model in models.items():
174
+ scores = cross_val_score(model, X, y, cv=5)
175
+ results.append({
176
+ "Model": name,
177
+ "CV Mean Accuracy": np.mean(scores),
178
+ "CV Std Dev": np.std(scores)
179
+ })
180
+ wandb.log({f"{name}_cv_mean": np.mean(scores), f"{name}_cv_std": np.std(scores)})
181
+
182
+ results_df = pd.DataFrame(results)
183
+ return results_df
184
 
185
 
186
 
 
201
 
202
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
203
 
204
+ # Error analysis
205
+ error_df = X_test.copy()
206
+ error_df["actual"] = y_test
207
+ error_df["predicted"] = y_pred
208
+ error_df["error"] = error_df["actual"] != error_df["predicted"]
209
+ common_errors = error_df[error_df["error"]].groupby(["actual", "predicted"]).size().reset_index(name='count')
210
+
211
+ def generate_report(metrics_df, trials_df, common_errors_df):
212
+ report = f"""
213
+ # Model Training Report
214
+
215
+ ## Metrics
216
+ {metrics_df.to_markdown(index=False)}
217
+
218
+ ## Top Trials
219
+ {trials_df.to_markdown(index=False)}
220
+
221
+ ## Common Errors
222
+ {common_errors_df.to_markdown(index=False)}
223
+
224
+ _Generated on {time.strftime('%Y-%m-%d %H:%M:%S')}_
225
+ """
226
+ with open("model_report.md", "w") as f:
227
+ f.write(report)
228
+ return "Report saved to model_report.md"
229
+
230
+
231
+
232
+
233
+ fig, ax = plt.subplots(figsize=(6, 4))
234
+ ConfusionMatrixDisplay.from_estimator(best_model, X_test, y_test, ax=ax)
235
+ plt.savefig("confusion_matrix.png")
236
+ wandb.log({"confusion_matrix": wandb.Image("confusion_matrix.png")})
237
+
238
+
239
+
240
+ # Inside your layout:
241
+ compare_button = gr.Button("Compare Models")
242
+ compare_output = gr.Dataframe()
243
+
244
+ compare_button.click(fn=compare_models, outputs=compare_output)
245
+
246
+ report_button = gr.Button("Generate Report")
247
+ report_status = gr.Textbox()
248
+
249
+ report_button.click(
250
+ fn=lambda: generate_report(metrics_df, trials_df, common_errors),
251
+ outputs=report_status
252
+ )
253
+
254
+
255
+ # Log common misclassifications to wandb
256
+ wandb.log({"common_errors": wandb.Table(dataframe=common_errors)})
257
+
258
+
259
  def objective(trial):
260
  params = {
261
  "n_estimators": trial.suggest_int("n_estimators", 50, 200),