pavanmutha commited on
Commit
25b354e
·
verified ·
1 Parent(s): 99f4caf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -220
app.py CHANGED
@@ -12,12 +12,10 @@ import time
12
  import psutil
13
  import shutil
14
  import ast
15
- import seaborn as sns
16
- from sklearn.svm import SVC
17
  from smolagents import HfApiModel, CodeAgent
18
  from huggingface_hub import login
19
  from sklearn.model_selection import train_test_split, cross_val_score
20
- from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report
21
  from sklearn.metrics import ConfusionMatrixDisplay
22
  from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
23
  from sklearn.linear_model import LogisticRegression
@@ -254,196 +252,51 @@ def train_model(_):
254
  print(f"Training Error: {e}")
255
  return {}, pd.DataFrame()
256
 
257
- # Added a/b functions to existing code
258
- def create_model_comparison_plots(results_df):
259
- """Create visualizations for model comparison results"""
260
- os.makedirs('./comparison_plots', exist_ok=True)
261
- plot_paths = []
262
-
263
- # Model performance comparison
264
- plt.figure(figsize=(12, 6))
265
- sns.barplot(data=results_df, x='Model', y='Test Accuracy')
266
- plt.title('Model Accuracy Comparison')
267
- plt.xticks(rotation=45)
268
- accuracy_path = './comparison_plots/accuracy_comparison.png'
269
- plt.savefig(accuracy_path, bbox_inches='tight')
270
- plot_paths.append(accuracy_path)
271
- plt.close()
272
-
273
- # Metric radar chart
274
- metrics = ['Test Accuracy', 'Precision', 'Recall', 'F1 Score']
275
- if not results_df['ROC AUC'].isna().all():
276
- metrics.append('ROC AUC')
277
-
278
- plt.figure(figsize=(10, 10))
279
- ax = plt.subplot(111, polar=True)
280
-
281
- angles = np.linspace(0, 2*np.pi, len(metrics), endpoint=False)
282
- angles = np.concatenate((angles, [angles[0]]))
283
-
284
- for idx, row in results_df.iterrows():
285
- values = row[metrics].values.flatten().tolist()
286
- values += values[:1]
287
- ax.plot(angles, values, 'o-', label=row['Model'])
288
-
289
- ax.set_thetagrids(angles[:-1] * 180/np.pi, metrics)
290
- ax.set_title('Model Performance Radar Chart')
291
- ax.legend(bbox_to_anchor=(1.1, 1.1))
292
- radar_path = './comparison_plots/radar_chart.png'
293
- plt.savefig(radar_path, bbox_inches='tight')
294
- plot_paths.append(radar_path)
295
- plt.close()
296
-
297
- return plot_paths
298
-
299
- def compare_models_enhanced():
300
- """Enhanced model comparison with more metrics and visualizations"""
301
  if df_global is None:
302
- return "Please upload and preprocess a dataset first.", [], []
303
-
304
  target = df_global.columns[-1]
305
- X = df_global.drop(target, axis=1)
306
  y = df_global[target]
307
 
308
  if y.dtype == 'object':
309
  y = LabelEncoder().fit_transform(y)
310
-
311
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
312
-
313
- # Define models to compare
314
  models = {
315
- "RandomForest": RandomForestClassifier(),
316
- "LogisticRegression": LogisticRegression(max_iter=1000),
317
- "GradientBoosting": GradientBoostingClassifier(),
318
- "SVC": SVC(probability=True)
319
  }
320
-
321
- results = []
322
-
323
- for name, model in models.items():
324
- start_time = time.time()
325
-
326
- # Cross validation
327
- cv_scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
328
-
329
- # Full training and test evaluation
330
- model.fit(X_train, y_train)
331
- y_pred = model.predict(X_test)
332
- y_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else None
333
-
334
- # Calculate metrics
335
- metrics = {
336
- 'Model': name,
337
- 'CV Mean Accuracy': np.mean(cv_scores),
338
- 'CV Std Dev': np.std(cv_scores),
339
- 'Test Accuracy': accuracy_score(y_test, y_pred),
340
- 'Precision': precision_score(y_test, y_pred, average='weighted'),
341
- 'Recall': recall_score(y_test, y_pred, average='weighted'),
342
- 'F1 Score': f1_score(y_test, y_pred, average='weighted'),
343
- 'ROC AUC': roc_auc_score(y_test, y_proba) if y_proba is not None and len(np.unique(y_test)) == 2 else np.nan,
344
- 'Training Time (s)': time.time() - start_time
345
- }
346
-
347
- results.append(metrics)
348
-
349
- # Log to wandb
350
- if wandb.run:
351
- wandb.log({f"{name}_{k}": v for k, v in metrics.items() if k != 'Model'})
352
-
353
- # Create visualizations
354
- results_df = pd.DataFrame(results)
355
- plot_paths = create_model_comparison_plots(results_df)
356
-
357
- return results_df, plot_paths
358
 
359
- def perform_ab_test(model_a_name, model_b_name):
360
- """Perform A/B test between two specific models"""
361
- if df_global is None:
362
- return {"error": "Please upload and preprocess a dataset first."}, []
363
-
364
- target = df_global.columns[-1]
365
- X = df_global.drop(target, axis=1)
366
- y = df_global[target]
367
 
368
- if y.dtype == 'object':
369
- y = LabelEncoder().fit_transform(y)
370
-
371
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
372
-
373
- # Define all available models
374
- model_library = {
375
- "RandomForest": RandomForestClassifier(),
376
- "LogisticRegression": LogisticRegression(max_iter=1000),
377
- "GradientBoosting": GradientBoostingClassifier(),
378
- "SVC": SVC(probability=True)
379
- }
380
-
381
- # Get the selected models
382
- model_a = model_library.get(model_a_name)
383
- model_b = model_library.get(model_b_name)
384
-
385
- if not model_a or not model_b:
386
- return {"error": "Invalid model selection"}, []
387
-
388
- # Train both models
389
- model_a.fit(X_train, y_train)
390
- model_b.fit(X_train, y_train)
391
-
392
- # Get predictions
393
- y_pred_a = model_a.predict(X_test)
394
- y_pred_b = model_b.predict(X_test)
395
-
396
- # Calculate metrics
397
- metrics_a = {
398
- 'accuracy': accuracy_score(y_test, y_pred_a),
399
- 'precision': precision_score(y_test, y_pred_a, average='weighted'),
400
- 'recall': recall_score(y_test, y_pred_a, average='weighted'),
401
- 'f1': f1_score(y_test, y_pred_a, average='weighted')
402
- }
403
-
404
- metrics_b = {
405
- 'accuracy': accuracy_score(y_test, y_pred_b),
406
- 'precision': precision_score(y_test, y_pred_b, average='weighted'),
407
- 'recall': recall_score(y_test, y_pred_b, average='weighted'),
408
- 'f1': f1_score(y_test, y_pred_b, average='weighted')
409
- }
410
-
411
- # Calculate relative improvements
412
- improvements = {
413
- 'accuracy_improvement': metrics_b['accuracy'] - metrics_a['accuracy'],
414
- 'f1_improvement': metrics_b['f1'] - metrics_a['f1'],
415
- 'relative_improvement': (metrics_b['accuracy'] - metrics_a['accuracy']) / metrics_a['accuracy'] if metrics_a['accuracy'] != 0 else 0
416
- }
417
-
418
- # Create comparison DataFrame
419
- comparison_df = pd.DataFrame({
420
- 'Metric': list(metrics_a.keys()),
421
- model_a_name: list(metrics_a.values()),
422
- model_b_name: list(metrics_b.values())
423
- })
424
-
425
- # Log to wandb
426
- if wandb.run:
427
- wandb.log({
428
- f"A_B_Test/{model_a_name}_metrics": metrics_a,
429
- f"A_B_Test/{model_b_name}_metrics": metrics_b,
430
- f"A_B_Test/Improvements": improvements
431
  })
432
-
433
- # Create visualization
434
- plt.figure(figsize=(10, 6))
435
- comparison_df.set_index('Metric').plot(kind='bar', rot=0)
436
- plt.title(f'A/B Test: {model_a_name} vs {model_b_name}')
437
- plt.ylabel('Score')
438
- plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
439
- plot_path = './comparison_plots/ab_test_comparison.png'
440
- plt.savefig(plot_path, bbox_inches='tight')
441
- plt.close()
442
-
443
- return {
444
- 'metrics_comparison': comparison_df.to_dict(),
445
- 'improvements': improvements
446
- }, [plot_path]
447
 
448
  def explainability(_):
449
  import warnings
@@ -543,49 +396,16 @@ with gr.Blocks() as demo:
543
  explain_btn = gr.Button("SHAP + LIME Explainability")
544
  shap_img = gr.Image(label="SHAP Summary Plot")
545
  lime_img = gr.Image(label="LIME Explanation")
546
-
547
- # Add new A/B testing components
548
- with gr.Row():
549
- with gr.Column():
550
- gr.Markdown("### 🆚 Model A/B Testing")
551
- with gr.Row():
552
- model_a_select = gr.Dropdown(
553
- choices=["RandomForest", "LogisticRegression", "GradientBoosting", "SVC"],
554
- label="Select Model A",
555
- value="RandomForest"
556
- )
557
- model_b_select = gr.Dropdown(
558
- choices=["RandomForest", "LogisticRegression", "GradientBoosting", "SVC"],
559
- label="Select Model B",
560
- value="LogisticRegression"
561
- )
562
- ab_test_btn = gr.Button("Run A/B Test")
563
-
564
- with gr.Column():
565
- ab_test_results = gr.JSON(label="A/B Test Results")
566
- ab_test_plots = gr.Gallery(label="A/B Test Visualizations")
567
 
568
- # Add model comparison components
569
- with gr.Row():
570
- compare_btn = gr.Button("Compare All Models")
571
- comparison_results = gr.DataFrame(label="Model Comparison Results")
572
- comparison_plots = gr.Gallery(label="Comparison Visualizations")
573
 
574
  agent_btn.click(fn=analyze_data, inputs=[file_input], outputs=[insights_output, visual_output])
575
  train_btn.click(fn=train_model, inputs=[file_input], outputs=[metrics_output, trials_output])
576
  explain_btn.click(fn=explainability, inputs=[], outputs=[shap_img, lime_img])
577
 
578
- # New handlers for A/B testing and comparison
579
- ab_test_btn.click(
580
- fn=perform_ab_test,
581
- inputs=[model_a_select, model_b_select],
582
- outputs=[ab_test_results, ab_test_plots]
583
- )
584
-
585
- compare_btn.click(
586
- fn=compare_models_enhanced,
587
- inputs=[],
588
- outputs=[comparison_results, comparison_plots]
589
- )
590
-
591
  demo.launch(debug=True)
 
12
  import psutil
13
  import shutil
14
  import ast
 
 
15
  from smolagents import HfApiModel, CodeAgent
16
  from huggingface_hub import login
17
  from sklearn.model_selection import train_test_split, cross_val_score
18
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
19
  from sklearn.metrics import ConfusionMatrixDisplay
20
  from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
21
  from sklearn.linear_model import LogisticRegression
 
252
  print(f"Training Error: {e}")
253
  return {}, pd.DataFrame()
254
 
255
+ def ab_test_models():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  if df_global is None:
257
+ return "Please upload and preprocess a dataset first.", pd.DataFrame()
258
+
259
  target = df_global.columns[-1]
260
+ X = df_global.drop(columns=[target])
261
  y = df_global[target]
262
 
263
  if y.dtype == 'object':
264
  y = LabelEncoder().fit_transform(y)
265
+
266
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
267
+
 
268
  models = {
269
+ "Random Forest": RandomForestClassifier(n_estimators=100),
270
+ "Logistic Regression": LogisticRegression(max_iter=1000),
271
+ "Gradient Boosting": GradientBoostingClassifier()
 
272
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
+ results = []
 
 
 
 
 
 
 
275
 
276
+ for name, clf in models.items():
277
+ clf.fit(X_train, y_train)
278
+ y_pred = clf.predict(X_test)
279
+ results.append({
280
+ "Model": name,
281
+ "Accuracy": accuracy_score(y_test, y_pred),
282
+ "Precision": precision_score(y_test, y_pred, average="weighted", zero_division=0),
283
+ "Recall": recall_score(y_test, y_pred, average="weighted", zero_division=0),
284
+ "F1 Score": f1_score(y_test, y_pred, average="weighted", zero_division=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  })
286
+ wandb.log({f"{name}_metrics": results[-1]})
287
+
288
+ result_df = pd.DataFrame(results)
289
+ best_model = result_df.sort_values("F1 Score", ascending=False).iloc[0]
290
+
291
+ summary = f"""
292
+ 🔍 <b>Best Model:</b> {best_model['Model']}<br>
293
+ <b>F1 Score:</b> {best_model['F1 Score']:.4f}<br>
294
+ 📊 <b>Accuracy:</b> {best_model['Accuracy']:.4f}<br>
295
+ 🧠 <b>Precision:</b> {best_model['Precision']:.4f}<br>
296
+ 🔁 <b>Recall:</b> {best_model['Recall']:.4f}
297
+ """
298
+
299
+ return summary, result_df
 
300
 
301
  def explainability(_):
302
  import warnings
 
396
  explain_btn = gr.Button("SHAP + LIME Explainability")
397
  shap_img = gr.Image(label="SHAP Summary Plot")
398
  lime_img = gr.Image(label="LIME Explanation")
399
+
400
+ with gr.Blocks():
401
+ ab_test_button = gr.Button("Run A/B Testing")
402
+ ab_summary = gr.HTML()
403
+ ab_results = gr.Dataframe()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
 
405
+ ab_test_button.click(fn=ab_test_models, inputs=[], outputs=[ab_summary, ab_results])
 
 
 
 
406
 
407
  agent_btn.click(fn=analyze_data, inputs=[file_input], outputs=[insights_output, visual_output])
408
  train_btn.click(fn=train_model, inputs=[file_input], outputs=[metrics_output, trials_output])
409
  explain_btn.click(fn=explainability, inputs=[], outputs=[shap_img, lime_img])
410
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411
  demo.launch(debug=True)