pavanmutha commited on
Commit
73ac46a
·
verified ·
1 Parent(s): b2cb237

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +428 -95
app.py CHANGED
@@ -12,16 +12,21 @@ import time
12
  import psutil
13
  import shutil
14
  import ast
 
 
15
  from smolagents import HfApiModel, CodeAgent
16
  from huggingface_hub import login
17
  from sklearn.model_selection import train_test_split, cross_val_score
18
- from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
 
19
  from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
20
  from sklearn.linear_model import LogisticRegression
21
  from sklearn.preprocessing import LabelEncoder
22
  from datetime import datetime
23
  from PIL import Image
24
- from sklearn.svm import SVC
 
 
25
 
26
  # Authenticate with Hugging Face
27
  hf_token = os.getenv("HF_TOKEN")
@@ -43,12 +48,12 @@ def clean_data(df):
43
  def upload_file(file):
44
  global df_global
45
  if file is None:
46
- return pd.DataFrame({"Error": ["No file uploaded."]}), None
47
  ext = os.path.splitext(file.name)[-1]
48
  df = pd.read_csv(file.name) if ext == ".csv" else pd.read_excel(file.name)
49
  df = clean_data(df)
50
  df_global = df
51
- return df.head(), df
52
 
53
  def format_analysis_report(raw_output, visuals):
54
  try:
@@ -154,117 +159,445 @@ def analyze_data(csv_file, additional_notes=""):
154
  run.finish()
155
  return format_analysis_report(analysis_result, visuals)
156
 
157
- def compare_models(selected_models, df):
158
- if df is None or len(selected_models) == 0:
159
- return pd.DataFrame(), []
160
- target = df.columns[-1]
161
- X = df.drop(target, axis=1)
162
- y = df[target]
 
 
163
  if y.dtype == 'object':
164
  y = LabelEncoder().fit_transform(y)
165
- model_dict = {
 
166
  "RandomForest": RandomForestClassifier(),
167
  "LogisticRegression": LogisticRegression(max_iter=1000),
168
- "SVC": SVC(probability=True)
169
  }
 
170
  results = []
171
- confusion_imgs = []
172
- for name in selected_models:
173
- model = model_dict[name]
174
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  model.fit(X_train, y_train)
176
  y_pred = model.predict(X_test)
177
- y_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") and len(np.unique(y)) == 2 else None
 
178
  metrics = {
179
- "Model": name,
180
- "Accuracy": accuracy_score(y_test, y_pred),
181
- "Precision": precision_score(y_test, y_pred, average="weighted", zero_division=0),
182
- "Recall": recall_score(y_test, y_pred, average="weighted", zero_division=0),
183
- "F1": f1_score(y_test, y_pred, average="weighted", zero_division=0),
184
- "ROC-AUC": roc_auc_score(y_test, y_proba) if y_proba is not None else "N/A"
185
  }
186
- results.append(metrics)
187
- # Confusion matrix plot
188
- fig, ax = plt.subplots()
189
- ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
190
- img_path = f"conf_matrix_{name}.png"
191
- plt.savefig(img_path)
192
- confusion_imgs.append(img_path)
193
- plt.close(fig)
194
- results_df = pd.DataFrame(results)
195
- return results_df, confusion_imgs
196
-
197
- def ab_test_models(model_a, model_b, df):
198
- if df is None or model_a == model_b:
199
- return pd.DataFrame()
200
- target = df.columns[-1]
201
- X = df.drop(target, axis=1)
202
- y = df[target]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  if y.dtype == 'object':
204
  y = LabelEncoder().fit_transform(y)
 
205
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
206
- mid = len(X_test) // 2
207
- X_a, X_b = X_test[:mid], X_test[mid:]
208
- y_a, y_b = y_test[:mid], y_test[mid:]
209
- model_dict = {
210
  "RandomForest": RandomForestClassifier(),
211
  "LogisticRegression": LogisticRegression(max_iter=1000),
 
 
212
  "SVC": SVC(probability=True)
213
  }
 
214
  results = []
215
- for name, X_grp, y_grp in zip([model_a, model_b], [X_a, X_b], [y_a, y_b]):
216
- model = model_dict[name]
 
 
 
 
 
 
217
  model.fit(X_train, y_train)
218
- y_pred = model.predict(X_grp)
 
 
 
219
  metrics = {
220
- "Model": name,
221
- "Accuracy": accuracy_score(y_grp, y_pred),
222
- "Precision": precision_score(y_grp, y_pred, average="weighted", zero_division=0),
223
- "Recall": recall_score(y_grp, y_pred, average="weighted", zero_division=0),
224
- "F1": f1_score(y_grp, y_pred, average="weighted", zero_division=0),
 
 
 
 
225
  }
 
226
  results.append(metrics)
227
- return pd.DataFrame(results)
228
-
229
- def get_model_choices():
230
- return ["RandomForest", "LogisticRegression", "SVC"]
231
-
232
- def clear_confusion_imgs():
233
- for name in get_model_choices():
234
- img_path = f"conf_matrix_{name}.png"
235
- if os.path.exists(img_path):
236
- os.remove(img_path)
237
-
238
- def main():
239
- with gr.Blocks() as demo:
240
- gr.Markdown("# 🤖 Model Comparison & A/B Testing (Hugging Face + Gradio)")
241
- with gr.Row():
242
- with gr.Column():
243
- file_input = gr.File(label="Upload CSV or Excel", type="filepath")
244
- df_output = gr.DataFrame(label="Cleaned Data Preview")
245
- state = gr.State()
246
- file_input.change(fn=upload_file, inputs=file_input, outputs=[df_output, state])
247
- with gr.Column():
248
- model_choices = gr.CheckboxGroup(
249
- choices=get_model_choices(),
250
- value=["RandomForest", "LogisticRegression"],
251
- label="Select Models to Compare"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  )
253
- compare_btn = gr.Button("Compare Models")
254
- metrics_output = gr.DataFrame(label="Model Performance Metrics")
255
- confusion_gallery = gr.Gallery(label="Confusion Matrices", columns=3)
256
- compare_btn.click(fn=compare_models, inputs=[model_choices, state], outputs=[metrics_output, confusion_gallery])
257
- gr.Markdown("## A/B Test: Compare Two Models on Test Set")
258
- with gr.Row():
259
- ab_model_a = gr.Dropdown(get_model_choices(), value="RandomForest", label="Model A")
260
- ab_model_b = gr.Dropdown(get_model_choices(), value="LogisticRegression", label="Model B")
261
- ab_btn = gr.Button("Run A/B Test")
262
- ab_output = gr.DataFrame(label="A/B Test Results")
263
- ab_btn.click(fn=ab_test_models, inputs=[ab_model_a, ab_model_b, state], outputs=ab_output)
264
- gr.Markdown("---\nBuilt for Hugging Face Spaces with Gradio. Upload your data, select models, and compare!")
265
- return demo
266
-
267
- if __name__ == "__main__":
268
- clear_confusion_imgs()
269
- demo = main()
270
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  import psutil
13
  import shutil
14
  import ast
15
+ import seaborn as sns
16
+ from sklearn.svm import SVC
17
  from smolagents import HfApiModel, CodeAgent
18
  from huggingface_hub import login
19
  from sklearn.model_selection import train_test_split, cross_val_score
20
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report
21
+ from sklearn.metrics import ConfusionMatrixDisplay
22
  from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
23
  from sklearn.linear_model import LogisticRegression
24
  from sklearn.preprocessing import LabelEncoder
25
  from datetime import datetime
26
  from PIL import Image
27
+ from xgboost import XGBClassifier
28
+
29
+
30
 
31
  # Authenticate with Hugging Face
32
  hf_token = os.getenv("HF_TOKEN")
 
48
  def upload_file(file):
49
  global df_global
50
  if file is None:
51
+ return pd.DataFrame({"Error": ["No file uploaded."]})
52
  ext = os.path.splitext(file.name)[-1]
53
  df = pd.read_csv(file.name) if ext == ".csv" else pd.read_excel(file.name)
54
  df = clean_data(df)
55
  df_global = df
56
+ return df.head()
57
 
58
  def format_analysis_report(raw_output, visuals):
59
  try:
 
159
  run.finish()
160
  return format_analysis_report(analysis_result, visuals)
161
 
162
+ def compare_models():
163
+ if df_global is None:
164
+ return "Please upload and preprocess a dataset first."
165
+
166
+ target = df_global.columns[-1]
167
+ X = df_global.drop(target, axis=1)
168
+ y = df_global[target]
169
+
170
  if y.dtype == 'object':
171
  y = LabelEncoder().fit_transform(y)
172
+
173
+ models = {
174
  "RandomForest": RandomForestClassifier(),
175
  "LogisticRegression": LogisticRegression(max_iter=1000),
176
+ "SVC": SVC()
177
  }
178
+
179
  results = []
180
+ for name, model in models.items():
181
+ scores = cross_val_score(model, X, y, cv=5)
182
+ results.append({
183
+ "Model": name,
184
+ "CV Mean Accuracy": np.mean(scores),
185
+ "CV Std Dev": np.std(scores)
186
+ })
187
+ wandb.log({f"{name}_cv_mean": np.mean(scores), f"{name}_cv_std": np.std(scores)})
188
+
189
+ results_df = pd.DataFrame(results)
190
+ return results_df
191
+
192
+ # 1. prepare_data should come first
193
+ def prepare_data(df, target_column=None):
194
+ from sklearn.model_selection import train_test_split
195
+
196
+ # If no target column is specified, select the first object column or the last column
197
+ if target_column is None:
198
+ target_column = df.select_dtypes(include=['object']).columns[0] if len(df.select_dtypes(include=['object']).columns) > 0 else df.columns[-1]
199
+
200
+ X = df.drop(columns=[target_column])
201
+ y = df[target_column]
202
+
203
+ return train_test_split(X, y, test_size=0.3, random_state=42)
204
+
205
+
206
+ def train_model(_):
207
+ try:
208
+ wandb.login(key=os.environ.get("WANDB_API_KEY"))
209
+ wandb_run = wandb.init(
210
+ project="huggingface-data-analysis",
211
+ name=f"Optuna_Run_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
212
+ reinit=True
213
+ )
214
+
215
+ X_train, X_test, y_train, y_test = prepare_data(df_global)
216
+
217
+ def objective(trial):
218
+ params = {
219
+ "n_estimators": trial.suggest_int("n_estimators", 50, 200),
220
+ "max_depth": trial.suggest_int("max_depth", 3, 10),
221
+ }
222
+ model = RandomForestClassifier()
223
+ score = cross_val_score(model, X_train, y_train, cv=3).mean()
224
+ wandb.log({**params, "cv_score": score})
225
+ return score
226
+
227
+ study = optuna.create_study(direction="maximize")
228
+ study.optimize(objective, n_trials=15)
229
+
230
+ best_params = study.best_params
231
+ model = RandomForestClassifier()
232
  model.fit(X_train, y_train)
233
  y_pred = model.predict(X_test)
234
+
235
+
236
  metrics = {
237
+ "accuracy": accuracy_score(y_test, y_pred),
238
+ "precision": precision_score(y_test, y_pred, average="weighted", zero_division=0),
239
+ "recall": recall_score(y_test, y_pred, average="weighted", zero_division=0),
240
+ "f1_score": f1_score(y_test, y_pred, average="weighted", zero_division=0),
 
 
241
  }
242
+ wandb.log(metrics)
243
+ wandb_run.finish()
244
+
245
+ # Top 7 trials
246
+ top_trials = sorted(study.trials, key=lambda x: x.value, reverse=True)[:7]
247
+ trial_rows = []
248
+ for t in top_trials:
249
+ row = t.params.copy()
250
+ row["score"] = t.value
251
+ trial_rows.append(row)
252
+ trials_df = pd.DataFrame(trial_rows)
253
+
254
+ return metrics, trials_df
255
+
256
+ except Exception as e:
257
+ print(f"Training Error: {e}")
258
+ return {}, pd.DataFrame()
259
+
260
+ # Added a/b functions to existing code
261
+ def create_model_comparison_plots(results_df):
262
+ """Create visualizations for model comparison results"""
263
+ os.makedirs('./comparison_plots', exist_ok=True)
264
+ plot_paths = []
265
+
266
+ # Model performance comparison
267
+ plt.figure(figsize=(12, 6))
268
+ sns.barplot(data=results_df, x='Model', y='Test Accuracy')
269
+ plt.title('Model Accuracy Comparison')
270
+ plt.xticks(rotation=45)
271
+ accuracy_path = './comparison_plots/accuracy_comparison.png'
272
+ plt.savefig(accuracy_path, bbox_inches='tight')
273
+ plot_paths.append(accuracy_path)
274
+ plt.close()
275
+
276
+ # Metric radar chart
277
+ metrics = ['Test Accuracy', 'Precision', 'Recall', 'F1 Score']
278
+ if not results_df['ROC AUC'].isna().all():
279
+ metrics.append('ROC AUC')
280
+
281
+ plt.figure(figsize=(10, 10))
282
+ ax = plt.subplot(111, polar=True)
283
+
284
+ angles = np.linspace(0, 2*np.pi, len(metrics), endpoint=False)
285
+ angles = np.concatenate((angles, [angles[0]]))
286
+
287
+ for idx, row in results_df.iterrows():
288
+ values = row[metrics].values.flatten().tolist()
289
+ values += values[:1]
290
+ ax.plot(angles, values, 'o-', label=row['Model'])
291
+
292
+ ax.set_thetagrids(angles[:-1] * 180/np.pi, metrics)
293
+ ax.set_title('Model Performance Radar Chart')
294
+ ax.legend(bbox_to_anchor=(1.1, 1.1))
295
+ radar_path = './comparison_plots/radar_chart.png'
296
+ plt.savefig(radar_path, bbox_inches='tight')
297
+ plot_paths.append(radar_path)
298
+ plt.close()
299
+
300
+ return plot_paths
301
+
302
+ def compare_models_enhanced():
303
+ """Enhanced model comparison with more metrics and visualizations"""
304
+ if df_global is None:
305
+ return "Please upload and preprocess a dataset first.", [], []
306
+
307
+ target = df_global.columns[-1]
308
+ X = df_global.drop(target, axis=1)
309
+ y = df_global[target]
310
+
311
  if y.dtype == 'object':
312
  y = LabelEncoder().fit_transform(y)
313
+
314
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
315
+
316
+ # Define models to compare
317
+ models = {
 
318
  "RandomForest": RandomForestClassifier(),
319
  "LogisticRegression": LogisticRegression(max_iter=1000),
320
+ "GradientBoosting": GradientBoostingClassifier(),
321
+ "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
322
  "SVC": SVC(probability=True)
323
  }
324
+
325
  results = []
326
+
327
+ for name, model in models.items():
328
+ start_time = time.time()
329
+
330
+ # Cross validation
331
+ cv_scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
332
+
333
+ # Full training and test evaluation
334
  model.fit(X_train, y_train)
335
+ y_pred = model.predict(X_test)
336
+ y_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else None
337
+
338
+ # Calculate metrics
339
  metrics = {
340
+ 'Model': name,
341
+ 'CV Mean Accuracy': np.mean(cv_scores),
342
+ 'CV Std Dev': np.std(cv_scores),
343
+ 'Test Accuracy': accuracy_score(y_test, y_pred),
344
+ 'Precision': precision_score(y_test, y_pred, average='weighted'),
345
+ 'Recall': recall_score(y_test, y_pred, average='weighted'),
346
+ 'F1 Score': f1_score(y_test, y_pred, average='weighted'),
347
+ 'ROC AUC': roc_auc_score(y_test, y_proba) if y_proba is not None and len(np.unique(y_test)) == 2 else np.nan,
348
+ 'Training Time (s)': time.time() - start_time
349
  }
350
+
351
  results.append(metrics)
352
+
353
+ # Log to wandb
354
+ if wandb.run:
355
+ wandb.log({f"{name}_{k}": v for k, v in metrics.items() if k != 'Model'})
356
+
357
+ # Create visualizations
358
+ results_df = pd.DataFrame(results)
359
+ plot_paths = create_model_comparison_plots(results_df)
360
+
361
+ return results_df, plot_paths
362
+
363
+ def perform_ab_test(model_a_name, model_b_name):
364
+ """Perform A/B test between two specific models"""
365
+ if df_global is None:
366
+ return {"error": "Please upload and preprocess a dataset first."}, []
367
+
368
+ target = df_global.columns[-1]
369
+ X = df_global.drop(target, axis=1)
370
+ y = df_global[target]
371
+
372
+ if y.dtype == 'object':
373
+ y = LabelEncoder().fit_transform(y)
374
+
375
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
376
+
377
+ # Define all available models
378
+ model_library = {
379
+ "RandomForest": RandomForestClassifier(),
380
+ "LogisticRegression": LogisticRegression(max_iter=1000),
381
+ "GradientBoosting": GradientBoostingClassifier(),
382
+ "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
383
+ "SVC": SVC(probability=True)
384
+ }
385
+
386
+ # Get the selected models
387
+ model_a = model_library.get(model_a_name)
388
+ model_b = model_library.get(model_b_name)
389
+
390
+ if not model_a or not model_b:
391
+ return {"error": "Invalid model selection"}, []
392
+
393
+ # Train both models
394
+ model_a.fit(X_train, y_train)
395
+ model_b.fit(X_train, y_train)
396
+
397
+ # Get predictions
398
+ y_pred_a = model_a.predict(X_test)
399
+ y_pred_b = model_b.predict(X_test)
400
+
401
+ # Calculate metrics
402
+ metrics_a = {
403
+ 'accuracy': accuracy_score(y_test, y_pred_a),
404
+ 'precision': precision_score(y_test, y_pred_a, average='weighted'),
405
+ 'recall': recall_score(y_test, y_pred_a, average='weighted'),
406
+ 'f1': f1_score(y_test, y_pred_a, average='weighted')
407
+ }
408
+
409
+ metrics_b = {
410
+ 'accuracy': accuracy_score(y_test, y_pred_b),
411
+ 'precision': precision_score(y_test, y_pred_b, average='weighted'),
412
+ 'recall': recall_score(y_test, y_pred_b, average='weighted'),
413
+ 'f1': f1_score(y_test, y_pred_b, average='weighted')
414
+ }
415
+
416
+ # Calculate relative improvements
417
+ improvements = {
418
+ 'accuracy_improvement': metrics_b['accuracy'] - metrics_a['accuracy'],
419
+ 'f1_improvement': metrics_b['f1'] - metrics_a['f1'],
420
+ 'relative_improvement': (metrics_b['accuracy'] - metrics_a['accuracy']) / metrics_a['accuracy'] if metrics_a['accuracy'] != 0 else 0
421
+ }
422
+
423
+ # Create comparison DataFrame
424
+ comparison_df = pd.DataFrame({
425
+ 'Metric': list(metrics_a.keys()),
426
+ model_a_name: list(metrics_a.values()),
427
+ model_b_name: list(metrics_b.values())
428
+ })
429
+
430
+ # Log to wandb
431
+ if wandb.run:
432
+ wandb.log({
433
+ f"A_B_Test/{model_a_name}_metrics": metrics_a,
434
+ f"A_B_Test/{model_b_name}_metrics": metrics_b,
435
+ f"A_B_Test/Improvements": improvements
436
+ })
437
+
438
+ # Create visualization
439
+ plt.figure(figsize=(10, 6))
440
+ comparison_df.set_index('Metric').plot(kind='bar', rot=0)
441
+ plt.title(f'A/B Test: {model_a_name} vs {model_b_name}')
442
+ plt.ylabel('Score')
443
+ plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
444
+ plot_path = './comparison_plots/ab_test_comparison.png'
445
+ plt.savefig(plot_path, bbox_inches='tight')
446
+ plt.close()
447
+
448
+ return {
449
+ 'metrics_comparison': comparison_df.to_dict(),
450
+ 'improvements': improvements
451
+ }, [plot_path]
452
+
453
+
454
+
455
+ def explainability(_):
456
+ import warnings
457
+ warnings.filterwarnings("ignore")
458
+
459
+ target = df_global.columns[-1]
460
+ X = df_global.drop(target, axis=1)
461
+ y = df_global[target]
462
+
463
+ if y.dtype == "object":
464
+ y = LabelEncoder().fit_transform(y)
465
+
466
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
467
+
468
+ model = RandomForestClassifier()
469
+ model.fit(X_train, y_train)
470
+
471
+ explainer = shap.TreeExplainer(model)
472
+ shap_values = explainer.shap_values(X_test)
473
+
474
+ try:
475
+ if isinstance(shap_values, list):
476
+ class_idx = 0
477
+ sv = shap_values[class_idx]
478
+ else:
479
+ sv = shap_values
480
+
481
+ # Ensure 2D input shape for SHAP plot
482
+ if len(sv.shape) > 2:
483
+ sv = sv.reshape(sv.shape[0], -1) # Flatten any extra dimensions
484
+
485
+ # Use safe feature names if mismatch, fallback to dummy
486
+ num_features = sv.shape[1]
487
+ if num_features <= X_test.shape[1]:
488
+ feature_names = X_test.columns[:num_features]
489
+ else:
490
+ feature_names = [f"Feature_{i}" for i in range(num_features)]
491
+
492
+ X_shap_safe = pd.DataFrame(np.zeros_like(sv), columns=feature_names)
493
+
494
+ shap.summary_plot(sv, X_shap_safe, show=False)
495
+ shap_path = "./shap_plot.png"
496
+ plt.title("SHAP Summary")
497
+ plt.savefig(shap_path)
498
+ if wandb.run:
499
+ wandb.log({"shap_summary": wandb.Image(shap_path)})
500
+ plt.clf()
501
+
502
+ except Exception as e:
503
+ shap_path = "./shap_error.png"
504
+ print("SHAP plotting failed:", e)
505
+ plt.figure(figsize=(6, 3))
506
+ plt.text(0.5, 0.5, f"SHAP Error:\n{str(e)}", ha='center', va='center')
507
+ plt.axis('off')
508
+ plt.savefig(shap_path)
509
+ if wandb.run:
510
+ wandb.log({"shap_error": wandb.Image(shap_path)})
511
+ plt.clf()
512
+
513
+ # LIME
514
+ lime_explainer = lime.lime_tabular.LimeTabularExplainer(
515
+ X_train.values,
516
+ feature_names=X_train.columns.tolist(),
517
+ class_names=[str(c) for c in np.unique(y_train)],
518
+ mode='classification'
519
+ )
520
+ lime_exp = lime_explainer.explain_instance(X_test.iloc[0].values, model.predict_proba)
521
+ lime_fig = lime_exp.as_pyplot_figure()
522
+ lime_path = "./lime_plot.png"
523
+ lime_fig.savefig(lime_path)
524
+ if wandb.run:
525
+ wandb.log({"lime_explanation": wandb.Image(lime_path)})
526
+ plt.clf()
527
+
528
+ return shap_path, lime_path
529
+
530
+ with gr.Blocks() as demo:
531
+ gr.Markdown("## 📊 AI-Powered Data Analysis with Hyperparameter Optimization")
532
+
533
+ with gr.Row():
534
+ with gr.Column():
535
+ file_input = gr.File(label="Upload CSV or Excel", type="filepath")
536
+ df_output = gr.DataFrame(label="Cleaned Data Preview")
537
+ file_input.change(fn=upload_file, inputs=file_input, outputs=df_output)
538
+
539
+ with gr.Column():
540
+ insights_output = gr.HTML(label="Insights from SmolAgent")
541
+ visual_output = gr.Gallery(label="Visualizations (Auto-generated by Agent)", columns=2)
542
+ agent_btn = gr.Button("Run AI Agent (5 Insights + 5 Visualizations)")
543
+
544
+ with gr.Row():
545
+ train_btn = gr.Button("Train Model with Optuna + WandB")
546
+ metrics_output = gr.JSON(label="Performance Metrics")
547
+ trials_output = gr.DataFrame(label="Top 7 Hyperparameter Trials")
548
+
549
+ with gr.Row():
550
+ explain_btn = gr.Button("SHAP + LIME Explainability")
551
+ shap_img = gr.Image(label="SHAP Summary Plot")
552
+ lime_img = gr.Image(label="LIME Explanation")
553
+
554
+ # Add new A/B testing components
555
+ with gr.Row():
556
+ with gr.Column():
557
+ gr.Markdown("### 🆚 Model A/B Testing")
558
+ with gr.Row():
559
+ model_a_select = gr.Dropdown(
560
+ choices=["RandomForest", "LogisticRegression", "GradientBoosting", "XGBoost", "SVC"],
561
+ label="Select Model A",
562
+ value="RandomForest"
563
+ )
564
+ model_b_select = gr.Dropdown(
565
+ choices=["RandomForest", "LogisticRegression", "GradientBoosting", "XGBoost", "SVC"],
566
+ label="Select Model B",
567
+ value="LogisticRegression"
568
  )
569
+ ab_test_btn = gr.Button("Run A/B Test")
570
+
571
+ with gr.Column():
572
+ ab_test_results = gr.JSON(label="A/B Test Results")
573
+ ab_test_plots = gr.Gallery(label="A/B Test Visualizations")
574
+
575
+ # Add model comparison components
576
+ with gr.Row():
577
+ compare_btn = gr.Button("Compare All Models")
578
+ comparison_results = gr.DataFrame(label="Model Comparison Results")
579
+ comparison_plots = gr.Gallery(label="Comparison Visualizations")
580
+
581
+
582
+
583
+
584
+ agent_btn.click(fn=analyze_data, inputs=[file_input], outputs=[insights_output, visual_output])
585
+ train_btn.click(fn=train_model, inputs=[file_input], outputs=[metrics_output, trials_output])
586
+ explain_btn.click(fn=explainability, inputs=[], outputs=[shap_img, lime_img])
587
+
588
+ # New handlers for A/B testing and comparison
589
+ ab_test_btn.click(
590
+ fn=perform_ab_test,
591
+ inputs=[model_a_select, model_b_select],
592
+ outputs=[ab_test_results, ab_test_plots]
593
+ )
594
+
595
+ compare_btn.click(
596
+ fn=compare_models_enhanced,
597
+ inputs=[],
598
+ outputs=[comparison_results, comparison_plots]
599
+ )
600
+
601
+
602
+
603
+ demo.launch(debug=True)