Update app.py
Browse files
app.py
CHANGED
|
@@ -12,12 +12,10 @@ import time
|
|
| 12 |
import psutil
|
| 13 |
import shutil
|
| 14 |
import ast
|
| 15 |
-
import seaborn as sns
|
| 16 |
-
from sklearn.svm import SVC
|
| 17 |
from smolagents import HfApiModel, CodeAgent
|
| 18 |
from huggingface_hub import login
|
| 19 |
from sklearn.model_selection import train_test_split, cross_val_score
|
| 20 |
-
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
| 21 |
from sklearn.metrics import ConfusionMatrixDisplay
|
| 22 |
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
|
| 23 |
from sklearn.linear_model import LogisticRegression
|
|
@@ -254,196 +252,51 @@ def train_model(_):
|
|
| 254 |
print(f"Training Error: {e}")
|
| 255 |
return {}, pd.DataFrame()
|
| 256 |
|
| 257 |
-
|
| 258 |
-
def create_model_comparison_plots(results_df):
|
| 259 |
-
"""Create visualizations for model comparison results"""
|
| 260 |
-
os.makedirs('./comparison_plots', exist_ok=True)
|
| 261 |
-
plot_paths = []
|
| 262 |
-
|
| 263 |
-
# Model performance comparison
|
| 264 |
-
plt.figure(figsize=(12, 6))
|
| 265 |
-
sns.barplot(data=results_df, x='Model', y='Test Accuracy')
|
| 266 |
-
plt.title('Model Accuracy Comparison')
|
| 267 |
-
plt.xticks(rotation=45)
|
| 268 |
-
accuracy_path = './comparison_plots/accuracy_comparison.png'
|
| 269 |
-
plt.savefig(accuracy_path, bbox_inches='tight')
|
| 270 |
-
plot_paths.append(accuracy_path)
|
| 271 |
-
plt.close()
|
| 272 |
-
|
| 273 |
-
# Metric radar chart
|
| 274 |
-
metrics = ['Test Accuracy', 'Precision', 'Recall', 'F1 Score']
|
| 275 |
-
if not results_df['ROC AUC'].isna().all():
|
| 276 |
-
metrics.append('ROC AUC')
|
| 277 |
-
|
| 278 |
-
plt.figure(figsize=(10, 10))
|
| 279 |
-
ax = plt.subplot(111, polar=True)
|
| 280 |
-
|
| 281 |
-
angles = np.linspace(0, 2*np.pi, len(metrics), endpoint=False)
|
| 282 |
-
angles = np.concatenate((angles, [angles[0]]))
|
| 283 |
-
|
| 284 |
-
for idx, row in results_df.iterrows():
|
| 285 |
-
values = row[metrics].values.flatten().tolist()
|
| 286 |
-
values += values[:1]
|
| 287 |
-
ax.plot(angles, values, 'o-', label=row['Model'])
|
| 288 |
-
|
| 289 |
-
ax.set_thetagrids(angles[:-1] * 180/np.pi, metrics)
|
| 290 |
-
ax.set_title('Model Performance Radar Chart')
|
| 291 |
-
ax.legend(bbox_to_anchor=(1.1, 1.1))
|
| 292 |
-
radar_path = './comparison_plots/radar_chart.png'
|
| 293 |
-
plt.savefig(radar_path, bbox_inches='tight')
|
| 294 |
-
plot_paths.append(radar_path)
|
| 295 |
-
plt.close()
|
| 296 |
-
|
| 297 |
-
return plot_paths
|
| 298 |
-
|
| 299 |
-
def compare_models_enhanced():
|
| 300 |
-
"""Enhanced model comparison with more metrics and visualizations"""
|
| 301 |
if df_global is None:
|
| 302 |
-
return "Please upload and preprocess a dataset first.",
|
| 303 |
-
|
| 304 |
target = df_global.columns[-1]
|
| 305 |
-
X = df_global.drop(
|
| 306 |
y = df_global[target]
|
| 307 |
|
| 308 |
if y.dtype == 'object':
|
| 309 |
y = LabelEncoder().fit_transform(y)
|
| 310 |
-
|
| 311 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
| 312 |
-
|
| 313 |
-
# Define models to compare
|
| 314 |
models = {
|
| 315 |
-
"
|
| 316 |
-
"
|
| 317 |
-
"
|
| 318 |
-
"SVC": SVC(probability=True)
|
| 319 |
}
|
| 320 |
-
|
| 321 |
-
results = []
|
| 322 |
-
|
| 323 |
-
for name, model in models.items():
|
| 324 |
-
start_time = time.time()
|
| 325 |
-
|
| 326 |
-
# Cross validation
|
| 327 |
-
cv_scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
|
| 328 |
-
|
| 329 |
-
# Full training and test evaluation
|
| 330 |
-
model.fit(X_train, y_train)
|
| 331 |
-
y_pred = model.predict(X_test)
|
| 332 |
-
y_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else None
|
| 333 |
-
|
| 334 |
-
# Calculate metrics
|
| 335 |
-
metrics = {
|
| 336 |
-
'Model': name,
|
| 337 |
-
'CV Mean Accuracy': np.mean(cv_scores),
|
| 338 |
-
'CV Std Dev': np.std(cv_scores),
|
| 339 |
-
'Test Accuracy': accuracy_score(y_test, y_pred),
|
| 340 |
-
'Precision': precision_score(y_test, y_pred, average='weighted'),
|
| 341 |
-
'Recall': recall_score(y_test, y_pred, average='weighted'),
|
| 342 |
-
'F1 Score': f1_score(y_test, y_pred, average='weighted'),
|
| 343 |
-
'ROC AUC': roc_auc_score(y_test, y_proba) if y_proba is not None and len(np.unique(y_test)) == 2 else np.nan,
|
| 344 |
-
'Training Time (s)': time.time() - start_time
|
| 345 |
-
}
|
| 346 |
-
|
| 347 |
-
results.append(metrics)
|
| 348 |
-
|
| 349 |
-
# Log to wandb
|
| 350 |
-
if wandb.run:
|
| 351 |
-
wandb.log({f"{name}_{k}": v for k, v in metrics.items() if k != 'Model'})
|
| 352 |
-
|
| 353 |
-
# Create visualizations
|
| 354 |
-
results_df = pd.DataFrame(results)
|
| 355 |
-
plot_paths = create_model_comparison_plots(results_df)
|
| 356 |
-
|
| 357 |
-
return results_df, plot_paths
|
| 358 |
|
| 359 |
-
|
| 360 |
-
"""Perform A/B test between two specific models"""
|
| 361 |
-
if df_global is None:
|
| 362 |
-
return {"error": "Please upload and preprocess a dataset first."}, []
|
| 363 |
-
|
| 364 |
-
target = df_global.columns[-1]
|
| 365 |
-
X = df_global.drop(target, axis=1)
|
| 366 |
-
y = df_global[target]
|
| 367 |
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
"GradientBoosting": GradientBoostingClassifier(),
|
| 378 |
-
"SVC": SVC(probability=True)
|
| 379 |
-
}
|
| 380 |
-
|
| 381 |
-
# Get the selected models
|
| 382 |
-
model_a = model_library.get(model_a_name)
|
| 383 |
-
model_b = model_library.get(model_b_name)
|
| 384 |
-
|
| 385 |
-
if not model_a or not model_b:
|
| 386 |
-
return {"error": "Invalid model selection"}, []
|
| 387 |
-
|
| 388 |
-
# Train both models
|
| 389 |
-
model_a.fit(X_train, y_train)
|
| 390 |
-
model_b.fit(X_train, y_train)
|
| 391 |
-
|
| 392 |
-
# Get predictions
|
| 393 |
-
y_pred_a = model_a.predict(X_test)
|
| 394 |
-
y_pred_b = model_b.predict(X_test)
|
| 395 |
-
|
| 396 |
-
# Calculate metrics
|
| 397 |
-
metrics_a = {
|
| 398 |
-
'accuracy': accuracy_score(y_test, y_pred_a),
|
| 399 |
-
'precision': precision_score(y_test, y_pred_a, average='weighted'),
|
| 400 |
-
'recall': recall_score(y_test, y_pred_a, average='weighted'),
|
| 401 |
-
'f1': f1_score(y_test, y_pred_a, average='weighted')
|
| 402 |
-
}
|
| 403 |
-
|
| 404 |
-
metrics_b = {
|
| 405 |
-
'accuracy': accuracy_score(y_test, y_pred_b),
|
| 406 |
-
'precision': precision_score(y_test, y_pred_b, average='weighted'),
|
| 407 |
-
'recall': recall_score(y_test, y_pred_b, average='weighted'),
|
| 408 |
-
'f1': f1_score(y_test, y_pred_b, average='weighted')
|
| 409 |
-
}
|
| 410 |
-
|
| 411 |
-
# Calculate relative improvements
|
| 412 |
-
improvements = {
|
| 413 |
-
'accuracy_improvement': metrics_b['accuracy'] - metrics_a['accuracy'],
|
| 414 |
-
'f1_improvement': metrics_b['f1'] - metrics_a['f1'],
|
| 415 |
-
'relative_improvement': (metrics_b['accuracy'] - metrics_a['accuracy']) / metrics_a['accuracy'] if metrics_a['accuracy'] != 0 else 0
|
| 416 |
-
}
|
| 417 |
-
|
| 418 |
-
# Create comparison DataFrame
|
| 419 |
-
comparison_df = pd.DataFrame({
|
| 420 |
-
'Metric': list(metrics_a.keys()),
|
| 421 |
-
model_a_name: list(metrics_a.values()),
|
| 422 |
-
model_b_name: list(metrics_b.values())
|
| 423 |
-
})
|
| 424 |
-
|
| 425 |
-
# Log to wandb
|
| 426 |
-
if wandb.run:
|
| 427 |
-
wandb.log({
|
| 428 |
-
f"A_B_Test/{model_a_name}_metrics": metrics_a,
|
| 429 |
-
f"A_B_Test/{model_b_name}_metrics": metrics_b,
|
| 430 |
-
f"A_B_Test/Improvements": improvements
|
| 431 |
})
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
}, [plot_path]
|
| 447 |
|
| 448 |
def explainability(_):
|
| 449 |
import warnings
|
|
@@ -543,49 +396,16 @@ with gr.Blocks() as demo:
|
|
| 543 |
explain_btn = gr.Button("SHAP + LIME Explainability")
|
| 544 |
shap_img = gr.Image(label="SHAP Summary Plot")
|
| 545 |
lime_img = gr.Image(label="LIME Explanation")
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
with gr.Row():
|
| 552 |
-
model_a_select = gr.Dropdown(
|
| 553 |
-
choices=["RandomForest", "LogisticRegression", "GradientBoosting", "SVC"],
|
| 554 |
-
label="Select Model A",
|
| 555 |
-
value="RandomForest"
|
| 556 |
-
)
|
| 557 |
-
model_b_select = gr.Dropdown(
|
| 558 |
-
choices=["RandomForest", "LogisticRegression", "GradientBoosting", "SVC"],
|
| 559 |
-
label="Select Model B",
|
| 560 |
-
value="LogisticRegression"
|
| 561 |
-
)
|
| 562 |
-
ab_test_btn = gr.Button("Run A/B Test")
|
| 563 |
-
|
| 564 |
-
with gr.Column():
|
| 565 |
-
ab_test_results = gr.JSON(label="A/B Test Results")
|
| 566 |
-
ab_test_plots = gr.Gallery(label="A/B Test Visualizations")
|
| 567 |
|
| 568 |
-
|
| 569 |
-
with gr.Row():
|
| 570 |
-
compare_btn = gr.Button("Compare All Models")
|
| 571 |
-
comparison_results = gr.DataFrame(label="Model Comparison Results")
|
| 572 |
-
comparison_plots = gr.Gallery(label="Comparison Visualizations")
|
| 573 |
|
| 574 |
agent_btn.click(fn=analyze_data, inputs=[file_input], outputs=[insights_output, visual_output])
|
| 575 |
train_btn.click(fn=train_model, inputs=[file_input], outputs=[metrics_output, trials_output])
|
| 576 |
explain_btn.click(fn=explainability, inputs=[], outputs=[shap_img, lime_img])
|
| 577 |
|
| 578 |
-
# New handlers for A/B testing and comparison
|
| 579 |
-
ab_test_btn.click(
|
| 580 |
-
fn=perform_ab_test,
|
| 581 |
-
inputs=[model_a_select, model_b_select],
|
| 582 |
-
outputs=[ab_test_results, ab_test_plots]
|
| 583 |
-
)
|
| 584 |
-
|
| 585 |
-
compare_btn.click(
|
| 586 |
-
fn=compare_models_enhanced,
|
| 587 |
-
inputs=[],
|
| 588 |
-
outputs=[comparison_results, comparison_plots]
|
| 589 |
-
)
|
| 590 |
-
|
| 591 |
demo.launch(debug=True)
|
|
|
|
| 12 |
import psutil
|
| 13 |
import shutil
|
| 14 |
import ast
|
|
|
|
|
|
|
| 15 |
from smolagents import HfApiModel, CodeAgent
|
| 16 |
from huggingface_hub import login
|
| 17 |
from sklearn.model_selection import train_test_split, cross_val_score
|
| 18 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
| 19 |
from sklearn.metrics import ConfusionMatrixDisplay
|
| 20 |
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
|
| 21 |
from sklearn.linear_model import LogisticRegression
|
|
|
|
| 252 |
print(f"Training Error: {e}")
|
| 253 |
return {}, pd.DataFrame()
|
| 254 |
|
| 255 |
+
def ab_test_models():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
if df_global is None:
|
| 257 |
+
return "Please upload and preprocess a dataset first.", pd.DataFrame()
|
| 258 |
+
|
| 259 |
target = df_global.columns[-1]
|
| 260 |
+
X = df_global.drop(columns=[target])
|
| 261 |
y = df_global[target]
|
| 262 |
|
| 263 |
if y.dtype == 'object':
|
| 264 |
y = LabelEncoder().fit_transform(y)
|
| 265 |
+
|
| 266 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
| 267 |
+
|
|
|
|
| 268 |
models = {
|
| 269 |
+
"Random Forest": RandomForestClassifier(n_estimators=100),
|
| 270 |
+
"Logistic Regression": LogisticRegression(max_iter=1000),
|
| 271 |
+
"Gradient Boosting": GradientBoostingClassifier()
|
|
|
|
| 272 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
|
| 274 |
+
results = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
|
| 276 |
+
for name, clf in models.items():
|
| 277 |
+
clf.fit(X_train, y_train)
|
| 278 |
+
y_pred = clf.predict(X_test)
|
| 279 |
+
results.append({
|
| 280 |
+
"Model": name,
|
| 281 |
+
"Accuracy": accuracy_score(y_test, y_pred),
|
| 282 |
+
"Precision": precision_score(y_test, y_pred, average="weighted", zero_division=0),
|
| 283 |
+
"Recall": recall_score(y_test, y_pred, average="weighted", zero_division=0),
|
| 284 |
+
"F1 Score": f1_score(y_test, y_pred, average="weighted", zero_division=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
})
|
| 286 |
+
wandb.log({f"{name}_metrics": results[-1]})
|
| 287 |
+
|
| 288 |
+
result_df = pd.DataFrame(results)
|
| 289 |
+
best_model = result_df.sort_values("F1 Score", ascending=False).iloc[0]
|
| 290 |
+
|
| 291 |
+
summary = f"""
|
| 292 |
+
🔍 <b>Best Model:</b> {best_model['Model']}<br>
|
| 293 |
+
✅ <b>F1 Score:</b> {best_model['F1 Score']:.4f}<br>
|
| 294 |
+
📊 <b>Accuracy:</b> {best_model['Accuracy']:.4f}<br>
|
| 295 |
+
🧠 <b>Precision:</b> {best_model['Precision']:.4f}<br>
|
| 296 |
+
🔁 <b>Recall:</b> {best_model['Recall']:.4f}
|
| 297 |
+
"""
|
| 298 |
+
|
| 299 |
+
return summary, result_df
|
|
|
|
| 300 |
|
| 301 |
def explainability(_):
|
| 302 |
import warnings
|
|
|
|
| 396 |
explain_btn = gr.Button("SHAP + LIME Explainability")
|
| 397 |
shap_img = gr.Image(label="SHAP Summary Plot")
|
| 398 |
lime_img = gr.Image(label="LIME Explanation")
|
| 399 |
+
|
| 400 |
+
with gr.Blocks():
|
| 401 |
+
ab_test_button = gr.Button("Run A/B Testing")
|
| 402 |
+
ab_summary = gr.HTML()
|
| 403 |
+
ab_results = gr.Dataframe()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
|
| 405 |
+
ab_test_button.click(fn=ab_test_models, inputs=[], outputs=[ab_summary, ab_results])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 406 |
|
| 407 |
agent_btn.click(fn=analyze_data, inputs=[file_input], outputs=[insights_output, visual_output])
|
| 408 |
train_btn.click(fn=train_model, inputs=[file_input], outputs=[metrics_output, trials_output])
|
| 409 |
explain_btn.click(fn=explainability, inputs=[], outputs=[shap_img, lime_img])
|
| 410 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
demo.launch(debug=True)
|