Spaces:
Sleeping
Sleeping
| """ | |
| SmartCertify ML โ Fraud Detection Evaluation | |
| Generate comprehensive metrics, reports, confusion matrices, and ROC curves. | |
| """ | |
| import numpy as np | |
| import pandas as pd | |
| import json | |
| import logging | |
| from pathlib import Path | |
| from typing import Dict, Any, List | |
| from sklearn.metrics import ( | |
| accuracy_score, precision_score, recall_score, f1_score, | |
| roc_auc_score, classification_report, confusion_matrix, | |
| ) | |
| import sys | |
| sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent.parent)) | |
| from app.config.settings import MODEL_DIR, PLOTS_DIR | |
| from app.utils.model_io import load_sklearn_model | |
| from app.utils.visualization import ( | |
| plot_confusion_matrix, plot_roc_curve, | |
| plot_precision_recall_curve, plot_multi_roc, | |
| ) | |
| logger = logging.getLogger(__name__) | |
| def evaluate_model( | |
| model, | |
| X_test: np.ndarray, | |
| y_test: np.ndarray, | |
| model_name: str, | |
| ) -> Dict[str, Any]: | |
| """Evaluate a single model and generate full report.""" | |
| y_pred = model.predict(X_test) | |
| y_proba = ( | |
| model.predict_proba(X_test)[:, 1] | |
| if hasattr(model, "predict_proba") | |
| else y_pred.astype(float) | |
| ) | |
| metrics = { | |
| "accuracy": round(accuracy_score(y_test, y_pred), 4), | |
| "precision": round(precision_score(y_test, y_pred, zero_division=0), 4), | |
| "recall": round(recall_score(y_test, y_pred, zero_division=0), 4), | |
| "f1": round(f1_score(y_test, y_pred, zero_division=0), 4), | |
| "roc_auc": round(roc_auc_score(y_test, y_proba), 4), | |
| } | |
| # Classification report | |
| report = classification_report(y_test, y_pred, target_names=["Authentic", "Fraudulent"], output_dict=True) | |
| # Save plots | |
| cm_path = plot_confusion_matrix(y_test, y_pred, model_name) | |
| roc_path = plot_roc_curve(y_test, y_proba, model_name) | |
| pr_path = plot_precision_recall_curve(y_test, y_proba, model_name) | |
| result = { | |
| "model_name": model_name, | |
| "metrics": metrics, | |
| "classification_report": report, | |
| "plots": { | |
| "confusion_matrix": cm_path, | |
| "roc_curve": roc_path, | |
| "pr_curve": pr_path, | |
| }, | |
| } | |
| return result | |
| def evaluate_all_models( | |
| X_test: np.ndarray, | |
| y_test: np.ndarray, | |
| ) -> pd.DataFrame: | |
| """Evaluate all saved fraud detection models and create comparison.""" | |
| model_files = { | |
| "Logistic Regression": "fraud_lr.joblib", | |
| "k-NN": "fraud_knn.joblib", | |
| "SVM": "fraud_svm.joblib", | |
| "Random Forest": "fraud_rf.joblib", | |
| "XGBoost": "fraud_xgb.joblib", | |
| "LightGBM": "fraud_lgbm.joblib", | |
| "Voting Ensemble": "fraud_ensemble.joblib", | |
| } | |
| all_results = [] | |
| roc_data = {} | |
| for name, filename in model_files.items(): | |
| model = load_sklearn_model(filename) | |
| if model is None: | |
| logger.warning(f"Model {filename} not found, skipping") | |
| continue | |
| logger.info(f"Evaluating {name}...") | |
| result = evaluate_model(model, X_test, y_test, name) | |
| all_results.append(result) | |
| # Collect ROC data for multi-model comparison | |
| y_proba = ( | |
| model.predict_proba(X_test)[:, 1] | |
| if hasattr(model, "predict_proba") | |
| else model.predict(X_test).astype(float) | |
| ) | |
| roc_data[name] = {"y_true": y_test, "y_proba": y_proba} | |
| # Multi-model ROC comparison plot | |
| if len(roc_data) > 1: | |
| plot_multi_roc(roc_data) | |
| # Create benchmark DataFrame | |
| benchmark_data = [] | |
| for result in all_results: | |
| benchmark_data.append({ | |
| "Model": result["model_name"], | |
| **result["metrics"], | |
| }) | |
| benchmark_df = pd.DataFrame(benchmark_data) | |
| benchmark_df = benchmark_df.sort_values("f1", ascending=False) | |
| # Save benchmark | |
| benchmark_path = PLOTS_DIR / "evaluation_benchmark.csv" | |
| benchmark_df.to_csv(benchmark_path, index=False) | |
| # Save detailed reports as JSON | |
| reports_path = PLOTS_DIR / "evaluation_reports.json" | |
| serializable_results = [] | |
| for r in all_results: | |
| s = { | |
| "model_name": r["model_name"], | |
| "metrics": r["metrics"], | |
| "classification_report": r["classification_report"], | |
| } | |
| serializable_results.append(s) | |
| with open(reports_path, "w") as f: | |
| json.dump(serializable_results, f, indent=2) | |
| logger.info(f"\nBenchmark:\n{benchmark_df.to_string(index=False)}") | |
| return benchmark_df | |
| def main(): | |
| """Run evaluation on all models.""" | |
| from app.data.preprocess import prepare_data | |
| print("=" * 60) | |
| print(" SmartCertify ML โ Model Evaluation") | |
| print("=" * 60) | |
| print("\n๐ Loading data...") | |
| X_train, X_test, y_train, y_test, _ = prepare_data(apply_smote=False) | |
| print("\n๐ Evaluating all models...") | |
| benchmark = evaluate_all_models(X_test, y_test) | |
| print(f"\nโ Evaluation complete!") | |
| print(benchmark.to_string(index=False)) | |
| print(f"\nPlots saved to: {PLOTS_DIR}") | |
| if __name__ == "__main__": | |
| logging.basicConfig(level=logging.INFO) | |
| main() | |