import pandas as pd from evidently.report import Report from evidently.metric_preset import DataDriftPreset import os import sys sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) from src.monitoring.db import load_data_from_db def detect_drift(reference_path): # Load reference (original training data) reference_data = pd.read_csv(reference_path).drop("Class", axis=1) # Load current data from live DB current_data = load_data_from_db() # Drop tracking columns so we only compare the raw features (V1-V28, Time, Amount) current_data = current_data.drop(["id", "prediction", "probability", "Actual_Class"], axis=1) if len(current_data) < 50: print("⚠️ Not enough live data for drift detection") return None # Run statistical tests via Evidently AI report = Report(metrics=[DataDriftPreset()]) report.run(reference_data=reference_data, current_data=current_data) BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")) REPORT_PATH = os.path.join(BASE_DIR, "reports") os.makedirs(REPORT_PATH, exist_ok=True) path = os.path.join(REPORT_PATH, "drift_report.html") report.save_html(path) return path