Spaces:
Running
Running
File size: 1,633 Bytes
b53ee19 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | import pandas as pd
import numpy as np
from scipy import stats
from typing import Dict, Any
import json
from pathlib import Path
class DriftDetector:
def __init__(self, reference_data: pd.DataFrame, threshold: float = 0.05):
self.reference_data = reference_data
self.threshold = threshold
def detect_drift(self, current_data: pd.DataFrame) -> Dict[str, Any]:
"""Detect drift using Kolmogorov-Smirnov test"""
drift_report = {
"drift_detected": False,
"drifted_features": [],
"drift_scores": {}
}
for col in self.reference_data.select_dtypes(include=[np.number]).columns:
if col in current_data.columns:
# KS test for numerical features
statistic, p_value = stats.ks_2samp(
self.reference_data[col].dropna(),
current_data[col].dropna()
)
drift_report["drift_scores"][col] = {
"statistic": float(statistic),
"p_value": float(p_value),
"drift": p_value < self.threshold
}
if p_value < self.threshold:
drift_report["drift_detected"] = True
drift_report["drifted_features"].append(col)
return drift_report
def save_report(self, report: Dict[str, Any], output_path: Path):
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w') as f:
json.dump(report, f, indent=2)
|