Abeshith's picture
Added Monitoring Stages
b53ee19
import pandas as pd
import numpy as np
from scipy import stats
from typing import Dict, Any
import json
from pathlib import Path
class DriftDetector:
def __init__(self, reference_data: pd.DataFrame, threshold: float = 0.05):
self.reference_data = reference_data
self.threshold = threshold
def detect_drift(self, current_data: pd.DataFrame) -> Dict[str, Any]:
"""Detect drift using Kolmogorov-Smirnov test"""
drift_report = {
"drift_detected": False,
"drifted_features": [],
"drift_scores": {}
}
for col in self.reference_data.select_dtypes(include=[np.number]).columns:
if col in current_data.columns:
# KS test for numerical features
statistic, p_value = stats.ks_2samp(
self.reference_data[col].dropna(),
current_data[col].dropna()
)
drift_report["drift_scores"][col] = {
"statistic": float(statistic),
"p_value": float(p_value),
"drift": p_value < self.threshold
}
if p_value < self.threshold:
drift_report["drift_detected"] = True
drift_report["drifted_features"].append(col)
return drift_report
def save_report(self, report: Dict[str, Any], output_path: Path):
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w') as f:
json.dump(report, f, indent=2)