Spaces:
Sleeping
Sleeping
| # src/drift_detection.py | |
| import os | |
| import json | |
| import pandas as pd | |
| from datetime import timedelta | |
| from river.drift import ADWIN, PageHinkley | |
| DATA_PATH = "data/processed/merged_features.csv" | |
| OUT_DIR = "drift_reports" | |
| os.makedirs(OUT_DIR, exist_ok=True) | |
| FEATURES = ["return_lag1", "volume_lag1", "sentiment_lag1"] | |
| CURRENT_DAYS = 30 | |
| def main(): | |
| df = pd.read_csv(DATA_PATH) | |
| df["Date"] = pd.to_datetime(df["Date"]) | |
| cutoff = df["Date"].max() - timedelta(days=CURRENT_DAYS) | |
| recent = df[df["Date"] >= cutoff] | |
| if recent.empty: | |
| raise RuntimeError("No recent data for drift detection") | |
| results = {} | |
| for f in FEATURES: | |
| adwin = ADWIN() | |
| ph = PageHinkley() | |
| drift_points = [] | |
| for val in recent[f].dropna(): | |
| adwin.update(val) | |
| ph.update(val) | |
| if adwin.drift_detected or ph.drift_detected: | |
| drift_points.append(True) | |
| else: | |
| drift_points.append(False) | |
| results[f] = { | |
| "adwin_drift": adwin.drift_detected, | |
| "page_hinkley_drift": ph.drift_detected, | |
| "drift_flag": adwin.drift_detected or ph.drift_detected | |
| } | |
| out_path = os.path.join(OUT_DIR, "drift_summary.json") | |
| with open(out_path, "w") as f: | |
| json.dump(results, f, indent=4) | |
| print("River drift detection completed") | |
| print(json.dumps(results, indent=2)) | |
| if __name__ == "__main__": | |
| main() | |