Upload online_learning.py
Browse files- online_learning.py +93 -72
online_learning.py
CHANGED
|
@@ -1,88 +1,109 @@
|
|
| 1 |
-
"""Online Learning - Adaptive model
|
| 2 |
import numpy as np
|
| 3 |
import pandas as pd
|
| 4 |
-
|
|
|
|
| 5 |
from typing import Dict, Optional
|
| 6 |
-
import
|
| 7 |
-
warnings.filterwarnings('ignore')
|
| 8 |
|
| 9 |
|
| 10 |
-
class
|
| 11 |
-
"""
|
| 12 |
|
| 13 |
-
def __init__(self,
|
| 14 |
-
self.
|
| 15 |
-
self.
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
self.
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
-
|
|
|
|
|
|
|
| 42 |
|
| 43 |
-
|
| 44 |
-
return
|
| 45 |
-
|
| 46 |
-
def get_drift_score(self, recent_X: np.ndarray, recent_y: np.ndarray) -> float:
|
| 47 |
-
"""Detect concept drift."""
|
| 48 |
-
if not self.is_fitted: return 0.0
|
| 49 |
-
pred = self.predict(recent_X)
|
| 50 |
-
recent_mse = np.mean((pred - recent_y) ** 2)
|
| 51 |
-
all_pred = self.predict(np.vstack(self.feature_buffer))
|
| 52 |
-
all_y = np.concatenate(self.target_buffer)
|
| 53 |
-
historical_mse = np.mean((all_pred - all_y) ** 2)
|
| 54 |
-
drift = recent_mse / (historical_mse + 1e-8) - 1.0
|
| 55 |
-
self.drift_scores.append(drift)
|
| 56 |
-
return drift
|
| 57 |
|
| 58 |
|
| 59 |
-
class
|
| 60 |
-
"""
|
| 61 |
|
| 62 |
-
def __init__(self,
|
| 63 |
-
|
| 64 |
-
self.
|
| 65 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
return
|
| 73 |
|
| 74 |
-
def
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
ic, _ = spearmanr(pred, actual)
|
| 79 |
-
ics[name] = abs(ic) if not np.isnan(ic) else 0.0
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
-
|
| 88 |
-
self.weights = {k: v / total_w for k, v in self.weights.items()}
|
|
|
|
| 1 |
+
"""Online Learning - Adaptive model updates with drift detection"""
|
| 2 |
import numpy as np
|
| 3 |
import pandas as pd
|
| 4 |
+
import torch
|
| 5 |
+
import torch.nn as nn
|
| 6 |
from typing import Dict, Optional
|
| 7 |
+
from scipy.stats import ks_2samp
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
+
class DriftDetector:
|
| 11 |
+
"""Detect data drift using statistical tests"""
|
| 12 |
|
| 13 |
+
def __init__(self, significance=0.05, window=252):
|
| 14 |
+
self.significance = significance
|
| 15 |
+
self.window = window
|
| 16 |
+
self.reference_stats = {}
|
| 17 |
+
self.drift_history = []
|
| 18 |
+
|
| 19 |
+
def set_reference(self, data: np.ndarray, name: str = 'default'):
|
| 20 |
+
self.reference_stats[name] = {'mean': data.mean(axis=0), 'std': data.std(axis=0), 'data': data}
|
| 21 |
+
|
| 22 |
+
def detect_ks(self, new_data: np.ndarray, name: str = 'default') -> Dict:
|
| 23 |
+
ref = self.reference_stats.get(name)
|
| 24 |
+
if ref is None:
|
| 25 |
+
return {'drift': False, 'p_value': 1.0}
|
| 26 |
+
n_features = new_data.shape[1] if new_data.ndim > 1 else 1
|
| 27 |
+
drifts = []
|
| 28 |
+
p_values = []
|
| 29 |
+
for i in range(n_features):
|
| 30 |
+
col = i if new_data.ndim > 1 else 0
|
| 31 |
+
ref_feat = ref['data'][:, col] if ref['data'].ndim > 1 else ref['data']
|
| 32 |
+
new_feat = new_data[:, col] if new_data.ndim > 1 else new_data
|
| 33 |
+
stat, p = ks_2samp(ref_feat, new_feat)
|
| 34 |
+
drifts.append(p < self.significance)
|
| 35 |
+
p_values.append(p)
|
| 36 |
+
n_drift = sum(drifts)
|
| 37 |
+
overall_drift = n_drift > n_features * 0.3
|
| 38 |
+
return {'drift': overall_drift, 'p_values': p_values, 'n_features_drifted': n_drift, 'total_features': n_features}
|
| 39 |
+
|
| 40 |
+
def detect_cusum(self, residuals: np.ndarray, threshold: float = 5.0, drift: float = 1.0) -> Dict:
|
| 41 |
+
pos_cusum = np.zeros(len(residuals))
|
| 42 |
+
neg_cusum = np.zeros(len(residuals))
|
| 43 |
|
| 44 |
+
for t in range(1, len(residuals)):
|
| 45 |
+
pos_cusum[t] = max(0, pos_cusum[t-1] + residuals[t] - drift)
|
| 46 |
+
neg_cusum[t] = min(0, neg_cusum[t-1] + residuals[t] + drift)
|
| 47 |
|
| 48 |
+
alert = np.any(pos_cusum > threshold) or np.any(neg_cusum < -threshold)
|
| 49 |
+
return {'alert': alert, 'pos_cusum': pos_cusum, 'neg_cusum': neg_cusum}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
|
| 52 |
+
class OnlineLearner:
|
| 53 |
+
"""Online learning with periodic model adaptation"""
|
| 54 |
|
| 55 |
+
def __init__(self, model: nn.Module, lr: float = 1e-5,
|
| 56 |
+
adaptation_window: int = 21, drift_threshold: float = 0.3):
|
| 57 |
+
self.model = model
|
| 58 |
+
self.lr = lr
|
| 59 |
+
self.adaptation_window = adaptation_window
|
| 60 |
+
self.drift_threshold = drift_threshold
|
| 61 |
+
self.drift_detector = DriftDetector()
|
| 62 |
+
self.adaptation_count = 0
|
| 63 |
+
self.ic_history = []
|
| 64 |
+
self.performance_history = []
|
| 65 |
+
|
| 66 |
+
def check_and_adapt(self, X_new: np.ndarray, y_new: np.ndarray,
|
| 67 |
+
X_ref: Optional[np.ndarray] = None) -> Dict:
|
| 68 |
+
drift_result = self.drift_detector.detect_ks(X_new)
|
| 69 |
|
| 70 |
+
if drift_result['drift']:
|
| 71 |
+
print(f"⚠️ Drift detected: {drift_result['n_features_drifted']}/{drift_result['total_features']} features shifted")
|
| 72 |
+
self._adapt(X_new, y_new)
|
| 73 |
+
self.adaptation_count += 1
|
| 74 |
+
return {'adapted': True, 'drift': drift_result}
|
| 75 |
+
return {'adapted': False, 'drift': drift_result}
|
| 76 |
|
| 77 |
+
def _adapt(self, X: np.ndarray, y: np.ndarray, epochs: int = 5):
|
| 78 |
+
self.model.train()
|
| 79 |
+
optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
|
| 80 |
+
criterion = nn.MSELoss()
|
|
|
|
|
|
|
| 81 |
|
| 82 |
+
X_t = torch.FloatTensor(X)
|
| 83 |
+
y_t = torch.FloatTensor(y).unsqueeze(1)
|
| 84 |
+
|
| 85 |
+
for epoch in range(epochs):
|
| 86 |
+
optimizer.zero_grad()
|
| 87 |
+
pred = self.model(X_t)
|
| 88 |
+
loss = criterion(pred, y_t)
|
| 89 |
+
loss.backward()
|
| 90 |
+
optimizer.step()
|
| 91 |
+
|
| 92 |
+
print(f" Adapted model with {epochs} epochs, loss={loss.item():.6f}")
|
| 93 |
+
|
| 94 |
+
def track_performance(self, predictions: np.ndarray, actuals: np.ndarray):
|
| 95 |
+
from scipy.stats import spearmanr
|
| 96 |
+
ic, _ = spearmanr(predictions, actuals)
|
| 97 |
+
self.ic_history.append(ic)
|
| 98 |
|
| 99 |
+
# Check if IC is degrading
|
| 100 |
+
if len(self.ic_history) > 63:
|
| 101 |
+
recent_ic = np.mean(self.ic_history[-21:])
|
| 102 |
+
long_ic = np.mean(self.ic_history[-63:])
|
| 103 |
+
degradation = (long_ic - recent_ic) / (abs(long_ic) + 1e-8)
|
| 104 |
+
|
| 105 |
+
if degradation > 0.3:
|
| 106 |
+
print(f"⚠️ IC degradation: recent={recent_ic:.4f}, long={long_ic:.4f}, degradation={degradation:.2%}")
|
| 107 |
+
return 'degrading'
|
| 108 |
|
| 109 |
+
return 'stable'
|
|
|