Premchan369 commited on
Commit
f67e0eb
·
verified ·
1 Parent(s): fdb56ec

Upload online_learning.py

Browse files
Files changed (1) hide show
  1. online_learning.py +93 -72
online_learning.py CHANGED
@@ -1,88 +1,109 @@
1
- """Online Learning - Adaptive model retraining with concept drift detection."""
2
  import numpy as np
3
  import pandas as pd
4
- from sklearn.linear_model import SGDRegressor
 
5
  from typing import Dict, Optional
6
- import warnings
7
- warnings.filterwarnings('ignore')
8
 
9
 
10
- class OnlineLearner:
11
- """Incremental learning for adaptive alpha models."""
12
 
13
- def __init__(self, lookback_window: int = 252):
14
- self.lookback_window = lookback_window
15
- self.model = SGDRegressor(loss='squared_error', penalty='l2', alpha=0.0001,
16
- learning_rate='adaptive', eta0=0.01, max_iter=1,
17
- warm_start=True, random_state=42)
18
- self.feature_buffer = []
19
- self.target_buffer = []
20
- self.is_fitted = False
21
- self.update_count = 0
22
- self.drift_scores = []
23
-
24
- def partial_fit(self, X: np.ndarray, y: np.ndarray):
25
- """Incrementally update model."""
26
- self.feature_buffer.append(X)
27
- self.target_buffer.append(y)
28
-
29
- if len(self.feature_buffer) > self.lookback_window:
30
- self.feature_buffer = self.feature_buffer[-self.lookback_window:]
31
- self.target_buffer = self.target_buffer[-self.lookback_window:]
32
-
33
- X_batch = np.vstack(self.feature_buffer)
34
- y_batch = np.concatenate(self.target_buffer)
35
-
36
- if not self.is_fitted:
37
- self.model.fit(X_batch, y_batch); self.is_fitted = True
38
- else:
39
- self.model.partial_fit(X_batch, y_batch)
 
 
 
40
 
41
- self.update_count += 1
 
 
42
 
43
- def predict(self, X: np.ndarray) -> np.ndarray:
44
- return self.model.predict(X) if self.is_fitted else np.zeros(len(X))
45
-
46
- def get_drift_score(self, recent_X: np.ndarray, recent_y: np.ndarray) -> float:
47
- """Detect concept drift."""
48
- if not self.is_fitted: return 0.0
49
- pred = self.predict(recent_X)
50
- recent_mse = np.mean((pred - recent_y) ** 2)
51
- all_pred = self.predict(np.vstack(self.feature_buffer))
52
- all_y = np.concatenate(self.target_buffer)
53
- historical_mse = np.mean((all_pred - all_y) ** 2)
54
- drift = recent_mse / (historical_mse + 1e-8) - 1.0
55
- self.drift_scores.append(drift)
56
- return drift
57
 
58
 
59
- class AdaptiveEnsemble:
60
- """Ensemble adapting weights based on recent IC."""
61
 
62
- def __init__(self, models: Dict[str, object], adaptation_rate: float = 0.1):
63
- self.models = models
64
- self.weights = {name: 1.0 / len(models) for name in models}
65
- self.adaptation_rate = adaptation_rate
 
 
 
 
 
 
 
 
 
 
66
 
67
- def predict(self, X: np.ndarray) -> np.ndarray:
68
- final = np.zeros(len(X))
69
- for name, model in self.models.items():
70
- try: final += self.weights[name] * model.predict(X)
71
- except: pass
72
- return final
73
 
74
- def update_weights(self, predictions: Dict[str, np.ndarray], actual: np.ndarray):
75
- from scipy.stats import spearmanr
76
- ics = {}
77
- for name, pred in predictions.items():
78
- ic, _ = spearmanr(pred, actual)
79
- ics[name] = abs(ic) if not np.isnan(ic) else 0.0
80
 
81
- total = sum(ics.values()) + 1e-8
82
- target = {name: ic / total for name, ic in ics.items()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- for name in self.weights:
85
- self.weights[name] = (1 - self.adaptation_rate) * self.weights[name] + self.adaptation_rate * target[name]
 
 
 
 
 
 
 
86
 
87
- total_w = sum(self.weights.values())
88
- self.weights = {k: v / total_w for k, v in self.weights.items()}
 
1
+ """Online Learning - Adaptive model updates with drift detection"""
2
  import numpy as np
3
  import pandas as pd
4
+ import torch
5
+ import torch.nn as nn
6
  from typing import Dict, Optional
7
+ from scipy.stats import ks_2samp
 
8
 
9
 
10
+ class DriftDetector:
11
+ """Detect data drift using statistical tests"""
12
 
13
+ def __init__(self, significance=0.05, window=252):
14
+ self.significance = significance
15
+ self.window = window
16
+ self.reference_stats = {}
17
+ self.drift_history = []
18
+
19
+ def set_reference(self, data: np.ndarray, name: str = 'default'):
20
+ self.reference_stats[name] = {'mean': data.mean(axis=0), 'std': data.std(axis=0), 'data': data}
21
+
22
+ def detect_ks(self, new_data: np.ndarray, name: str = 'default') -> Dict:
23
+ ref = self.reference_stats.get(name)
24
+ if ref is None:
25
+ return {'drift': False, 'p_value': 1.0}
26
+ n_features = new_data.shape[1] if new_data.ndim > 1 else 1
27
+ drifts = []
28
+ p_values = []
29
+ for i in range(n_features):
30
+ col = i if new_data.ndim > 1 else 0
31
+ ref_feat = ref['data'][:, col] if ref['data'].ndim > 1 else ref['data']
32
+ new_feat = new_data[:, col] if new_data.ndim > 1 else new_data
33
+ stat, p = ks_2samp(ref_feat, new_feat)
34
+ drifts.append(p < self.significance)
35
+ p_values.append(p)
36
+ n_drift = sum(drifts)
37
+ overall_drift = n_drift > n_features * 0.3
38
+ return {'drift': overall_drift, 'p_values': p_values, 'n_features_drifted': n_drift, 'total_features': n_features}
39
+
40
+ def detect_cusum(self, residuals: np.ndarray, threshold: float = 5.0, drift: float = 1.0) -> Dict:
41
+ pos_cusum = np.zeros(len(residuals))
42
+ neg_cusum = np.zeros(len(residuals))
43
 
44
+ for t in range(1, len(residuals)):
45
+ pos_cusum[t] = max(0, pos_cusum[t-1] + residuals[t] - drift)
46
+ neg_cusum[t] = min(0, neg_cusum[t-1] + residuals[t] + drift)
47
 
48
+ alert = np.any(pos_cusum > threshold) or np.any(neg_cusum < -threshold)
49
+ return {'alert': alert, 'pos_cusum': pos_cusum, 'neg_cusum': neg_cusum}
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
 
52
+ class OnlineLearner:
53
+ """Online learning with periodic model adaptation"""
54
 
55
+ def __init__(self, model: nn.Module, lr: float = 1e-5,
56
+ adaptation_window: int = 21, drift_threshold: float = 0.3):
57
+ self.model = model
58
+ self.lr = lr
59
+ self.adaptation_window = adaptation_window
60
+ self.drift_threshold = drift_threshold
61
+ self.drift_detector = DriftDetector()
62
+ self.adaptation_count = 0
63
+ self.ic_history = []
64
+ self.performance_history = []
65
+
66
+ def check_and_adapt(self, X_new: np.ndarray, y_new: np.ndarray,
67
+ X_ref: Optional[np.ndarray] = None) -> Dict:
68
+ drift_result = self.drift_detector.detect_ks(X_new)
69
 
70
+ if drift_result['drift']:
71
+ print(f"⚠️ Drift detected: {drift_result['n_features_drifted']}/{drift_result['total_features']} features shifted")
72
+ self._adapt(X_new, y_new)
73
+ self.adaptation_count += 1
74
+ return {'adapted': True, 'drift': drift_result}
75
+ return {'adapted': False, 'drift': drift_result}
76
 
77
+ def _adapt(self, X: np.ndarray, y: np.ndarray, epochs: int = 5):
78
+ self.model.train()
79
+ optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
80
+ criterion = nn.MSELoss()
 
 
81
 
82
+ X_t = torch.FloatTensor(X)
83
+ y_t = torch.FloatTensor(y).unsqueeze(1)
84
+
85
+ for epoch in range(epochs):
86
+ optimizer.zero_grad()
87
+ pred = self.model(X_t)
88
+ loss = criterion(pred, y_t)
89
+ loss.backward()
90
+ optimizer.step()
91
+
92
+ print(f" Adapted model with {epochs} epochs, loss={loss.item():.6f}")
93
+
94
+ def track_performance(self, predictions: np.ndarray, actuals: np.ndarray):
95
+ from scipy.stats import spearmanr
96
+ ic, _ = spearmanr(predictions, actuals)
97
+ self.ic_history.append(ic)
98
 
99
+ # Check if IC is degrading
100
+ if len(self.ic_history) > 63:
101
+ recent_ic = np.mean(self.ic_history[-21:])
102
+ long_ic = np.mean(self.ic_history[-63:])
103
+ degradation = (long_ic - recent_ic) / (abs(long_ic) + 1e-8)
104
+
105
+ if degradation > 0.3:
106
+ print(f"⚠️ IC degradation: recent={recent_ic:.4f}, long={long_ic:.4f}, degradation={degradation:.2%}")
107
+ return 'degrading'
108
 
109
+ return 'stable'