Badumetsibb commited on
Commit
b12bad2
·
verified ·
1 Parent(s): 3a0e72b

Upload adaptive_meta_patch_v2.py

Browse files
Files changed (1) hide show
  1. adaptive_meta_patch_v2.py +322 -0
adaptive_meta_patch_v2.py ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # ===============================================
3
+ # ADAPTIVE META-CONTROLLER MAIN LOOP (V2 — Contextual LinUCB)
4
+ # Drop-in for app.py — replaces your main_worker.
5
+ # Upgrades Thompson sampling to a contextual LinUCB bandit using live features.
6
+ # ===============================================
7
+
8
+ import os
9
+ import csv
10
+ import time
11
+ import math
12
+ import random
13
+ from collections import deque, defaultdict
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+
18
+ # ------------------ Contextual Bandit (LinUCB) ------------------
19
+
20
+ class LinUCBBandit:
21
+ \"\"\"A simple LinUCB contextual bandit implementation.
22
+ Each arm maintains A (dxd) and b (d) for ridge regression.
23
+ p_a = theta_a^T x + alpha * sqrt(x^T A^{-1} x)
24
+ \"\"\"
25
+ def __init__(self, strategies, d, alpha=1.0, regularization=1.0):
26
+ self.strategies = list(strategies)
27
+ self.d = d
28
+ self.alpha = alpha
29
+ self.reg = regularization
30
+ # initialize A as reg*I and b as zeros for each arm
31
+ self.A = {s: (self.reg * np.eye(self.d)) for s in self.strategies}
32
+ self.b = {s: np.zeros(self.d) for s in self.strategies}
33
+ def _get_ucb(self, s, x):
34
+ A_inv = np.linalg.inv(self.A[s])
35
+ theta = A_inv.dot(self.b[s])
36
+ mean = theta.dot(x)
37
+ var = x.dot(A_inv).dot(x)
38
+ bonus = self.alpha * math.sqrt(max(var, 0.0))
39
+ return mean + bonus, mean
40
+ def select(self, context_vector):
41
+ # context_vector: 1D numpy array shape (d,)
42
+ scores = {}
43
+ for s in self.strategies:
44
+ ucb, mean = self._get_ucb(s, context_vector)
45
+ scores[s] = ucb
46
+ chosen = max(scores, key=scores.get)
47
+ return chosen
48
+ def update(self, strategy, context_vector, reward):
49
+ # reward: float (can be pnl or binary 0/1). Using reward as-is.
50
+ x = context_vector.reshape(-1)
51
+ self.A[strategy] += np.outer(x, x)
52
+ self.b[strategy] += reward * x
53
+
54
+ # ------------------ Other meta components (from v1) ------------------
55
+
56
+ class PerformanceLogger:
57
+ \"\"\"Append signals and outcomes to a CSV for meta-learning and replay.\"\"\"
58
+ def __init__(self, path=\"/mnt/data/agent_signals_log.csv\"):
59
+ self.path = path
60
+ header = [\"timestamp\",\"strategy\",\"action\",\"entry\",\"stop_loss\",\"take_profit\",\"price_at_signal\",\"eval_time\",\"pnl\",\"reward\",\"context_hash\"]
61
+ if not os.path.exists(self.path):
62
+ with open(self.path, \"w\", newline='') as f:
63
+ writer = csv.writer(f)
64
+ writer.writerow(header)
65
+ def log_signal(self, ts, strategy, action, entry, sl, tp, price, eval_time, context_hash):
66
+ with open(self.path, \"a\", newline='') as f:
67
+ writer = csv.writer(f)
68
+ writer.writerow([ts, strategy, action, entry, sl, tp, price, eval_time, \"\", \"\", context_hash])
69
+ def update_outcome(self, ts, pnl, reward):
70
+ rows = []
71
+ filled = False
72
+ with open(self.path, \"r\", newline='') as f:
73
+ rows = list(csv.reader(f))
74
+ for i in range(len(rows)-1, 0, -1):
75
+ if rows[i][0] == ts and rows[i][8] == \"\":
76
+ rows[i][8] = f\"{pnl:.6f}\"
77
+ rows[i][9] = f\"{reward:.6f}\"
78
+ filled = True
79
+ break
80
+ if filled:
81
+ with open(self.path, \"w\", newline='') as f:
82
+ writer = csv.writer(f)
83
+ writer.writerows(rows)
84
+
85
+ class PageHinkley:
86
+ \"\"\"Page-Hinkley change detector for streaming losses/returns.\"\"\"
87
+ def __init__(self, delta=0.0001, lambda_=40, alpha=1-1e-3):
88
+ self.mean = 0.0
89
+ self.delta = delta
90
+ self.lambda_ = lambda_
91
+ self.alpha = alpha
92
+ self.cumulative = 0.0
93
+ def update(self, x):
94
+ # x: score (e.g., negative pnl or error)
95
+ self.mean = self.mean * self.alpha + x * (1 - self.alpha)
96
+ self.cumulative = min(self.cumulative + x - self.mean - self.delta, 0)
97
+ if -self.cumulative > self.lambda_:
98
+ self.cumulative = 0
99
+ return True
100
+ return False
101
+
102
+ class StrategyManager:
103
+ \"\"\"Wrap strategies with a uniform callable interface.\"\"\"
104
+ def __init__(self, situation_room, extra_strategies=None):
105
+ self.situation_room = situation_room
106
+ self.extra = extra_strategies or {}
107
+ def list_strategies(self):
108
+ # Provide your canonical rule-based strategy
109
+ def rule_based(seq):
110
+ return self.situation_room.generate_thesis({}, seq)
111
+ all_strat = {\"rule_based\": rule_based}
112
+ all_strat.update(self.extra)
113
+ return all_strat
114
+
115
+ # ------------------ Small helpers ------------------
116
+
117
+ def context_hash_from_df(df):
118
+ r = df.iloc[-1]
119
+ keys = [k for k in [\"close\",\"ATR\",\"EMA_20\",\"RSI\",\"session_london\"] if k in r.index]
120
+ vals = [f\"{r[k]:.6f}\" for k in keys]
121
+ return \"_\".join(vals) if vals else f\"{float(r.get('close', 0.0)):.6f}\"
122
+
123
+ def fetch_current_price_or_last(seq):
124
+ try:
125
+ return float(seq.iloc[-1]['close'])
126
+ except Exception:
127
+ return float(seq['close'].iloc[-1])
128
+
129
+ # ------------------ Context vector builder ------------------
130
+
131
+ def build_context_vector_from_features(df, feature_keys=None, d=16):
132
+ \"\"\"Create a fixed-size numeric context vector from the features DataFrame's last row.
133
+ - If feature_keys provided and exist, we use them.
134
+ - Otherwise create a compact vector using normalized primitives.
135
+ \"\"\"
136
+ last = df.iloc[-1]
137
+ if feature_keys is None:
138
+ feature_keys = [k for k in ['close','ATR','EMA_20','EMA_50','RSI','volume'] if k in last.index]
139
+ vec = []
140
+ for k in feature_keys:
141
+ val = float(last.get(k, 0.0))
142
+ if math.isfinite(val):
143
+ vec.append(val)
144
+ else:
145
+ vec.append(0.0)
146
+ # simple normalization: divide by close to keep scale small
147
+ close = float(last.get('close', 1.0) or 1.0)
148
+ vec = [v/close for v in vec]
149
+ # pad / truncate to length d
150
+ if len(vec) >= d:
151
+ vec = vec[:d]
152
+ else:
153
+ vec = vec + [0.0]*(d - len(vec))
154
+ return np.array(vec, dtype=float)
155
+
156
+ # ------------------ Evaluation pass (uses context) ------------------
157
+
158
+ def evaluate_pending_signals(perf_logger_path, bandit, change_detector, price_fetch_seq, context_builder):
159
+ now = pd.Timestamp.now(tz='UTC')
160
+ rows = []
161
+ updated = False
162
+ try:
163
+ with open(perf_logger_path, \"r\", newline='') as f:
164
+ rows = list(csv.reader(f))
165
+ except FileNotFoundError:
166
+ return
167
+ for i in range(1, len(rows)):
168
+ if rows[i][8] != \"\": # already evaluated
169
+ continue
170
+ eval_time_str = rows[i][7]
171
+ try:
172
+ eval_time = pd.to_datetime(eval_time_str)
173
+ except Exception:
174
+ continue
175
+ if eval_time <= now:
176
+ strategy = rows[i][1]; action = rows[i][2]
177
+ try:
178
+ entry = float(rows[i][3])
179
+ except Exception:
180
+ continue
181
+ price_now = fetch_current_price_or_last(price_fetch_seq())
182
+ pnl = (price_now - entry) if action == \"BUY\" else (entry - price_now)
183
+ reward = 1.0 if pnl > 0 else 0.0
184
+ rows[i][8] = f\"{pnl:.6f}\"
185
+ rows[i][9] = f\"{reward:.6f}\"
186
+ # extract context vector for update
187
+ ctx = context_builder(price_fetch_seq())
188
+ try:
189
+ bandit.update(strategy, ctx, reward)
190
+ except Exception:
191
+ # fallback: if bandit doesn't support context, ignore
192
+ pass
193
+ _ = change_detector.update(-pnl)
194
+ updated = True
195
+ if updated:
196
+ with open(perf_logger_path, \"w\", newline='') as f:
197
+ writer = csv.writer(f)
198
+ writer.writerows(rows)
199
+
200
+ # ------------------ Bootstrap dependencies ------------------
201
+
202
+ def bootstrap_components(symbol):
203
+ \"\"\"Create or load your core app components.
204
+ If your app constructs these elsewhere, replace this with imports/uses of your instances.
205
+ \"\"\"
206
+ # Prediction engine: assumes a class PredictionEngine() exists in your app
207
+ try:
208
+ pred_engine = PredictionEngine(symbol=symbol)
209
+ except Exception:
210
+ pred_engine = None # If you don't have it or construct elsewhere
211
+ # Situation room & regime filter
212
+ try:
213
+ sr = RuleBasedSituationRoom(BEST_PARAMS)
214
+ except Exception:
215
+ sr = RuleBasedSituationRoom({})
216
+ try:
217
+ rf = MarketRegimeFilter()
218
+ except Exception:
219
+ class _DummyRF:
220
+ def should_trade(self, regime, thesis): return True
221
+ rf = _DummyRF()
222
+ return pred_engine, sr, rf
223
+
224
+ # ------------------ NEW main_worker (Contextual LinUCB) ------------------
225
+
226
+ def main_worker(symbol: str, ntfy_topic: str, poll_interval_seconds: int = 60, lookback_minutes: int = 240, eval_horizon_minutes: int = 30, use_contextual: bool = True):
227
+ \"\"\"Adaptive, self-evaluating main loop with contextual bandit option.
228
+ Replaces your existing main_worker. Safe to run in paper mode.
229
+ \"\"\"
230
+ pred_engine, situation_room, regime_filter = bootstrap_components(symbol)
231
+ strategy_manager = StrategyManager(situation_room, extra_strategies={
232
+ # Example alt strategy: a tiny scalp variant built on top of your situation room.
233
+ \"scalp\": lambda seq: situation_room.generate_thesis({}, seq)
234
+ })
235
+ # Build initial context vector size (d)
236
+ d = 16
237
+ bandit = None
238
+ if use_contextual:
239
+ bandit = LinUCBBandit(strategy_manager.list_strategies().keys(), d=d, alpha=1.0, regularization=1.0)
240
+ else:
241
+ # fallback to a simple uniform random selector (if you prefer to keep thompson, add it back)
242
+ class _Rand:
243
+ def __init__(self, keys): self.keys = list(keys)
244
+ def select(self, ctx=None): return random.choice(self.keys)
245
+ def update(self, *a, **k): pass
246
+ bandit = _Rand(strategy_manager.list_strategies().keys())
247
+
248
+ perf_logger = PerformanceLogger()
249
+ change_detector = PageHinkley(delta=0.0001, lambda_=40)
250
+
251
+ def _price_seq_provider():
252
+ # Replace with your data fetcher to get the latest window
253
+ return fetch_latest_sequence(symbol, lookback_minutes)
254
+
255
+ print(\"[Adaptive v2] main_worker started (contextual=%s).\" % str(use_contextual))
256
+ while True:
257
+ try:
258
+ # 1) Fetch latest window + build features
259
+ input_sequence = _price_seq_provider()
260
+ if input_sequence is None or len(input_sequence) < 10:
261
+ time.sleep(poll_interval_seconds); continue
262
+
263
+ features = create_feature_set_for_inference(input_sequence)
264
+
265
+ # 2) Predict (optional): if you have a prediction_engine, use it to enrich features
266
+ if pred_engine is not None and hasattr(pred_engine, \"predict\"):
267
+ try:
268
+ _ = pred_engine.predict(features)
269
+ except Exception as _e:
270
+ pass
271
+
272
+ # 3) Build context vector
273
+ ctx_vec = build_context_vector_from_features(features, d=d)
274
+
275
+ # 4) Strategy selection and signal (context-aware if enabled)
276
+ available = strategy_manager.list_strategies()
277
+ chosen_name = bandit.select(ctx_vec)
278
+ trade_thesis = available[chosen_name](features)
279
+
280
+ is_tradeable = True
281
+ try:
282
+ is_tradeable = regime_filter.should_trade(\"normal\", trade_thesis)
283
+ except Exception:
284
+ pass
285
+
286
+ final_action = trade_thesis.get('action', 'NO ACTION')
287
+ if not is_tradeable:
288
+ final_action = \"NO TRADE (FILTERED)\"
289
+
290
+ # 5) Log signal for later evaluation
291
+ ts = str(pd.Timestamp.now(tz='UTC'))
292
+ context_hash = context_hash_from_df(features)
293
+ if final_action in [\"BUY\", \"SELL\"]:
294
+ perf_logger.log_signal(
295
+ ts, chosen_name, final_action,
296
+ trade_thesis.get('entry', features.iloc[-1]['close']),
297
+ trade_thesis.get('stop_loss', None),
298
+ trade_thesis.get('take_profit', None),
299
+ float(features.iloc[-1]['close']),
300
+ (pd.Timestamp.now(tz='UTC') + pd.Timedelta(minutes=eval_horizon_minutes)).isoformat(),
301
+ context_hash
302
+ )
303
+ # Notify
304
+ try:
305
+ send_ntfy_notification(ntfy_topic, trade_thesis | {\"strategy\": chosen_name})
306
+ except Exception:
307
+ pass
308
+
309
+ # 6) Evaluate pending signals (shadow P&L)
310
+ evaluate_pending_signals(perf_logger.path, bandit, change_detector, _price_seq_provider, lambda seq: build_context_vector_from_features(seq, d=d))
311
+
312
+ # 7) Optional: trigger fine-tune on drift
313
+
314
+ time.sleep(poll_interval_seconds)
315
+
316
+ except KeyboardInterrupt:
317
+ print(\"[Adaptive v2] Stopping main_worker.\")
318
+ break
319
+ except Exception as e:
320
+ # Keep the loop resilient
321
+ print(f\"[Adaptive v2] Loop error: {e}\")
322
+ time.sleep(poll_interval_seconds)