Spaces:

Badumetsibb
/

Aurora

Sleeping

App Files Files Community

Badumetsibb commited on Sep 2, 2025

Commit

b12bad2

verified ·

1 Parent(s): 3a0e72b

Upload adaptive_meta_patch_v2.py

Browse files

Files changed (1) hide show

adaptive_meta_patch_v2.py +322 -0

adaptive_meta_patch_v2.py ADDED Viewed

	@@ -0,0 +1,322 @@

+# ===============================================
+# ADAPTIVE META-CONTROLLER MAIN LOOP (V2 — Contextual LinUCB)
+# Drop-in for app.py — replaces your main_worker.
+# Upgrades Thompson sampling to a contextual LinUCB bandit using live features.
+# ===============================================
+import os
+import csv
+import time
+import math
+import random
+from collections import deque, defaultdict
+import numpy as np
+import pandas as pd
+# ------------------ Contextual Bandit (LinUCB) ------------------
+class LinUCBBandit:
+    \"\"\"A simple LinUCB contextual bandit implementation.
+    Each arm maintains A (dxd) and b (d) for ridge regression.
+    p_a = theta_a^T x + alpha * sqrt(x^T A^{-1} x)
+    \"\"\"
+    def __init__(self, strategies, d, alpha=1.0, regularization=1.0):
+        self.strategies = list(strategies)
+        self.d = d
+        self.alpha = alpha
+        self.reg = regularization
+        # initialize A as reg*I and b as zeros for each arm
+        self.A = {s: (self.reg * np.eye(self.d)) for s in self.strategies}
+        self.b = {s: np.zeros(self.d) for s in self.strategies}
+    def _get_ucb(self, s, x):
+        A_inv = np.linalg.inv(self.A[s])
+        theta = A_inv.dot(self.b[s])
+        mean = theta.dot(x)
+        var = x.dot(A_inv).dot(x)
+        bonus = self.alpha * math.sqrt(max(var, 0.0))
+        return mean + bonus, mean
+    def select(self, context_vector):
+        # context_vector: 1D numpy array shape (d,)
+        scores = {}
+        for s in self.strategies:
+            ucb, mean = self._get_ucb(s, context_vector)
+            scores[s] = ucb
+        chosen = max(scores, key=scores.get)
+        return chosen
+    def update(self, strategy, context_vector, reward):
+        # reward: float (can be pnl or binary 0/1). Using reward as-is.
+        x = context_vector.reshape(-1)
+        self.A[strategy] += np.outer(x, x)
+        self.b[strategy] += reward * x
+# ------------------ Other meta components (from v1) ------------------
+class PerformanceLogger:
+    \"\"\"Append signals and outcomes to a CSV for meta-learning and replay.\"\"\"
+    def __init__(self, path=\"/mnt/data/agent_signals_log.csv\"):
+        self.path = path
+        header = [\"timestamp\",\"strategy\",\"action\",\"entry\",\"stop_loss\",\"take_profit\",\"price_at_signal\",\"eval_time\",\"pnl\",\"reward\",\"context_hash\"]
+        if not os.path.exists(self.path):
+            with open(self.path, \"w\", newline='') as f:
+                writer = csv.writer(f)
+                writer.writerow(header)
+    def log_signal(self, ts, strategy, action, entry, sl, tp, price, eval_time, context_hash):
+        with open(self.path, \"a\", newline='') as f:
+            writer = csv.writer(f)
+            writer.writerow([ts, strategy, action, entry, sl, tp, price, eval_time, \"\", \"\", context_hash])
+    def update_outcome(self, ts, pnl, reward):
+        rows = []
+        filled = False
+        with open(self.path, \"r\", newline='') as f:
+            rows = list(csv.reader(f))
+        for i in range(len(rows)-1, 0, -1):
+            if rows[i][0] == ts and rows[i][8] == \"\":
+                rows[i][8] = f\"{pnl:.6f}\"
+                rows[i][9] = f\"{reward:.6f}\"
+                filled = True
+                break
+        if filled:
+            with open(self.path, \"w\", newline='') as f:
+                writer = csv.writer(f)
+                writer.writerows(rows)
+class PageHinkley:
+    \"\"\"Page-Hinkley change detector for streaming losses/returns.\"\"\"
+    def __init__(self, delta=0.0001, lambda_=40, alpha=1-1e-3):
+        self.mean = 0.0
+        self.delta = delta
+        self.lambda_ = lambda_
+        self.alpha = alpha
+        self.cumulative = 0.0
+    def update(self, x):
+        # x: score (e.g., negative pnl or error)
+        self.mean = self.mean * self.alpha + x * (1 - self.alpha)
+        self.cumulative = min(self.cumulative + x - self.mean - self.delta, 0)
+        if -self.cumulative > self.lambda_:
+            self.cumulative = 0
+            return True
+        return False
+class StrategyManager:
+    \"\"\"Wrap strategies with a uniform callable interface.\"\"\"
+    def __init__(self, situation_room, extra_strategies=None):
+        self.situation_room = situation_room
+        self.extra = extra_strategies or {}
+    def list_strategies(self):
+        # Provide your canonical rule-based strategy
+        def rule_based(seq):
+            return self.situation_room.generate_thesis({}, seq)
+        all_strat = {\"rule_based\": rule_based}
+        all_strat.update(self.extra)
+        return all_strat
+# ------------------ Small helpers ------------------
+def context_hash_from_df(df):
+    r = df.iloc[-1]
+    keys = [k for k in [\"close\",\"ATR\",\"EMA_20\",\"RSI\",\"session_london\"] if k in r.index]
+    vals = [f\"{r[k]:.6f}\" for k in keys]
+    return \"_\".join(vals) if vals else f\"{float(r.get('close', 0.0)):.6f}\"
+def fetch_current_price_or_last(seq):
+    try:
+        return float(seq.iloc[-1]['close'])
+    except Exception:
+        return float(seq['close'].iloc[-1])
+# ------------------ Context vector builder ------------------
+def build_context_vector_from_features(df, feature_keys=None, d=16):
+    \"\"\"Create a fixed-size numeric context vector from the features DataFrame's last row.
+    - If feature_keys provided and exist, we use them.
+    - Otherwise create a compact vector using normalized primitives.
+    \"\"\"
+    last = df.iloc[-1]
+    if feature_keys is None:
+        feature_keys = [k for k in ['close','ATR','EMA_20','EMA_50','RSI','volume'] if k in last.index]
+    vec = []
+    for k in feature_keys:
+        val = float(last.get(k, 0.0))
+        if math.isfinite(val):
+            vec.append(val)
+        else:
+            vec.append(0.0)
+    # simple normalization: divide by close to keep scale small
+    close = float(last.get('close', 1.0) or 1.0)
+    vec = [v/close for v in vec]
+    # pad / truncate to length d
+    if len(vec) >= d:
+        vec = vec[:d]
+    else:
+        vec = vec + [0.0]*(d - len(vec))
+    return np.array(vec, dtype=float)
+# ------------------ Evaluation pass (uses context) ------------------
+def evaluate_pending_signals(perf_logger_path, bandit, change_detector, price_fetch_seq, context_builder):
+    now = pd.Timestamp.now(tz='UTC')
+    rows = []
+    updated = False
+    try:
+        with open(perf_logger_path, \"r\", newline='') as f:
+            rows = list(csv.reader(f))
+    except FileNotFoundError:
+        return
+    for i in range(1, len(rows)):
+        if rows[i][8] != \"\":  # already evaluated
+            continue
+        eval_time_str = rows[i][7]
+        try:
+            eval_time = pd.to_datetime(eval_time_str)
+        except Exception:
+            continue
+        if eval_time <= now:
+            strategy = rows[i][1]; action = rows[i][2]
+            try:
+                entry = float(rows[i][3])
+            except Exception:
+                continue
+            price_now = fetch_current_price_or_last(price_fetch_seq())
+            pnl = (price_now - entry) if action == \"BUY\" else (entry - price_now)
+            reward = 1.0 if pnl > 0 else 0.0
+            rows[i][8] = f\"{pnl:.6f}\"
+            rows[i][9] = f\"{reward:.6f}\"
+            # extract context vector for update
+            ctx = context_builder(price_fetch_seq())
+            try:
+                bandit.update(strategy, ctx, reward)
+            except Exception:
+                # fallback: if bandit doesn't support context, ignore
+                pass
+            _ = change_detector.update(-pnl)
+            updated = True
+    if updated:
+        with open(perf_logger_path, \"w\", newline='') as f:
+            writer = csv.writer(f)
+            writer.writerows(rows)
+# ------------------ Bootstrap dependencies ------------------
+def bootstrap_components(symbol):
+    \"\"\"Create or load your core app components.
+    If your app constructs these elsewhere, replace this with imports/uses of your instances.
+    \"\"\"
+    # Prediction engine: assumes a class PredictionEngine() exists in your app
+    try:
+        pred_engine = PredictionEngine(symbol=symbol)
+    except Exception:
+        pred_engine = None  # If you don't have it or construct elsewhere
+    # Situation room & regime filter
+    try:
+        sr = RuleBasedSituationRoom(BEST_PARAMS)
+    except Exception:
+        sr = RuleBasedSituationRoom({})
+    try:
+        rf = MarketRegimeFilter()
+    except Exception:
+        class _DummyRF:
+            def should_trade(self, regime, thesis): return True
+        rf = _DummyRF()
+    return pred_engine, sr, rf
+# ------------------ NEW main_worker (Contextual LinUCB) ------------------
+def main_worker(symbol: str, ntfy_topic: str, poll_interval_seconds: int = 60, lookback_minutes: int = 240, eval_horizon_minutes: int = 30, use_contextual: bool = True):
+    \"\"\"Adaptive, self-evaluating main loop with contextual bandit option.
+    Replaces your existing main_worker. Safe to run in paper mode.
+    \"\"\"
+    pred_engine, situation_room, regime_filter = bootstrap_components(symbol)
+    strategy_manager = StrategyManager(situation_room, extra_strategies={
+        # Example alt strategy: a tiny scalp variant built on top of your situation room.
+        \"scalp\": lambda seq: situation_room.generate_thesis({}, seq)
+    })
+    # Build initial context vector size (d)
+    d = 16
+    bandit = None
+    if use_contextual:
+        bandit = LinUCBBandit(strategy_manager.list_strategies().keys(), d=d, alpha=1.0, regularization=1.0)
+    else:
+        # fallback to a simple uniform random selector (if you prefer to keep thompson, add it back)
+        class _Rand:
+            def __init__(self, keys): self.keys = list(keys)
+            def select(self, ctx=None): return random.choice(self.keys)
+            def update(self, *a, **k): pass
+        bandit = _Rand(strategy_manager.list_strategies().keys())
+    perf_logger = PerformanceLogger()
+    change_detector = PageHinkley(delta=0.0001, lambda_=40)
+    def _price_seq_provider():
+        # Replace with your data fetcher to get the latest window
+        return fetch_latest_sequence(symbol, lookback_minutes)
+    print(\"[Adaptive v2] main_worker started (contextual=%s).\" % str(use_contextual))
+    while True:
+        try:
+            # 1) Fetch latest window + build features
+            input_sequence = _price_seq_provider()
+            if input_sequence is None or len(input_sequence) < 10:
+                time.sleep(poll_interval_seconds); continue
+            features = create_feature_set_for_inference(input_sequence)
+            # 2) Predict (optional): if you have a prediction_engine, use it to enrich features
+            if pred_engine is not None and hasattr(pred_engine, \"predict\"):
+                try:
+                    _ = pred_engine.predict(features)
+                except Exception as _e:
+                    pass
+            # 3) Build context vector
+            ctx_vec = build_context_vector_from_features(features, d=d)
+            # 4) Strategy selection and signal (context-aware if enabled)
+            available = strategy_manager.list_strategies()
+            chosen_name = bandit.select(ctx_vec)
+            trade_thesis = available[chosen_name](features)
+            is_tradeable = True
+            try:
+                is_tradeable = regime_filter.should_trade(\"normal\", trade_thesis)
+            except Exception:
+                pass
+            final_action = trade_thesis.get('action', 'NO ACTION')
+            if not is_tradeable:
+                final_action = \"NO TRADE (FILTERED)\"
+            # 5) Log signal for later evaluation
+            ts = str(pd.Timestamp.now(tz='UTC'))
+            context_hash = context_hash_from_df(features)
+            if final_action in [\"BUY\", \"SELL\"]:
+                perf_logger.log_signal(
+                    ts, chosen_name, final_action,
+                    trade_thesis.get('entry', features.iloc[-1]['close']),
+                    trade_thesis.get('stop_loss', None),
+                    trade_thesis.get('take_profit', None),
+                    float(features.iloc[-1]['close']),
+                    (pd.Timestamp.now(tz='UTC') + pd.Timedelta(minutes=eval_horizon_minutes)).isoformat(),
+                    context_hash
+                )
+                # Notify
+                try:
+                    send_ntfy_notification(ntfy_topic, trade_thesis | {\"strategy\": chosen_name})
+                except Exception:
+                    pass
+            # 6) Evaluate pending signals (shadow P&L)
+            evaluate_pending_signals(perf_logger.path, bandit, change_detector, _price_seq_provider, lambda seq: build_context_vector_from_features(seq, d=d))
+            # 7) Optional: trigger fine-tune on drift
+            time.sleep(poll_interval_seconds)
+        except KeyboardInterrupt:
+            print(\"[Adaptive v2] Stopping main_worker.\")
+            break
+        except Exception as e:
+            # Keep the loop resilient
+            print(f\"[Adaptive v2] Loop error: {e}\")
+            time.sleep(poll_interval_seconds)