Spaces:

P2SAMAPA
/

P2-ETF-DQN-ENGINE

Running

App Files Files Community

P2-ETF-DQN-ENGINE / env.py

P2SAMAPA

[auto] Deploy to HF Space from GitHub

a47c2f1 verified 1 day ago

raw

history blame contribute delete

8.95 kB

	# env.py
	# Custom Gym-style trading environment for the Dueling DQN agent.
	# State : flattened lookback window of technical indicators + macro
	# Actions: 0=CASH, 1..7 = ETF indices (maps to config.ACTIONS)
	# Reward : excess return over T-bill - transaction cost on switches,
	# scaled by inverse realised volatility to penalise drawdowns

	import numpy as np
	import pandas as pd

	import config


	class ETFTradingEnv:
	"""
	Single-asset-at-a-time ETF selection environment.

	Parameters
	----------
	feat_df : pd.DataFrame — full feature matrix (one row per trading day)
	price_df : pd.DataFrame — ETF close prices aligned to feat_df index
	macro_df : pd.DataFrame — macro data (for TBILL_3M fallback)
	start_idx : int — first index to start an episode from
	end_idx : int — last index (exclusive)
	fee_pct : float — one-way transaction cost as fraction (e.g. 0.001 = 10bps)
	lookback : int — window size fed as state
	tsl_pct : float — trailing stop loss % (applied post-signal in backtest)
	"""

	def __init__(self,
	feat_df: pd.DataFrame,
	price_df: pd.DataFrame,
	macro_df: pd.DataFrame,
	start_idx: int = 0,
	end_idx: int = None,
	fee_pct: float = config.DEFAULT_FEE_BPS / 10_000,
	lookback: int = config.LOOKBACK_WINDOW,
	tsl_pct: float = config.DEFAULT_TSL_PCT / 100):

	self.feat_df = feat_df.reset_index(drop=True)
	self.price_df = price_df.reindex(feat_df.index).reset_index(drop=True)
	self.macro_df = macro_df.reindex(feat_df.index).ffill().reset_index(drop=True)
	self.fee_pct = fee_pct
	self.lookback = lookback
	self.tsl_pct = tsl_pct
	self.n_actions = config.N_ACTIONS
	self.actions = config.ACTIONS

	self.start_idx = max(start_idx, lookback - 1)
	self.end_idx = end_idx if end_idx is not None else len(self.feat_df) - 1

	self.n_features = feat_df.shape[1] * lookback # flattened state size

	self.reset()

	# ── Gym-style interface ───────────────────────────────────────────────────

	def reset(self) -> np.ndarray:
	# FIX: randomise start within first 50% of window so agent sees diverse sequences
	max_rand = self.start_idx + (self.end_idx - self.start_idx) // 2
	self.current_idx = int(np.random.randint(self.start_idx, max(self.start_idx + 1, max_rand)))
	self.held_action = 0 # start in CASH
	self.peak_equity = 1.0
	self.equity = 1.0
	self.is_stopped_out = False
	return self._get_state()

	def step(self, action: int):
	"""
	Execute one step.
	Returns (next_state, reward, done, info)
	"""
	assert 0 <= action < self.n_actions

	prev_idx = self.current_idx
	self.current_idx += 1
	done = (self.current_idx >= self.end_idx)

	# ── Transaction cost on switch ────────────────────────────────────────
	switched = (action != self.held_action)
	t_cost = self.fee_pct if switched else 0.0
	if switched:
	self.held_action = action
	self.peak_equity = self.equity # reset peak on new position

	# ── Daily return of chosen action ─────────────────────────────────────
	if action == 0: # CASH — earn T-bill
	tbill_rate = self._get_tbill(prev_idx)
	day_ret = tbill_rate / 252
	else:
	etf = self.actions[action]
	if etf in self.price_df.columns:
	p0 = self.price_df[etf].iloc[prev_idx]
	p1 = self.price_df[etf].iloc[self.current_idx]
	day_ret = (p1 / (p0 + 1e-9)) - 1.0
	else:
	day_ret = 0.0

	day_ret -= t_cost
	self.equity *= (1.0 + day_ret)

	# ── Trailing stop-loss check ──────────────────────────────────────────
	if action != 0:
	if self.equity > self.peak_equity:
	self.peak_equity = self.equity
	if self.equity < self.peak_equity * (1 - self.tsl_pct):
	self.is_stopped_out = True

	# ── Risk-adjusted reward ──────────────────────────────────────────────
	# FIX: clamp vol so CASH (vol~0.005) doesn't get 30x reward amplification
	tbill_daily = self._get_tbill(prev_idx) / 252
	excess_ret = day_ret - tbill_daily
	vol_21d = self._get_vol(action, prev_idx)
	vol_scale = float(np.clip(vol_21d, config.REWARD_VOL_MIN, config.REWARD_VOL_MAX))
	reward = excess_ret / vol_scale
	# FIX: small bonus when ETF (not CASH) beats T-bill — discourage CASH collapse
	if action != 0 and excess_ret > 0:
	reward *= config.REWARD_ETF_BONUS

	next_state = self._get_state()
	info = dict(day_ret=day_ret, equity=self.equity,
	action_name=self.actions[action],
	tsl_triggered=self.is_stopped_out)

	return next_state, reward, done, info

	@property
	def observation_size(self) -> int:
	# FIX: +n_actions for one-hot position encoding appended to state
	return self.n_features + self.n_actions

	# ── Internal helpers ──────────────────────────────────────────────────────

	def _get_state(self) -> np.ndarray:
	"""Return flattened lookback window + one-hot current position."""
	start = self.current_idx - self.lookback + 1
	end = self.current_idx + 1
	window = self.feat_df.iloc[start:end].values.astype(np.float32)
	if len(window) < self.lookback:
	pad = np.zeros((self.lookback - len(window), self.feat_df.shape[1]),
	dtype=np.float32)
	window = np.vstack([pad, window])
	# FIX: append one-hot current position so agent knows what it is holding
	position = np.zeros(self.n_actions, dtype=np.float32)
	position[self.held_action] = 1.0
	return np.concatenate([window.flatten(), position])

	def _get_tbill(self, idx: int) -> float:
	"""Annual T-bill rate at given index (fraction)."""
	if "macro_TBILL_3M" in self.macro_df.columns:
	val = self.macro_df["macro_TBILL_3M"].iloc[idx]
	if not np.isnan(val):
	return float(val) / 100.0
	return 0.036 # fallback 3.6%

	def _get_vol(self, action: int, idx: int) -> float:
	"""21d annualised vol for scaling reward."""
	if action == 0:
	return 0.005 # CASH ~ zero vol
	etf = self.actions[action]
	vol_col = f"{etf}_Vol21d"
	if vol_col in self.feat_df.columns:
	val = self.feat_df[vol_col].iloc[idx]
	if not np.isnan(val) and val > 0:
	return float(val)
	return 0.15 # fallback 15% vol


	# ── Train / Val / Test splitter ───────────────────────────────────────────────

	def make_splits(feat_df: pd.DataFrame,
	price_df: pd.DataFrame,
	macro_df: pd.DataFrame,
	start_year: int,
	fee_pct: float = config.DEFAULT_FEE_BPS / 10_000,
	lookback: int = config.LOOKBACK_WINDOW):
	"""
	Returns three ETFTradingEnv instances: train, val, test.
	Split is 80/10/10 of the date range from start_year onwards.
	"""
	# Filter by start year
	mask = feat_df.index.year >= start_year
	feat_sub = feat_df[mask].copy()
	n = len(feat_sub)

	n_train = int(n * config.TRAIN_SPLIT)
	n_val = int(n * config.VAL_SPLIT)

	train_env = ETFTradingEnv(feat_sub.iloc[:n_train],
	price_df, macro_df,
	fee_pct=fee_pct, lookback=lookback)

	val_env = ETFTradingEnv(feat_sub.iloc[n_train : n_train + n_val],
	price_df, macro_df,
	fee_pct=fee_pct, lookback=lookback)

	test_env = ETFTradingEnv(feat_sub.iloc[n_train + n_val:],
	price_df, macro_df,
	fee_pct=fee_pct, lookback=lookback)

	return train_env, val_env, test_env