Upload config.py with huggingface_hub

f6105df verified 21 days ago

11.6 kB

	import os
	#%%writefile config.py
	# config.py

	# ─────────────────────────────────────────────────────────────────────────────
	# Data
	# ─────────────────────────────────────────────────────────────────────────────
	import glob
	DATA_DIR = "Data"
	DATA_FILE = sorted(glob.glob(os.path.join(DATA_DIR, "*.csv")))
	# If DATA_FILE is empty, fallback to a single file to avoid crashes
	if not DATA_FILE:
	DATA_FILE = ["Data/NIFTY 50_30minute.csv"]

	LOOKBACK_WINDOW = 512 # paper's optimal for LPatchTST (was 400)
	ORACLE_MAX_HOLD = 96
	FORECAST_HORIZON = 96
	ATR_PERIOD = 14 # rolling window for ATR (Oracle + backtest)

	# ─────────────────────────────────────────────────────────────────────────────
	# Model Architecture
	# ─────────────────────────────────────────────────────────────────────────────
	D_MODEL = 96
	N_HEADS = 4
	N_LAYERS = 5
	PATCH_LEN = 16
	STRIDE = 12
	AGGREGATION_MODE = "mixing" # "mixing" \| "cls" \| "mean"
	INFERENCE_SMOOTHING = 3 # rolling window applied to raw predictions

	# num_patches = (seq_len - patch_len) // stride + 1

	# ── Input Mode ───────────────────────────────────────────────────────────────
	# MASTER SWITCH: "tokens_only" \| "features_only" \| "combined"
	# The entire pipeline (data_loader, model, train) responds to this flag.
	INPUT_MODE = "tokens_only"
	USE_TALIB = False # If True, adds ~150 TA-Lib features when in features/combined mode

	# ── LPatchTST Architecture ───────────────────────────────────────────────────
	USE_LPATCHTST = True # False = use vanilla PatchTST, True = LPatchTST
	LSTM_LAYERS = 1 # 1 is sufficient; set 2 for deeper denoising

	# ─────────────────────────────────────────────────────────────────────────────
	# Oracle
	# ─────────────────────────────────────────────────────────────────────────────
	FEE_PER_SIDE = 0.001
	SLIPPAGE = 0.0005
	ATR_MULT = 3.8
	SATURATION_FACTOR = 2.5
	MAE_PENALTY = 0.20
	MIN_TRADES_TUNE = 30

	# ─────────────────────────────────────────────────────────────────────────────
	# Training
	# ─────────────────────────────────────────────────────────────────────────────
	BATCH_SIZE = 32
	LEARNING_RATE = 1e-5
	EPOCHS = 100
	WEIGHT_DECAY = 0.1
	DROPOUT = 0.3
	GRAD_CLIP = 2.0
	NUM_WORKERS = 4 # parallel data prefetch workers
	PREFETCH_FACTOR = 2 # batches prefetched per worker
	USE_AMP = True

	# ─────────────────────────────────────────────────────────────────────────────
	# Split Ratios
	# ─────────────────────────────────────────────────────────────────────────────
	TRAIN_RATIO = 0.70
	VAL_RATIO = 0.15
	TEST_RATIO = 0.15

	# ── Robust Clipping ──────────────────────────────────────────────────────────
	# Per-column clip bounds in IQR units (NOT std devs).
	# Calibrated via clip_audit.py on training data.
	#
	# vs_factor_span260 : p99.5 = 1.493 IQR-units → bound 2.0 clips nothing.
	# feat_vol_squeeze : p99.5 = 2.821 IQR-units → bound 2.5 clips ~0.9%.
	#
	ROBUST_CLIP_BOUNDS: dict[str, float] = {
	"vs_factor_span": 2.0, # prefix match
	"feat_vol_squeeze": 2.5,
	}
	ROBUST_CLIP_BOUND_DEFAULT: float = 3.0 # fallback for unknown robust columns

	# ─────────────────────────────────────────────────────────────────────────────
	# Feature Engineering ←→ features.py / FeatureConfig
	#
	# These are the ONLY config keys that feed into FeatureEngineer.
	# train.py._make_feature_config() maps every key here to a FeatureConfig field.
	# Changing any value here automatically changes what columns are produced,
	# what columns data_loader.py routes to each scaler bucket, and what
	# input_dim is passed to the model — no code edits required anywhere.
	# ─────────────────────────────────────────────────────────────────────────────

	# EWMA volatility span (bars). Controls σ_t used by ret_norm_* and vs_factor.
	# Larger span = slower regime adaptation. Maps to FeatureConfig.ewma_span.
	FE_VOL_LONG_PERIOD = 260

	# Multi-horizon normalised return lookback windows (trading bars).
	# Produces columns: ret_norm_1d, ret_norm_5d, ret_norm_21d, …
	# All are vol-scaled (≈ z-score) → NO_SCALE bucket in data_loader.py.
	FE_RETURN_HORIZONS = [1, 3, 6, 13, 26, 65, 130, 260]

	# Multi-scale MACD (short_span, long_span) pairs.
	# Produces columns: macd_8_24, macd_16_48, macd_32_96.
	# All 3-step normalised (std ≈ 1.05) → NO_SCALE bucket in data_loader.py.
	FE_MACD_PAIRS = [(8, 24), (26, 78), (52, 156)]

	# MACD Step-2: rolling price std window for per-instrument normalisation.
	# Paper default: 63 bars. Maps to FeatureConfig.macd_price_std_window.
	FE_MACD_PRICE_STD_WIN = 260

	# MACD Step-3: rolling regime std window for cross-sectional normalisation.
	# Paper default: 252 bars. Maps to FeatureConfig.macd_signal_std_window.
	FE_MACD_SIGNAL_STD_WIN = 3276

	# Oracle target clip bound. Normalised return targets clipped to ±FE_TARGET_CLIP
	# before being used as training labels. Paper default: 20.0.
	FE_TARGET_CLIP = 20.0

	# ─────────────────────────────────────────────────────────────────────────────
	# OHLC Feature Engineering (Features 6–13)
	# Maps to new FeatureConfig fields in features.py
	# ─────────────────────────────────────────────────────────────────────────────

	# Kaufman Efficiency Ratio + RSI lookback (bars). Maps to momentum_period.
	FE_MOMENTUM_PERIOD = 26

	# Separate RSI period. Set to None to share FE_MOMENTUM_PERIOD.
	FE_RSI_PERIOD = 14

	# Rolling window for directional vol asymmetry. Maps to vol_asym_window.
	FE_VOL_ASYM_WINDOW = 65

	# Smoothing window for Internal Close Position. Maps to icp_period.
	FE_ICP_PERIOD = 13

	# Donchian channel lookback for local structure. ~5 days on 30-min NIFTY.
	FE_LOCAL_STRUCTURE_BARS = 65

	# Fast/slow ATR windows for vol squeeze ratio. Maps to vol_squeeze_fast/slow.
	FE_VOL_SQUEEZE_FAST = 5
	FE_VOL_SQUEEZE_SLOW = 26

	# Session time-of-day encoding (NIFTY 30-min).
	FE_SESSION_OPEN = "09:15"
	FE_SESSION_CLOSE = "15:30"
	FE_SESSION_TZ = "Asia/Kolkata"
	FE_ADD_SESSION = True

	# ─────────────────────────────────────────────────────────────────────────────
	# Sampler
	# ─────────────────────────────────────────────────────────────────────────────
	# \|score\| below this threshold → Flat class in WeightedRandomSampler.
	SAMPLER_THRESHOLD = 0.10

	# ─────────────────────────────────────────────────────────────────────────────
	# Tokenizer (Kronos Hierarchical — Pre-trained Specs)
	# ─────────────────────────────────────────────────────────────────────────────
	TOKENIZER_D_IN = 6
	TOKENIZER_D_MODEL = 256
	TOKENIZER_N_HEADS = 4
	TOKENIZER_FF_DIM = 512
	TOKENIZER_N_ENC = 4
	TOKENIZER_N_DEC = 4
	TOKENIZER_S1_BITS = 10
	TOKENIZER_S2_BITS = 10
	TOKENIZER_GROUP_SIZE = 4
	VOCAB_SIZE = 2 ** (TOKENIZER_S1_BITS + TOKENIZER_S2_BITS)

	# Tokenizer Hyperparameters (for training/loss consistency)
	TOKENIZER_BETA = 0.05
	TOKENIZER_GAMMA0 = 1.0
	TOKENIZER_GAMMA = 1.1
	TOKENIZER_ZETA = 0.05
	TOKENIZER_ATTN_DROPOUT = 0.0
	TOKENIZER_FFN_DROPOUT = 0.0
	TOKENIZER_RESID_DROPOUT = 0.0

	TOKENIZER_CHUNK_SIZE = 2048 # Reduced for larger d_model
	TOKENIZER_PATH = "model.safetensors"

	# ─────────────────────────────────────────────────────────────────────────────
	# Walk-Forward Validation
	# ─────────────────────────────────────────────────────────────────────────────
	WFV_ENABLED = True
	WFV_TRAIN_BARS = 15000
	WFV_VAL_BARS = 3000
	WFV_STEP_BARS = 3000
	WFV_MIN_FOLDS = 3
	WFV_PATIENCE = 15

	# ─────────────────────────────────────────────────────────────────────────────
	# Runtime — set dynamically, do not edit
	# ─────────────────────────────────────────────────────────────────────────────
	# Populated by train.py / evaluate.py after feature columns are resolved.
	# Value = len(feature_cols) when USE_TOKENIZER=False, else 1.
	NUM_FEATURES = None