import os
#%%writefile config.py
# config.py

# ─────────────────────────────────────────────────────────────────────────────
# Data
# ─────────────────────────────────────────────────────────────────────────────
import glob
DATA_DIR = "Data"
DATA_FILE = sorted(glob.glob(os.path.join(DATA_DIR, "*.csv")))
# If DATA_FILE is empty, fallback to a single file to avoid crashes
if not DATA_FILE:
    DATA_FILE = ["Data/NIFTY 50_30minute.csv"]

LOOKBACK_WINDOW  = 512     # paper's optimal for LPatchTST (was 400)
ORACLE_MAX_HOLD  = 96
FORECAST_HORIZON = 96
ATR_PERIOD       = 14      # rolling window for ATR (Oracle + backtest)

# ─────────────────────────────────────────────────────────────────────────────
# Model Architecture
# ─────────────────────────────────────────────────────────────────────────────
D_MODEL            = 96
N_HEADS            = 4
N_LAYERS           = 5
PATCH_LEN          = 16
STRIDE             = 12
AGGREGATION_MODE   = "mixing"   # "mixing" | "cls" | "mean"
INFERENCE_SMOOTHING = 3         # rolling window applied to raw predictions

#   num_patches = (seq_len - patch_len) // stride + 1

# ── Input Mode ───────────────────────────────────────────────────────────────
# MASTER SWITCH: "tokens_only" | "features_only" | "combined"
# The entire pipeline (data_loader, model, train) responds to this flag.
INPUT_MODE      = "tokens_only"
USE_TALIB       = False    # If True, adds ~150 TA-Lib features when in features/combined mode

# ── LPatchTST Architecture ───────────────────────────────────────────────────
USE_LPATCHTST   = True    # False = use vanilla PatchTST, True = LPatchTST
LSTM_LAYERS     = 1      # 1 is sufficient; set 2 for deeper denoising

# ─────────────────────────────────────────────────────────────────────────────
# Oracle
# ─────────────────────────────────────────────────────────────────────────────
FEE_PER_SIDE      = 0.001
SLIPPAGE          = 0.0005
ATR_MULT          = 3.8
SATURATION_FACTOR = 2.5
MAE_PENALTY       = 0.20
MIN_TRADES_TUNE   = 30

# ─────────────────────────────────────────────────────────────────────────────
# Training
# ─────────────────────────────────────────────────────────────────────────────
BATCH_SIZE      = 32
LEARNING_RATE   = 1e-5
EPOCHS          = 100
WEIGHT_DECAY    = 0.1
DROPOUT         = 0.3
GRAD_CLIP       = 2.0
NUM_WORKERS     = 4     # parallel data prefetch workers
PREFETCH_FACTOR = 2     # batches prefetched per worker
USE_AMP         = True

# ─────────────────────────────────────────────────────────────────────────────
# Split Ratios
# ─────────────────────────────────────────────────────────────────────────────
TRAIN_RATIO = 0.70
VAL_RATIO   = 0.15
TEST_RATIO  = 0.15

# ── Robust Clipping ──────────────────────────────────────────────────────────
# Per-column clip bounds in IQR units (NOT std devs).
# Calibrated via clip_audit.py on training data.
#
#   vs_factor_span260 : p99.5 = 1.493 IQR-units → bound 2.0 clips nothing.
#   feat_vol_squeeze  : p99.5 = 2.821 IQR-units → bound 2.5 clips ~0.9%.
#
ROBUST_CLIP_BOUNDS: dict[str, float] = {
    "vs_factor_span":  2.0,   # prefix match
    "feat_vol_squeeze": 2.5,
}
ROBUST_CLIP_BOUND_DEFAULT: float = 3.0   # fallback for unknown robust columns

# ─────────────────────────────────────────────────────────────────────────────
# Feature Engineering  ←→  features.py / FeatureConfig
#
# These are the ONLY config keys that feed into FeatureEngineer.
# train.py._make_feature_config() maps every key here to a FeatureConfig field.
# Changing any value here automatically changes what columns are produced,
# what columns data_loader.py routes to each scaler bucket, and what
# input_dim is passed to the model — no code edits required anywhere.
# ─────────────────────────────────────────────────────────────────────────────

# EWMA volatility span (bars). Controls σ_t used by ret_norm_* and vs_factor.
# Larger span = slower regime adaptation. Maps to FeatureConfig.ewma_span.
FE_VOL_LONG_PERIOD = 260

# Multi-horizon normalised return lookback windows (trading bars).
# Produces columns: ret_norm_1d, ret_norm_5d, ret_norm_21d, …
# All are vol-scaled (≈ z-score) → NO_SCALE bucket in data_loader.py.
FE_RETURN_HORIZONS = [1, 3, 6, 13, 26, 65, 130, 260]

# Multi-scale MACD (short_span, long_span) pairs.
# Produces columns: macd_8_24, macd_16_48, macd_32_96.
# All 3-step normalised (std ≈ 1.05) → NO_SCALE bucket in data_loader.py.
FE_MACD_PAIRS = [(8, 24), (26, 78), (52, 156)]

# MACD Step-2: rolling price std window for per-instrument normalisation.
# Paper default: 63 bars. Maps to FeatureConfig.macd_price_std_window.
FE_MACD_PRICE_STD_WIN = 260

# MACD Step-3: rolling regime std window for cross-sectional normalisation.
# Paper default: 252 bars. Maps to FeatureConfig.macd_signal_std_window.
FE_MACD_SIGNAL_STD_WIN = 3276

# Oracle target clip bound. Normalised return targets clipped to ±FE_TARGET_CLIP
# before being used as training labels. Paper default: 20.0.
FE_TARGET_CLIP = 20.0

# ─────────────────────────────────────────────────────────────────────────────
# OHLC Feature Engineering  (Features 6–13)
# Maps to new FeatureConfig fields in features.py
# ─────────────────────────────────────────────────────────────────────────────

# Kaufman Efficiency Ratio + RSI lookback (bars). Maps to momentum_period.
FE_MOMENTUM_PERIOD     = 26

# Separate RSI period. Set to None to share FE_MOMENTUM_PERIOD.
FE_RSI_PERIOD          = 14

# Rolling window for directional vol asymmetry. Maps to vol_asym_window.
FE_VOL_ASYM_WINDOW     = 65

# Smoothing window for Internal Close Position. Maps to icp_period.
FE_ICP_PERIOD          = 13

# Donchian channel lookback for local structure. ~5 days on 30-min NIFTY.
FE_LOCAL_STRUCTURE_BARS = 65

# Fast/slow ATR windows for vol squeeze ratio. Maps to vol_squeeze_fast/slow.
FE_VOL_SQUEEZE_FAST    = 5
FE_VOL_SQUEEZE_SLOW    = 26

# Session time-of-day encoding (NIFTY 30-min).
FE_SESSION_OPEN        = "09:15"
FE_SESSION_CLOSE       = "15:30"
FE_SESSION_TZ          = "Asia/Kolkata"
FE_ADD_SESSION         = True

# ─────────────────────────────────────────────────────────────────────────────
# Sampler
# ─────────────────────────────────────────────────────────────────────────────
# |score| below this threshold → Flat class in WeightedRandomSampler.
SAMPLER_THRESHOLD = 0.10

# ─────────────────────────────────────────────────────────────────────────────
# Tokenizer (Kronos Hierarchical — Pre-trained Specs)
# ─────────────────────────────────────────────────────────────────────────────
TOKENIZER_D_IN       = 6
TOKENIZER_D_MODEL    = 256
TOKENIZER_N_HEADS    = 4
TOKENIZER_FF_DIM     = 512
TOKENIZER_N_ENC      = 4
TOKENIZER_N_DEC      = 4
TOKENIZER_S1_BITS    = 10
TOKENIZER_S2_BITS    = 10
TOKENIZER_GROUP_SIZE = 4
VOCAB_SIZE            = 2 ** (TOKENIZER_S1_BITS + TOKENIZER_S2_BITS)

# Tokenizer Hyperparameters (for training/loss consistency)
TOKENIZER_BETA       = 0.05
TOKENIZER_GAMMA0     = 1.0
TOKENIZER_GAMMA      = 1.1
TOKENIZER_ZETA       = 0.05
TOKENIZER_ATTN_DROPOUT = 0.0
TOKENIZER_FFN_DROPOUT  = 0.0
TOKENIZER_RESID_DROPOUT = 0.0

TOKENIZER_CHUNK_SIZE  = 2048   # Reduced for larger d_model
TOKENIZER_PATH        = "model.safetensors"

# ─────────────────────────────────────────────────────────────────────────────
# Walk-Forward Validation
# ─────────────────────────────────────────────────────────────────────────────
WFV_ENABLED    = True
WFV_TRAIN_BARS = 15000
WFV_VAL_BARS  = 3000
WFV_STEP_BARS  = 3000
WFV_MIN_FOLDS  = 3
WFV_PATIENCE   = 15

# ─────────────────────────────────────────────────────────────────────────────
# Runtime — set dynamically, do not edit
# ─────────────────────────────────────────────────────────────────────────────
# Populated by train.py / evaluate.py after feature columns are resolved.
# Value = len(feature_cols) when USE_TOKENIZER=False, else 1.
NUM_FEATURES = None