| """ | |
| ml_config.py β All hyperparameters for the ML probability filter layer. | |
| Edit here only; never hardcode values in other modules. | |
| """ | |
| from pathlib import Path | |
| # ββ PATHS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| ML_DIR = Path(__file__).parent / "ml_artifacts" | |
| MODEL_PATH = ML_DIR / "trade_filter.pkl" | |
| THRESHOLD_PATH = ML_DIR / "threshold.json" | |
| FEATURE_IMP_PATH = ML_DIR / "feature_importance.csv" | |
| LABEL_PATH = ML_DIR / "label_stats.json" | |
| # ββ LABELING ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # How many forward bars to check for target/stop hit. | |
| # 1H timeframe β 24 bars = 1 trading day lookahead. Good balance of | |
| # recency vs enough bars for a 1:2 RR to play out. | |
| LABEL_FORWARD_BARS = 24 | |
| # Realistic costs: 0.06% taker fee each side + 0.04% slippage each side | |
| TRADE_FEE_PCT = 0.0006 # 0.06% taker fee per side | |
| TRADE_SLIP_PCT = 0.0004 # 0.04% slippage per side | |
| ROUND_TRIP_COST = (TRADE_FEE_PCT + TRADE_SLIP_PCT) * 2 # both sides | |
| # ATR multipliers matching risk_engine.py | |
| STOP_MULT = 2.5 | |
| TARGET_RR = 2.0 # target = stop_distance * TARGET_RR | |
| # ββ WALK-FORWARD ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| WF_N_SPLITS = 5 # number of walk-forward folds | |
| WF_TRAIN_FRAC = 0.70 # fraction of each fold used for training | |
| WF_MIN_TRAIN_OBS = 500 # minimum training observations per fold | |
| # ββ MODEL HYPERPARAMETERS βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # These target LightGBM params; HistGradientBoostingClassifier maps them. | |
| LGBM_PARAMS = dict( | |
| n_estimators = 400, | |
| learning_rate = 0.03, | |
| max_depth = 5, # shallow: reduces overfitting | |
| min_samples_leaf = 40, # minimum leaf size: ~1% of 4000 samples | |
| l2_regularization = 2.0, # L2 ridge penalty | |
| max_features = 0.70, # feature bagging per split | |
| early_stopping_rounds = 30, | |
| validation_fraction = 0.15, | |
| n_iter_no_change = 30, | |
| random_state = 42, | |
| verbose = 0, | |
| ) | |
| # ββ THRESHOLD OPTIMIZATION ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Objective to maximize when searching for the optimal probability cutoff. | |
| # Options: "sharpe", "expectancy", "f1", "precision_recall" | |
| THRESHOLD_OBJECTIVE = "expectancy" | |
| # Search grid for threshold sweep | |
| THRESHOLD_MIN = 0.35 | |
| THRESHOLD_MAX = 0.80 | |
| THRESHOLD_STEPS = 91 # 0.35, 0.36, ..., 0.80 | |
| # ββ INFERENCE βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| DEFAULT_PROB_THRESHOLD = 0.55 # conservative default before calibration | |
| # ββ FEATURE ENGINEERING βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Raw features from the rule engine fed into the model. | |
| # Order here defines column order in the feature matrix β DO NOT CHANGE | |
| # without retraining. | |
| FEATURE_COLUMNS = [ | |
| # Trend / momentum | |
| "adx", | |
| "di_plus", | |
| "di_minus", | |
| "di_diff", # engineered: di_plus - di_minus | |
| "di_ratio", # engineered: di_plus / (di_plus + di_minus + 1e-9) | |
| # Volatility | |
| "atr_pct", | |
| "vol_ratio", | |
| "vol_compressed", | |
| "vol_expanding", | |
| "vol_expanding_from_base", | |
| # Volume / order flow | |
| "absorption", | |
| "failed_breakout", | |
| "recent_failed_count", | |
| "obv_slope_norm", | |
| "delta_sign", | |
| "spike", | |
| "climax", | |
| # Price context | |
| "dist_atr", | |
| "dist_atr_abs", # engineered: abs(dist_atr) | |
| # Rule-engine scores (carry human priors into the model) | |
| "regime_confidence", | |
| "regime_score", | |
| "volume_score", | |
| "structure_score", | |
| "confidence_score", | |
| "total_score", | |
| # Interactions (multiplicative signal combinations) | |
| "adx_x_regime", # engineered: adx * regime_score | |
| "vol_x_obv", # engineered: vol_ratio * obv_slope_norm | |
| "score_x_conf", # engineered: total_score * regime_confidence | |
| ] | |