Goshawk_Hedge_Pro / ml_config.py
GoshawkVortexAI's picture
Update ml_config.py
0a82a80 verified
"""
ml_config.py β€” All hyperparameters for the ML probability filter layer.
Edit here only; never hardcode values in other modules.
"""
from pathlib import Path
# ── PATHS ─────────────────────────────────────────────────────────────────────
ML_DIR = Path(__file__).parent / "ml_artifacts"
MODEL_PATH = ML_DIR / "trade_filter.pkl"
THRESHOLD_PATH = ML_DIR / "threshold.json"
FEATURE_IMP_PATH = ML_DIR / "feature_importance.csv"
LABEL_PATH = ML_DIR / "label_stats.json"
# ── LABELING ──────────────────────────────────────────────────────────────────
# How many forward bars to check for target/stop hit.
# 1H timeframe β†’ 24 bars = 1 trading day lookahead. Good balance of
# recency vs enough bars for a 1:2 RR to play out.
LABEL_FORWARD_BARS = 24
# Realistic costs: 0.06% taker fee each side + 0.04% slippage each side
TRADE_FEE_PCT = 0.0006 # 0.06% taker fee per side
TRADE_SLIP_PCT = 0.0004 # 0.04% slippage per side
ROUND_TRIP_COST = (TRADE_FEE_PCT + TRADE_SLIP_PCT) * 2 # both sides
# ATR multipliers matching risk_engine.py
STOP_MULT = 2.5
TARGET_RR = 2.0 # target = stop_distance * TARGET_RR
# ── WALK-FORWARD ──────────────────────────────────────────────────────────────
WF_N_SPLITS = 5 # number of walk-forward folds
WF_TRAIN_FRAC = 0.70 # fraction of each fold used for training
WF_MIN_TRAIN_OBS = 500 # minimum training observations per fold
# ── MODEL HYPERPARAMETERS ─────────────────────────────────────────────────────
# These target LightGBM params; HistGradientBoostingClassifier maps them.
LGBM_PARAMS = dict(
n_estimators = 400,
learning_rate = 0.03,
max_depth = 5, # shallow: reduces overfitting
min_samples_leaf = 40, # minimum leaf size: ~1% of 4000 samples
l2_regularization = 2.0, # L2 ridge penalty
max_features = 0.70, # feature bagging per split
early_stopping_rounds = 30,
validation_fraction = 0.15,
n_iter_no_change = 30,
random_state = 42,
verbose = 0,
)
# ── THRESHOLD OPTIMIZATION ────────────────────────────────────────────────────
# Objective to maximize when searching for the optimal probability cutoff.
# Options: "sharpe", "expectancy", "f1", "precision_recall"
THRESHOLD_OBJECTIVE = "expectancy"
# Search grid for threshold sweep
THRESHOLD_MIN = 0.35
THRESHOLD_MAX = 0.80
THRESHOLD_STEPS = 91 # 0.35, 0.36, ..., 0.80
# ── INFERENCE ─────────────────────────────────────────────────────────────────
DEFAULT_PROB_THRESHOLD = 0.55 # conservative default before calibration
# ── FEATURE ENGINEERING ───────────────────────────────────────────────────────
# Raw features from the rule engine fed into the model.
# Order here defines column order in the feature matrix β€” DO NOT CHANGE
# without retraining.
FEATURE_COLUMNS = [
# Trend / momentum
"adx",
"di_plus",
"di_minus",
"di_diff", # engineered: di_plus - di_minus
"di_ratio", # engineered: di_plus / (di_plus + di_minus + 1e-9)
# Volatility
"atr_pct",
"vol_ratio",
"vol_compressed",
"vol_expanding",
"vol_expanding_from_base",
# Volume / order flow
"absorption",
"failed_breakout",
"recent_failed_count",
"obv_slope_norm",
"delta_sign",
"spike",
"climax",
# Price context
"dist_atr",
"dist_atr_abs", # engineered: abs(dist_atr)
# Rule-engine scores (carry human priors into the model)
"regime_confidence",
"regime_score",
"volume_score",
"structure_score",
"confidence_score",
"total_score",
# Interactions (multiplicative signal combinations)
"adx_x_regime", # engineered: adx * regime_score
"vol_x_obv", # engineered: vol_ratio * obv_slope_norm
"score_x_conf", # engineered: total_score * regime_confidence
]