Spaces:
Sleeping
Sleeping
File size: 4,743 Bytes
0a82a80 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | """
ml_config.py β All hyperparameters for the ML probability filter layer.
Edit here only; never hardcode values in other modules.
"""
from pathlib import Path
# ββ PATHS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
ML_DIR = Path(__file__).parent / "ml_artifacts"
MODEL_PATH = ML_DIR / "trade_filter.pkl"
THRESHOLD_PATH = ML_DIR / "threshold.json"
FEATURE_IMP_PATH = ML_DIR / "feature_importance.csv"
LABEL_PATH = ML_DIR / "label_stats.json"
# ββ LABELING ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# How many forward bars to check for target/stop hit.
# 1H timeframe β 24 bars = 1 trading day lookahead. Good balance of
# recency vs enough bars for a 1:2 RR to play out.
LABEL_FORWARD_BARS = 24
# Realistic costs: 0.06% taker fee each side + 0.04% slippage each side
TRADE_FEE_PCT = 0.0006 # 0.06% taker fee per side
TRADE_SLIP_PCT = 0.0004 # 0.04% slippage per side
ROUND_TRIP_COST = (TRADE_FEE_PCT + TRADE_SLIP_PCT) * 2 # both sides
# ATR multipliers matching risk_engine.py
STOP_MULT = 2.5
TARGET_RR = 2.0 # target = stop_distance * TARGET_RR
# ββ WALK-FORWARD ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
WF_N_SPLITS = 5 # number of walk-forward folds
WF_TRAIN_FRAC = 0.70 # fraction of each fold used for training
WF_MIN_TRAIN_OBS = 500 # minimum training observations per fold
# ββ MODEL HYPERPARAMETERS βββββββββββββββββββββββββββββββββββββββββββββββββββββ
# These target LightGBM params; HistGradientBoostingClassifier maps them.
LGBM_PARAMS = dict(
n_estimators = 400,
learning_rate = 0.03,
max_depth = 5, # shallow: reduces overfitting
min_samples_leaf = 40, # minimum leaf size: ~1% of 4000 samples
l2_regularization = 2.0, # L2 ridge penalty
max_features = 0.70, # feature bagging per split
early_stopping_rounds = 30,
validation_fraction = 0.15,
n_iter_no_change = 30,
random_state = 42,
verbose = 0,
)
# ββ THRESHOLD OPTIMIZATION ββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Objective to maximize when searching for the optimal probability cutoff.
# Options: "sharpe", "expectancy", "f1", "precision_recall"
THRESHOLD_OBJECTIVE = "expectancy"
# Search grid for threshold sweep
THRESHOLD_MIN = 0.35
THRESHOLD_MAX = 0.80
THRESHOLD_STEPS = 91 # 0.35, 0.36, ..., 0.80
# ββ INFERENCE βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
DEFAULT_PROB_THRESHOLD = 0.55 # conservative default before calibration
# ββ FEATURE ENGINEERING βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Raw features from the rule engine fed into the model.
# Order here defines column order in the feature matrix β DO NOT CHANGE
# without retraining.
FEATURE_COLUMNS = [
# Trend / momentum
"adx",
"di_plus",
"di_minus",
"di_diff", # engineered: di_plus - di_minus
"di_ratio", # engineered: di_plus / (di_plus + di_minus + 1e-9)
# Volatility
"atr_pct",
"vol_ratio",
"vol_compressed",
"vol_expanding",
"vol_expanding_from_base",
# Volume / order flow
"absorption",
"failed_breakout",
"recent_failed_count",
"obv_slope_norm",
"delta_sign",
"spike",
"climax",
# Price context
"dist_atr",
"dist_atr_abs", # engineered: abs(dist_atr)
# Rule-engine scores (carry human priors into the model)
"regime_confidence",
"regime_score",
"volume_score",
"structure_score",
"confidence_score",
"total_score",
# Interactions (multiplicative signal combinations)
"adx_x_regime", # engineered: adx * regime_score
"vol_x_obv", # engineered: vol_ratio * obv_slope_norm
"score_x_conf", # engineered: total_score * regime_confidence
]
|