GoshawkVortexAI commited on
Commit
0a82a80
Β·
verified Β·
1 Parent(s): dea1e1a

Update ml_config.py

Browse files
Files changed (1) hide show
  1. ml_config.py +102 -0
ml_config.py CHANGED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ml_config.py β€” All hyperparameters for the ML probability filter layer.
3
+ Edit here only; never hardcode values in other modules.
4
+ """
5
+ from pathlib import Path
6
+
7
+ # ── PATHS ─────────────────────────────────────────────────────────────────────
8
+ ML_DIR = Path(__file__).parent / "ml_artifacts"
9
+ MODEL_PATH = ML_DIR / "trade_filter.pkl"
10
+ THRESHOLD_PATH = ML_DIR / "threshold.json"
11
+ FEATURE_IMP_PATH = ML_DIR / "feature_importance.csv"
12
+ LABEL_PATH = ML_DIR / "label_stats.json"
13
+
14
+ # ── LABELING ──────────────────────────────────────────────────────────────────
15
+ # How many forward bars to check for target/stop hit.
16
+ # 1H timeframe β†’ 24 bars = 1 trading day lookahead. Good balance of
17
+ # recency vs enough bars for a 1:2 RR to play out.
18
+ LABEL_FORWARD_BARS = 24
19
+
20
+ # Realistic costs: 0.06% taker fee each side + 0.04% slippage each side
21
+ TRADE_FEE_PCT = 0.0006 # 0.06% taker fee per side
22
+ TRADE_SLIP_PCT = 0.0004 # 0.04% slippage per side
23
+ ROUND_TRIP_COST = (TRADE_FEE_PCT + TRADE_SLIP_PCT) * 2 # both sides
24
+
25
+ # ATR multipliers matching risk_engine.py
26
+ STOP_MULT = 2.5
27
+ TARGET_RR = 2.0 # target = stop_distance * TARGET_RR
28
+
29
+ # ── WALK-FORWARD ──────────────────────────────────────────────────────────────
30
+ WF_N_SPLITS = 5 # number of walk-forward folds
31
+ WF_TRAIN_FRAC = 0.70 # fraction of each fold used for training
32
+ WF_MIN_TRAIN_OBS = 500 # minimum training observations per fold
33
+
34
+ # ── MODEL HYPERPARAMETERS ─────────────────────────────────────────────────────
35
+ # These target LightGBM params; HistGradientBoostingClassifier maps them.
36
+ LGBM_PARAMS = dict(
37
+ n_estimators = 400,
38
+ learning_rate = 0.03,
39
+ max_depth = 5, # shallow: reduces overfitting
40
+ min_samples_leaf = 40, # minimum leaf size: ~1% of 4000 samples
41
+ l2_regularization = 2.0, # L2 ridge penalty
42
+ max_features = 0.70, # feature bagging per split
43
+ early_stopping_rounds = 30,
44
+ validation_fraction = 0.15,
45
+ n_iter_no_change = 30,
46
+ random_state = 42,
47
+ verbose = 0,
48
+ )
49
+
50
+ # ── THRESHOLD OPTIMIZATION ────────────────────────────────────────────────────
51
+ # Objective to maximize when searching for the optimal probability cutoff.
52
+ # Options: "sharpe", "expectancy", "f1", "precision_recall"
53
+ THRESHOLD_OBJECTIVE = "expectancy"
54
+
55
+ # Search grid for threshold sweep
56
+ THRESHOLD_MIN = 0.35
57
+ THRESHOLD_MAX = 0.80
58
+ THRESHOLD_STEPS = 91 # 0.35, 0.36, ..., 0.80
59
+
60
+ # ── INFERENCE ─────────────────────────────────────────────────────────────────
61
+ DEFAULT_PROB_THRESHOLD = 0.55 # conservative default before calibration
62
+
63
+ # ── FEATURE ENGINEERING ───────────────────────────────────────────────────────
64
+ # Raw features from the rule engine fed into the model.
65
+ # Order here defines column order in the feature matrix β€” DO NOT CHANGE
66
+ # without retraining.
67
+ FEATURE_COLUMNS = [
68
+ # Trend / momentum
69
+ "adx",
70
+ "di_plus",
71
+ "di_minus",
72
+ "di_diff", # engineered: di_plus - di_minus
73
+ "di_ratio", # engineered: di_plus / (di_plus + di_minus + 1e-9)
74
+ # Volatility
75
+ "atr_pct",
76
+ "vol_ratio",
77
+ "vol_compressed",
78
+ "vol_expanding",
79
+ "vol_expanding_from_base",
80
+ # Volume / order flow
81
+ "absorption",
82
+ "failed_breakout",
83
+ "recent_failed_count",
84
+ "obv_slope_norm",
85
+ "delta_sign",
86
+ "spike",
87
+ "climax",
88
+ # Price context
89
+ "dist_atr",
90
+ "dist_atr_abs", # engineered: abs(dist_atr)
91
+ # Rule-engine scores (carry human priors into the model)
92
+ "regime_confidence",
93
+ "regime_score",
94
+ "volume_score",
95
+ "structure_score",
96
+ "confidence_score",
97
+ "total_score",
98
+ # Interactions (multiplicative signal combinations)
99
+ "adx_x_regime", # engineered: adx * regime_score
100
+ "vol_x_obv", # engineered: vol_ratio * obv_slope_norm
101
+ "score_x_conf", # engineered: total_score * regime_confidence
102
+ ]