Spaces:

GoshawkVortexAI
/

Goshawk_Hedge_Pro

Sleeping

App Files Files Community

GoshawkVortexAI commited on Feb 27

Commit

0a82a80

verified ·

1 Parent(s): dea1e1a

Update ml_config.py

Browse files

Files changed (1) hide show

ml_config.py +102 -0

ml_config.py CHANGED Viewed

	@@ -0,0 +1,102 @@

+"""
+ml_config.py — All hyperparameters for the ML probability filter layer.
+Edit here only; never hardcode values in other modules.
+"""
+from pathlib import Path
+# ── PATHS ─────────────────────────────────────────────────────────────────────
+ML_DIR = Path(__file__).parent / "ml_artifacts"
+MODEL_PATH       = ML_DIR / "trade_filter.pkl"
+THRESHOLD_PATH   = ML_DIR / "threshold.json"
+FEATURE_IMP_PATH = ML_DIR / "feature_importance.csv"
+LABEL_PATH       = ML_DIR / "label_stats.json"
+# ── LABELING ──────────────────────────────────────────────────────────────────
+# How many forward bars to check for target/stop hit.
+# 1H timeframe → 24 bars = 1 trading day lookahead. Good balance of
+# recency vs enough bars for a 1:2 RR to play out.
+LABEL_FORWARD_BARS = 24
+# Realistic costs: 0.06% taker fee each side + 0.04% slippage each side
+TRADE_FEE_PCT     = 0.0006   # 0.06% taker fee per side
+TRADE_SLIP_PCT    = 0.0004   # 0.04% slippage per side
+ROUND_TRIP_COST   = (TRADE_FEE_PCT + TRADE_SLIP_PCT) * 2  # both sides
+# ATR multipliers matching risk_engine.py
+STOP_MULT  = 2.5
+TARGET_RR  = 2.0   # target = stop_distance * TARGET_RR
+# ── WALK-FORWARD ──────────────────────────────────────────────────────────────
+WF_N_SPLITS      = 5      # number of walk-forward folds
+WF_TRAIN_FRAC    = 0.70   # fraction of each fold used for training
+WF_MIN_TRAIN_OBS = 500    # minimum training observations per fold
+# ── MODEL HYPERPARAMETERS ─────────────────────────────────────────────────────
+# These target LightGBM params; HistGradientBoostingClassifier maps them.
+LGBM_PARAMS = dict(
+    n_estimators      = 400,
+    learning_rate     = 0.03,
+    max_depth         = 5,         # shallow: reduces overfitting
+    min_samples_leaf  = 40,        # minimum leaf size: ~1% of 4000 samples
+    l2_regularization = 2.0,       # L2 ridge penalty
+    max_features      = 0.70,      # feature bagging per split
+    early_stopping_rounds = 30,
+    validation_fraction   = 0.15,
+    n_iter_no_change      = 30,
+    random_state          = 42,
+    verbose               = 0,
+)
+# ── THRESHOLD OPTIMIZATION ────────────────────────────────────────────────────
+# Objective to maximize when searching for the optimal probability cutoff.
+# Options: "sharpe", "expectancy", "f1", "precision_recall"
+THRESHOLD_OBJECTIVE = "expectancy"
+# Search grid for threshold sweep
+THRESHOLD_MIN   = 0.35
+THRESHOLD_MAX   = 0.80
+THRESHOLD_STEPS = 91   # 0.35, 0.36, ..., 0.80
+# ── INFERENCE ─────────────────────────────────────────────────────────────────
+DEFAULT_PROB_THRESHOLD = 0.55    # conservative default before calibration
+# ── FEATURE ENGINEERING ───────────────────────────────────────────────────────
+# Raw features from the rule engine fed into the model.
+# Order here defines column order in the feature matrix — DO NOT CHANGE
+# without retraining.
+FEATURE_COLUMNS = [
+    # Trend / momentum
+    "adx",
+    "di_plus",
+    "di_minus",
+    "di_diff",          # engineered: di_plus - di_minus
+    "di_ratio",         # engineered: di_plus / (di_plus + di_minus + 1e-9)
+    # Volatility
+    "atr_pct",
+    "vol_ratio",
+    "vol_compressed",
+    "vol_expanding",
+    "vol_expanding_from_base",
+    # Volume / order flow
+    "absorption",
+    "failed_breakout",
+    "recent_failed_count",
+    "obv_slope_norm",
+    "delta_sign",
+    "spike",
+    "climax",
+    # Price context
+    "dist_atr",
+    "dist_atr_abs",     # engineered: abs(dist_atr)
+    # Rule-engine scores (carry human priors into the model)
+    "regime_confidence",
+    "regime_score",
+    "volume_score",
+    "structure_score",
+    "confidence_score",
+    "total_score",
+    # Interactions (multiplicative signal combinations)
+    "adx_x_regime",     # engineered: adx * regime_score
+    "vol_x_obv",        # engineered: vol_ratio * obv_slope_norm
+    "score_x_conf",     # engineered: total_score * regime_confidence
+]