Spaces:

GoshawkVortexAI
/

Goshawk_Hedge_Pro

Running

App Files Files Community

Goshawk_Hedge_Pro / ml_config.py

GoshawkVortexAI

Update ml_config.py

0a82a80 verified 15 days ago

raw

history blame contribute delete

4.74 kB

	"""
	ml_config.py — All hyperparameters for the ML probability filter layer.
	Edit here only; never hardcode values in other modules.
	"""
	from pathlib import Path

	# ── PATHS ─────────────────────────────────────────────────────────────────────
	ML_DIR = Path(__file__).parent / "ml_artifacts"
	MODEL_PATH = ML_DIR / "trade_filter.pkl"
	THRESHOLD_PATH = ML_DIR / "threshold.json"
	FEATURE_IMP_PATH = ML_DIR / "feature_importance.csv"
	LABEL_PATH = ML_DIR / "label_stats.json"

	# ── LABELING ──────────────────────────────────────────────────────────────────
	# How many forward bars to check for target/stop hit.
	# 1H timeframe → 24 bars = 1 trading day lookahead. Good balance of
	# recency vs enough bars for a 1:2 RR to play out.
	LABEL_FORWARD_BARS = 24

	# Realistic costs: 0.06% taker fee each side + 0.04% slippage each side
	TRADE_FEE_PCT = 0.0006 # 0.06% taker fee per side
	TRADE_SLIP_PCT = 0.0004 # 0.04% slippage per side
	ROUND_TRIP_COST = (TRADE_FEE_PCT + TRADE_SLIP_PCT) * 2 # both sides

	# ATR multipliers matching risk_engine.py
	STOP_MULT = 2.5
	TARGET_RR = 2.0 # target = stop_distance * TARGET_RR

	# ── WALK-FORWARD ──────────────────────────────────────────────────────────────
	WF_N_SPLITS = 5 # number of walk-forward folds
	WF_TRAIN_FRAC = 0.70 # fraction of each fold used for training
	WF_MIN_TRAIN_OBS = 500 # minimum training observations per fold

	# ── MODEL HYPERPARAMETERS ─────────────────────────────────────────────────────
	# These target LightGBM params; HistGradientBoostingClassifier maps them.
	LGBM_PARAMS = dict(
	n_estimators = 400,
	learning_rate = 0.03,
	max_depth = 5, # shallow: reduces overfitting
	min_samples_leaf = 40, # minimum leaf size: ~1% of 4000 samples
	l2_regularization = 2.0, # L2 ridge penalty
	max_features = 0.70, # feature bagging per split
	early_stopping_rounds = 30,
	validation_fraction = 0.15,
	n_iter_no_change = 30,
	random_state = 42,
	verbose = 0,
	)

	# ── THRESHOLD OPTIMIZATION ────────────────────────────────────────────────────
	# Objective to maximize when searching for the optimal probability cutoff.
	# Options: "sharpe", "expectancy", "f1", "precision_recall"
	THRESHOLD_OBJECTIVE = "expectancy"

	# Search grid for threshold sweep
	THRESHOLD_MIN = 0.35
	THRESHOLD_MAX = 0.80
	THRESHOLD_STEPS = 91 # 0.35, 0.36, ..., 0.80

	# ── INFERENCE ─────────────────────────────────────────────────────────────────
	DEFAULT_PROB_THRESHOLD = 0.55 # conservative default before calibration

	# ── FEATURE ENGINEERING ───────────────────────────────────────────────────────
	# Raw features from the rule engine fed into the model.
	# Order here defines column order in the feature matrix — DO NOT CHANGE
	# without retraining.
	FEATURE_COLUMNS = [
	# Trend / momentum
	"adx",
	"di_plus",
	"di_minus",
	"di_diff", # engineered: di_plus - di_minus
	"di_ratio", # engineered: di_plus / (di_plus + di_minus + 1e-9)
	# Volatility
	"atr_pct",
	"vol_ratio",
	"vol_compressed",
	"vol_expanding",
	"vol_expanding_from_base",
	# Volume / order flow
	"absorption",
	"failed_breakout",
	"recent_failed_count",
	"obv_slope_norm",
	"delta_sign",
	"spike",
	"climax",
	# Price context
	"dist_atr",
	"dist_atr_abs", # engineered: abs(dist_atr)
	# Rule-engine scores (carry human priors into the model)
	"regime_confidence",
	"regime_score",
	"volume_score",
	"structure_score",
	"confidence_score",
	"total_score",
	# Interactions (multiplicative signal combinations)
	"adx_x_regime", # engineered: adx * regime_score
	"vol_x_obv", # engineered: vol_ratio * obv_slope_norm
	"score_x_conf", # engineered: total_score * regime_confidence
	]