Upload 61 files

9cb5a00 verified 2 months ago

4.66 kB

	"""
	ML-3m-trader Labeling Engine
	==============================
	Generates supervised-learning labels by simulating potential trades
	at each bar and checking 1:1 RR outcomes over a lookahead window.

	Labels
	------
	0 = DO_NOTHING (spread filter failed)
	1 = BUY (long setup that would hit TP before SL)
	2 = SELL (short setup that would hit TP before SL)
	3 = HOLD (neither BUY nor SELL produced a winner)
	"""

	import numpy as np
	import pandas as pd

	import config as cfg


	def _compute_atr(high: np.ndarray, low: np.ndarray, close: np.ndarray,
	period: int) -> np.ndarray:
	"""Vectorized ATR (Wilder) for the labeler."""
	n = len(high)
	tr = np.empty(n, dtype=np.float64)
	tr[0] = high[0] - low[0]
	for i in range(1, n):
	tr[i] = max(high[i] - low[i],
	abs(high[i] - close[i - 1]),
	abs(low[i] - close[i - 1]))
	atr = np.empty(n, dtype=np.float64)
	atr[:] = np.nan
	atr[period - 1] = np.mean(tr[:period])
	alpha = 1.0 / period
	for i in range(period, n):
	atr[i] = atr[i - 1] * (1 - alpha) + tr[i] * alpha
	return atr


	def generate_labels(df: pd.DataFrame) -> np.ndarray:
	"""
	For each bar, determine the best label:

	1. Compute ATR-based stop-loss distance.
	2. If SL distance < spread * SPREAD_FILTER_MULTIPLIER -> DO_NOTHING.
	3. For BUY: entry=close, SL=close - sl_dist, TP=close + sl_dist (1:1 RR).
	Walk forward up to LABEL_LOOKAHEAD_BARS. If TP hit first -> BUY candidate.
	4. For SELL: entry=close, SL=close + sl_dist, TP=close - sl_dist.
	Walk forward. If TP hit first -> SELL candidate.
	5. If both BUY and SELL win -> pick the one that hits TP sooner.
	6. If neither wins -> HOLD.

	Parameters
	----------
	df : pd.DataFrame
	Must contain: high, low, close, spread

	Returns
	-------
	np.ndarray of int
	Label array aligned with df index.
	"""
	high = df["high"].values.astype(np.float64)
	low = df["low"].values.astype(np.float64)
	close = df["close"].values.astype(np.float64)

	# Spread: MT5 provides spread in points; convert to price units.
	# For XAUUSDc 1 point = 0.01 typically. If spread is already in
	# price terms (>1), we use as-is; otherwise multiply by point size.
	spread_raw = df["spread"].values.astype(np.float64)
	# Heuristic: if median spread < 1, it is likely in points -> convert
	point = 0.01 # XAUUSDc standard point
	if np.nanmedian(spread_raw) < 1.0:
	spread = spread_raw * point
	else:
	spread = spread_raw

	atr = _compute_atr(high, low, close, cfg.ATR_PERIOD)
	sl_dist = atr * cfg.ATR_SL_MULTIPLIER

	n = len(close)
	labels = np.full(n, cfg.LABEL_HOLD, dtype=np.int32)
	lookahead = cfg.LABEL_LOOKAHEAD_BARS
	spread_mult = cfg.SPREAD_FILTER_MULTIPLIER

	for i in range(n):
	if np.isnan(sl_dist[i]) or sl_dist[i] <= 0:
	labels[i] = cfg.LABEL_DO_NOTHING
	continue

	# Spread filter
	if sl_dist[i] < spread[i] * spread_mult:
	labels[i] = cfg.LABEL_DO_NOTHING
	continue

	entry = close[i]
	sd = sl_dist[i]

	# BUY scenario
	buy_tp = entry + sd
	buy_sl = entry - sd
	buy_bars = -1 # bars to TP (-1 = never)

	# SELL scenario
	sell_tp = entry - sd
	sell_sl = entry + sd
	sell_bars = -1

	end = min(i + lookahead + 1, n)
	for j in range(i + 1, end):
	# Check BUY
	if buy_bars == -1:
	if low[j] <= buy_sl:
	buy_bars = n + 1 # SL hit first -> lose
	elif high[j] >= buy_tp:
	buy_bars = j - i # TP hit

	# Check SELL
	if sell_bars == -1:
	if high[j] >= sell_sl:
	sell_bars = n + 1 # SL hit
	elif low[j] <= sell_tp:
	sell_bars = j - i # TP hit

	# Both resolved
	if buy_bars != -1 and sell_bars != -1:
	break

	buy_won = 0 < buy_bars < n
	sell_won = 0 < sell_bars < n

	if buy_won and sell_won:
	labels[i] = cfg.LABEL_BUY if buy_bars <= sell_bars else cfg.LABEL_SELL
	elif buy_won:
	labels[i] = cfg.LABEL_BUY
	elif sell_won:
	labels[i] = cfg.LABEL_SELL
	else:
	labels[i] = cfg.LABEL_HOLD

	# Summary
	unique, counts = np.unique(labels, return_counts=True)
	dist = {cfg.LABEL_NAMES.get(u, u): int(c) for u, c in zip(unique, counts)}
	print(f"[INFO] Label distribution: {dist}")

	return labels