Spaces:
Sleeping
Sleeping
File size: 1,883 Bytes
8ba081b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | """XGBoost classifier for triple-barrier labels.
Per Jansen Ch.12, gradient-boosted trees are the natural baseline for tabular
financial features and routinely beat LSTMs on these problems. The hyper-
parameters here are conservative (shallow trees, moderate n_estimators) to
avoid overfitting on small per-fold training sets in the purged CV scheme.
"""
from __future__ import annotations
import numpy as np
from xgboost import XGBClassifier
def build_xgb_classifier(random_state: int = 42) -> XGBClassifier:
"""Returns a fresh XGBClassifier for one CV fold.
Output classes use the XGBoost-internal indexing ``{0, 1, 2}`` for
``{-1, 0, +1}`` since XGBoost requires non-negative integer labels. The
training driver wraps this with an encoder.
"""
return XGBClassifier(
objective="multi:softprob",
num_class=3,
max_depth=4,
n_estimators=300,
learning_rate=0.05,
subsample=0.8,
colsample_bytree=0.8,
reg_lambda=1.0,
eval_metric="mlogloss",
random_state=random_state,
n_jobs=-1,
tree_method="hist",
)
class XGBTripleBarrier:
"""Thin wrapper that owns the label encoding from ``{-1, 0, 1}`` ↔ ``{0, 1, 2}``."""
def __init__(self, random_state: int = 42):
self.model = build_xgb_classifier(random_state=random_state)
self.classes_ = np.array([-1, 0, 1])
def fit(self, X, y, sample_weight=None):
y_enc = np.asarray(y).astype(int) + 1 # {-1, 0, 1} -> {0, 1, 2}
self.model.fit(X, y_enc, sample_weight=sample_weight)
return self
def predict(self, X):
y_pred_enc = self.model.predict(X)
return y_pred_enc - 1
def predict_proba(self, X):
return self.model.predict_proba(X)
@property
def feature_importances_(self):
return self.model.feature_importances_
|