moccaram's picture
Replace v1 demo with v2 XGBoost-backed Gradio app (reference-backed rebuild)
8ba081b verified
Raw
History Blame Contribute Delete
1.88 kB
"""XGBoost classifier for triple-barrier labels.
Per Jansen Ch.12, gradient-boosted trees are the natural baseline for tabular
financial features and routinely beat LSTMs on these problems. The hyper-
parameters here are conservative (shallow trees, moderate n_estimators) to
avoid overfitting on small per-fold training sets in the purged CV scheme.
"""
from __future__ import annotations
import numpy as np
from xgboost import XGBClassifier
def build_xgb_classifier(random_state: int = 42) -> XGBClassifier:
"""Returns a fresh XGBClassifier for one CV fold.
Output classes use the XGBoost-internal indexing ``{0, 1, 2}`` for
``{-1, 0, +1}`` since XGBoost requires non-negative integer labels. The
training driver wraps this with an encoder.
"""
return XGBClassifier(
objective="multi:softprob",
num_class=3,
max_depth=4,
n_estimators=300,
learning_rate=0.05,
subsample=0.8,
colsample_bytree=0.8,
reg_lambda=1.0,
eval_metric="mlogloss",
random_state=random_state,
n_jobs=-1,
tree_method="hist",
)
class XGBTripleBarrier:
"""Thin wrapper that owns the label encoding from ``{-1, 0, 1}`` ↔ ``{0, 1, 2}``."""
def __init__(self, random_state: int = 42):
self.model = build_xgb_classifier(random_state=random_state)
self.classes_ = np.array([-1, 0, 1])
def fit(self, X, y, sample_weight=None):
y_enc = np.asarray(y).astype(int) + 1 # {-1, 0, 1} -> {0, 1, 2}
self.model.fit(X, y_enc, sample_weight=sample_weight)
return self
def predict(self, X):
y_pred_enc = self.model.predict(X)
return y_pred_enc - 1
def predict_proba(self, X):
return self.model.predict_proba(X)
@property
def feature_importances_(self):
return self.model.feature_importances_