DataSynthis_ML_JobTask / src /models /arima_model.py
moccaram's picture
Replace v1 demo with v2 XGBoost-backed Gradio app (reference-backed rebuild)
8ba081b verified
Raw
History Blame Contribute Delete
2.49 kB
"""ARIMA wrapper for triple-barrier classification.
ARIMA forecasts a continuous next-step return; we threshold it into ``{-1, 0, +1}``
using ``卤k路蟽`` where ``蟽`` is the daily-vol estimate at the event time. The
``k`` factor matches the profit-taking / stop-loss multiplier used for labeling
so that the discretization is consistent with the label scheme.
"""
from __future__ import annotations
import warnings
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
class ARIMAClassifier:
"""Wraps statsmodels ARIMA so it can sit in the same fit/predict loop as XGB/LSTM.
The model is fit on the log-price series implied by the training rows (the
feature matrix carries the volatility estimate per row, used to threshold).
Required X columns: ``frac_diff_close`` (used as a proxy for the underlying
log-price level we want to forecast) and ``target_vol`` (per-event vol used
to set the 卤k路蟽 threshold).
"""
def __init__(self, order: tuple[int, int, int] = (1, 1, 1), threshold_k: float = 0.5):
self.order = order
self.threshold_k = threshold_k
self.fitted_ = None
self.train_tail_value_: float = 0.0
self.classes_: np.ndarray = np.array([-1, 0, 1])
def fit(self, X, y, sample_weight=None):
series = X["frac_diff_close"].astype(float).to_numpy()
with warnings.catch_warnings():
warnings.simplefilter("ignore")
self.fitted_ = ARIMA(series, order=self.order).fit()
self.train_tail_value_ = float(series[-1])
return self
def predict(self, X):
n = len(X)
forecast = self.fitted_.forecast(steps=n)
# convert forecast deltas back to per-step returns vs the tail of training
last = self.train_tail_value_
per_step_return = np.diff(np.concatenate([[last], np.asarray(forecast)]))
thresholds = self.threshold_k * X["target_vol"].astype(float).to_numpy()
preds = np.zeros(n, dtype=int)
preds[per_step_return > thresholds] = 1
preds[per_step_return < -thresholds] = -1
return preds
def predict_proba(self, X):
# ARIMA isn't probabilistic in the triple-barrier sense; collapse hard
# predictions into a one-hot for log-loss calculation.
preds = self.predict(X)
proba = np.zeros((len(preds), 3))
for i, c in enumerate(self.classes_):
proba[preds == c, i] = 1.0
return proba