arbintel / tests /test_study.py
AJAY KASU
Add microstructure signal study pipeline
c5555e5
Raw
History Blame Contribute Delete
2.45 kB
import numpy as np
import pandas as pd
from src.microstructure.study import build_dataset, evaluate_signal
def _ticks(ticker, mids):
"""Minimal raw-tick frame with the columns build_dataset consumes."""
n = len(mids)
return pd.DataFrame({
"market_ticker": ticker,
"ts": pd.date_range("2026-01-01", periods=n, freq="s", tz="UTC"),
"mid": mids,
"microprice": mids, # zero edge — irrelevant for these tests
"imbalance": 0.5,
"spread": 0.02,
})
def test_build_dataset_no_lookahead_and_no_leakage():
ticks = pd.concat([
_ticks("A", [0.50, 0.51, 0.52, 0.53, 0.54]),
_ticks("B", [0.20, 0.20, 0.20, 0.20, 0.20]),
], ignore_index=True)
data = build_dataset(ticks, horizon=2)
a = data[data.market_ticker == "A"].reset_index(drop=True)
# 5 ticks, horizon 2 -> 3 labelled rows; fwd_return = mid[t+2] - mid[t]
assert len(a) == 3
assert a["fwd_return"].round(4).tolist() == [0.02, 0.02, 0.02]
# market B's labels must never borrow market A's rising mids
b = data[data.market_ticker == "B"]
assert (b["fwd_return"].round(6) == 0.0).all()
def test_build_dataset_skips_markets_shorter_than_horizon():
ticks = _ticks("SHORT", [0.5, 0.5]) # only 2 ticks
assert build_dataset(ticks, horizon=2).empty
def test_evaluate_detects_a_real_signal():
rng = np.random.default_rng(0)
n = 4000
imb = rng.uniform(-0.5, 0.5, n)
fwd = 0.5 * imb + rng.normal(0, 0.05, n) # imbalance genuinely predicts
data = pd.DataFrame({
"market_ticker": "X", "ts": pd.Timestamp("2026-01-01", tz="UTC"),
"imbalance": imb, "microprice_edge": imb,
"spread": 0.02, "mid": 0.5, "fwd_return": fwd,
})
rep = evaluate_signal(data)
assert rep["corr_imbalance"] > 0.5
assert rep["hit_rate"] > 0.6
def test_evaluate_reports_flat_on_pure_noise():
rng = np.random.default_rng(1)
n = 4000
data = pd.DataFrame({
"market_ticker": "X", "ts": pd.Timestamp("2026-01-01", tz="UTC"),
"imbalance": rng.uniform(-0.5, 0.5, n),
"microprice_edge": rng.uniform(-0.02, 0.02, n),
"spread": 0.02, "mid": 0.5,
"fwd_return": rng.normal(0, 0.05, n), # no relationship at all
})
rep = evaluate_signal(data)
assert abs(rep["corr_imbalance"]) < 0.1
def test_evaluate_handles_empty_input():
assert evaluate_signal(pd.DataFrame()) == {"n": 0}