import numpy as np import pandas as pd from src.microstructure.study import build_dataset, evaluate_signal def _ticks(ticker, mids): """Minimal raw-tick frame with the columns build_dataset consumes.""" n = len(mids) return pd.DataFrame({ "market_ticker": ticker, "ts": pd.date_range("2026-01-01", periods=n, freq="s", tz="UTC"), "mid": mids, "microprice": mids, # zero edge — irrelevant for these tests "imbalance": 0.5, "spread": 0.02, }) def test_build_dataset_no_lookahead_and_no_leakage(): ticks = pd.concat([ _ticks("A", [0.50, 0.51, 0.52, 0.53, 0.54]), _ticks("B", [0.20, 0.20, 0.20, 0.20, 0.20]), ], ignore_index=True) data = build_dataset(ticks, horizon=2) a = data[data.market_ticker == "A"].reset_index(drop=True) # 5 ticks, horizon 2 -> 3 labelled rows; fwd_return = mid[t+2] - mid[t] assert len(a) == 3 assert a["fwd_return"].round(4).tolist() == [0.02, 0.02, 0.02] # market B's labels must never borrow market A's rising mids b = data[data.market_ticker == "B"] assert (b["fwd_return"].round(6) == 0.0).all() def test_build_dataset_skips_markets_shorter_than_horizon(): ticks = _ticks("SHORT", [0.5, 0.5]) # only 2 ticks assert build_dataset(ticks, horizon=2).empty def test_evaluate_detects_a_real_signal(): rng = np.random.default_rng(0) n = 4000 imb = rng.uniform(-0.5, 0.5, n) fwd = 0.5 * imb + rng.normal(0, 0.05, n) # imbalance genuinely predicts data = pd.DataFrame({ "market_ticker": "X", "ts": pd.Timestamp("2026-01-01", tz="UTC"), "imbalance": imb, "microprice_edge": imb, "spread": 0.02, "mid": 0.5, "fwd_return": fwd, }) rep = evaluate_signal(data) assert rep["corr_imbalance"] > 0.5 assert rep["hit_rate"] > 0.6 def test_evaluate_reports_flat_on_pure_noise(): rng = np.random.default_rng(1) n = 4000 data = pd.DataFrame({ "market_ticker": "X", "ts": pd.Timestamp("2026-01-01", tz="UTC"), "imbalance": rng.uniform(-0.5, 0.5, n), "microprice_edge": rng.uniform(-0.02, 0.02, n), "spread": 0.02, "mid": 0.5, "fwd_return": rng.normal(0, 0.05, n), # no relationship at all }) rep = evaluate_signal(data) assert abs(rep["corr_imbalance"]) < 0.1 def test_evaluate_handles_empty_input(): assert evaluate_signal(pd.DataFrame()) == {"n": 0}