| import numpy as np |
| import pandas as pd |
|
|
| from src.microstructure.study import build_dataset, evaluate_signal |
|
|
|
|
| def _ticks(ticker, mids): |
| """Minimal raw-tick frame with the columns build_dataset consumes.""" |
| n = len(mids) |
| return pd.DataFrame({ |
| "market_ticker": ticker, |
| "ts": pd.date_range("2026-01-01", periods=n, freq="s", tz="UTC"), |
| "mid": mids, |
| "microprice": mids, |
| "imbalance": 0.5, |
| "spread": 0.02, |
| }) |
|
|
|
|
| def test_build_dataset_no_lookahead_and_no_leakage(): |
| ticks = pd.concat([ |
| _ticks("A", [0.50, 0.51, 0.52, 0.53, 0.54]), |
| _ticks("B", [0.20, 0.20, 0.20, 0.20, 0.20]), |
| ], ignore_index=True) |
| data = build_dataset(ticks, horizon=2) |
|
|
| a = data[data.market_ticker == "A"].reset_index(drop=True) |
| |
| assert len(a) == 3 |
| assert a["fwd_return"].round(4).tolist() == [0.02, 0.02, 0.02] |
|
|
| |
| b = data[data.market_ticker == "B"] |
| assert (b["fwd_return"].round(6) == 0.0).all() |
|
|
|
|
| def test_build_dataset_skips_markets_shorter_than_horizon(): |
| ticks = _ticks("SHORT", [0.5, 0.5]) |
| assert build_dataset(ticks, horizon=2).empty |
|
|
|
|
| def test_evaluate_detects_a_real_signal(): |
| rng = np.random.default_rng(0) |
| n = 4000 |
| imb = rng.uniform(-0.5, 0.5, n) |
| fwd = 0.5 * imb + rng.normal(0, 0.05, n) |
| data = pd.DataFrame({ |
| "market_ticker": "X", "ts": pd.Timestamp("2026-01-01", tz="UTC"), |
| "imbalance": imb, "microprice_edge": imb, |
| "spread": 0.02, "mid": 0.5, "fwd_return": fwd, |
| }) |
| rep = evaluate_signal(data) |
| assert rep["corr_imbalance"] > 0.5 |
| assert rep["hit_rate"] > 0.6 |
|
|
|
|
| def test_evaluate_reports_flat_on_pure_noise(): |
| rng = np.random.default_rng(1) |
| n = 4000 |
| data = pd.DataFrame({ |
| "market_ticker": "X", "ts": pd.Timestamp("2026-01-01", tz="UTC"), |
| "imbalance": rng.uniform(-0.5, 0.5, n), |
| "microprice_edge": rng.uniform(-0.02, 0.02, n), |
| "spread": 0.02, "mid": 0.5, |
| "fwd_return": rng.normal(0, 0.05, n), |
| }) |
| rep = evaluate_signal(data) |
| assert abs(rep["corr_imbalance"]) < 0.1 |
|
|
|
|
| def test_evaluate_handles_empty_input(): |
| assert evaluate_signal(pd.DataFrame()) == {"n": 0} |
|
|