Spaces:

AJAYKASU
/

arbintel

Sleeping

AJAY KASU

Add microstructure signal study pipeline

c5555e5 about 1 month ago

2.45 kB

	import numpy as np
	import pandas as pd

	from src.microstructure.study import build_dataset, evaluate_signal


	def _ticks(ticker, mids):
	"""Minimal raw-tick frame with the columns build_dataset consumes."""
	n = len(mids)
	return pd.DataFrame({
	"market_ticker": ticker,
	"ts": pd.date_range("2026-01-01", periods=n, freq="s", tz="UTC"),
	"mid": mids,
	"microprice": mids, # zero edge — irrelevant for these tests
	"imbalance": 0.5,
	"spread": 0.02,
	})


	def test_build_dataset_no_lookahead_and_no_leakage():
	ticks = pd.concat([
	_ticks("A", [0.50, 0.51, 0.52, 0.53, 0.54]),
	_ticks("B", [0.20, 0.20, 0.20, 0.20, 0.20]),
	], ignore_index=True)
	data = build_dataset(ticks, horizon=2)

	a = data[data.market_ticker == "A"].reset_index(drop=True)
	# 5 ticks, horizon 2 -> 3 labelled rows; fwd_return = mid[t+2] - mid[t]
	assert len(a) == 3
	assert a["fwd_return"].round(4).tolist() == [0.02, 0.02, 0.02]

	# market B's labels must never borrow market A's rising mids
	b = data[data.market_ticker == "B"]
	assert (b["fwd_return"].round(6) == 0.0).all()


	def test_build_dataset_skips_markets_shorter_than_horizon():
	ticks = _ticks("SHORT", [0.5, 0.5]) # only 2 ticks
	assert build_dataset(ticks, horizon=2).empty


	def test_evaluate_detects_a_real_signal():
	rng = np.random.default_rng(0)
	n = 4000
	imb = rng.uniform(-0.5, 0.5, n)
	fwd = 0.5 * imb + rng.normal(0, 0.05, n) # imbalance genuinely predicts
	data = pd.DataFrame({
	"market_ticker": "X", "ts": pd.Timestamp("2026-01-01", tz="UTC"),
	"imbalance": imb, "microprice_edge": imb,
	"spread": 0.02, "mid": 0.5, "fwd_return": fwd,
	})
	rep = evaluate_signal(data)
	assert rep["corr_imbalance"] > 0.5
	assert rep["hit_rate"] > 0.6


	def test_evaluate_reports_flat_on_pure_noise():
	rng = np.random.default_rng(1)
	n = 4000
	data = pd.DataFrame({
	"market_ticker": "X", "ts": pd.Timestamp("2026-01-01", tz="UTC"),
	"imbalance": rng.uniform(-0.5, 0.5, n),
	"microprice_edge": rng.uniform(-0.02, 0.02, n),
	"spread": 0.02, "mid": 0.5,
	"fwd_return": rng.normal(0, 0.05, n), # no relationship at all
	})
	rep = evaluate_signal(data)
	assert abs(rep["corr_imbalance"]) < 0.1


	def test_evaluate_handles_empty_input():
	assert evaluate_signal(pd.DataFrame()) == {"n": 0}