"""Pure order-book reconstruction for Kalshi binary markets. Kalshi exposes a market's book as two arrays of *bids*: - ``yes``: resting bids to buy YES contracts, ``[price_cents, count]`` - ``no`` : resting bids to buy NO contracts, ``[price_cents, count]`` A NO bid at price ``p`` is economically an offer to *sell YES* at ``100 - p``. So the YES book is: - YES bid side = the ``yes`` array directly (best bid = highest yes price) - YES ask side = the ``no`` array mapped to ``ask = 100 - no_price`` (best ask = ``100 - highest no price``) Prices are integer cents in [1, 99]; the book is kept in cents and features are returned in dollars (0-1). No I/O and no network here on purpose — the reconstruction logic is unit-tested independently of the live collector. """ from dataclasses import dataclass, field @dataclass class KalshiOrderBook: market_ticker: str # price_cents -> total resting contract count yes_bids: dict[int, int] = field(default_factory=dict) no_bids: dict[int, int] = field(default_factory=dict) def apply_snapshot(self, yes, no): """Replace the whole book. ``yes``/``no`` are lists of ``[price, count]``.""" self.yes_bids = {int(p): int(c) for p, c in yes if int(c) > 0} self.no_bids = {int(p): int(c) for p, c in no if int(c) > 0} def apply_delta(self, price, delta, side): """Apply an incremental count change to one price level on one side.""" book = self.yes_bids if side == "yes" else self.no_bids price = int(price) new_count = book.get(price, 0) + int(delta) if new_count > 0: book[price] = new_count else: book.pop(price, None) def features(self): """Microstructure features for the current top of book. Returns ``None`` when the book is one-sided or crossed — those states are transient artifacts, not tradeable prices, and would only add noise to a training set. """ if not self.yes_bids or not self.no_bids: return None best_yes = max(self.yes_bids) best_no = max(self.no_bids) bid = best_yes # highest price to SELL a YES into ask = 100 - best_no # lowest price to BUY a YES from if bid > ask: return None # crossed book — drop it bid_size = self.yes_bids[best_yes] ask_size = self.no_bids[best_no] total = bid_size + ask_size # Microprice: the size-weighted fair value. Heavy bid size pulls it # toward the ask — a classic short-horizon directional predictor and # the core feature this whole collection effort exists to study. microprice = (bid * ask_size + ask * bid_size) / total return { "market_ticker": self.market_ticker, "best_bid": bid / 100, "best_ask": ask / 100, "bid_size": bid_size, "ask_size": ask_size, "mid": (bid + ask) / 200, "spread": (ask - bid) / 100, "microprice": microprice / 100, "imbalance": bid_size / total, "yes_depth": sum(self.yes_bids.values()), "no_depth": sum(self.no_bids.values()), "n_yes_levels": len(self.yes_bids), "n_no_levels": len(self.no_bids), }