Premchan369 commited on
Commit
2b738f4
·
verified ·
1 Parent(s): b470e42

Add Level 2 LOB reconstruction with full order book, queue position, depth profile, spread dynamics

Browse files
Files changed (1) hide show
  1. limit_order_book.py +630 -0
limit_order_book.py ADDED
@@ -0,0 +1,630 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Limit Order Book (LOB) Reconstruction and Level 2 Features
2
+
3
+ What Jane Street sees that retail doesn't:
4
+ - Full Level 2 order book (10+ price levels, not just best bid/ask)
5
+ - Queue position for each order
6
+ - Order arrival/cancel rates
7
+ - Market depth profile
8
+ - Spread dynamics (widening = informed trading)
9
+ - Large order detection
10
+
11
+ This is the foundation of HIGH-FREQUENCY alpha.
12
+ """
13
+ import numpy as np
14
+ import pandas as pd
15
+ from typing import Dict, List, Tuple, Optional, NamedTuple
16
+ from collections import defaultdict
17
+ import bisect
18
+ import warnings
19
+ warnings.filterwarnings('ignore')
20
+
21
+
22
+ class OrderBookEntry:
23
+ """Single entry in the order book"""
24
+ def __init__(self, price: float, quantity: int, order_id: str,
25
+ side: str, timestamp: float):
26
+ self.price = price
27
+ self.quantity = quantity
28
+ self.order_id = order_id
29
+ self.side = side # 'bid' or 'ask'
30
+ self.timestamp = timestamp
31
+
32
+ def __repr__(self):
33
+ return f"{self.side.upper()} {self.quantity}@{self.price:.2f}"
34
+
35
+
36
+ class LimitOrderBook:
37
+ """
38
+ Full Limit Order Book reconstruction from message feed.
39
+
40
+ Jane Street processes millions of these per second.
41
+ Key insight: The order book itself CONTAINS alpha.
42
+ - Large orders at round numbers = resistance/support
43
+ - Order imbalance predicts next price move (30ms ahead)
44
+ - Spread dynamics = informed vs uninformed flow
45
+ """
46
+
47
+ def __init__(self, max_depth: int = 10):
48
+ self.max_depth = max_depth
49
+ self.bids = {} # price -> list of OrderBookEntry
50
+ self.asks = {} # price -> list of OrderBookEntry
51
+ self.bid_prices = [] # Sorted descending
52
+ self.ask_prices = [] # Sorted ascending
53
+ self.order_map = {} # order_id -> (side, price)
54
+
55
+ # Statistics
56
+ self.trade_history = []
57
+ self.imbalance_history = []
58
+ self.spread_history = []
59
+ self.depth_history = []
60
+
61
+ def add_order(self, order: OrderBookEntry):
62
+ """Add a limit order"""
63
+ side_dict = self.bids if order.side == 'bid' else self.asks
64
+ price_list = self.bid_prices if order.side == 'bid' else self.ask_prices
65
+
66
+ if order.price not in side_dict:
67
+ side_dict[order.price] = []
68
+ bisect.insort(price_list, order.price)
69
+ if order.side == 'bid':
70
+ price_list.sort(reverse=True)
71
+
72
+ side_dict[order.price].append(order)
73
+ self.order_map[order.order_id] = (order.side, order.price)
74
+
75
+ def cancel_order(self, order_id: str):
76
+ """Cancel a limit order"""
77
+ if order_id not in self.order_map:
78
+ return False
79
+
80
+ side, price = self.order_map[order_id]
81
+ side_dict = self.bids if side == 'bid' else self.asks
82
+
83
+ if price in side_dict:
84
+ side_dict[price] = [o for o in side_dict[price] if o.order_id != order_id]
85
+ if not side_dict[price]:
86
+ del side_dict[price]
87
+ price_list = self.bid_prices if side == 'bid' else self.ask_prices
88
+ price_list.remove(price)
89
+
90
+ del self.order_map[order_id]
91
+ return True
92
+
93
+ def execute_trade(self, side: str, quantity: int,
94
+ aggressive: bool = True) -> Tuple[float, int]:
95
+ """
96
+ Execute a market order against the book.
97
+
98
+ aggressive=True: market order (crosses spread)
99
+ aggressive=False: limit order that hits
100
+
101
+ Returns: (avg_price, executed_qty)
102
+ """
103
+ remaining = quantity
104
+ total_cost = 0.0
105
+
106
+ # Match against opposite side
107
+ opposite = 'ask' if side == 'bid' else 'bid'
108
+ opposite_dict = self.asks if opposite == 'ask' else self.bids
109
+ price_list = self.ask_prices if opposite == 'ask' else self.bid_prices
110
+
111
+ while remaining > 0 and price_list:
112
+ best_price = price_list[0]
113
+
114
+ if best_price not in opposite_dict:
115
+ price_list.pop(0)
116
+ continue
117
+
118
+ level_orders = opposite_dict[best_price]
119
+
120
+ while remaining > 0 and level_orders:
121
+ order = level_orders[0]
122
+ exec_qty = min(remaining, order.quantity)
123
+
124
+ total_cost += exec_qty * best_price
125
+ remaining -= exec_qty
126
+ order.quantity -= exec_qty
127
+
128
+ if order.quantity <= 0:
129
+ level_orders.pop(0)
130
+ if order.order_id in self.order_map:
131
+ del self.order_map[order.order_id]
132
+
133
+ if not level_orders:
134
+ del opposite_dict[best_price]
135
+ price_list.pop(0)
136
+
137
+ executed = quantity - remaining
138
+ avg_price = total_cost / executed if executed > 0 else 0.0
139
+
140
+ # Record trade
141
+ if executed > 0:
142
+ self.trade_history.append({
143
+ 'side': side,
144
+ 'quantity': executed,
145
+ 'avg_price': avg_price,
146
+ 'aggressive': aggressive
147
+ })
148
+
149
+ return avg_price, executed
150
+
151
+ def get_best_bid(self) -> Optional[float]:
152
+ return self.bid_prices[0] if self.bid_prices else None
153
+
154
+ def get_best_ask(self) -> Optional[float]:
155
+ return self.ask_prices[0] if self.ask_prices else None
156
+
157
+ def get_mid_price(self) -> Optional[float]:
158
+ bb = self.get_best_bid()
159
+ ba = self.get_best_ask()
160
+ if bb is not None and ba is not None:
161
+ return (bb + ba) / 2
162
+ return None
163
+
164
+ def get_spread(self) -> Optional[float]:
165
+ bb = self.get_best_bid()
166
+ ba = self.get_best_ask()
167
+ if bb is not None and ba is not None:
168
+ return ba - bb
169
+ return None
170
+
171
+ def get_spread_bps(self) -> Optional[float]:
172
+ spread = self.get_spread()
173
+ mid = self.get_mid_price()
174
+ if spread is not None and mid is not None:
175
+ return (spread / mid) * 10000
176
+ return None
177
+
178
+ def get_book_snapshot(self, depth: Optional[int] = None) -> Dict:
179
+ """Get a snapshot of the full book"""
180
+ depth = depth or self.max_depth
181
+
182
+ bids_snapshot = []
183
+ for p in self.bid_prices[:depth]:
184
+ if p in self.bids:
185
+ total_qty = sum(o.quantity for o in self.bids[p])
186
+ num_orders = len(self.bids[p])
187
+ bids_snapshot.append({
188
+ 'price': p,
189
+ 'quantity': total_qty,
190
+ 'num_orders': num_orders,
191
+ 'side': 'bid'
192
+ })
193
+
194
+ asks_snapshot = []
195
+ for p in self.ask_prices[:depth]:
196
+ if p in self.asks:
197
+ total_qty = sum(o.quantity for o in self.asks[p])
198
+ num_orders = len(self.asks[p])
199
+ asks_snapshot.append({
200
+ 'price': p,
201
+ 'quantity': total_qty,
202
+ 'num_orders': num_orders,
203
+ 'side': 'ask'
204
+ })
205
+
206
+ return {
207
+ 'bids': bids_snapshot,
208
+ 'asks': asks_snapshot,
209
+ 'mid_price': self.get_mid_price(),
210
+ 'spread': self.get_spread(),
211
+ 'spread_bps': self.get_spread_bps(),
212
+ 'bid_depth': len(self.bid_prices),
213
+ 'ask_depth': len(self.ask_prices),
214
+ 'total_bid_quantity': sum(sum(o.quantity for o in self.bids[p])
215
+ for p in self.bid_prices),
216
+ 'total_ask_quantity': sum(sum(o.quantity for o in self.asks[p])
217
+ for p in self.ask_prices)
218
+ }
219
+
220
+ def get_order_imbalance(self, levels: int = 5) -> float:
221
+ """
222
+ Order imbalance at top N levels.
223
+
224
+ Positive = more buying interest (bullish short-term)
225
+ Negative = more selling interest (bearish short-term)
226
+
227
+ Jane Street's #1 short-term signal.
228
+ """
229
+ bid_qty = sum(
230
+ sum(o.quantity for o in self.bids[p])
231
+ for p in self.bid_prices[:levels] if p in self.bids
232
+ )
233
+
234
+ ask_qty = sum(
235
+ sum(o.quantity for o in self.asks[p])
236
+ for p in self.ask_prices[:levels] if p in self.asks
237
+ )
238
+
239
+ total = bid_qty + ask_qty
240
+ if total == 0:
241
+ return 0.0
242
+
243
+ return (bid_qty - ask_qty) / total
244
+
245
+
246
+ class LOBFeatures:
247
+ """
248
+ Extract institutional-grade features from reconstructed LOB.
249
+
250
+ These features predict price movements 1-100ms ahead.
251
+ This is the EDGE that makes Jane Street profitable.
252
+ """
253
+
254
+ @staticmethod
255
+ def price_levels(book: LimitOrderBook, n: int = 10) -> pd.DataFrame:
256
+ """Price level data (Level 2 equivalent)"""
257
+ snapshot = book.get_book_snapshot(depth=n)
258
+
259
+ rows = []
260
+
261
+ # Bids (from best to worst)
262
+ for i, level in enumerate(snapshot['bids']):
263
+ rows.append({
264
+ 'side': 'bid',
265
+ 'level': i + 1,
266
+ 'price': level['price'],
267
+ 'quantity': level['quantity'],
268
+ 'num_orders': level['num_orders']
269
+ })
270
+
271
+ # Asks
272
+ for i, level in enumerate(snapshot['asks']):
273
+ rows.append({
274
+ 'side': 'ask',
275
+ 'level': i + 1,
276
+ 'price': level['price'],
277
+ 'quantity': level['quantity'],
278
+ 'num_orders': level['num_orders']
279
+ })
280
+
281
+ return pd.DataFrame(rows)
282
+
283
+ @staticmethod
284
+ def depth_profile(book: LimitOrderBook) -> Dict:
285
+ """
286
+ Market depth profile across price levels.
287
+
288
+ Skewed depth (more on one side) predicts price direction.
289
+ """
290
+ snapshot = book.get_book_snapshot()
291
+
292
+ bids = snapshot['bids']
293
+ asks = snapshot['asks']
294
+
295
+ # Cumulative depth
296
+ cum_bid_qty = np.cumsum([b['quantity'] for b in bids])
297
+ cum_ask_qty = np.cumsum([a['quantity'] for a in asks])
298
+
299
+ # Price distance from mid
300
+ mid = snapshot['mid_price'] or 0
301
+ bid_distances = [mid - b['price'] for b in bids]
302
+ ask_distances = [a['price'] - mid for a in asks]
303
+
304
+ return {
305
+ 'bid_depth_1': cum_bid_qty[0] if len(cum_bid_qty) > 0 else 0,
306
+ 'bid_depth_5': cum_bid_qty[4] if len(cum_bid_qty) > 4 else cum_bid_qty[-1] if len(cum_bid_qty) > 0 else 0,
307
+ 'bid_depth_10': cum_bid_qty[9] if len(cum_bid_qty) > 9 else cum_bid_qty[-1] if len(cum_bid_qty) > 0 else 0,
308
+ 'ask_depth_1': cum_ask_qty[0] if len(cum_ask_qty) > 0 else 0,
309
+ 'ask_depth_5': cum_ask_qty[4] if len(cum_ask_qty) > 4 else cum_ask_qty[-1] if len(cum_ask_qty) > 0 else 0,
310
+ 'ask_depth_10': cum_ask_qty[9] if len(cum_ask_qty) > 9 else cum_ask_qty[-1] if len(cum_ask_qty) > 0 else 0,
311
+ 'depth_ratio_1': (cum_bid_qty[0] / cum_ask_qty[0]) if len(cum_bid_qty) > 0 and len(cum_ask_qty) > 0 and cum_ask_qty[0] > 0 else 1.0,
312
+ 'depth_ratio_5': (cum_bid_qty[4] / cum_ask_qty[4]) if len(cum_bid_qty) > 4 and len(cum_ask_qty) > 4 and cum_ask_qty[4] > 0 else 1.0,
313
+ 'depth_skew': (snapshot['total_bid_quantity'] - snapshot['total_ask_quantity']) /
314
+ (snapshot['total_bid_quantity'] + snapshot['total_ask_quantity'] + 1)
315
+ }
316
+
317
+ @staticmethod
318
+ def queue_features(book: LimitOrderBook) -> Dict:
319
+ """
320
+ Queue position features.
321
+
322
+ Being at the FRONT of the queue means you get filled first = better price.
323
+ Queue length = how long you wait.
324
+ """
325
+ snapshot = book.get_book_snapshot(depth=1)
326
+
327
+ best_bid = snapshot['bids'][0] if snapshot['bids'] else None
328
+ best_ask = snapshot['asks'][0] if snapshot['asks'] else None
329
+
330
+ return {
331
+ 'bid_queue_length': best_bid['num_orders'] if best_bid else 0,
332
+ 'ask_queue_length': best_ask['num_orders'] if best_ask else 0,
333
+ 'bid_queue_qty': best_bid['quantity'] if best_bid else 0,
334
+ 'ask_queue_qty': best_ask['quantity'] if best_ask else 0,
335
+ 'queue_imbalance': ((best_bid['num_orders'] if best_bid else 0) -
336
+ (best_ask['num_orders'] if best_ask else 0))
337
+ }
338
+
339
+ @staticmethod
340
+ def large_order_detection(book: LimitOrderBook,
341
+ threshold_qty: float = 1000,
342
+ threshold_pct: float = 0.3) -> List[Dict]:
343
+ """
344
+ Detect unusually large orders.
345
+
346
+ Large orders = informed traders or iceberg orders.
347
+ Can predict price movements.
348
+ """
349
+ snapshot = book.get_book_snapshot()
350
+ large_orders = []
351
+
352
+ for side, side_name in [(book.bids, 'bid'), (book.asks, 'ask')]:
353
+ for price, orders in side.items():
354
+ total_at_price = sum(o.quantity for o in orders)
355
+ avg_qty = np.mean([o.quantity for o in orders]) if orders else 0
356
+
357
+ for order in orders:
358
+ if order.quantity >= threshold_qty:
359
+ large_orders.append({
360
+ 'side': side_name,
361
+ 'price': price,
362
+ 'quantity': order.quantity,
363
+ 'pct_of_level': order.quantity / total_at_price if total_at_price > 0 else 0,
364
+ 'is_iceberg': order.quantity > avg_qty * 3 # Likely iceberg
365
+ })
366
+
367
+ return sorted(large_orders, key=lambda x: x['quantity'], reverse=True)
368
+
369
+ @staticmethod
370
+ def spread_dynamics(book_history: List[LimitOrderBook],
371
+ window: int = 10) -> Dict:
372
+ """
373
+ Spread dynamics over time.
374
+
375
+ Widening spread = uncertainty, less liquidity, informed trading.
376
+ Narrowing spread = confidence, more liquidity.
377
+ """
378
+ spreads = []
379
+ mids = []
380
+ imbalances = []
381
+
382
+ for book in book_history[-window:]:
383
+ s = book.get_spread_bps()
384
+ m = book.get_mid_price()
385
+ i = book.get_order_imbalance()
386
+
387
+ if s is not None:
388
+ spreads.append(s)
389
+ if m is not None:
390
+ mids.append(m)
391
+ imbalances.append(i)
392
+
393
+ if len(spreads) < 2:
394
+ return {}
395
+
396
+ return {
397
+ 'avg_spread_bps': np.mean(spreads),
398
+ 'spread_volatility': np.std(spreads),
399
+ 'spread_trend': spreads[-1] - spreads[0],
400
+ 'spread_percentile': sum(1 for s in spreads if s <= spreads[-1]) / len(spreads),
401
+ 'mid_price_change_pct': (mids[-1] / mids[0] - 1) * 100 if len(mids) >= 2 and mids[0] > 0 else 0,
402
+ 'avg_imbalance': np.mean(imbalances),
403
+ 'imbalance_trend': imbalances[-1] - imbalances[0]
404
+ }
405
+
406
+ @staticmethod
407
+ def order_flow_-toxicity(book: LimitOrderBook,
408
+ trade_history: List[Dict],
409
+ window: int = 50) -> Dict:
410
+ """
411
+ VPIN-like metric using LOB data.
412
+
413
+ Toxic flow = aggressive orders that consume liquidity.
414
+ High toxicity = informed trading = adverse selection.
415
+ """
416
+ if not trade_history:
417
+ return {'vpin_approx': 0.0, 'toxicity': 0.0}
418
+
419
+ recent_trades = trade_history[-window:]
420
+
421
+ # Classify trades as aggressive buyer or seller
422
+ # (Simplified: if trade near ask = buyer aggressive)
423
+ mid = book.get_mid_price()
424
+
425
+ buy_volume = sum(t['quantity'] for t in recent_trades
426
+ if t.get('side') == 'bid' or t.get('aggressive', False))
427
+ sell_volume = sum(t['quantity'] for t in recent_trades
428
+ if t.get('side') == 'ask' or not t.get('aggressive', False))
429
+
430
+ total = buy_volume + sell_volume
431
+ if total == 0:
432
+ return {'vpin_approx': 0.0, 'toxicity': 0.0}
433
+
434
+ # Toxicity = |buy_vol - sell_vol| / total
435
+ vpin = abs(buy_volume - sell_volume) / total
436
+
437
+ return {
438
+ 'vpin_approx': vpin,
439
+ 'toxicity': vpin,
440
+ 'buy_volume': buy_volume,
441
+ 'sell_volume': sell_volume,
442
+ 'total_volume': total
443
+ }
444
+
445
+ @staticmethod
446
+ def all_features(book: LimitOrderBook,
447
+ book_history: Optional[List[LimitOrderBook]] = None) -> Dict:
448
+ """Compute all LOB features at once"""
449
+ features = {}
450
+
451
+ # Basic features
452
+ snapshot = book.get_book_snapshot()
453
+ features['mid_price'] = snapshot['mid_price']
454
+ features['spread'] = snapshot['spread']
455
+ features['spread_bps'] = snapshot['spread_bps']
456
+ features['bid_depth_total'] = snapshot['total_bid_quantity']
457
+ features['ask_depth_total'] = snapshot['total_ask_quantity']
458
+ features['depth_imbalance'] = book.get_order_imbalance()
459
+
460
+ # Depth profile
461
+ depth = LOBFeatures.depth_profile(book)
462
+ features.update({f'depth_{k}': v for k, v in depth.items()})
463
+
464
+ # Queue features
465
+ queue = LOBFeatures.queue_features(book)
466
+ features.update({f'queue_{k}': v for k, v in queue.items()})
467
+
468
+ # Large orders
469
+ large = LOBFeatures.large_order_detection(book)
470
+ features['n_large_orders'] = len(large)
471
+ features['large_order_total_qty'] = sum(o['quantity'] for o in large)
472
+
473
+ # Spread dynamics
474
+ if book_history and len(book_history) >= 2:
475
+ dynamics = LOBFeatures.spread_dynamics(book_history)
476
+ features.update({f'spread_dyn_{k}': v for k, v in dynamics.items()})
477
+
478
+ return features
479
+
480
+
481
+ def generate_synthetic_lob_feed(n_messages: int = 1000,
482
+ base_price: float = 100.0,
483
+ tick_size: float = 0.01) -> List[Dict]:
484
+ """Generate synthetic LOB message feed for testing"""
485
+ np.random.seed(42)
486
+
487
+ messages = []
488
+ order_counter = 0
489
+
490
+ # Initialize with some orders
491
+ for _ in range(50):
492
+ side = 'bid' if np.random.rand() < 0.5 else 'ask'
493
+ price = base_price + np.random.randint(-50, 50) * tick_size
494
+ if side == 'ask':
495
+ price = max(price, base_price)
496
+ else:
497
+ price = min(price, base_price)
498
+
499
+ messages.append({
500
+ 'type': 'add',
501
+ 'order_id': f'order_{order_counter}',
502
+ 'side': side,
503
+ 'price': round(price, 2),
504
+ 'quantity': np.random.randint(100, 1000),
505
+ 'timestamp': len(messages) / 1000.0
506
+ })
507
+ order_counter += 1
508
+
509
+ # Generate flowing messages
510
+ for _ in range(n_messages - 50):
511
+ msg_type = np.random.choice(['add', 'cancel', 'trade'], p=[0.5, 0.3, 0.2])
512
+
513
+ if msg_type == 'add':
514
+ side = 'bid' if np.random.rand() < 0.5 else 'ask'
515
+ offset = np.random.exponential(10) * tick_size
516
+ price = base_price + (offset if side == 'ask' else -offset)
517
+ price = round(max(price, 0.01), 2)
518
+
519
+ messages.append({
520
+ 'type': 'add',
521
+ 'order_id': f'order_{order_counter}',
522
+ 'side': side,
523
+ 'price': price,
524
+ 'quantity': np.random.randint(100, 2000),
525
+ 'timestamp': len(messages) / 1000.0
526
+ })
527
+ order_counter += 1
528
+
529
+ elif msg_type == 'cancel' and order_counter > 0:
530
+ # Cancel a random existing order
531
+ messages.append({
532
+ 'type': 'cancel',
533
+ 'order_id': f'order_{np.random.randint(0, order_counter)}',
534
+ 'timestamp': len(messages) / 1000.0
535
+ })
536
+
537
+ else:
538
+ # Trade
539
+ side = 'bid' if np.random.rand() < 0.5 else 'ask'
540
+ messages.append({
541
+ 'type': 'trade',
542
+ 'side': side,
543
+ 'quantity': np.random.randint(100, 500),
544
+ 'timestamp': len(messages) / 1000.0
545
+ })
546
+
547
+ return messages
548
+
549
+
550
+ def process_message_feed(messages: List[Dict]) -> Tuple[LimitOrderBook, List[Dict]]:
551
+ """Process a message feed and reconstruct the LOB"""
552
+ book = LimitOrderBook(max_depth=20)
553
+ trades = []
554
+ book_history = []
555
+ features_history = []
556
+
557
+ for msg in messages:
558
+ if msg['type'] == 'add':
559
+ entry = OrderBookEntry(
560
+ price=msg['price'],
561
+ quantity=msg['quantity'],
562
+ order_id=msg['order_id'],
563
+ side=msg['side'],
564
+ timestamp=msg['timestamp']
565
+ )
566
+ book.add_order(entry)
567
+
568
+ elif msg['type'] == 'cancel':
569
+ book.cancel_order(msg['order_id'])
570
+
571
+ elif msg['type'] == 'trade':
572
+ side = 'bid' if msg['side'] == 'ask' else 'ask' # Opposite side
573
+ avg_price, qty = book.execute_trade(side, msg['quantity'], aggressive=True)
574
+ trades.append({
575
+ 'timestamp': msg['timestamp'],
576
+ 'side': msg['side'],
577
+ 'quantity': qty,
578
+ 'avg_price': avg_price
579
+ })
580
+
581
+ # Snapshot every 100 messages
582
+ if len(book_history) % 100 == 0:
583
+ book_history.append(book)
584
+ features = LOBFeatures.all_features(book, book_history)
585
+ features['timestamp'] = msg['timestamp']
586
+ features_history.append(features)
587
+
588
+ return book, trades, features_history
589
+
590
+
591
+ if __name__ == '__main__':
592
+ print("=" * 70)
593
+ print(" LIMIT ORDER BOOK RECONSTRUCTION")
594
+ print("=" * 70)
595
+
596
+ # Generate synthetic data
597
+ messages = generate_synthetic_lob_feed(n_messages=5000)
598
+
599
+ # Process
600
+ book, trades, features = process_message_feed(messages)
601
+
602
+ # Final snapshot
603
+ snapshot = book.get_book_snapshot(depth=5)
604
+
605
+ print(f"\nFinal LOB State:")
606
+ print(f" Mid Price: ${snapshot['mid_price']:.2f}")
607
+ print(f" Spread: {snapshot['spread_bps']:.1f} bps")
608
+ print(f" Bid Depth: {snapshot['bid_depth']} levels")
609
+ print(f" Ask Depth: {snapshot['ask_depth']} levels")
610
+ print(f" Total Bid Qty: {snapshot['total_bid_quantity']:,}")
611
+ print(f" Total Ask Qty: {snapshot['total_ask_quantity']:,}")
612
+ print(f" Order Imbalance: {book.get_order_imbalance():.3f}")
613
+
614
+ # Level 2
615
+ print(f"\nLevel 2 Book (top 5):")
616
+ levels = LOBFeatures.price_levels(book, n=5)
617
+ print(levels.to_string())
618
+
619
+ # Features
620
+ if features:
621
+ print(f"\nLatest LOB Features:")
622
+ latest = features[-1]
623
+ for k, v in latest.items():
624
+ if isinstance(v, (int, float)):
625
+ print(f" {k}: {v:.4f}")
626
+
627
+ print(f"\n Trades executed: {len(trades)}")
628
+ print(f" Total messages processed: {len(messages)}")
629
+ print(f"\n This is what Jane Street sees every microsecond.")
630
+ print(f" Order imbalance, queue position, depth profile = PURE ALPHA.")