Syntrex Claude Sonnet 4.6 commited on
Commit
c884cb3
·
1 Parent(s): 2099da5

CockroachDB cutover + XGBoost evaluation fix + requirements cleanup

Browse files

- All 6 runtime tables now CockroachDB-backed via SQLAlchemy (db.py full rewrite, remote_db.py lazy engine)
- XGBoost evaluation pipeline: pd.read_sql(text(...)) read path fix + lineup_slot JOIN fix (xgb_hr_evaluation.py)
- config/settings.py: removed dead DUCKDB_PATH constant
- app.py: updated caption DuckDB → CockroachDB
- requirements.txt: remove duckdb==1.1.2 (unused), add xgboost (required by shadow model)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

app.py CHANGED
@@ -507,7 +507,7 @@ def render_header() -> None:
507
  st.title("⚾ World Baseball Classic Analytics Assistant")
508
  st.caption(
509
  "WBC-first app using official WBC schedule pages, WBC Statcast from Baseball Savant, "
510
- "The Odds API, weather overlays, DuckDB storage, and a modern Streamlit UI."
511
  )
512
  secret_status = []
513
  secret_status.append("ODDS_API_KEY ✓" if ODDS_API_KEY else "ODDS_API_KEY missing")
 
507
  st.title("⚾ World Baseball Classic Analytics Assistant")
508
  st.caption(
509
  "WBC-first app using official WBC schedule pages, WBC Statcast from Baseball Savant, "
510
+ "The Odds API, weather overlays, CockroachDB persistence, and a modern Streamlit UI."
511
  )
512
  secret_status = []
513
  secret_status.append("ODDS_API_KEY ✓" if ODDS_API_KEY else "ODDS_API_KEY missing")
config/settings.py CHANGED
@@ -10,8 +10,6 @@ SCORES_TTL_SECONDS = 8
10
  SCHEDULE_TTL_SECONDS = 300
11
  STATCAST_TTL_SECONDS = 600
12
 
13
- DUCKDB_PATH = "data/wbc.duckdb"
14
-
15
  ENABLE_ENTERPRISE_PROVIDER = False
16
 
17
  # Batch 12.5C: XGBoost shadow inference
 
10
  SCHEDULE_TTL_SECONDS = 300
11
  STATCAST_TTL_SECONDS = 600
12
 
 
 
13
  ENABLE_ENTERPRISE_PROVIDER = False
14
 
15
  # Batch 12.5C: XGBoost shadow inference
database/db.py CHANGED
@@ -1,112 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
3
- from pathlib import Path
4
- from typing import Iterable, Mapping, Any
5
 
6
- import duckdb
7
  import pandas as pd
 
 
 
 
8
 
9
- from config.settings import DUCKDB_PATH
 
 
10
 
 
11
 
12
- def get_connection() -> duckdb.DuckDBPyConnection:
13
- Path("data").mkdir(parents=True, exist_ok=True)
14
- conn = duckdb.connect(DUCKDB_PATH)
 
 
 
 
 
 
 
 
 
 
15
  initialize_schema(conn)
16
  return conn
17
 
18
 
19
- def initialize_schema(conn: duckdb.DuckDBPyConnection) -> None:
20
- conn.execute(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  """
22
  CREATE TABLE IF NOT EXISTS bets (
23
- bet_id BIGINT,
24
- created_at TIMESTAMP,
25
- sportsbook TEXT,
26
- market TEXT,
27
- selection TEXT,
28
- odds INTEGER,
29
- stake DOUBLE,
30
- result TEXT,
31
- profit DOUBLE,
32
- game_id TEXT,
33
- notes TEXT
34
- );
35
  """
36
- )
37
 
38
- conn.execute(
39
  """
40
  CREATE TABLE IF NOT EXISTS cached_schedule (
41
- fetched_at TIMESTAMP,
42
- game_id TEXT,
43
- game_date TIMESTAMP,
44
- status TEXT,
45
- away_team TEXT,
46
- home_team TEXT,
47
- away_score INTEGER,
48
- home_score INTEGER,
49
- venue TEXT
50
- );
51
  """
52
- )
53
 
54
- conn.execute(
55
  """
56
  CREATE TABLE IF NOT EXISTS cached_odds (
57
- fetched_at TIMESTAMP,
58
- event_id TEXT,
59
- commence_time TIMESTAMP,
60
- home_team TEXT,
61
- away_team TEXT,
62
- sportsbook TEXT,
63
- market_key TEXT,
64
- outcome_name TEXT,
65
- price INTEGER,
66
- point DOUBLE
67
- );
68
  """
69
- )
70
 
71
- conn.execute(
72
  """
73
  CREATE TABLE IF NOT EXISTS cached_weather (
74
- fetched_at TIMESTAMP,
75
- venue_key TEXT,
76
- location_name TEXT,
77
- temperature_f DOUBLE,
78
- humidity INTEGER,
79
- wind_speed_mph DOUBLE,
80
- wind_deg INTEGER,
81
- description TEXT
82
- );
83
  """
84
- )
 
85
 
 
 
 
86
 
87
  def upsert_dataframe(
88
- conn: duckdb.DuckDBPyConnection,
89
  table_name: str,
90
  df: pd.DataFrame,
91
  replace: bool = True,
92
  ) -> None:
93
- if df.empty:
94
  return
95
-
96
- temp_name = f"tmp_{table_name}"
97
- conn.register(temp_name, df)
98
  if replace:
99
- conn.execute(f"DELETE FROM {table_name}")
100
- conn.execute(f"INSERT INTO {table_name} SELECT * FROM {temp_name}")
101
- conn.unregister(temp_name)
102
 
103
 
104
- def read_table(conn: duckdb.DuckDBPyConnection, table_name: str) -> pd.DataFrame:
105
- return conn.execute(f"SELECT * FROM {table_name}").df()
106
 
107
 
 
 
 
 
108
  def insert_bet(
109
- conn: duckdb.DuckDBPyConnection,
110
  bet_id: int,
111
  created_at: str,
112
  sportsbook: str,
@@ -120,27 +190,56 @@ def insert_bet(
120
  notes: str,
121
  ) -> None:
122
  conn.execute(
123
- """
124
- INSERT INTO bets (
125
- bet_id, created_at, sportsbook, market, selection, odds, stake,
126
- result, profit, game_id, notes
127
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
128
- """,
129
- [
130
- bet_id,
131
- created_at,
132
- sportsbook,
133
- market,
134
- selection,
135
- odds,
136
- stake,
137
- result,
138
- profit,
139
- game_id,
140
- notes,
141
- ],
 
 
 
 
 
142
  )
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  def read_recommendation_audit_view(conn) -> pd.DataFrame:
145
  ensure_recommendation_logs_table(conn)
146
  ensure_recommendation_outcomes_table(conn)
@@ -192,31 +291,36 @@ def read_recommendation_audit_view(conn) -> pd.DataFrame:
192
  AND l.slot = o.slot
193
  ORDER BY l.created_at DESC
194
  """
195
- return conn.execute(query).df()
 
 
 
 
 
196
 
197
  def ensure_recommendation_outcomes_table(conn) -> None:
198
- conn.execute(
199
  """
200
  CREATE TABLE IF NOT EXISTS recommendation_outcomes (
201
- created_at TEXT,
202
- game_pk TEXT,
203
- away_team TEXT,
204
- home_team TEXT,
205
- batter_name TEXT,
206
- slot TEXT,
207
- market TEXT,
208
- realized_hit INTEGER,
209
- realized_hr INTEGER,
210
- realized_tb2p INTEGER,
211
- graded_at TEXT,
212
  outcome_source TEXT,
213
- lineup_slot TEXT
214
  )
215
  """
216
- )
217
 
218
  try:
219
- conn.execute("ALTER TABLE recommendation_outcomes ADD COLUMN lineup_slot TEXT")
220
  except Exception:
221
  pass # Column already exists
222
 
@@ -224,75 +328,68 @@ def ensure_recommendation_outcomes_table(conn) -> None:
224
  def insert_recommendation_outcomes(conn, df: pd.DataFrame) -> None:
225
  if df is None or df.empty:
226
  return
227
-
228
  ensure_recommendation_outcomes_table(conn)
229
- conn.register("recommendation_outcomes_df", df)
230
- conn.execute(
231
- """
232
- INSERT INTO recommendation_outcomes
233
- SELECT * FROM recommendation_outcomes_df
234
- """
235
- )
236
- conn.unregister("recommendation_outcomes_df")
237
 
238
- def next_bet_id(conn: duckdb.DuckDBPyConnection) -> int:
239
- value = conn.execute("SELECT COALESCE(MAX(bet_id), 0) + 1 FROM bets").fetchone()[0]
240
- return int(value)
241
 
242
  def ensure_recommendation_logs_table(conn) -> None:
243
- conn.execute(
244
  """
245
  CREATE TABLE IF NOT EXISTS recommendation_logs (
246
- created_at TEXT,
247
- game_pk TEXT,
248
- away_team TEXT,
249
- home_team TEXT,
250
- status TEXT,
251
- slot TEXT,
252
- batter_name TEXT,
253
- pitcher_name TEXT,
254
- ev90 DOUBLE,
255
- hit_prob DOUBLE,
256
- hr_prob DOUBLE,
257
- tb2p_prob DOUBLE,
258
- fair_hit_odds DOUBLE,
259
- fair_hr_odds DOUBLE,
260
- fair_tb2p_odds DOUBLE,
261
- book_hit_odds DOUBLE,
262
- book_hr_odds DOUBLE,
263
- book_tb2p_odds DOUBLE,
264
- hit_edge DOUBLE,
265
- hr_edge DOUBLE,
266
- tb2p_edge DOUBLE,
267
- adjusted_edge DOUBLE,
268
- hit_bet_ev DOUBLE,
269
- hr_bet_ev DOUBLE,
270
- tb2p_bet_ev DOUBLE,
271
- confidence DOUBLE,
272
- confidence_bucket TEXT,
273
- recommendation_tier TEXT,
274
- priority_score DOUBLE,
275
- reason_tags TEXT,
276
- starter_stays_next_batter_prob DOUBLE,
277
- starter_stays_next_inning_prob DOUBLE,
278
- bullpen_entry_prob DOUBLE,
279
- xgb_hr_delta DOUBLE,
280
- xgb_hr_adjusted DOUBLE,
281
- xgb_shadow_active BOOLEAN,
282
- lineup_slot TEXT
283
  )
284
  """
285
- )
286
 
287
- # Migrate existing tables missing shadow columns
288
  for _col, _dtype in [
289
- ("xgb_hr_delta", "DOUBLE"),
290
- ("xgb_hr_adjusted", "DOUBLE"),
291
  ("xgb_shadow_active", "BOOLEAN"),
292
  ("lineup_slot", "TEXT"),
293
  ]:
294
  try:
295
- conn.execute(f"ALTER TABLE recommendation_logs ADD COLUMN {_col} {_dtype}")
296
  except Exception:
297
  pass # Column already exists
298
 
@@ -300,142 +397,107 @@ def ensure_recommendation_logs_table(conn) -> None:
300
  def insert_recommendation_logs(conn, df: pd.DataFrame) -> None:
301
  if df is None or df.empty:
302
  return
303
-
304
  ensure_recommendation_logs_table(conn)
305
- conn.register("recommendation_logs_df", df)
306
- conn.execute(
307
- """
308
- INSERT INTO recommendation_logs
309
- SELECT * FROM recommendation_logs_df
310
- """
311
- )
312
- conn.unregister("recommendation_logs_df")
313
 
314
- def update_bet_result(
315
- conn: duckdb.DuckDBPyConnection,
316
- bet_id: int,
317
- result: str,
318
- profit: float,
319
- ) -> None:
320
- conn.execute(
321
- """
322
- UPDATE bets
323
- SET result = ?, profit = ?
324
- WHERE bet_id = ?
325
- """,
326
- [result, profit, bet_id],
327
- )
328
 
329
  def ensure_game_outcomes_table(conn) -> None:
330
- conn.execute(
331
  """
332
  CREATE TABLE IF NOT EXISTS game_outcomes (
333
- graded_at TEXT,
334
- game_pk TEXT,
335
- away_team TEXT,
336
- home_team TEXT,
337
- away_score INTEGER,
338
- home_score INTEGER,
339
- status TEXT,
340
  outcome_source TEXT
341
  )
342
  """
343
- )
344
 
345
 
346
  def insert_game_outcomes(conn, df: pd.DataFrame) -> None:
347
  if df is None or df.empty:
348
  return
349
-
350
  ensure_game_outcomes_table(conn)
351
- conn.register("game_outcomes_df", df)
352
- conn.execute(
353
- """
354
- INSERT INTO game_outcomes
355
- SELECT * FROM game_outcomes_df
356
- """
357
- )
358
- conn.unregister("game_outcomes_df")
359
 
360
  def read_game_outcomes(conn) -> pd.DataFrame:
361
  ensure_game_outcomes_table(conn)
362
- return conn.execute(
363
- """
364
- SELECT *
365
- FROM game_outcomes
366
- ORDER BY graded_at DESC
367
- """
368
- ).df()
 
 
369
 
370
  def ensure_batter_prop_outcomes_table(conn) -> None:
371
- conn.execute(
372
  """
373
  CREATE TABLE IF NOT EXISTS batter_prop_outcomes (
374
- created_at TEXT,
375
- graded_at TEXT,
376
- game_pk TEXT,
377
- away_team TEXT,
378
- home_team TEXT,
379
- slot TEXT,
380
- batter_name TEXT,
381
- pitcher_name TEXT,
382
- market TEXT,
383
- fair_hr_odds DOUBLE,
384
- book_hr_odds DOUBLE,
385
- adjusted_edge DOUBLE,
386
- confidence DOUBLE,
387
  recommendation_tier TEXT,
388
- realized_hit INTEGER,
389
- realized_hr INTEGER,
390
- realized_tb2p INTEGER,
391
- grade_status TEXT,
392
- outcome_source TEXT
393
  )
394
  """
395
- )
396
 
397
 
398
  def insert_batter_prop_outcomes(conn, df: pd.DataFrame) -> None:
399
  if df is None or df.empty:
400
  return
401
-
402
  ensure_batter_prop_outcomes_table(conn)
403
- conn.register("batter_prop_outcomes_df", df)
404
- conn.execute(
405
- """
406
- INSERT INTO batter_prop_outcomes
407
- SELECT * FROM batter_prop_outcomes_df
408
- """
409
- )
410
- conn.unregister("batter_prop_outcomes_df")
411
 
412
  def read_batter_prop_outcomes(conn) -> pd.DataFrame:
413
  ensure_batter_prop_outcomes_table(conn)
414
- return conn.execute(
415
- """
416
- SELECT *
417
- FROM batter_prop_outcomes
418
- ORDER BY graded_at DESC, created_at DESC
419
- """
420
- ).df()
421
 
422
  def replace_batter_prop_outcomes(conn, df: pd.DataFrame) -> None:
423
  if df is None or df.empty:
424
  return
425
-
426
  ensure_batter_prop_outcomes_table(conn)
427
- conn.execute("DELETE FROM batter_prop_outcomes")
428
- conn.register("batter_prop_outcomes_replace_df", df)
429
- conn.execute(
430
- """
431
- INSERT INTO batter_prop_outcomes
432
- SELECT * FROM batter_prop_outcomes_replace_df
433
- """
434
- )
435
- conn.unregister("batter_prop_outcomes_replace_df")
436
 
437
  def ensure_upcoming_hr_props_table(conn) -> None:
438
- conn.execute(
439
  """
440
  CREATE TABLE IF NOT EXISTS upcoming_hr_props (
441
  fetched_at TEXT,
@@ -448,20 +510,21 @@ def ensure_upcoming_hr_props_table(conn) -> None:
448
  player_name_raw TEXT,
449
  player_name TEXT,
450
  odds_american INTEGER,
451
- line DOUBLE,
452
- implied_prob DOUBLE,
453
- model_hr_prob DOUBLE,
454
  model_hr_prob_source TEXT,
455
- edge DOUBLE
456
  )
457
  """
458
- )
 
459
  for _col, _dtype in [
460
  ("model_hr_prob_source", "TEXT"),
461
- ("edge", "DOUBLE"),
462
  ]:
463
  try:
464
- conn.execute(f"ALTER TABLE upcoming_hr_props ADD COLUMN {_col} {_dtype}")
465
  except Exception:
466
  pass # Column already exists
467
 
@@ -470,27 +533,28 @@ def insert_upcoming_hr_props(conn, df: pd.DataFrame) -> None:
470
  if df is None or df.empty:
471
  return
472
  ensure_upcoming_hr_props_table(conn)
473
- conn.register("upcoming_hr_props_df", df)
474
- conn.execute(
475
- """
476
- INSERT INTO upcoming_hr_props
477
- SELECT
478
- fetched_at, event_id, commence_time, away_team, home_team,
479
- sportsbook, market, player_name_raw, player_name,
480
- odds_american, line, implied_prob, model_hr_prob,
481
- model_hr_prob_source, edge
482
- FROM upcoming_hr_props_df
483
- """
484
- )
485
- conn.unregister("upcoming_hr_props_df")
486
 
487
 
488
  def read_upcoming_hr_props(conn) -> pd.DataFrame:
489
  ensure_upcoming_hr_props_table(conn)
490
- return conn.execute(
491
- "SELECT * FROM upcoming_hr_props ORDER BY fetched_at DESC"
492
- ).df()
 
 
493
 
 
 
 
494
 
495
  def read_batter_prop_audit_view(conn) -> pd.DataFrame:
496
  ensure_batter_prop_outcomes_table(conn)
@@ -519,4 +583,4 @@ def read_batter_prop_audit_view(conn) -> pd.DataFrame:
519
  FROM batter_prop_outcomes
520
  ORDER BY graded_at DESC, created_at DESC
521
  """
522
- return conn.execute(query).df()
 
1
+ """
2
+ database/db.py
3
+
4
+ CockroachDB-backed persistence layer (cutover from DuckDB).
5
+
6
+ All table management for the active runtime tables:
7
+ recommendation_logs, recommendation_outcomes, upcoming_hr_props,
8
+ game_outcomes, batter_prop_outcomes, bets,
9
+ cached_schedule, cached_odds, cached_weather (schema artifacts, no active inserts)
10
+
11
+ Public API is unchanged — all callers import from this module.
12
+ Connection is a long-lived SQLAlchemy connection opened with AUTOCOMMIT isolation
13
+ (matches DuckDB's default auto-commit-per-statement behavior).
14
+
15
+ Bulk inserts are chunked at _INSERT_CHUNK_SIZE rows per execute call to prevent
16
+ large single-payload issues in CockroachDB. SQLAlchemy passes list-of-dicts to
17
+ execute() as executemany — no Python-level per-row loops.
18
+
19
+ No primary keys or unique constraints are added in this batch. Schema is designed
20
+ so they can be added later without structural changes.
21
+ """
22
+
23
  from __future__ import annotations
24
 
25
+ from typing import Any, Iterable, Mapping
 
26
 
 
27
  import pandas as pd
28
+ from sqlalchemy import text
29
+
30
+ from database import remote_db
31
+
32
 
33
+ # ---------------------------------------------------------------------------
34
+ # Chunk size for bulk inserts
35
+ # ---------------------------------------------------------------------------
36
 
37
+ _INSERT_CHUNK_SIZE = 500
38
 
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Connection
42
+ # ---------------------------------------------------------------------------
43
+
44
+ def get_connection():
45
+ """
46
+ Returns a CockroachDB SQLAlchemy connection with AUTOCOMMIT isolation.
47
+ Schema is initialized on every new connection (idempotent CREATE IF NOT EXISTS).
48
+ The connection is long-lived (module-level in app.py); the pool handles
49
+ stale connection detection via pool_pre_ping=True and pool_recycle=300.
50
+ """
51
+ conn = remote_db.get_connection().execution_options(isolation_level="AUTOCOMMIT")
52
  initialize_schema(conn)
53
  return conn
54
 
55
 
56
+ # ---------------------------------------------------------------------------
57
+ # Private helpers
58
+ # ---------------------------------------------------------------------------
59
+
60
+ def _bulk_insert(conn, table: str, df: pd.DataFrame) -> None:
61
+ """
62
+ Insert all rows from df into table using chunked executemany.
63
+ Column names in df must match table column names exactly.
64
+ NaN values are converted to None (SQL NULL) before insertion.
65
+ """
66
+ if df is None or df.empty:
67
+ return
68
+ cols = list(df.columns)
69
+ col_list = ", ".join(cols)
70
+ placeholders = ", ".join(f":{c}" for c in cols)
71
+ sql = text(f"INSERT INTO {table} ({col_list}) VALUES ({placeholders})")
72
+ records = df.where(df.notna(), other=None).to_dict("records")
73
+ for i in range(0, len(records), _INSERT_CHUNK_SIZE):
74
+ conn.execute(sql, records[i : i + _INSERT_CHUNK_SIZE])
75
+
76
+
77
+ # ---------------------------------------------------------------------------
78
+ # Schema initialization
79
+ # ---------------------------------------------------------------------------
80
+
81
+ def initialize_schema(conn) -> None:
82
+ """
83
+ Create base tables if they do not exist.
84
+ All DDL is idempotent (CREATE TABLE IF NOT EXISTS).
85
+ """
86
+ conn.execute(text(
87
  """
88
  CREATE TABLE IF NOT EXISTS bets (
89
+ bet_id BIGINT,
90
+ created_at TEXT,
91
+ sportsbook TEXT,
92
+ market TEXT,
93
+ selection TEXT,
94
+ odds INTEGER,
95
+ stake DOUBLE PRECISION,
96
+ result TEXT,
97
+ profit DOUBLE PRECISION,
98
+ game_id TEXT,
99
+ notes TEXT
100
+ )
101
  """
102
+ ))
103
 
104
+ conn.execute(text(
105
  """
106
  CREATE TABLE IF NOT EXISTS cached_schedule (
107
+ fetched_at TEXT,
108
+ game_id TEXT,
109
+ game_date TEXT,
110
+ status TEXT,
111
+ away_team TEXT,
112
+ home_team TEXT,
113
+ away_score INTEGER,
114
+ home_score INTEGER,
115
+ venue TEXT
116
+ )
117
  """
118
+ ))
119
 
120
+ conn.execute(text(
121
  """
122
  CREATE TABLE IF NOT EXISTS cached_odds (
123
+ fetched_at TEXT,
124
+ event_id TEXT,
125
+ commence_time TEXT,
126
+ home_team TEXT,
127
+ away_team TEXT,
128
+ sportsbook TEXT,
129
+ market_key TEXT,
130
+ outcome_name TEXT,
131
+ price INTEGER,
132
+ point DOUBLE PRECISION
133
+ )
134
  """
135
+ ))
136
 
137
+ conn.execute(text(
138
  """
139
  CREATE TABLE IF NOT EXISTS cached_weather (
140
+ fetched_at TEXT,
141
+ venue_key TEXT,
142
+ location_name TEXT,
143
+ temperature_f DOUBLE PRECISION,
144
+ humidity INTEGER,
145
+ wind_speed_mph DOUBLE PRECISION,
146
+ wind_deg INTEGER,
147
+ description TEXT
148
+ )
149
  """
150
+ ))
151
+
152
 
153
+ # ---------------------------------------------------------------------------
154
+ # Generic helpers
155
+ # ---------------------------------------------------------------------------
156
 
157
  def upsert_dataframe(
158
+ conn,
159
  table_name: str,
160
  df: pd.DataFrame,
161
  replace: bool = True,
162
  ) -> None:
163
+ if df is None or df.empty:
164
  return
 
 
 
165
  if replace:
166
+ conn.execute(text(f"DELETE FROM {table_name}"))
167
+ _bulk_insert(conn, table_name, df)
 
168
 
169
 
170
+ def read_table(conn, table_name: str) -> pd.DataFrame:
171
+ return pd.read_sql(text(f"SELECT * FROM {table_name}"), conn)
172
 
173
 
174
+ # ---------------------------------------------------------------------------
175
+ # Bets
176
+ # ---------------------------------------------------------------------------
177
+
178
  def insert_bet(
179
+ conn,
180
  bet_id: int,
181
  created_at: str,
182
  sportsbook: str,
 
190
  notes: str,
191
  ) -> None:
192
  conn.execute(
193
+ text(
194
+ """
195
+ INSERT INTO bets (
196
+ bet_id, created_at, sportsbook, market, selection, odds, stake,
197
+ result, profit, game_id, notes
198
+ ) VALUES (
199
+ :bet_id, :created_at, :sportsbook, :market, :selection, :odds, :stake,
200
+ :result, :profit, :game_id, :notes
201
+ )
202
+ """
203
+ ),
204
+ {
205
+ "bet_id": bet_id,
206
+ "created_at": created_at,
207
+ "sportsbook": sportsbook,
208
+ "market": market,
209
+ "selection": selection,
210
+ "odds": odds,
211
+ "stake": stake,
212
+ "result": result,
213
+ "profit": profit,
214
+ "game_id": game_id,
215
+ "notes": notes,
216
+ },
217
  )
218
 
219
+
220
+ def next_bet_id(conn) -> int:
221
+ return int(
222
+ conn.execute(text("SELECT COALESCE(MAX(bet_id), 0) + 1 FROM bets")).scalar()
223
+ )
224
+
225
+
226
+ def update_bet_result(conn, bet_id: int, result: str, profit: float) -> None:
227
+ conn.execute(
228
+ text(
229
+ """
230
+ UPDATE bets
231
+ SET result = :result, profit = :profit
232
+ WHERE bet_id = :bet_id
233
+ """
234
+ ),
235
+ {"result": result, "profit": profit, "bet_id": bet_id},
236
+ )
237
+
238
+
239
+ # ---------------------------------------------------------------------------
240
+ # Recommendation audit view
241
+ # ---------------------------------------------------------------------------
242
+
243
  def read_recommendation_audit_view(conn) -> pd.DataFrame:
244
  ensure_recommendation_logs_table(conn)
245
  ensure_recommendation_outcomes_table(conn)
 
291
  AND l.slot = o.slot
292
  ORDER BY l.created_at DESC
293
  """
294
+ return pd.read_sql(text(query), conn)
295
+
296
+
297
+ # ---------------------------------------------------------------------------
298
+ # Recommendation outcomes
299
+ # ---------------------------------------------------------------------------
300
 
301
  def ensure_recommendation_outcomes_table(conn) -> None:
302
+ conn.execute(text(
303
  """
304
  CREATE TABLE IF NOT EXISTS recommendation_outcomes (
305
+ created_at TEXT,
306
+ game_pk TEXT,
307
+ away_team TEXT,
308
+ home_team TEXT,
309
+ batter_name TEXT,
310
+ slot TEXT,
311
+ market TEXT,
312
+ realized_hit INTEGER,
313
+ realized_hr INTEGER,
314
+ realized_tb2p INTEGER,
315
+ graded_at TEXT,
316
  outcome_source TEXT,
317
+ lineup_slot TEXT
318
  )
319
  """
320
+ ))
321
 
322
  try:
323
+ conn.execute(text("ALTER TABLE recommendation_outcomes ADD COLUMN lineup_slot TEXT"))
324
  except Exception:
325
  pass # Column already exists
326
 
 
328
  def insert_recommendation_outcomes(conn, df: pd.DataFrame) -> None:
329
  if df is None or df.empty:
330
  return
 
331
  ensure_recommendation_outcomes_table(conn)
332
+ _bulk_insert(conn, "recommendation_outcomes", df)
333
+
 
 
 
 
 
 
334
 
335
+ # ---------------------------------------------------------------------------
336
+ # Recommendation logs
337
+ # ---------------------------------------------------------------------------
338
 
339
  def ensure_recommendation_logs_table(conn) -> None:
340
+ conn.execute(text(
341
  """
342
  CREATE TABLE IF NOT EXISTS recommendation_logs (
343
+ created_at TEXT,
344
+ game_pk TEXT,
345
+ away_team TEXT,
346
+ home_team TEXT,
347
+ status TEXT,
348
+ slot TEXT,
349
+ batter_name TEXT,
350
+ pitcher_name TEXT,
351
+ ev90 DOUBLE PRECISION,
352
+ hit_prob DOUBLE PRECISION,
353
+ hr_prob DOUBLE PRECISION,
354
+ tb2p_prob DOUBLE PRECISION,
355
+ fair_hit_odds DOUBLE PRECISION,
356
+ fair_hr_odds DOUBLE PRECISION,
357
+ fair_tb2p_odds DOUBLE PRECISION,
358
+ book_hit_odds DOUBLE PRECISION,
359
+ book_hr_odds DOUBLE PRECISION,
360
+ book_tb2p_odds DOUBLE PRECISION,
361
+ hit_edge DOUBLE PRECISION,
362
+ hr_edge DOUBLE PRECISION,
363
+ tb2p_edge DOUBLE PRECISION,
364
+ adjusted_edge DOUBLE PRECISION,
365
+ hit_bet_ev DOUBLE PRECISION,
366
+ hr_bet_ev DOUBLE PRECISION,
367
+ tb2p_bet_ev DOUBLE PRECISION,
368
+ confidence DOUBLE PRECISION,
369
+ confidence_bucket TEXT,
370
+ recommendation_tier TEXT,
371
+ priority_score DOUBLE PRECISION,
372
+ reason_tags TEXT,
373
+ starter_stays_next_batter_prob DOUBLE PRECISION,
374
+ starter_stays_next_inning_prob DOUBLE PRECISION,
375
+ bullpen_entry_prob DOUBLE PRECISION,
376
+ xgb_hr_delta DOUBLE PRECISION,
377
+ xgb_hr_adjusted DOUBLE PRECISION,
378
+ xgb_shadow_active BOOLEAN,
379
+ lineup_slot TEXT
380
  )
381
  """
382
+ ))
383
 
384
+ # Safe migration add columns missing from older schema
385
  for _col, _dtype in [
386
+ ("xgb_hr_delta", "DOUBLE PRECISION"),
387
+ ("xgb_hr_adjusted", "DOUBLE PRECISION"),
388
  ("xgb_shadow_active", "BOOLEAN"),
389
  ("lineup_slot", "TEXT"),
390
  ]:
391
  try:
392
+ conn.execute(text(f"ALTER TABLE recommendation_logs ADD COLUMN {_col} {_dtype}"))
393
  except Exception:
394
  pass # Column already exists
395
 
 
397
  def insert_recommendation_logs(conn, df: pd.DataFrame) -> None:
398
  if df is None or df.empty:
399
  return
 
400
  ensure_recommendation_logs_table(conn)
401
+ _bulk_insert(conn, "recommendation_logs", df)
 
 
 
 
 
 
 
402
 
403
+
404
+ # ---------------------------------------------------------------------------
405
+ # Game outcomes
406
+ # ---------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
407
 
408
  def ensure_game_outcomes_table(conn) -> None:
409
+ conn.execute(text(
410
  """
411
  CREATE TABLE IF NOT EXISTS game_outcomes (
412
+ graded_at TEXT,
413
+ game_pk TEXT,
414
+ away_team TEXT,
415
+ home_team TEXT,
416
+ away_score INTEGER,
417
+ home_score INTEGER,
418
+ status TEXT,
419
  outcome_source TEXT
420
  )
421
  """
422
+ ))
423
 
424
 
425
  def insert_game_outcomes(conn, df: pd.DataFrame) -> None:
426
  if df is None or df.empty:
427
  return
 
428
  ensure_game_outcomes_table(conn)
429
+ _bulk_insert(conn, "game_outcomes", df)
430
+
 
 
 
 
 
 
431
 
432
  def read_game_outcomes(conn) -> pd.DataFrame:
433
  ensure_game_outcomes_table(conn)
434
+ return pd.read_sql(
435
+ text("SELECT * FROM game_outcomes ORDER BY graded_at DESC"),
436
+ conn,
437
+ )
438
+
439
+
440
+ # ---------------------------------------------------------------------------
441
+ # Batter prop outcomes
442
+ # ---------------------------------------------------------------------------
443
 
444
  def ensure_batter_prop_outcomes_table(conn) -> None:
445
+ conn.execute(text(
446
  """
447
  CREATE TABLE IF NOT EXISTS batter_prop_outcomes (
448
+ created_at TEXT,
449
+ graded_at TEXT,
450
+ game_pk TEXT,
451
+ away_team TEXT,
452
+ home_team TEXT,
453
+ slot TEXT,
454
+ batter_name TEXT,
455
+ pitcher_name TEXT,
456
+ market TEXT,
457
+ fair_hr_odds DOUBLE PRECISION,
458
+ book_hr_odds DOUBLE PRECISION,
459
+ adjusted_edge DOUBLE PRECISION,
460
+ confidence DOUBLE PRECISION,
461
  recommendation_tier TEXT,
462
+ realized_hit INTEGER,
463
+ realized_hr INTEGER,
464
+ realized_tb2p INTEGER,
465
+ grade_status TEXT,
466
+ outcome_source TEXT
467
  )
468
  """
469
+ ))
470
 
471
 
472
  def insert_batter_prop_outcomes(conn, df: pd.DataFrame) -> None:
473
  if df is None or df.empty:
474
  return
 
475
  ensure_batter_prop_outcomes_table(conn)
476
+ _bulk_insert(conn, "batter_prop_outcomes", df)
477
+
 
 
 
 
 
 
478
 
479
  def read_batter_prop_outcomes(conn) -> pd.DataFrame:
480
  ensure_batter_prop_outcomes_table(conn)
481
+ return pd.read_sql(
482
+ text("SELECT * FROM batter_prop_outcomes ORDER BY graded_at DESC, created_at DESC"),
483
+ conn,
484
+ )
485
+
 
 
486
 
487
  def replace_batter_prop_outcomes(conn, df: pd.DataFrame) -> None:
488
  if df is None or df.empty:
489
  return
 
490
  ensure_batter_prop_outcomes_table(conn)
491
+ conn.execute(text("DELETE FROM batter_prop_outcomes"))
492
+ _bulk_insert(conn, "batter_prop_outcomes", df)
493
+
494
+
495
+ # ---------------------------------------------------------------------------
496
+ # Upcoming HR props
497
+ # ---------------------------------------------------------------------------
 
 
498
 
499
  def ensure_upcoming_hr_props_table(conn) -> None:
500
+ conn.execute(text(
501
  """
502
  CREATE TABLE IF NOT EXISTS upcoming_hr_props (
503
  fetched_at TEXT,
 
510
  player_name_raw TEXT,
511
  player_name TEXT,
512
  odds_american INTEGER,
513
+ line DOUBLE PRECISION,
514
+ implied_prob DOUBLE PRECISION,
515
+ model_hr_prob DOUBLE PRECISION,
516
  model_hr_prob_source TEXT,
517
+ edge DOUBLE PRECISION
518
  )
519
  """
520
+ ))
521
+
522
  for _col, _dtype in [
523
  ("model_hr_prob_source", "TEXT"),
524
+ ("edge", "DOUBLE PRECISION"),
525
  ]:
526
  try:
527
+ conn.execute(text(f"ALTER TABLE upcoming_hr_props ADD COLUMN {_col} {_dtype}"))
528
  except Exception:
529
  pass # Column already exists
530
 
 
533
  if df is None or df.empty:
534
  return
535
  ensure_upcoming_hr_props_table(conn)
536
+ # Select only the expected columns in the correct order
537
+ log_cols = [
538
+ "fetched_at", "event_id", "commence_time", "away_team", "home_team",
539
+ "sportsbook", "market", "player_name_raw", "player_name",
540
+ "odds_american", "line", "implied_prob", "model_hr_prob",
541
+ "model_hr_prob_source", "edge",
542
+ ]
543
+ present = [c for c in log_cols if c in df.columns]
544
+ _bulk_insert(conn, "upcoming_hr_props", df[present])
 
 
 
 
545
 
546
 
547
  def read_upcoming_hr_props(conn) -> pd.DataFrame:
548
  ensure_upcoming_hr_props_table(conn)
549
+ return pd.read_sql(
550
+ text("SELECT * FROM upcoming_hr_props ORDER BY fetched_at DESC"),
551
+ conn,
552
+ )
553
+
554
 
555
+ # ---------------------------------------------------------------------------
556
+ # Batter prop audit view
557
+ # ---------------------------------------------------------------------------
558
 
559
  def read_batter_prop_audit_view(conn) -> pd.DataFrame:
560
  ensure_batter_prop_outcomes_table(conn)
 
583
  FROM batter_prop_outcomes
584
  ORDER BY graded_at DESC, created_at DESC
585
  """
586
+ return pd.read_sql(text(query), conn)
database/remote_db.py CHANGED
@@ -4,24 +4,36 @@ import os
4
 
5
  from sqlalchemy import create_engine
6
 
 
 
 
 
 
 
7
 
8
- DATABASE_URL = os.getenv("DATABASE_URL")
9
 
10
- if not DATABASE_URL:
11
- raise RuntimeError("DATABASE_URL is not set")
12
 
13
- DATABASE_URL = DATABASE_URL.strip()
 
 
 
14
 
15
- # Cockroach needs the cockroachdb dialect for SQLAlchemy
16
- if DATABASE_URL.startswith("postgresql://"):
17
- DATABASE_URL = "cockroachdb://" + DATABASE_URL[len("postgresql://"):]
18
 
19
- engine = create_engine(
20
- DATABASE_URL,
21
- pool_pre_ping=True,
22
- pool_recycle=300,
23
- )
 
 
 
 
 
24
 
25
 
26
  def get_connection():
27
- return engine.connect()
 
4
 
5
  from sqlalchemy import create_engine
6
 
7
+ # ---------------------------------------------------------------------------
8
+ # Lazy engine — built on first get_connection() call, not at import time.
9
+ # This allows db.py (and other modules) to import remote_db safely even when
10
+ # DATABASE_URL is not yet in the environment at import time.
11
+ # RuntimeError is raised when get_connection() is first called without a URL.
12
+ # ---------------------------------------------------------------------------
13
 
14
+ _engine = None
15
 
 
 
16
 
17
+ def _get_engine():
18
+ global _engine
19
+ if _engine is not None:
20
+ return _engine
21
 
22
+ database_url = os.getenv("DATABASE_URL", "").strip()
23
+ if not database_url:
24
+ raise RuntimeError("DATABASE_URL is not set")
25
 
26
+ # CockroachDB requires the cockroachdb:// dialect for SQLAlchemy
27
+ if database_url.startswith("postgresql://"):
28
+ database_url = "cockroachdb://" + database_url[len("postgresql://"):]
29
+
30
+ _engine = create_engine(
31
+ database_url,
32
+ pool_pre_ping=True,
33
+ pool_recycle=300,
34
+ )
35
+ return _engine
36
 
37
 
38
  def get_connection():
39
+ return _get_engine().connect()
evaluation/xgb_hr_evaluation.py CHANGED
@@ -10,6 +10,7 @@ READ-ONLY relative to the simulator and production probabilities.
10
  from __future__ import annotations
11
 
12
  import pandas as pd
 
13
 
14
 
15
  # ---------------------------------------------------------------------------
@@ -50,8 +51,7 @@ def _load_evaluation_df(conn) -> pd.DataFrame:
50
  AND ro.realized_hr IS NOT NULL
51
  """
52
  try:
53
- df = conn.execute(query).df()
54
- return df
55
  except Exception:
56
  return pd.DataFrame()
57
 
 
10
  from __future__ import annotations
11
 
12
  import pandas as pd
13
+ from sqlalchemy import text
14
 
15
 
16
  # ---------------------------------------------------------------------------
 
51
  AND ro.realized_hr IS NOT NULL
52
  """
53
  try:
54
+ return pd.read_sql(text(query), conn)
 
55
  except Exception:
56
  return pd.DataFrame()
57
 
requirements.txt CHANGED
@@ -3,7 +3,7 @@ pandas==2.2.3
3
  numpy==2.1.2
4
  plotly==5.24.1
5
  requests==2.32.3
6
- duckdb==1.1.2
7
  pyarrow==18.0.0
8
  scikit-learn==1.5.2
9
  torch==2.4.1
 
3
  numpy==2.1.2
4
  plotly==5.24.1
5
  requests==2.32.3
6
+ xgboost
7
  pyarrow==18.0.0
8
  scikit-learn==1.5.2
9
  torch==2.4.1