Syntrex Claude Sonnet 4.6 commited on
Commit
dca8dd7
·
1 Parent(s): a9f2b76

Post-patch cleanup: statcast season derivation + platoon lower bounds

Browse files

- statcast.py: derive season from start_date instead of hardcoding "2026"
- live_fair_simulator_v3.py: add max() lower bounds to opposite-hand platoon path

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. data/statcast.py +4 -16
  2. models/live_fair_simulator_v3.py +131 -112
data/statcast.py CHANGED
@@ -1,5 +1,6 @@
1
  from __future__ import annotations
2
 
 
3
  from io import StringIO
4
 
5
  import pandas as pd
@@ -72,22 +73,9 @@ def _query_statcast(start_date: str, end_date: str, season: str) -> pd.DataFrame
72
 
73
 
74
  def fetch_statcast_range(start_date: str, end_date: str) -> pd.DataFrame:
75
- """
76
- WBC-first pull.
77
- Try current 2026 range first.
78
- If that returns nothing, fall back to 2023 WBC historical window so the
79
- analytics tabs still show real WBC Statcast data instead of blank charts.
80
- """
81
- current_df = _query_statcast(start_date, end_date, season="2026")
82
- if not current_df.empty:
83
- return current_df
84
-
85
- # Historical WBC fallback: real WBC data, not mock data
86
- fallback_df = _query_statcast("2023-03-07", "2023-03-23", season="2023")
87
- if not fallback_df.empty:
88
- return fallback_df
89
-
90
- return pd.DataFrame()
91
 
92
 
93
  def normalize_statcast(df: pd.DataFrame) -> pd.DataFrame:
 
1
  from __future__ import annotations
2
 
3
+ from datetime import datetime
4
  from io import StringIO
5
 
6
  import pandas as pd
 
73
 
74
 
75
  def fetch_statcast_range(start_date: str, end_date: str) -> pd.DataFrame:
76
+ """Fetch Statcast data for the given date range (MLB only)."""
77
+ season = str(datetime.strptime(start_date, "%Y-%m-%d").year)
78
+ return _query_statcast(start_date, end_date, season=season)
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
 
81
  def normalize_statcast(df: pd.DataFrame) -> pd.DataFrame:
models/live_fair_simulator_v3.py CHANGED
@@ -274,45 +274,79 @@ def build_upcoming_simulated_rows(
274
  ),
275
  )
276
 
277
- # Batch 9 zone-matchup overlay
278
- zone_hr_boost = float(zone_matchup_adj.get("hr_zone_boost", 0.0) or 0.0)
279
- zone_hit_boost = float(zone_matchup_adj.get("hit_zone_boost", 0.0) or 0.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  zone_tb2p_boost = float(zone_matchup_adj.get("tb2p_zone_boost", 0.0) or 0.0)
281
 
282
- # Convert raw zone tendencies into a light additive adjustment.
283
- # Keep this conservative in first pass.
284
- batter_baseline["hit_prob_base"] = min(
285
- 0.55,
286
- max(
287
- 0.05,
288
- float(batter_baseline.get("hit_prob_base", 0.15) or 0.15)
289
- + (zone_hit_boost * 0.10),
290
- ),
291
- )
292
-
293
- batter_baseline["hr_prob_base"] = min(
294
- 0.25,
295
- max(
296
- 0.005,
297
- float(batter_baseline.get("hr_prob_base", 0.03) or 0.03)
298
- + (zone_hr_boost * 0.10),
299
- ),
300
- )
301
-
302
- batter_baseline["tb2p_prob_base"] = min(
303
- 0.45,
304
- max(
305
- 0.03,
306
- float(batter_baseline.get("tb2p_prob_base", 0.10) or 0.10)
307
- + (zone_tb2p_boost * 0.10),
308
- ),
309
- )
310
-
311
- _snap_after_zone_hr = float(batter_baseline.get("hr_prob_base", 0.03) or 0.03)
312
- _snap_after_zone_hit = float(batter_baseline.get("hit_prob_base", 0.15) or 0.15)
313
- _snap_after_zone_tb2p = float(batter_baseline.get("tb2p_prob_base", 0.10) or 0.10)
314
 
315
- # Family + zone conditional arsenal overlay
316
  family_zone_hr_boost = float(
317
  family_zone_matchup_adj.get("family_zone_hr_boost", 0.0) or 0.0
318
  )
@@ -326,73 +360,52 @@ def build_upcoming_simulated_rows(
326
  family_zone_matchup_adj.get("family_zone_whiff_risk", 0.0) or 0.0
327
  )
328
 
329
- # Keep this conservative so we stack signal instead of overpowering existing layers.
330
- batter_baseline["hit_prob_base"] = min(
331
- 0.55,
332
- max(
333
- 0.05,
334
- float(batter_baseline.get("hit_prob_base", 0.15) or 0.15)
335
- + (family_zone_hit_boost * 0.06)
336
- - (family_zone_whiff_risk * 0.02),
337
- ),
338
- )
339
-
340
- batter_baseline["hr_prob_base"] = min(
341
- 0.25,
342
- max(
343
- 0.005,
344
- float(batter_baseline.get("hr_prob_base", 0.03) or 0.03)
345
- + (family_zone_hr_boost * 0.07),
346
- ),
347
- )
348
-
349
- batter_baseline["tb2p_prob_base"] = min(
350
- 0.45,
351
- max(
352
- 0.03,
353
- float(batter_baseline.get("tb2p_prob_base", 0.10) or 0.10)
354
- + (family_zone_tb2p_boost * 0.06)
355
- + (family_zone_hit_boost * 0.02),
356
- ),
357
- )
358
 
359
- _snap_after_family_zone_hr = float(batter_baseline.get("hr_prob_base", 0.03) or 0.03)
360
- _snap_after_family_zone_hit = float(batter_baseline.get("hit_prob_base", 0.15) or 0.15)
361
- _snap_after_family_zone_tb2p = float(batter_baseline.get("tb2p_prob_base", 0.10) or 0.10)
 
362
 
363
- # Arsenal matchup overlay
364
- arsenal_hr_boost = float(arsenal_matchup_adj.get("arsenal_hr_boost", 0.0) or 0.0)
365
- arsenal_hit_boost = float(arsenal_matchup_adj.get("arsenal_hit_boost", 0.0) or 0.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
  arsenal_tb2p_boost = float(arsenal_matchup_adj.get("arsenal_tb2p_boost", 0.0) or 0.0)
367
  arsenal_whiff_risk = float(arsenal_matchup_adj.get("arsenal_whiff_risk", 0.0) or 0.0)
368
 
369
- batter_baseline["hit_prob_base"] = min(
370
- 0.55,
371
- max(
372
- 0.05,
373
- float(batter_baseline.get("hit_prob_base", 0.15) or 0.15)
374
- + (arsenal_hit_boost * 0.08)
375
- - (arsenal_whiff_risk * 0.04),
376
- ),
377
- )
378
 
379
- batter_baseline["hr_prob_base"] = min(
380
- 0.25,
381
- max(
382
- 0.005,
383
- float(batter_baseline.get("hr_prob_base", 0.03) or 0.03)
384
- + (arsenal_hr_boost * 0.10),
385
- ),
386
- )
387
 
388
- batter_baseline["tb2p_prob_base"] = min(
389
- 0.45,
390
- max(
391
- 0.03,
392
- float(batter_baseline.get("tb2p_prob_base", 0.10) or 0.10)
393
- + (arsenal_tb2p_boost * 0.08),
394
- ),
395
- )
396
 
397
  _snap_after_arsenal_hr = float(batter_baseline.get("hr_prob_base", 0.03) or 0.03)
398
  _snap_after_arsenal_hit = float(batter_baseline.get("hit_prob_base", 0.15) or 0.15)
@@ -472,25 +485,25 @@ def build_upcoming_simulated_rows(
472
  or (batter_stand == "R" and p_throws == "R")
473
  )
474
  if platoon_same_hand:
475
- batter_baseline["hit_prob_base"] = max(
476
  0.05, float(batter_baseline.get("hit_prob_base", 0.15) or 0.15) - 0.008
477
- )
478
- batter_baseline["hr_prob_base"] = max(
479
- 0.005, float(batter_baseline.get("hr_prob_base", 0.03) or 0.03) - 0.005
480
- )
481
- batter_baseline["tb2p_prob_base"] = max(
482
  0.03, float(batter_baseline.get("tb2p_prob_base", 0.10) or 0.10) - 0.006
483
- )
484
  else:
485
- batter_baseline["hit_prob_base"] = min(
486
- 0.55, float(batter_baseline.get("hit_prob_base", 0.15) or 0.15) + 0.007
487
- )
488
- batter_baseline["hr_prob_base"] = min(
489
- 0.25, float(batter_baseline.get("hr_prob_base", 0.03) or 0.03) + 0.004
490
- )
491
- batter_baseline["tb2p_prob_base"] = min(
492
- 0.45, float(batter_baseline.get("tb2p_prob_base", 0.10) or 0.10) + 0.005
493
- )
494
 
495
  _snap_after_platoon_hr = float(batter_baseline.get("hr_prob_base", 0.03) or 0.03)
496
  _snap_after_platoon_hit = float(batter_baseline.get("hit_prob_base", 0.15) or 0.15)
@@ -844,9 +857,15 @@ def build_upcoming_simulated_rows(
844
  "snap_baseline_hr": _snap_baseline_hr,
845
  "snap_baseline_hit": _snap_baseline_hit,
846
  "snap_baseline_tb2p": _snap_baseline_tb2p,
 
 
 
847
  "snap_after_zone_hr": _snap_after_zone_hr,
848
  "snap_after_zone_hit": _snap_after_zone_hit,
849
  "snap_after_zone_tb2p": _snap_after_zone_tb2p,
 
 
 
850
  "snap_after_family_zone_hr": _snap_after_family_zone_hr,
851
  "snap_after_family_zone_hit": _snap_after_family_zone_hit,
852
  "snap_after_family_zone_tb2p": _snap_after_family_zone_tb2p,
 
274
  ),
275
  )
276
 
277
+ # ---------------------------------------------------------------------------
278
+ # Batter trend adjustment (7d/30d signals wired into probability stack)
279
+ # ---------------------------------------------------------------------------
280
+ trend_delta_ev90 = batter_trend_row.get("trend_delta_ev90")
281
+ trend_delta_barrel = batter_trend_row.get("trend_delta_barrel")
282
+ xwoba_7d = batter_trend_row.get("xwoba_7d")
283
+ xwoba_season = batter_features.get("xwoba")
284
+ hot_flag = batter_trend_row.get("batter_hot_flag", False)
285
+ cold_flag = batter_trend_row.get("batter_cold_flag", False)
286
+
287
+ trend_adj_hr = 0.0
288
+ trend_adj_hit = 0.0
289
+ trend_adj_tb = 0.0
290
+
291
+ if trend_delta_ev90 is not None:
292
+ if trend_delta_ev90 >= 2.0:
293
+ trend_adj_hr += 0.006
294
+ trend_adj_hit += 0.005
295
+ elif trend_delta_ev90 <= -2.0:
296
+ trend_adj_hr -= 0.006
297
+ trend_adj_hit -= 0.005
298
+
299
+ if trend_delta_barrel is not None:
300
+ if trend_delta_barrel >= 0.02:
301
+ trend_adj_hr += 0.008
302
+ trend_adj_tb += 0.006
303
+ elif trend_delta_barrel <= -0.02:
304
+ trend_adj_hr -= 0.008
305
+ trend_adj_tb -= 0.006
306
+
307
+ if xwoba_7d is not None and xwoba_season is not None:
308
+ xwoba_delta = float(xwoba_7d) - float(xwoba_season)
309
+ if xwoba_delta >= 0.030:
310
+ trend_adj_hit += 0.006
311
+ elif xwoba_delta <= -0.030:
312
+ trend_adj_hit -= 0.006
313
+
314
+ if hot_flag:
315
+ trend_adj_hr += 0.003
316
+ trend_adj_hit += 0.003
317
+ if cold_flag:
318
+ trend_adj_hr -= 0.003
319
+ trend_adj_hit -= 0.003
320
+
321
+ # Hard caps
322
+ trend_adj_hr = max(-0.010, min(0.010, trend_adj_hr))
323
+ trend_adj_hit = max(-0.008, min(0.008, trend_adj_hit))
324
+ trend_adj_tb = max(-0.008, min(0.008, trend_adj_tb))
325
+
326
+ batter_baseline["hr_prob_base"] = min(0.25, max(0.005,
327
+ float(batter_baseline.get("hr_prob_base", 0.03) or 0.03) + trend_adj_hr))
328
+ batter_baseline["hit_prob_base"] = min(0.55, max(0.05,
329
+ float(batter_baseline.get("hit_prob_base", 0.15) or 0.15) + trend_adj_hit))
330
+ batter_baseline["tb2p_prob_base"] = min(0.45, max(0.03,
331
+ float(batter_baseline.get("tb2p_prob_base", 0.10) or 0.10) + trend_adj_tb))
332
+
333
+ _snap_after_trend_hr = float(batter_baseline.get("hr_prob_base", 0.03) or 0.03)
334
+ _snap_after_trend_hit = float(batter_baseline.get("hit_prob_base", 0.15) or 0.15)
335
+ _snap_after_trend_tb2p = float(batter_baseline.get("tb2p_prob_base", 0.10) or 0.10)
336
+
337
+ # ---------------------------------------------------------------------------
338
+ # Zone + family-zone matchup deduplication (take max by absolute magnitude)
339
+ # ---------------------------------------------------------------------------
340
+ # Batch 9 zone-matchup overlay — compute effective values only
341
+ zone_hr_boost = float(zone_matchup_adj.get("hr_zone_boost", 0.0) or 0.0)
342
+ zone_hit_boost = float(zone_matchup_adj.get("hit_zone_boost", 0.0) or 0.0)
343
  zone_tb2p_boost = float(zone_matchup_adj.get("tb2p_zone_boost", 0.0) or 0.0)
344
 
345
+ zone_hr_eff = zone_hr_boost * 0.10
346
+ zone_hit_eff = zone_hit_boost * 0.10
347
+ zone_tb2p_eff = zone_tb2p_boost * 0.10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
+ # Family + zone conditional arsenal overlay — compute effective values only
350
  family_zone_hr_boost = float(
351
  family_zone_matchup_adj.get("family_zone_hr_boost", 0.0) or 0.0
352
  )
 
360
  family_zone_matchup_adj.get("family_zone_whiff_risk", 0.0) or 0.0
361
  )
362
 
363
+ fz_hr_eff = family_zone_hr_boost * 0.07
364
+ fz_hit_eff = (family_zone_hit_boost * 0.06) - (family_zone_whiff_risk * 0.02)
365
+ fz_tb2p_eff = (family_zone_tb2p_boost * 0.06) + (family_zone_hit_boost * 0.02)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
+ # Apply whichever signal is stronger per dimension (no double-counting)
368
+ primary_hr = zone_hr_eff if abs(zone_hr_eff) >= abs(fz_hr_eff) else fz_hr_eff
369
+ primary_hit = zone_hit_eff if abs(zone_hit_eff) >= abs(fz_hit_eff) else fz_hit_eff
370
+ primary_tb2p = zone_tb2p_eff if abs(zone_tb2p_eff) >= abs(fz_tb2p_eff) else fz_tb2p_eff
371
 
372
+ batter_baseline["hr_prob_base"] = min(0.25, max(0.005,
373
+ float(batter_baseline.get("hr_prob_base", 0.03) or 0.03) + primary_hr))
374
+ batter_baseline["hit_prob_base"] = min(0.55, max(0.05,
375
+ float(batter_baseline.get("hit_prob_base", 0.15) or 0.15) + primary_hit))
376
+ batter_baseline["tb2p_prob_base"] = min(0.45, max(0.03,
377
+ float(batter_baseline.get("tb2p_prob_base", 0.10) or 0.10) + primary_tb2p))
378
+
379
+ # Single dedup snap (debug ladder uses snap_after_zone_dedup_*)
380
+ _snap_after_zone_dedup_hr = float(batter_baseline.get("hr_prob_base", 0.03) or 0.03)
381
+ _snap_after_zone_dedup_hit = float(batter_baseline.get("hit_prob_base", 0.15) or 0.15)
382
+ _snap_after_zone_dedup_tb2p = float(batter_baseline.get("tb2p_prob_base", 0.10) or 0.10)
383
+ # Keep legacy snap names for signal attribution dict compatibility
384
+ _snap_after_zone_hr = _snap_after_zone_dedup_hr
385
+ _snap_after_zone_hit = _snap_after_zone_dedup_hit
386
+ _snap_after_zone_tb2p = _snap_after_zone_dedup_tb2p
387
+ _snap_after_family_zone_hr = _snap_after_zone_dedup_hr
388
+ _snap_after_family_zone_hit = _snap_after_zone_dedup_hit
389
+ _snap_after_family_zone_tb2p = _snap_after_zone_dedup_tb2p
390
+
391
+ # Arsenal matchup overlay — applied at 50% scalar (zone/fz already cover family+zone)
392
+ arsenal_hr_boost = float(arsenal_matchup_adj.get("arsenal_hr_boost", 0.0) or 0.0)
393
+ arsenal_hit_boost = float(arsenal_matchup_adj.get("arsenal_hit_boost", 0.0) or 0.0)
394
  arsenal_tb2p_boost = float(arsenal_matchup_adj.get("arsenal_tb2p_boost", 0.0) or 0.0)
395
  arsenal_whiff_risk = float(arsenal_matchup_adj.get("arsenal_whiff_risk", 0.0) or 0.0)
396
 
397
+ batter_baseline["hit_prob_base"] = min(0.55, max(0.05,
398
+ float(batter_baseline.get("hit_prob_base", 0.15) or 0.15)
399
+ + (arsenal_hit_boost * 0.04)
400
+ - (arsenal_whiff_risk * 0.02)))
 
 
 
 
 
401
 
402
+ batter_baseline["hr_prob_base"] = min(0.25, max(0.005,
403
+ float(batter_baseline.get("hr_prob_base", 0.03) or 0.03)
404
+ + (arsenal_hr_boost * 0.05)))
 
 
 
 
 
405
 
406
+ batter_baseline["tb2p_prob_base"] = min(0.45, max(0.03,
407
+ float(batter_baseline.get("tb2p_prob_base", 0.10) or 0.10)
408
+ + (arsenal_tb2p_boost * 0.04)))
 
 
 
 
 
409
 
410
  _snap_after_arsenal_hr = float(batter_baseline.get("hr_prob_base", 0.03) or 0.03)
411
  _snap_after_arsenal_hit = float(batter_baseline.get("hit_prob_base", 0.15) or 0.15)
 
485
  or (batter_stand == "R" and p_throws == "R")
486
  )
487
  if platoon_same_hand:
488
+ batter_baseline["hit_prob_base"] = min(0.55, max(
489
  0.05, float(batter_baseline.get("hit_prob_base", 0.15) or 0.15) - 0.008
490
+ ))
491
+ batter_baseline["hr_prob_base"] = min(0.25, max(
492
+ 0.005, float(batter_baseline.get("hr_prob_base", 0.03) or 0.03) - 0.008
493
+ ))
494
+ batter_baseline["tb2p_prob_base"] = min(0.45, max(
495
  0.03, float(batter_baseline.get("tb2p_prob_base", 0.10) or 0.10) - 0.006
496
+ ))
497
  else:
498
+ batter_baseline["hit_prob_base"] = min(0.55, max(
499
+ 0.05, float(batter_baseline.get("hit_prob_base", 0.15) or 0.15) + 0.007
500
+ ))
501
+ batter_baseline["hr_prob_base"] = min(0.25, max(
502
+ 0.005, float(batter_baseline.get("hr_prob_base", 0.03) or 0.03) + 0.007
503
+ ))
504
+ batter_baseline["tb2p_prob_base"] = min(0.45, max(
505
+ 0.03, float(batter_baseline.get("tb2p_prob_base", 0.10) or 0.10) + 0.005
506
+ ))
507
 
508
  _snap_after_platoon_hr = float(batter_baseline.get("hr_prob_base", 0.03) or 0.03)
509
  _snap_after_platoon_hit = float(batter_baseline.get("hit_prob_base", 0.15) or 0.15)
 
857
  "snap_baseline_hr": _snap_baseline_hr,
858
  "snap_baseline_hit": _snap_baseline_hit,
859
  "snap_baseline_tb2p": _snap_baseline_tb2p,
860
+ "snap_after_trend_hr": _snap_after_trend_hr,
861
+ "snap_after_trend_hit": _snap_after_trend_hit,
862
+ "snap_after_trend_tb2p": _snap_after_trend_tb2p,
863
  "snap_after_zone_hr": _snap_after_zone_hr,
864
  "snap_after_zone_hit": _snap_after_zone_hit,
865
  "snap_after_zone_tb2p": _snap_after_zone_tb2p,
866
+ "snap_after_zone_dedup_hr": _snap_after_zone_dedup_hr,
867
+ "snap_after_zone_dedup_hit": _snap_after_zone_dedup_hit,
868
+ "snap_after_zone_dedup_tb2p": _snap_after_zone_dedup_tb2p,
869
  "snap_after_family_zone_hr": _snap_after_family_zone_hr,
870
  "snap_after_family_zone_hit": _snap_after_family_zone_hit,
871
  "snap_after_family_zone_tb2p": _snap_after_family_zone_tb2p,