Syntrex commited on
Commit
4c8e6d6
·
1 Parent(s): 6989587

Personalize props model voice

Browse files
analytics/model_voice.py CHANGED
@@ -25,49 +25,110 @@ def _dedupe(items: list[str]) -> list[str]:
25
  return ordered
26
 
27
 
28
- def build_hr_model_voice(row: dict[str, Any]) -> dict[str, str]:
29
- positives: list[str] = []
30
- negatives: list[str] = []
31
-
32
- if float(row.get("arsenal_hr_adjustment") or 0.0) >= 0.006:
33
- positives.append("Crushes this arsenal mix")
34
- if float(row.get("zone_hr_adjustment") or 0.0) >= 0.006:
35
- positives.append("Gets a favorable zone-shape matchup")
36
- if float(row.get("family_zone_hr_adjustment") or 0.0) >= 0.006:
37
- positives.append("Family-zone profile boosts the matchup")
38
- if float(row.get("weather_hr_adjustment") or 0.0) >= 0.004 or float(row.get("park_hr_adjustment") or 0.0) >= 0.004:
39
- positives.append("Park or weather helps carry")
40
- if float(row.get("rolling_hr_adjustment") or 0.0) >= 0.004:
41
- positives.append("Recent form is trending up")
42
- if float(row.get("opportunity_hr_adjustment") or 0.0) >= 0.003:
43
- positives.append("Projected lineup slot supports extra opportunities")
44
-
45
- if float(row.get("pitcher_hr_adjustment") or 0.0) <= -0.006:
46
- negatives.append("Pitcher profile suppresses home-run damage")
47
- if float(row.get("trajectory_hr_adjustment") or 0.0) <= -0.004:
48
- negatives.append("Pitch shape and tunneling work against the hitter")
49
- if float(row.get("platoon_hr_adjustment") or 0.0) <= -0.004:
50
- negatives.append("Platoon split is not in his favor")
51
- if float(row.get("rolling_hr_adjustment") or 0.0) <= -0.004:
52
- negatives.append("Recent form has cooled")
53
- if float(row.get("opportunity_hr_adjustment") or 0.0) <= -0.003:
54
- negatives.append("Opportunity projection is light")
55
-
56
- for reason in _listify(row.get("confidence_reasons"))[:2]:
57
- negatives.append(reason)
58
-
59
- positives = _dedupe(positives)
60
- negatives = _dedupe(negatives)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  return {
62
- "model_voice_for": " + ".join(positives[:2]) if positives else "Baseline power keeps the play in range",
63
- "model_voice_against": " + ".join(negatives[:2]) if negatives else "Market price is the main pushback",
 
 
 
 
64
  }
65
 
66
 
67
- def build_strikeout_model_voice(result: dict[str, Any]) -> dict[str, str]:
68
  positives = _dedupe(_listify(result.get("reason_tags_for")))
69
  negatives = _dedupe(_listify(result.get("reason_tags_against")) + _listify(result.get("confidence_reasons")))
 
 
 
 
 
 
 
 
 
 
 
70
  return {
71
- "model_voice_for": " + ".join(positives[:2]) if positives else "Whiff-oriented strikeout profile",
72
- "model_voice_against": " + ".join(negatives[:2]) if negatives else "Price and volatility keep this close",
 
 
 
 
73
  }
 
25
  return ordered
26
 
27
 
28
+ def _template_text(template_key: str, template_inputs: dict[str, Any] | None = None) -> str:
29
+ inputs = dict(template_inputs or {})
30
+ lineup_slot = inputs.get("lineup_slot_used")
31
+ lineup_slot_txt = f"No. {int(lineup_slot)} slot" if lineup_slot not in {None, "", "nan", "None"} else "projected slot"
32
+ venue = str(inputs.get("venue") or "").strip()
33
+
34
+ templates = {
35
+ "pitcher_attackable": "The opposing pitcher profile is giving up HR-friendly contact",
36
+ "pitcher_suppresses_hr": "The pitcher profile suppresses home-run damage",
37
+ "trend_up": "Recent batted-ball form is trending up",
38
+ "trend_down": "Recent batted-ball form has cooled",
39
+ "zone_favorable": "The zone matchup lines up with his damage areas",
40
+ "zone_tough": "This zone profile limits his best contact lanes",
41
+ "family_zone_favorable": "The family-zone matchup boosts his contact shape",
42
+ "family_zone_tough": "The family-zone matchup works against his usual damage path",
43
+ "arsenal_favorable": "The arsenal mix fits his power profile",
44
+ "arsenal_tough": "This pitch mix is a tougher fit for his power profile",
45
+ "platoon_advantage": "The handedness split helps the matchup",
46
+ "platoon_disadvantage": "Same-handed conditions trim the HR ceiling",
47
+ "pulled_contact_strength": "His pulled-air damage keeps the HR ceiling live",
48
+ "pulled_contact_light": "The pulled-air profile is lighter than ideal for this HR spot",
49
+ "weather_supportive": "Weather conditions add a little extra carry",
50
+ "weather_suppressive": "Weather conditions are holding down carry",
51
+ "park_supportive": f"{venue} plays friendlier for carry" if venue else "The park adds a small carry boost",
52
+ "park_suppressive": f"{venue} suppresses HR carry" if venue else "The park trims carry",
53
+ "trajectory_helpful": "Pitch shape is more hittable than usual here",
54
+ "trajectory_tough": "Pitch shape and tunneling make clean lift harder here",
55
+ "rolling_up": "Recent form is moving in the right direction",
56
+ "rolling_down": "Recent form has cooled",
57
+ "opportunity_strong": f"The {lineup_slot_txt} adds plate-appearance upside",
58
+ "opportunity_light": "The opportunity projection is lighter than usual",
59
+ "pitcher_unresolved": "The opposing pitcher is still unresolved",
60
+ "lineup_unknown": "The lineup slot is still unknown",
61
+ "lineup_projected": "The lineup slot is projected rather than confirmed",
62
+ "strikeout_whiff_profile": "The whiff profile supports the strikeout look",
63
+ "strikeout_price_close": "The price is keeping the strikeout edge tight",
64
+ }
65
+ return templates.get(template_key, template_key.replace("_", " ").capitalize())
66
+
67
+
68
+ def build_hr_model_voice(row: dict[str, Any]) -> dict[str, Any]:
69
+ candidates = row.get("model_voice_reason_candidates") or []
70
+ supportive = [c for c in candidates if str(c.get("direction") or "").strip().lower() == "supportive"]
71
+ cautions = [c for c in candidates if str(c.get("direction") or "").strip().lower() == "caution"]
72
+
73
+ primary = supportive[0] if supportive else candidates[0] if candidates else None
74
+ caveat = None
75
+ if primary and str(primary.get("direction") or "").strip().lower() == "caution":
76
+ caveat = cautions[1] if len(cautions) > 1 else None
77
+ else:
78
+ caveat = cautions[0] if cautions else None
79
+
80
+ primary_reason = (
81
+ _template_text(str(primary.get("template_key") or ""), primary.get("template_inputs"))
82
+ if primary
83
+ else "His current power baseline is keeping the matchup in range"
84
+ )
85
+ caveat_reason = (
86
+ _template_text(str(caveat.get("template_key") or ""), caveat.get("template_inputs"))
87
+ if caveat
88
+ else ""
89
+ )
90
+
91
+ voice = primary_reason
92
+ if caveat_reason:
93
+ voice = f"{primary_reason}, but {caveat_reason[:1].lower()}{caveat_reason[1:] if len(caveat_reason) > 1 else ''}"
94
+
95
+ tags = _dedupe(
96
+ [
97
+ str(candidate.get("template_key") or "").strip()
98
+ for candidate in candidates
99
+ if str(candidate.get("template_key") or "").strip()
100
+ ]
101
+ )
102
+
103
  return {
104
+ "model_voice": voice.rstrip(".") + ".",
105
+ "model_voice_primary_reason": primary_reason,
106
+ "model_voice_caveat": caveat_reason or None,
107
+ "model_voice_tags": tags,
108
+ "model_voice_for": primary_reason,
109
+ "model_voice_against": caveat_reason or None,
110
  }
111
 
112
 
113
+ def build_strikeout_model_voice(result: dict[str, Any]) -> dict[str, Any]:
114
  positives = _dedupe(_listify(result.get("reason_tags_for")))
115
  negatives = _dedupe(_listify(result.get("reason_tags_against")) + _listify(result.get("confidence_reasons")))
116
+
117
+ primary_reason = positives[0] if positives else "The whiff profile supports the strikeout look"
118
+ caveat = negatives[0] if negatives else ""
119
+ voice = primary_reason
120
+ if caveat:
121
+ voice = f"{primary_reason}, but {caveat[:1].lower()}{caveat[1:] if len(caveat) > 1 else ''}"
122
+
123
+ tags = _dedupe(
124
+ ["strikeout_whiff_profile" if positives else "", "strikeout_price_close" if negatives else ""]
125
+ )
126
+
127
  return {
128
+ "model_voice": voice.rstrip(".") + ".",
129
+ "model_voice_primary_reason": primary_reason,
130
+ "model_voice_caveat": caveat or None,
131
+ "model_voice_tags": [tag for tag in tags if tag],
132
+ "model_voice_for": primary_reason,
133
+ "model_voice_against": caveat or None,
134
  }
analytics/props_view_model.py CHANGED
@@ -288,6 +288,9 @@ def build_player_prop_detail_map(mapped_df: pd.DataFrame) -> dict[str, dict[str,
288
  "best_primary": best_primary,
289
  "best_primary_row": best_primary,
290
  "best_verdict": best_primary.get("verdict") if best_primary else None,
 
 
 
291
  "model_voice_for": best_primary.get("model_voice_for") if best_primary else None,
292
  "model_voice_against": best_primary.get("model_voice_against") if best_primary else None,
293
  "primary_rows": primary_rows.to_dict("records"),
@@ -332,6 +335,9 @@ def build_game_player_props_map(mapped_df: pd.DataFrame) -> dict[str, dict[str,
332
  "best_bet_ev": best_primary.get("bet_ev"),
333
  "best_confidence_score": best_primary.get("confidence_score"),
334
  "best_verdict": best_primary.get("verdict"),
 
 
 
335
  "model_voice_for": best_primary.get("model_voice_for"),
336
  "model_voice_against": best_primary.get("model_voice_against"),
337
  "details": detail,
 
288
  "best_primary": best_primary,
289
  "best_primary_row": best_primary,
290
  "best_verdict": best_primary.get("verdict") if best_primary else None,
291
+ "model_voice": best_primary.get("model_voice") if best_primary else None,
292
+ "model_voice_primary_reason": best_primary.get("model_voice_primary_reason") if best_primary else None,
293
+ "model_voice_caveat": best_primary.get("model_voice_caveat") if best_primary else None,
294
  "model_voice_for": best_primary.get("model_voice_for") if best_primary else None,
295
  "model_voice_against": best_primary.get("model_voice_against") if best_primary else None,
296
  "primary_rows": primary_rows.to_dict("records"),
 
335
  "best_bet_ev": best_primary.get("bet_ev"),
336
  "best_confidence_score": best_primary.get("confidence_score"),
337
  "best_verdict": best_primary.get("verdict"),
338
+ "model_voice": best_primary.get("model_voice"),
339
+ "model_voice_primary_reason": best_primary.get("model_voice_primary_reason"),
340
+ "model_voice_caveat": best_primary.get("model_voice_caveat"),
341
  "model_voice_for": best_primary.get("model_voice_for"),
342
  "model_voice_against": best_primary.get("model_voice_against"),
343
  "details": detail,
database/db.py CHANGED
@@ -808,6 +808,10 @@ def ensure_upcoming_hr_props_table(conn) -> None:
808
  model_hr_prob_source TEXT,
809
  edge DOUBLE PRECISION,
810
  verdict TEXT,
 
 
 
 
811
  model_voice_for TEXT,
812
  model_voice_against TEXT
813
  )
@@ -832,6 +836,10 @@ def ensure_upcoming_hr_props_table(conn) -> None:
832
  ("confidence_bucket", "TEXT"),
833
  ("opportunity_hr_adjustment", "DOUBLE PRECISION"),
834
  ("verdict", "TEXT"),
 
 
 
 
835
  ("model_voice_for", "TEXT"),
836
  ("model_voice_against", "TEXT"),
837
  ]:
@@ -854,7 +862,7 @@ def insert_upcoming_hr_props(conn, df: pd.DataFrame) -> None:
854
  "odds_american", "line", "implied_prob", "raw_hr_prob",
855
  "calibrated_hr_prob", "model_hr_prob", "fair_prob", "bet_ev", "confidence_score",
856
  "confidence_bucket", "opportunity_hr_adjustment",
857
- "model_hr_prob_source", "edge", "verdict", "model_voice_for", "model_voice_against",
858
  ]
859
  present = [c for c in log_cols if c in df.columns]
860
  _bulk_insert(conn, "upcoming_hr_props", df[present])
 
808
  model_hr_prob_source TEXT,
809
  edge DOUBLE PRECISION,
810
  verdict TEXT,
811
+ model_voice TEXT,
812
+ model_voice_primary_reason TEXT,
813
+ model_voice_caveat TEXT,
814
+ model_voice_tags TEXT,
815
  model_voice_for TEXT,
816
  model_voice_against TEXT
817
  )
 
836
  ("confidence_bucket", "TEXT"),
837
  ("opportunity_hr_adjustment", "DOUBLE PRECISION"),
838
  ("verdict", "TEXT"),
839
+ ("model_voice", "TEXT"),
840
+ ("model_voice_primary_reason", "TEXT"),
841
+ ("model_voice_caveat", "TEXT"),
842
+ ("model_voice_tags", "TEXT"),
843
  ("model_voice_for", "TEXT"),
844
  ("model_voice_against", "TEXT"),
845
  ]:
 
862
  "odds_american", "line", "implied_prob", "raw_hr_prob",
863
  "calibrated_hr_prob", "model_hr_prob", "fair_prob", "bet_ev", "confidence_score",
864
  "confidence_bucket", "opportunity_hr_adjustment",
865
+ "model_hr_prob_source", "edge", "verdict", "model_voice", "model_voice_primary_reason", "model_voice_caveat", "model_voice_tags", "model_voice_for", "model_voice_against",
866
  ]
867
  present = [c for c in log_cols if c in df.columns]
868
  _bulk_insert(conn, "upcoming_hr_props", df[present])
models/hr_probability_engine.py CHANGED
@@ -94,6 +94,8 @@ def _empty_result(player_name: str, mode: str) -> dict[str, Any]:
94
  "trajectory_reliability": 0.0,
95
  "rolling_reliability": 0.0,
96
  "opportunity_reliability": 0.0,
 
 
97
  }
98
 
99
 
@@ -109,6 +111,30 @@ def _apply_reliability(raw_adjustment: float, reliability: float) -> float:
109
  return raw_adjustment * _clamp(reliability, 0.0, 1.0)
110
 
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  def _compute_environment_reliability(game_row: dict[str, Any], weather_row: dict[str, Any] | None) -> float:
113
  has_venue = bool(str(game_row.get("venue") or "").strip())
114
  weather_row = dict(weather_row or {})
@@ -346,6 +372,7 @@ def build_hr_probability_result(
346
 
347
  applied_layers: list[str] = []
348
  skipped_layers = result["skipped_layers"].split("|") if result["skipped_layers"] else []
 
349
 
350
  pitcher_row = build_pitcher_feature_row(
351
  statcast_df=pitcher_df,
@@ -368,6 +395,14 @@ def build_hr_probability_result(
368
  hr_prob = _clamp(hr_prob + result["pitcher_hr_adjustment"], 0.005, 0.25)
369
  if abs(result["pitcher_hr_adjustment"]) > 1e-6:
370
  applied_layers.append("pitcher")
 
 
 
 
 
 
 
 
371
 
372
  reference_date = game_row.get("game_datetime_utc") or game_row.get("game_date")
373
  batter_trend_row = build_batter_trend_row(
@@ -387,6 +422,13 @@ def build_hr_probability_result(
387
  hr_prob = _clamp(hr_prob + result["trend_hr_adjustment"], 0.005, 0.25)
388
  if abs(result["trend_hr_adjustment"]) > 1e-6:
389
  applied_layers.append("trend")
 
 
 
 
 
 
 
390
 
391
  matchup_multiplier = 1.0
392
  if result["pitcher_name"]:
@@ -450,8 +492,22 @@ def build_hr_probability_result(
450
  hr_prob = _clamp(hr_prob + result["family_zone_hr_adjustment"], 0.005, 0.25)
451
  if abs(result["zone_hr_adjustment"]) > 1e-6:
452
  applied_layers.append("zone")
 
 
 
 
 
 
 
453
  if abs(result["family_zone_hr_adjustment"]) > 1e-6:
454
  applied_layers.append("family_zone")
 
 
 
 
 
 
 
455
 
456
  arsenal_eff = 0.0
457
  try:
@@ -476,11 +532,26 @@ def build_hr_probability_result(
476
  hr_prob = _clamp(hr_prob + result["arsenal_hr_adjustment"], 0.005, 0.25)
477
  if abs(result["arsenal_hr_adjustment"]) > 1e-6:
478
  applied_layers.append("arsenal")
 
 
 
 
 
 
 
479
 
480
  result["platoon_hr_adjustment"] = platoon_adj
481
  hr_prob = _clamp(hr_prob + platoon_adj, 0.005, 0.25)
482
  if abs(platoon_adj) > 1e-6:
483
  applied_layers.append("platoon")
 
 
 
 
 
 
 
 
484
  else:
485
  skipped_layers.extend(["pitcher_missing", "zone_matchup_unavailable", "arsenal_matchup_unavailable"])
486
 
@@ -492,6 +563,13 @@ def build_hr_probability_result(
492
  hr_prob = _clamp(hr_prob + result["pulled_contact_hr_adjustment"], 0.005, 0.30)
493
  if abs(result["pulled_contact_hr_adjustment"]) > 1e-6:
494
  applied_layers.append("pulled_contact")
 
 
 
 
 
 
 
495
 
496
  env_adj = compute_environment_adjustment(game_row=game_row, weather_row=weather_row)
497
  result["environment_reliability"] = _compute_environment_reliability(game_row, weather_row)
@@ -512,6 +590,16 @@ def build_hr_probability_result(
512
  hr_prob = _clamp(hr_prob + result["env_hr_adjustment"], 0.005, 0.30)
513
  if abs(result["env_hr_adjustment"]) > 1e-6:
514
  applied_layers.append("environment")
 
 
 
 
 
 
 
 
 
 
515
 
516
  trajectory_row = build_trajectory_features(
517
  statcast_df=pitcher_df,
@@ -527,6 +615,13 @@ def build_hr_probability_result(
527
  hr_prob = _clamp(hr_prob + result["trajectory_hr_adjustment"], 0.005, 0.25)
528
  if abs(result["trajectory_hr_adjustment"]) > 1e-6:
529
  applied_layers.append("trajectory")
 
 
 
 
 
 
 
530
 
531
  pitcher_rolling_row = build_pitcher_rolling_form_row(
532
  statcast_df=pitcher_df,
@@ -557,6 +652,13 @@ def build_hr_probability_result(
557
  hr_prob = _clamp(hr_prob + result["rolling_hr_adjustment"], 0.005, 0.30)
558
  if abs(result["rolling_hr_adjustment"]) > 1e-6:
559
  applied_layers.append("rolling")
 
 
 
 
 
 
 
560
 
561
  lineup_slot = game_row.get("lineup_slot")
562
  try:
@@ -598,6 +700,17 @@ def build_hr_probability_result(
598
  hr_prob = _clamp(hr_prob + result["opportunity_hr_adjustment"], 0.005, 0.30)
599
  if abs(result["opportunity_hr_adjustment"]) > 1e-6:
600
  applied_layers.append("opportunity")
 
 
 
 
 
 
 
 
 
 
 
601
 
602
  result["raw_hr_prob"] = hr_prob
603
  result["adjusted_hr_prob"] = hr_prob
@@ -617,9 +730,40 @@ def build_hr_probability_result(
617
  applied_layers=applied_layers,
618
  )
619
  result.update(confidence)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
620
 
621
  result["applied_layers"] = "|".join(dict.fromkeys(applied_layers))
622
  result["skipped_layers"] = "|".join(dict.fromkeys([s for s in skipped_layers if s]))
 
 
 
 
 
 
 
623
  result["pregame_context_applied"] = any(
624
  abs(_safe_float(result.get(key))) > 1e-6
625
  for key in [
 
94
  "trajectory_reliability": 0.0,
95
  "rolling_reliability": 0.0,
96
  "opportunity_reliability": 0.0,
97
+ "model_voice_reason_candidates": [],
98
+ "model_voice_tags": [],
99
  }
100
 
101
 
 
111
  return raw_adjustment * _clamp(reliability, 0.0, 1.0)
112
 
113
 
114
+ def _append_reason_candidate(
115
+ reason_candidates: list[dict[str, Any]],
116
+ *,
117
+ category: str,
118
+ direction: str,
119
+ magnitude: float,
120
+ template_key: str,
121
+ template_inputs: dict[str, Any] | None = None,
122
+ ) -> None:
123
+ mag = abs(_safe_float(magnitude, 0.0))
124
+ if mag <= 1e-6:
125
+ return
126
+ reason_candidates.append(
127
+ {
128
+ "category": category,
129
+ "direction": direction,
130
+ "magnitude": mag,
131
+ "signed_magnitude": _safe_float(magnitude, 0.0),
132
+ "template_key": template_key,
133
+ "template_inputs": dict(template_inputs or {}),
134
+ }
135
+ )
136
+
137
+
138
  def _compute_environment_reliability(game_row: dict[str, Any], weather_row: dict[str, Any] | None) -> float:
139
  has_venue = bool(str(game_row.get("venue") or "").strip())
140
  weather_row = dict(weather_row or {})
 
372
 
373
  applied_layers: list[str] = []
374
  skipped_layers = result["skipped_layers"].split("|") if result["skipped_layers"] else []
375
+ reason_candidates: list[dict[str, Any]] = []
376
 
377
  pitcher_row = build_pitcher_feature_row(
378
  statcast_df=pitcher_df,
 
395
  hr_prob = _clamp(hr_prob + result["pitcher_hr_adjustment"], 0.005, 0.25)
396
  if abs(result["pitcher_hr_adjustment"]) > 1e-6:
397
  applied_layers.append("pitcher")
398
+ _append_reason_candidate(
399
+ reason_candidates,
400
+ category="pitcher",
401
+ direction="supportive" if result["pitcher_hr_adjustment"] > 0 else "caution",
402
+ magnitude=result["pitcher_hr_adjustment"],
403
+ template_key="pitcher_attackable" if result["pitcher_hr_adjustment"] > 0 else "pitcher_suppresses_hr",
404
+ template_inputs={"pitcher_name": result["pitcher_name"]},
405
+ )
406
 
407
  reference_date = game_row.get("game_datetime_utc") or game_row.get("game_date")
408
  batter_trend_row = build_batter_trend_row(
 
422
  hr_prob = _clamp(hr_prob + result["trend_hr_adjustment"], 0.005, 0.25)
423
  if abs(result["trend_hr_adjustment"]) > 1e-6:
424
  applied_layers.append("trend")
425
+ _append_reason_candidate(
426
+ reason_candidates,
427
+ category="trend",
428
+ direction="supportive" if result["trend_hr_adjustment"] > 0 else "caution",
429
+ magnitude=result["trend_hr_adjustment"],
430
+ template_key="trend_up" if result["trend_hr_adjustment"] > 0 else "trend_down",
431
+ )
432
 
433
  matchup_multiplier = 1.0
434
  if result["pitcher_name"]:
 
492
  hr_prob = _clamp(hr_prob + result["family_zone_hr_adjustment"], 0.005, 0.25)
493
  if abs(result["zone_hr_adjustment"]) > 1e-6:
494
  applied_layers.append("zone")
495
+ _append_reason_candidate(
496
+ reason_candidates,
497
+ category="zone",
498
+ direction="supportive" if result["zone_hr_adjustment"] > 0 else "caution",
499
+ magnitude=result["zone_hr_adjustment"],
500
+ template_key="zone_favorable" if result["zone_hr_adjustment"] > 0 else "zone_tough",
501
+ )
502
  if abs(result["family_zone_hr_adjustment"]) > 1e-6:
503
  applied_layers.append("family_zone")
504
+ _append_reason_candidate(
505
+ reason_candidates,
506
+ category="family_zone",
507
+ direction="supportive" if result["family_zone_hr_adjustment"] > 0 else "caution",
508
+ magnitude=result["family_zone_hr_adjustment"],
509
+ template_key="family_zone_favorable" if result["family_zone_hr_adjustment"] > 0 else "family_zone_tough",
510
+ )
511
 
512
  arsenal_eff = 0.0
513
  try:
 
532
  hr_prob = _clamp(hr_prob + result["arsenal_hr_adjustment"], 0.005, 0.25)
533
  if abs(result["arsenal_hr_adjustment"]) > 1e-6:
534
  applied_layers.append("arsenal")
535
+ _append_reason_candidate(
536
+ reason_candidates,
537
+ category="arsenal",
538
+ direction="supportive" if result["arsenal_hr_adjustment"] > 0 else "caution",
539
+ magnitude=result["arsenal_hr_adjustment"],
540
+ template_key="arsenal_favorable" if result["arsenal_hr_adjustment"] > 0 else "arsenal_tough",
541
+ )
542
 
543
  result["platoon_hr_adjustment"] = platoon_adj
544
  hr_prob = _clamp(hr_prob + platoon_adj, 0.005, 0.25)
545
  if abs(platoon_adj) > 1e-6:
546
  applied_layers.append("platoon")
547
+ _append_reason_candidate(
548
+ reason_candidates,
549
+ category="platoon",
550
+ direction="supportive" if platoon_adj > 0 else "caution",
551
+ magnitude=platoon_adj,
552
+ template_key="platoon_advantage" if platoon_adj > 0 else "platoon_disadvantage",
553
+ template_inputs={"matchup_reason": matchup_reason},
554
+ )
555
  else:
556
  skipped_layers.extend(["pitcher_missing", "zone_matchup_unavailable", "arsenal_matchup_unavailable"])
557
 
 
563
  hr_prob = _clamp(hr_prob + result["pulled_contact_hr_adjustment"], 0.005, 0.30)
564
  if abs(result["pulled_contact_hr_adjustment"]) > 1e-6:
565
  applied_layers.append("pulled_contact")
566
+ _append_reason_candidate(
567
+ reason_candidates,
568
+ category="pulled_contact",
569
+ direction="supportive" if result["pulled_contact_hr_adjustment"] > 0 else "caution",
570
+ magnitude=result["pulled_contact_hr_adjustment"],
571
+ template_key="pulled_contact_strength" if result["pulled_contact_hr_adjustment"] > 0 else "pulled_contact_light",
572
+ )
573
 
574
  env_adj = compute_environment_adjustment(game_row=game_row, weather_row=weather_row)
575
  result["environment_reliability"] = _compute_environment_reliability(game_row, weather_row)
 
590
  hr_prob = _clamp(hr_prob + result["env_hr_adjustment"], 0.005, 0.30)
591
  if abs(result["env_hr_adjustment"]) > 1e-6:
592
  applied_layers.append("environment")
593
+ dominant_env_key = "weather_supportive" if abs(result["weather_hr_adjustment"]) >= abs(result["park_hr_adjustment"]) else "park_supportive"
594
+ dominant_env_tough_key = "weather_suppressive" if abs(result["weather_hr_adjustment"]) >= abs(result["park_hr_adjustment"]) else "park_suppressive"
595
+ _append_reason_candidate(
596
+ reason_candidates,
597
+ category="environment",
598
+ direction="supportive" if result["env_hr_adjustment"] > 0 else "caution",
599
+ magnitude=result["env_hr_adjustment"],
600
+ template_key=dominant_env_key if result["env_hr_adjustment"] > 0 else dominant_env_tough_key,
601
+ template_inputs={"venue": game_row.get("venue")},
602
+ )
603
 
604
  trajectory_row = build_trajectory_features(
605
  statcast_df=pitcher_df,
 
615
  hr_prob = _clamp(hr_prob + result["trajectory_hr_adjustment"], 0.005, 0.25)
616
  if abs(result["trajectory_hr_adjustment"]) > 1e-6:
617
  applied_layers.append("trajectory")
618
+ _append_reason_candidate(
619
+ reason_candidates,
620
+ category="trajectory",
621
+ direction="supportive" if result["trajectory_hr_adjustment"] > 0 else "caution",
622
+ magnitude=result["trajectory_hr_adjustment"],
623
+ template_key="trajectory_helpful" if result["trajectory_hr_adjustment"] > 0 else "trajectory_tough",
624
+ )
625
 
626
  pitcher_rolling_row = build_pitcher_rolling_form_row(
627
  statcast_df=pitcher_df,
 
652
  hr_prob = _clamp(hr_prob + result["rolling_hr_adjustment"], 0.005, 0.30)
653
  if abs(result["rolling_hr_adjustment"]) > 1e-6:
654
  applied_layers.append("rolling")
655
+ _append_reason_candidate(
656
+ reason_candidates,
657
+ category="rolling",
658
+ direction="supportive" if result["rolling_hr_adjustment"] > 0 else "caution",
659
+ magnitude=result["rolling_hr_adjustment"],
660
+ template_key="rolling_up" if result["rolling_hr_adjustment"] > 0 else "rolling_down",
661
+ )
662
 
663
  lineup_slot = game_row.get("lineup_slot")
664
  try:
 
700
  hr_prob = _clamp(hr_prob + result["opportunity_hr_adjustment"], 0.005, 0.30)
701
  if abs(result["opportunity_hr_adjustment"]) > 1e-6:
702
  applied_layers.append("opportunity")
703
+ _append_reason_candidate(
704
+ reason_candidates,
705
+ category="opportunity",
706
+ direction="supportive" if result["opportunity_hr_adjustment"] > 0 else "caution",
707
+ magnitude=result["opportunity_hr_adjustment"],
708
+ template_key="opportunity_strong" if result["opportunity_hr_adjustment"] > 0 else "opportunity_light",
709
+ template_inputs={
710
+ "lineup_slot_used": lineup_slot,
711
+ "lineup_slot_source": result["lineup_slot_source"],
712
+ },
713
+ )
714
 
715
  result["raw_hr_prob"] = hr_prob
716
  result["adjusted_hr_prob"] = hr_prob
 
730
  applied_layers=applied_layers,
731
  )
732
  result.update(confidence)
733
+ if "Pitcher unresolved" in result.get("confidence_reasons", []):
734
+ _append_reason_candidate(
735
+ reason_candidates,
736
+ category="confidence",
737
+ direction="caution",
738
+ magnitude=0.004,
739
+ template_key="pitcher_unresolved",
740
+ )
741
+ if "Lineup slot unavailable" in result.get("confidence_reasons", []):
742
+ _append_reason_candidate(
743
+ reason_candidates,
744
+ category="confidence",
745
+ direction="caution",
746
+ magnitude=0.003,
747
+ template_key="lineup_unknown",
748
+ )
749
+ if "Using projected lineup slot" in result.get("confidence_reasons", []):
750
+ _append_reason_candidate(
751
+ reason_candidates,
752
+ category="confidence",
753
+ direction="caution",
754
+ magnitude=0.002,
755
+ template_key="lineup_projected",
756
+ )
757
 
758
  result["applied_layers"] = "|".join(dict.fromkeys(applied_layers))
759
  result["skipped_layers"] = "|".join(dict.fromkeys([s for s in skipped_layers if s]))
760
+ ranked_reasons = sorted(
761
+ reason_candidates,
762
+ key=lambda item: abs(_safe_float(item.get("signed_magnitude"))),
763
+ reverse=True,
764
+ )
765
+ result["model_voice_reason_candidates"] = ranked_reasons
766
+ result["model_voice_tags"] = [str(item.get("template_key") or "") for item in ranked_reasons if str(item.get("template_key") or "").strip()]
767
  result["pregame_context_applied"] = any(
768
  abs(_safe_float(result.get(key))) > 1e-6
769
  for key in [
tests/test_model_voice.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ import unittest
6
+
7
+ sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
8
+
9
+ from analytics.model_voice import build_hr_model_voice
10
+
11
+
12
+ class TestModelVoice(unittest.TestCase):
13
+ def test_hr_model_voice_uses_ranked_reason_candidates(self) -> None:
14
+ row = {
15
+ "model_voice_reason_candidates": [
16
+ {
17
+ "category": "arsenal",
18
+ "direction": "supportive",
19
+ "magnitude": 0.012,
20
+ "signed_magnitude": 0.012,
21
+ "template_key": "arsenal_favorable",
22
+ "template_inputs": {},
23
+ },
24
+ {
25
+ "category": "pitcher",
26
+ "direction": "caution",
27
+ "magnitude": 0.008,
28
+ "signed_magnitude": -0.008,
29
+ "template_key": "pitcher_suppresses_hr",
30
+ "template_inputs": {"pitcher_name": "Ace Arm"},
31
+ },
32
+ ]
33
+ }
34
+
35
+ voice = build_hr_model_voice(row)
36
+
37
+ self.assertIn("arsenal mix fits his power profile", voice["model_voice"].lower())
38
+ self.assertIn("pitcher profile suppresses home-run damage", voice["model_voice"].lower())
39
+ self.assertTrue(voice["model_voice"].endswith("."))
40
+ self.assertEqual(voice["model_voice_primary_reason"], "The arsenal mix fits his power profile")
41
+ self.assertEqual(voice["model_voice_caveat"], "The pitcher profile suppresses home-run damage")
42
+
43
+ def test_hr_model_voice_changes_with_different_reasons(self) -> None:
44
+ favorable = build_hr_model_voice(
45
+ {
46
+ "model_voice_reason_candidates": [
47
+ {
48
+ "category": "environment",
49
+ "direction": "supportive",
50
+ "magnitude": 0.010,
51
+ "signed_magnitude": 0.010,
52
+ "template_key": "weather_supportive",
53
+ "template_inputs": {},
54
+ }
55
+ ]
56
+ }
57
+ )
58
+ caution = build_hr_model_voice(
59
+ {
60
+ "model_voice_reason_candidates": [
61
+ {
62
+ "category": "platoon",
63
+ "direction": "caution",
64
+ "magnitude": 0.009,
65
+ "signed_magnitude": -0.009,
66
+ "template_key": "platoon_disadvantage",
67
+ "template_inputs": {},
68
+ }
69
+ ]
70
+ }
71
+ )
72
+
73
+ self.assertNotEqual(favorable["model_voice"], caution["model_voice"])
74
+ self.assertIn("weather conditions", favorable["model_voice"].lower())
75
+ self.assertIn("same-handed", caution["model_voice"].lower())
76
+
77
+
78
+ if __name__ == "__main__":
79
+ unittest.main()
tests/test_props_mapper.py CHANGED
@@ -95,6 +95,8 @@ class TestPropsMapper(unittest.TestCase):
95
  self.assertAlmostEqual(float(row["model_hr_prob"]), 0.17, places=6)
96
  self.assertTrue(pd.notna(row["bet_ev"]))
97
  self.assertEqual(row["confidence_bucket"], "high")
 
 
98
  self.assertIn("final_recommendation_score", result.columns)
99
  self.assertTrue(pd.notna(row["edge"]))
100
 
 
95
  self.assertAlmostEqual(float(row["model_hr_prob"]), 0.17, places=6)
96
  self.assertTrue(pd.notna(row["bet_ev"]))
97
  self.assertEqual(row["confidence_bucket"], "high")
98
+ self.assertTrue(str(row["model_voice"]).strip())
99
+ self.assertTrue(str(row["model_voice_primary_reason"]).strip())
100
  self.assertIn("final_recommendation_score", result.columns)
101
  self.assertTrue(pd.notna(row["edge"]))
102
 
visualization/debug_page.py CHANGED
@@ -650,6 +650,10 @@ def render_debug(
650
  "fair_prob",
651
  "bet_ev",
652
  "verdict",
 
 
 
 
653
  "model_voice_for",
654
  "model_voice_against",
655
  "confidence_score",
@@ -725,6 +729,9 @@ def render_debug(
725
  "best_bet_ev": player_entry.get("best_bet_ev"),
726
  "best_confidence_score": player_entry.get("best_confidence_score"),
727
  "best_verdict": player_entry.get("best_verdict"),
 
 
 
728
  "model_voice_for": player_entry.get("model_voice_for"),
729
  "model_voice_against": player_entry.get("model_voice_against"),
730
  "best_edge": player_entry.get("best_edge"),
 
650
  "fair_prob",
651
  "bet_ev",
652
  "verdict",
653
+ "model_voice",
654
+ "model_voice_primary_reason",
655
+ "model_voice_caveat",
656
+ "model_voice_tags",
657
  "model_voice_for",
658
  "model_voice_against",
659
  "confidence_score",
 
729
  "best_bet_ev": player_entry.get("best_bet_ev"),
730
  "best_confidence_score": player_entry.get("best_confidence_score"),
731
  "best_verdict": player_entry.get("best_verdict"),
732
+ "model_voice": player_entry.get("model_voice"),
733
+ "model_voice_primary_reason": player_entry.get("model_voice_primary_reason"),
734
+ "model_voice_caveat": player_entry.get("model_voice_caveat"),
735
  "model_voice_for": player_entry.get("model_voice_for"),
736
  "model_voice_against": player_entry.get("model_voice_against"),
737
  "best_edge": player_entry.get("best_edge"),
visualization/props_page.py CHANGED
@@ -211,6 +211,13 @@ def _render_props_ui_styles() -> None:
211
  .props-voice-line strong {
212
  color: #f3f8ff;
213
  }
 
 
 
 
 
 
 
214
  .props-game-card {
215
  border: 1px solid rgba(62, 88, 125, 0.32);
216
  border-radius: 16px;
@@ -473,11 +480,15 @@ def _maybe_log_props(conn, mapped: pd.DataFrame) -> None:
473
  "odds_american", "line", "implied_prob", "raw_hr_prob",
474
  "calibrated_hr_prob", "model_hr_prob", "fair_prob", "bet_ev", "confidence_score",
475
  "confidence_bucket", "opportunity_hr_adjustment", "model_hr_prob_source", "edge",
476
- "verdict", "model_voice_for", "model_voice_against",
477
  ]
478
  for col in log_cols:
479
  if col not in to_log.columns:
480
  to_log[col] = None
 
 
 
 
481
  ensure_upcoming_hr_props_table(conn)
482
  insert_upcoming_hr_props(conn, to_log[log_cols])
483
  except Exception:
@@ -631,6 +642,7 @@ def render_featured_hr_cards(featured_df: pd.DataFrame) -> None:
631
  verdict = str(row.get("verdict") or "tracked").strip().lower()
632
  edge_class = _metric_tone_class("edge", row.get("edge"))
633
  ev_class = _metric_tone_class("ev", row.get("bet_ev"))
 
634
  st.markdown(
635
  f"""
636
  <div class="props-card{' top' if idx == 0 else ''}">
@@ -666,8 +678,7 @@ def render_featured_hr_cards(featured_df: pd.DataFrame) -> None:
666
  </div>
667
  </div>
668
  <div class="props-voice">
669
- <div class="props-voice-line"><strong>Model Voice For:</strong> {str(row.get('model_voice_for') or 'Baseline power keeps this live')}</div>
670
- <div class="props-voice-line"><strong>Model Voice Against:</strong> {str(row.get('model_voice_against') or 'Market price is the main concern')}</div>
671
  </div>
672
  </div>
673
  """,
@@ -730,6 +741,11 @@ def render_player_hr_details(player_details: dict[str, Any]) -> None:
730
  "fair_prob",
731
  "bet_ev",
732
  "verdict",
 
 
 
 
 
733
  "model_voice_for",
734
  "model_voice_against",
735
  "confidence_score",
@@ -773,8 +789,7 @@ def render_player_hr_row(player_entry: dict[str, Any]) -> None:
773
  best_ev = _format_ev(player_entry.get("best_bet_ev"))
774
  best_confidence = _format_confidence(player_entry.get("best_confidence_score"))
775
  verdict = str(player_entry.get("best_verdict") or "tracked").strip().lower()
776
- voice_for = str(player_entry.get("model_voice_for") or "")
777
- voice_against = str(player_entry.get("model_voice_against") or "")
778
 
779
  details_label = f"{player_name} | {best_label} | {best_book} {best_odds} | {best_ev} EV | {best_edge} | {best_confidence} conf"
780
  if player_entry.get("has_alt_ladders"):
@@ -794,10 +809,8 @@ def render_player_hr_row(player_entry: dict[str, Any]) -> None:
794
  value=False,
795
  key=f"props_player_toggle_{player_entry.get('player_key')}",
796
  )
797
- if voice_for:
798
- st.caption(f"For: {voice_for}")
799
- if voice_against:
800
- st.caption(f"Against: {voice_against}")
801
 
802
  if details_open:
803
  st.caption(details_label)
@@ -812,10 +825,19 @@ def render_player_hr_row(player_entry: dict[str, Any]) -> None:
812
  metric_cols[3].metric("EV", best_ev)
813
  metric_cols[4].metric("Edge", best_edge)
814
  metric_cols[5].metric("Confidence", best_confidence)
815
- if voice_for:
816
- st.caption(f"Model Voice For: {voice_for}")
817
- if voice_against:
818
- st.caption(f"Model Voice Against: {voice_against}")
 
 
 
 
 
 
 
 
 
819
  render_player_hr_details(details)
820
  st.divider()
821
 
@@ -880,7 +902,7 @@ def _build_flat_table_rows(display: pd.DataFrame, market_type: str) -> list[dict
880
  "Pregame HR%": _format_pct(row.get("model_hr_prob")),
881
  "EV": _format_ev(row.get("bet_ev")),
882
  "Confidence": _format_confidence(row.get("confidence_score")),
883
- "Model Voice": str(row.get("model_voice_for") or "-"),
884
  "Source": str(row.get("model_hr_prob_source") or "-"),
885
  "Edge": _format_edge(row.get("edge")),
886
  }
@@ -900,7 +922,7 @@ def _build_flat_table_rows(display: pd.DataFrame, market_type: str) -> list[dict
900
  "Fair%": _format_pct(row.get("fair_prob")),
901
  "EV": _format_ev(row.get("bet_ev")),
902
  "Confidence": _format_confidence(row.get("confidence_score")),
903
- "Model Voice": str(row.get("model_voice_for") or "-"),
904
  "Edge": _format_edge(row.get("edge")),
905
  }
906
  for _, row in display.iterrows()
@@ -940,6 +962,9 @@ def render_probability_diagnostics(display: pd.DataFrame) -> None:
940
  "sportsbook",
941
  "display_label",
942
  "verdict",
 
 
 
943
  "model_voice_for",
944
  "model_voice_against",
945
  "baseline_hr_prob",
 
211
  .props-voice-line strong {
212
  color: #f3f8ff;
213
  }
214
+ .props-voice-card {
215
+ display: -webkit-box;
216
+ -webkit-line-clamp: 3;
217
+ -webkit-box-orient: vertical;
218
+ overflow: hidden;
219
+ min-height: 3.7rem;
220
+ }
221
  .props-game-card {
222
  border: 1px solid rgba(62, 88, 125, 0.32);
223
  border-radius: 16px;
 
480
  "odds_american", "line", "implied_prob", "raw_hr_prob",
481
  "calibrated_hr_prob", "model_hr_prob", "fair_prob", "bet_ev", "confidence_score",
482
  "confidence_bucket", "opportunity_hr_adjustment", "model_hr_prob_source", "edge",
483
+ "verdict", "model_voice", "model_voice_primary_reason", "model_voice_caveat", "model_voice_tags", "model_voice_for", "model_voice_against",
484
  ]
485
  for col in log_cols:
486
  if col not in to_log.columns:
487
  to_log[col] = None
488
+ if "model_voice_tags" in to_log.columns:
489
+ to_log["model_voice_tags"] = to_log["model_voice_tags"].apply(
490
+ lambda v: "|".join(v) if isinstance(v, list) else v
491
+ )
492
  ensure_upcoming_hr_props_table(conn)
493
  insert_upcoming_hr_props(conn, to_log[log_cols])
494
  except Exception:
 
642
  verdict = str(row.get("verdict") or "tracked").strip().lower()
643
  edge_class = _metric_tone_class("edge", row.get("edge"))
644
  ev_class = _metric_tone_class("ev", row.get("bet_ev"))
645
+ model_voice = str(row.get("model_voice") or "Model voice is still being assembled for this matchup.")
646
  st.markdown(
647
  f"""
648
  <div class="props-card{' top' if idx == 0 else ''}">
 
678
  </div>
679
  </div>
680
  <div class="props-voice">
681
+ <div class="props-voice-line props-voice-card"><strong>Model Voice:</strong> {model_voice}</div>
 
682
  </div>
683
  </div>
684
  """,
 
741
  "fair_prob",
742
  "bet_ev",
743
  "verdict",
744
+ "model_voice",
745
+ "model_voice_primary_reason",
746
+ "model_voice_caveat",
747
+ "model_voice_reason_candidates",
748
+ "model_voice_tags",
749
  "model_voice_for",
750
  "model_voice_against",
751
  "confidence_score",
 
789
  best_ev = _format_ev(player_entry.get("best_bet_ev"))
790
  best_confidence = _format_confidence(player_entry.get("best_confidence_score"))
791
  verdict = str(player_entry.get("best_verdict") or "tracked").strip().lower()
792
+ model_voice = str(player_entry.get("model_voice") or "")
 
793
 
794
  details_label = f"{player_name} | {best_label} | {best_book} {best_odds} | {best_ev} EV | {best_edge} | {best_confidence} conf"
795
  if player_entry.get("has_alt_ladders"):
 
809
  value=False,
810
  key=f"props_player_toggle_{player_entry.get('player_key')}",
811
  )
812
+ if model_voice:
813
+ st.caption(f"Model Voice: {model_voice}")
 
 
814
 
815
  if details_open:
816
  st.caption(details_label)
 
825
  metric_cols[3].metric("EV", best_ev)
826
  metric_cols[4].metric("Edge", best_edge)
827
  metric_cols[5].metric("Confidence", best_confidence)
828
+ if model_voice:
829
+ st.caption(f"Model Voice: {model_voice}")
830
+ primary_reason = str(player_entry.get("model_voice_primary_reason") or "")
831
+ caveat = str(player_entry.get("model_voice_caveat") or "")
832
+ if primary_reason or caveat:
833
+ why_lines: list[str] = []
834
+ if primary_reason:
835
+ why_lines.append(f"Lead reason: {primary_reason}")
836
+ if caveat:
837
+ why_lines.append(f"Caveat: {caveat}")
838
+ st.caption("Why this rating")
839
+ for line in why_lines:
840
+ st.write(f"- {line}")
841
  render_player_hr_details(details)
842
  st.divider()
843
 
 
902
  "Pregame HR%": _format_pct(row.get("model_hr_prob")),
903
  "EV": _format_ev(row.get("bet_ev")),
904
  "Confidence": _format_confidence(row.get("confidence_score")),
905
+ "Model Voice": str(row.get("model_voice") or "-"),
906
  "Source": str(row.get("model_hr_prob_source") or "-"),
907
  "Edge": _format_edge(row.get("edge")),
908
  }
 
922
  "Fair%": _format_pct(row.get("fair_prob")),
923
  "EV": _format_ev(row.get("bet_ev")),
924
  "Confidence": _format_confidence(row.get("confidence_score")),
925
+ "Model Voice": str(row.get("model_voice") or "-"),
926
  "Edge": _format_edge(row.get("edge")),
927
  }
928
  for _, row in display.iterrows()
 
962
  "sportsbook",
963
  "display_label",
964
  "verdict",
965
+ "model_voice",
966
+ "model_voice_primary_reason",
967
+ "model_voice_caveat",
968
  "model_voice_for",
969
  "model_voice_against",
970
  "baseline_hr_prob",