2026_MLB_Model / analytics /model_voice.py
Syntrex's picture
Promote strikeout v2 and harden telemetry models
50dc123
raw
history blame
7.56 kB
from __future__ import annotations
from typing import Any
def _listify(value: Any) -> list[str]:
if value is None:
return []
if isinstance(value, list):
return [str(v).strip() for v in value if str(v).strip()]
if isinstance(value, str):
return [part.strip() for part in value.split("|") if part.strip()]
return [str(value).strip()] if str(value).strip() else []
def _dedupe(items: list[str]) -> list[str]:
seen: set[str] = set()
ordered: list[str] = []
for item in items:
norm = item.casefold()
if not item or norm in seen:
continue
seen.add(norm)
ordered.append(item)
return ordered
def _template_text(template_key: str, template_inputs: dict[str, Any] | None = None) -> str:
inputs = dict(template_inputs or {})
lineup_slot = inputs.get("lineup_slot_used")
lineup_slot_txt = f"No. {int(lineup_slot)} slot" if lineup_slot not in {None, "", "nan", "None"} else "projected slot"
venue = str(inputs.get("venue") or "").strip()
templates = {
"pitcher_attackable": "The opposing pitcher profile is giving up HR-friendly contact",
"pitcher_suppresses_hr": "The pitcher profile suppresses home-run damage",
"trend_up": "Recent batted-ball form is trending up",
"trend_down": "Recent batted-ball form has cooled",
"zone_favorable": "The zone matchup lines up with his damage areas",
"zone_tough": "This zone profile limits his best contact lanes",
"family_zone_favorable": "The family-zone matchup boosts his contact shape",
"family_zone_tough": "The family-zone matchup works against his usual damage path",
"arsenal_favorable": "The arsenal mix fits his power profile",
"arsenal_tough": "This pitch mix is a tougher fit for his power profile",
"platoon_advantage": "The handedness split helps the matchup",
"platoon_disadvantage": "Same-handed conditions trim the HR ceiling",
"pulled_contact_strength": "His pulled-air damage keeps the HR ceiling live",
"pulled_contact_light": "The pulled-air profile is lighter than ideal for this HR spot",
"weather_supportive": "Weather conditions add a little extra carry",
"weather_suppressive": "Weather conditions are holding down carry",
"park_supportive": f"{venue} plays friendlier for carry" if venue else "The park adds a small carry boost",
"park_suppressive": f"{venue} suppresses HR carry" if venue else "The park trims carry",
"trajectory_helpful": "Pitch shape is more hittable than usual here",
"trajectory_tough": "Pitch shape and tunneling make clean lift harder here",
"rolling_up": "Recent form is moving in the right direction",
"rolling_down": "Recent form has cooled",
"opportunity_strong": f"The {lineup_slot_txt} adds plate-appearance upside",
"opportunity_light": "The opportunity projection is lighter than usual",
"pitcher_unresolved": "The opposing pitcher is still unresolved",
"lineup_unknown": "The lineup slot is still unknown",
"lineup_projected": "The lineup slot is projected rather than confirmed",
"strikeout_whiff_profile": "The whiff profile supports the strikeout look",
"strikeout_price_close": "The price is keeping the strikeout edge tight",
}
return templates.get(template_key, template_key.replace("_", " ").capitalize())
def build_hr_model_voice(row: dict[str, Any]) -> dict[str, Any]:
candidates = row.get("model_voice_reason_candidates") or []
supportive = [c for c in candidates if str(c.get("direction") or "").strip().lower() == "supportive"]
cautions = [c for c in candidates if str(c.get("direction") or "").strip().lower() == "caution"]
primary = supportive[0] if supportive else candidates[0] if candidates else None
caveat = None
if primary and str(primary.get("direction") or "").strip().lower() == "caution":
caveat = cautions[1] if len(cautions) > 1 else None
else:
caveat = cautions[0] if cautions else None
primary_reason = (
_template_text(str(primary.get("template_key") or ""), primary.get("template_inputs"))
if primary
else "His current power baseline is keeping the matchup in range"
)
caveat_reason = (
_template_text(str(caveat.get("template_key") or ""), caveat.get("template_inputs"))
if caveat
else ""
)
voice = primary_reason
if caveat_reason:
voice = f"{primary_reason}, but {caveat_reason[:1].lower()}{caveat_reason[1:] if len(caveat_reason) > 1 else ''}"
tags = _dedupe(
[
str(candidate.get("template_key") or "").strip()
for candidate in candidates
if str(candidate.get("template_key") or "").strip()
]
)
return {
"model_voice": voice.rstrip(".") + ".",
"model_voice_primary_reason": primary_reason,
"model_voice_caveat": caveat_reason or None,
"model_voice_tags": tags,
"model_voice_for": primary_reason,
"model_voice_against": caveat_reason or None,
}
def build_strikeout_model_voice(result: dict[str, Any]) -> dict[str, Any]:
selection_side = str(result.get("selection_side") or "").strip().lower()
line = result.get("line")
expected_ks = result.get("expected_strikeouts")
projected_bf = result.get("projected_batters_faced")
leash_risk = result.get("leash_risk_subscore")
positives = _dedupe(_listify(result.get("reason_tags_for")))
negatives = _dedupe(_listify(result.get("reason_tags_against")) + _listify(result.get("confidence_reasons")))
if selection_side == "under":
try:
line_txt = f"{float(line):.1f}" if line is not None else "the number"
except Exception:
line_txt = "the number"
primary_reason = f"The line is a little high relative to the projected strikeout total ({line_txt} Ks)"
if expected_ks is not None:
try:
primary_reason = f"Projected strikeouts land below the line ({float(expected_ks):.1f} vs {line_txt} Ks)"
except Exception:
pass
if projected_bf is not None and float(projected_bf) <= 21.5:
primary_reason = "Projected batters faced are lighter than ideal for the strikeout line"
if leash_risk is not None and float(leash_risk) >= 0.48:
primary_reason = "Pitch-count and leash risk keep the under live even with swing-and-miss stuff"
caveat = positives[0] if positives else (negatives[0] if negatives else "")
else:
primary_reason = positives[0] if positives else "The whiff profile supports the strikeout look"
caveat = negatives[0] if negatives else ""
voice = primary_reason
if caveat:
voice = f"{primary_reason}, but {caveat[:1].lower()}{caveat[1:] if len(caveat) > 1 else ''}"
tags = []
if selection_side == "under":
tags.extend(["strikeout_line_high", "strikeout_opportunity_cap"])
elif positives:
tags.append("strikeout_whiff_profile")
if negatives:
tags.append("strikeout_price_close")
tags = _dedupe(tags)
return {
"model_voice": voice.rstrip(".") + ".",
"model_voice_primary_reason": primary_reason,
"model_voice_caveat": caveat or None,
"model_voice_tags": [tag for tag in tags if tag],
"model_voice_for": primary_reason,
"model_voice_against": caveat or None,
}