Spaces:

Syntrex
/

2026_MLB_Model

Running

App Files Files

2026_MLB_Model / analytics /model_voice.py

Syntrex

Promote strikeout v2 and harden telemetry models

50dc123 2 months ago

raw

history blame

7.56 kB

	from __future__ import annotations

	from typing import Any


	def _listify(value: Any) -> list[str]:
	if value is None:
	return []
	if isinstance(value, list):
	return [str(v).strip() for v in value if str(v).strip()]
	if isinstance(value, str):
	return [part.strip() for part in value.split("\|") if part.strip()]
	return [str(value).strip()] if str(value).strip() else []


	def _dedupe(items: list[str]) -> list[str]:
	seen: set[str] = set()
	ordered: list[str] = []
	for item in items:
	norm = item.casefold()
	if not item or norm in seen:
	continue
	seen.add(norm)
	ordered.append(item)
	return ordered


	def _template_text(template_key: str, template_inputs: dict[str, Any] \| None = None) -> str:
	inputs = dict(template_inputs or {})
	lineup_slot = inputs.get("lineup_slot_used")
	lineup_slot_txt = f"No. {int(lineup_slot)} slot" if lineup_slot not in {None, "", "nan", "None"} else "projected slot"
	venue = str(inputs.get("venue") or "").strip()

	templates = {
	"pitcher_attackable": "The opposing pitcher profile is giving up HR-friendly contact",
	"pitcher_suppresses_hr": "The pitcher profile suppresses home-run damage",
	"trend_up": "Recent batted-ball form is trending up",
	"trend_down": "Recent batted-ball form has cooled",
	"zone_favorable": "The zone matchup lines up with his damage areas",
	"zone_tough": "This zone profile limits his best contact lanes",
	"family_zone_favorable": "The family-zone matchup boosts his contact shape",
	"family_zone_tough": "The family-zone matchup works against his usual damage path",
	"arsenal_favorable": "The arsenal mix fits his power profile",
	"arsenal_tough": "This pitch mix is a tougher fit for his power profile",
	"platoon_advantage": "The handedness split helps the matchup",
	"platoon_disadvantage": "Same-handed conditions trim the HR ceiling",
	"pulled_contact_strength": "His pulled-air damage keeps the HR ceiling live",
	"pulled_contact_light": "The pulled-air profile is lighter than ideal for this HR spot",
	"weather_supportive": "Weather conditions add a little extra carry",
	"weather_suppressive": "Weather conditions are holding down carry",
	"park_supportive": f"{venue} plays friendlier for carry" if venue else "The park adds a small carry boost",
	"park_suppressive": f"{venue} suppresses HR carry" if venue else "The park trims carry",
	"trajectory_helpful": "Pitch shape is more hittable than usual here",
	"trajectory_tough": "Pitch shape and tunneling make clean lift harder here",
	"rolling_up": "Recent form is moving in the right direction",
	"rolling_down": "Recent form has cooled",
	"opportunity_strong": f"The {lineup_slot_txt} adds plate-appearance upside",
	"opportunity_light": "The opportunity projection is lighter than usual",
	"pitcher_unresolved": "The opposing pitcher is still unresolved",
	"lineup_unknown": "The lineup slot is still unknown",
	"lineup_projected": "The lineup slot is projected rather than confirmed",
	"strikeout_whiff_profile": "The whiff profile supports the strikeout look",
	"strikeout_price_close": "The price is keeping the strikeout edge tight",
	}
	return templates.get(template_key, template_key.replace("_", " ").capitalize())


	def build_hr_model_voice(row: dict[str, Any]) -> dict[str, Any]:
	candidates = row.get("model_voice_reason_candidates") or []
	supportive = [c for c in candidates if str(c.get("direction") or "").strip().lower() == "supportive"]
	cautions = [c for c in candidates if str(c.get("direction") or "").strip().lower() == "caution"]

	primary = supportive[0] if supportive else candidates[0] if candidates else None
	caveat = None
	if primary and str(primary.get("direction") or "").strip().lower() == "caution":
	caveat = cautions[1] if len(cautions) > 1 else None
	else:
	caveat = cautions[0] if cautions else None

	primary_reason = (
	_template_text(str(primary.get("template_key") or ""), primary.get("template_inputs"))
	if primary
	else "His current power baseline is keeping the matchup in range"
	)
	caveat_reason = (
	_template_text(str(caveat.get("template_key") or ""), caveat.get("template_inputs"))
	if caveat
	else ""
	)

	voice = primary_reason
	if caveat_reason:
	voice = f"{primary_reason}, but {caveat_reason[:1].lower()}{caveat_reason[1:] if len(caveat_reason) > 1 else ''}"

	tags = _dedupe(
	[
	str(candidate.get("template_key") or "").strip()
	for candidate in candidates
	if str(candidate.get("template_key") or "").strip()
	]
	)

	return {
	"model_voice": voice.rstrip(".") + ".",
	"model_voice_primary_reason": primary_reason,
	"model_voice_caveat": caveat_reason or None,
	"model_voice_tags": tags,
	"model_voice_for": primary_reason,
	"model_voice_against": caveat_reason or None,
	}


	def build_strikeout_model_voice(result: dict[str, Any]) -> dict[str, Any]:
	selection_side = str(result.get("selection_side") or "").strip().lower()
	line = result.get("line")
	expected_ks = result.get("expected_strikeouts")
	projected_bf = result.get("projected_batters_faced")
	leash_risk = result.get("leash_risk_subscore")
	positives = _dedupe(_listify(result.get("reason_tags_for")))
	negatives = _dedupe(_listify(result.get("reason_tags_against")) + _listify(result.get("confidence_reasons")))

	if selection_side == "under":
	try:
	line_txt = f"{float(line):.1f}" if line is not None else "the number"
	except Exception:
	line_txt = "the number"
	primary_reason = f"The line is a little high relative to the projected strikeout total ({line_txt} Ks)"
	if expected_ks is not None:
	try:
	primary_reason = f"Projected strikeouts land below the line ({float(expected_ks):.1f} vs {line_txt} Ks)"
	except Exception:
	pass
	if projected_bf is not None and float(projected_bf) <= 21.5:
	primary_reason = "Projected batters faced are lighter than ideal for the strikeout line"
	if leash_risk is not None and float(leash_risk) >= 0.48:
	primary_reason = "Pitch-count and leash risk keep the under live even with swing-and-miss stuff"
	caveat = positives[0] if positives else (negatives[0] if negatives else "")
	else:
	primary_reason = positives[0] if positives else "The whiff profile supports the strikeout look"
	caveat = negatives[0] if negatives else ""

	voice = primary_reason
	if caveat:
	voice = f"{primary_reason}, but {caveat[:1].lower()}{caveat[1:] if len(caveat) > 1 else ''}"

	tags = []
	if selection_side == "under":
	tags.extend(["strikeout_line_high", "strikeout_opportunity_cap"])
	elif positives:
	tags.append("strikeout_whiff_profile")
	if negatives:
	tags.append("strikeout_price_close")
	tags = _dedupe(tags)

	return {
	"model_voice": voice.rstrip(".") + ".",
	"model_voice_primary_reason": primary_reason,
	"model_voice_caveat": caveat or None,
	"model_voice_tags": [tag for tag in tags if tag],
	"model_voice_for": primary_reason,
	"model_voice_against": caveat or None,
	}