Spaces:

Syntrex
/

2026_MLB_Model

Sleeping

2026_MLB_Model

File size: 7,562 Bytes

from __future__ import annotations

from typing import Any


def _listify(value: Any) -> list[str]:
    if value is None:
        return []
    if isinstance(value, list):
        return [str(v).strip() for v in value if str(v).strip()]
    if isinstance(value, str):
        return [part.strip() for part in value.split("|") if part.strip()]
    return [str(value).strip()] if str(value).strip() else []


def _dedupe(items: list[str]) -> list[str]:
    seen: set[str] = set()
    ordered: list[str] = []
    for item in items:
        norm = item.casefold()
        if not item or norm in seen:
            continue
        seen.add(norm)
        ordered.append(item)
    return ordered


def _template_text(template_key: str, template_inputs: dict[str, Any] | None = None) -> str:
    inputs = dict(template_inputs or {})
    lineup_slot = inputs.get("lineup_slot_used")
    lineup_slot_txt = f"No. {int(lineup_slot)} slot" if lineup_slot not in {None, "", "nan", "None"} else "projected slot"
    venue = str(inputs.get("venue") or "").strip()

    templates = {
        "pitcher_attackable": "The opposing pitcher profile is giving up HR-friendly contact",
        "pitcher_suppresses_hr": "The pitcher profile suppresses home-run damage",
        "trend_up": "Recent batted-ball form is trending up",
        "trend_down": "Recent batted-ball form has cooled",
        "zone_favorable": "The zone matchup lines up with his damage areas",
        "zone_tough": "This zone profile limits his best contact lanes",
        "family_zone_favorable": "The family-zone matchup boosts his contact shape",
        "family_zone_tough": "The family-zone matchup works against his usual damage path",
        "arsenal_favorable": "The arsenal mix fits his power profile",
        "arsenal_tough": "This pitch mix is a tougher fit for his power profile",
        "platoon_advantage": "The handedness split helps the matchup",
        "platoon_disadvantage": "Same-handed conditions trim the HR ceiling",
        "pulled_contact_strength": "His pulled-air damage keeps the HR ceiling live",
        "pulled_contact_light": "The pulled-air profile is lighter than ideal for this HR spot",
        "weather_supportive": "Weather conditions add a little extra carry",
        "weather_suppressive": "Weather conditions are holding down carry",
        "park_supportive": f"{venue} plays friendlier for carry" if venue else "The park adds a small carry boost",
        "park_suppressive": f"{venue} suppresses HR carry" if venue else "The park trims carry",
        "trajectory_helpful": "Pitch shape is more hittable than usual here",
        "trajectory_tough": "Pitch shape and tunneling make clean lift harder here",
        "rolling_up": "Recent form is moving in the right direction",
        "rolling_down": "Recent form has cooled",
        "opportunity_strong": f"The {lineup_slot_txt} adds plate-appearance upside",
        "opportunity_light": "The opportunity projection is lighter than usual",
        "pitcher_unresolved": "The opposing pitcher is still unresolved",
        "lineup_unknown": "The lineup slot is still unknown",
        "lineup_projected": "The lineup slot is projected rather than confirmed",
        "strikeout_whiff_profile": "The whiff profile supports the strikeout look",
        "strikeout_price_close": "The price is keeping the strikeout edge tight",
    }
    return templates.get(template_key, template_key.replace("_", " ").capitalize())


def build_hr_model_voice(row: dict[str, Any]) -> dict[str, Any]:
    candidates = row.get("model_voice_reason_candidates") or []
    supportive = [c for c in candidates if str(c.get("direction") or "").strip().lower() == "supportive"]
    cautions = [c for c in candidates if str(c.get("direction") or "").strip().lower() == "caution"]

    primary = supportive[0] if supportive else candidates[0] if candidates else None
    caveat = None
    if primary and str(primary.get("direction") or "").strip().lower() == "caution":
        caveat = cautions[1] if len(cautions) > 1 else None
    else:
        caveat = cautions[0] if cautions else None

    primary_reason = (
        _template_text(str(primary.get("template_key") or ""), primary.get("template_inputs"))
        if primary
        else "His current power baseline is keeping the matchup in range"
    )
    caveat_reason = (
        _template_text(str(caveat.get("template_key") or ""), caveat.get("template_inputs"))
        if caveat
        else ""
    )

    voice = primary_reason
    if caveat_reason:
        voice = f"{primary_reason}, but {caveat_reason[:1].lower()}{caveat_reason[1:] if len(caveat_reason) > 1 else ''}"

    tags = _dedupe(
        [
            str(candidate.get("template_key") or "").strip()
            for candidate in candidates
            if str(candidate.get("template_key") or "").strip()
        ]
    )

    return {
        "model_voice": voice.rstrip(".") + ".",
        "model_voice_primary_reason": primary_reason,
        "model_voice_caveat": caveat_reason or None,
        "model_voice_tags": tags,
        "model_voice_for": primary_reason,
        "model_voice_against": caveat_reason or None,
    }


def build_strikeout_model_voice(result: dict[str, Any]) -> dict[str, Any]:
    selection_side = str(result.get("selection_side") or "").strip().lower()
    line = result.get("line")
    expected_ks = result.get("expected_strikeouts")
    projected_bf = result.get("projected_batters_faced")
    leash_risk = result.get("leash_risk_subscore")
    positives = _dedupe(_listify(result.get("reason_tags_for")))
    negatives = _dedupe(_listify(result.get("reason_tags_against")) + _listify(result.get("confidence_reasons")))

    if selection_side == "under":
        try:
            line_txt = f"{float(line):.1f}" if line is not None else "the number"
        except Exception:
            line_txt = "the number"
        primary_reason = f"The line is a little high relative to the projected strikeout total ({line_txt} Ks)"
        if expected_ks is not None:
            try:
                primary_reason = f"Projected strikeouts land below the line ({float(expected_ks):.1f} vs {line_txt} Ks)"
            except Exception:
                pass
        if projected_bf is not None and float(projected_bf) <= 21.5:
            primary_reason = "Projected batters faced are lighter than ideal for the strikeout line"
        if leash_risk is not None and float(leash_risk) >= 0.48:
            primary_reason = "Pitch-count and leash risk keep the under live even with swing-and-miss stuff"
        caveat = positives[0] if positives else (negatives[0] if negatives else "")
    else:
        primary_reason = positives[0] if positives else "The whiff profile supports the strikeout look"
        caveat = negatives[0] if negatives else ""

    voice = primary_reason
    if caveat:
        voice = f"{primary_reason}, but {caveat[:1].lower()}{caveat[1:] if len(caveat) > 1 else ''}"

    tags = []
    if selection_side == "under":
        tags.extend(["strikeout_line_high", "strikeout_opportunity_cap"])
    elif positives:
        tags.append("strikeout_whiff_profile")
    if negatives:
        tags.append("strikeout_price_close")
    tags = _dedupe(tags)

    return {
        "model_voice": voice.rstrip(".") + ".",
        "model_voice_primary_reason": primary_reason,
        "model_voice_caveat": caveat or None,
        "model_voice_tags": [tag for tag in tags if tag],
        "model_voice_for": primary_reason,
        "model_voice_against": caveat or None,
    }