from __future__ import annotations import math from typing import Any import pandas as pd from models.opportunity_model import build_projected_strikeout_opportunity from models.pitcher_adjustment import build_pitcher_feature_row from models.shared_matchup_engine import compose_shared_matchup_context def _safe_float(value: Any, default: float | None = None) -> float | None: try: if value is None: return default text = str(value).strip().lower() if text in {"", "nan", "none"}: return default return float(value) except Exception: return default def _clamp(value: float, lo: float, hi: float) -> float: return max(lo, min(hi, value)) def _reliability(sample_size: Any, k: float = 120.0) -> float: sample = max(0.0, float(_safe_float(sample_size, 0.0) or 0.0)) return _clamp(sample / (sample + max(1.0, k)), 0.0, 1.0) def _confidence_component(label: str, value: float, direction: str) -> dict[str, Any]: return { "label": label, "value": round(float(value), 1), "direction": direction, } def _poisson_prob_over(expected_value: float, line: float) -> float: if expected_value <= 0: return 0.0 target = int(math.floor(line)) cumulative = 0.0 for k in range(0, target + 1): cumulative += math.exp(-expected_value) * (expected_value ** k) / math.factorial(k) return _clamp(1.0 - cumulative, 0.0, 1.0) def _poisson_prob_under(expected_value: float, line: float) -> float: return _clamp(1.0 - _poisson_prob_over(expected_value, line), 0.0, 1.0) def _calibrate(probability: float) -> float: centered = probability - 0.50 return _clamp(0.50 + (centered * 0.92), 0.02, 0.98) def build_strikeout_probability_result_v2( pitcher_statcast_df: pd.DataFrame, pitcher_name: str, batter_statcast_df: pd.DataFrame | None = None, opponent_batters: list[str] | None = None, opponent_team: str | None = None, line: float | None = None, selection_side: str | None = None, game_row: dict[str, Any] | None = None, runtime_cache: dict[str, Any] | None = None, ) -> dict[str, Any]: result: dict[str, Any] = { "formula_version": "strikeout_v2_live", "raw_k_prob": None, "calibrated_k_prob": None, "fair_prob": None, "expected_strikeouts": None, "raw_k_prob_v2": None, "calibrated_k_prob_v2": None, "fair_prob_v2": None, "expected_strikeouts_v2": None, "projected_pitch_count": None, "projected_batters_faced": None, "projected_innings": None, "projected_k_rate": None, "pitches_per_bf": None, "opportunity_confidence": None, "opportunity_reasons": [], "k_rate_pitch_signal": None, "k_rate_anchor": None, "bb_rate_anchor": None, "command_efficiency_signal": None, "pitcher_swstr_rate": None, "pitcher_csw_rate": None, "pitcher_ball_rate": None, "swing_miss_subscore": None, "called_strike_subscore": None, "command_efficiency_subscore": None, "lineup_whiff_subscore": None, "zone_matchup_subscore": None, "family_zone_matchup_subscore": None, "arsenal_fit_subscore": None, "tunneling_subscore": None, "release_consistency_subscore": None, "sequencing_subscore": None, "count_leverage_subscore": None, "leash_risk_subscore": None, "role_certainty_score": None, "times_through_order_penalty": None, "telemetry_path_status": "baseline_only", "model_tier": "baseline_only_degraded", "variance_band_low": None, "variance_band_high": None, "matchup_coverage_confidence": None, "component_source_map": {}, "predicted_whiff_regions": [], "predicted_attack_regions": [], "predicted_damage_regions": [], "tunnel_pair_scores": [], "applied_layers": "", "skipped_layers": "", "confidence_score": None, "confidence_score_raw": None, "confidence_score_display": None, "confidence_source": "strikeout_v2_live", "confidence_bucket": None, "confidence_reasons": [], "confidence_component_bonuses": [], "confidence_component_penalties": [], "confidence_primary_driver": None, "confidence_summary_label": None, "reason_tags_for": [], "reason_tags_against": [], "applied_layers_v2": "", "skipped_layers_v2": "", "confidence_score_v2": None, "confidence_score_raw_v2": None, "confidence_score_display_v2": None, "confidence_source_v2": "strikeout_v2_live", "confidence_bucket_v2": None, "confidence_reasons_v2": [], "confidence_component_bonuses_v2": [], "confidence_component_penalties_v2": [], "confidence_primary_driver_v2": None, "confidence_summary_label_v2": None, } if ( pitcher_statcast_df is None or pitcher_statcast_df.empty or not pitcher_name or line is None or selection_side not in {"over", "under"} ): result["skipped_layers"] = "missing_pitcher_or_line" return result pitcher_row = build_pitcher_feature_row(pitcher_statcast_df, pitcher_name) sample_size = int(pitcher_row.get("sample_size") or 0) reliability = _reliability(sample_size, k=180.0) swstr = _safe_float(pitcher_row.get("swstr_rate")) csw = _safe_float(pitcher_row.get("csw_rate")) ball = _safe_float(pitcher_row.get("ball_rate")) strike_anchor = None walk_anchor = None if swstr is not None: strike_anchor = _clamp(0.12 + ((swstr - 0.11) * 1.15), 0.12, 0.42) if ball is not None: walk_anchor = _clamp(0.05 + ((ball - 0.36) * 0.75), 0.02, 0.14) matchup_rows: list[dict[str, Any]] = [] if batter_statcast_df is not None and not batter_statcast_df.empty and opponent_batters: matchup_cache_bucket = None lineup_cache_key = None if runtime_cache is not None: matchup_cache_bucket = runtime_cache.setdefault("strikeout_lineup_matchups", {}) lineup_cache_key = ( id(batter_statcast_df), id(pitcher_statcast_df), str(pitcher_name or "").strip().lower(), tuple(str(name or "").strip().lower() for name in (opponent_batters or [])[:9]), str((game_row or {}).get("away_team") or "").strip().lower(), str((game_row or {}).get("home_team") or "").strip().lower(), str((game_row or {}).get("projected_starter_match_status") or "").strip().lower(), str(opponent_team or "").strip().lower(), ) if matchup_cache_bucket is not None and lineup_cache_key in matchup_cache_bucket: matchup_rows = list(matchup_cache_bucket[lineup_cache_key]) else: for batter_name in opponent_batters[:9]: try: matchup_rows.append( compose_shared_matchup_context( batter_name=batter_name, pitcher_name=pitcher_name, batter_statcast_df=batter_statcast_df, pitcher_statcast_df=pitcher_statcast_df, pitcher_row=pitcher_row, game_row=game_row, batter_features={"batter_stand": "L"}, runtime_cache=runtime_cache, ) ) except Exception: continue if matchup_cache_bucket is not None and lineup_cache_key is not None: matchup_cache_bucket[lineup_cache_key] = list(matchup_rows) if matchup_rows: def _avg(path: tuple[str, ...], default: float = 0.0) -> float: vals: list[float] = [] for row in matchup_rows: cur: Any = row for key in path: if not isinstance(cur, dict): cur = default break cur = cur.get(key) if cur is not None: vals.append(float(_safe_float(cur, default) or default)) return sum(vals) / len(vals) if vals else default matchup = { "predicted_whiff_regions": matchup_rows[0].get("predicted_whiff_regions") or [], "predicted_attack_regions": matchup_rows[0].get("predicted_attack_regions") or [], "predicted_damage_regions": matchup_rows[0].get("predicted_damage_regions") or [], "tunnel_pair_scores": matchup_rows[0].get("tunnel_pair_scores") or [], "matchup_coverage_confidence": _avg(("matchup_coverage_confidence",), 0.0), "component_source_map": matchup_rows[0].get("component_source_map") or {}, "zone_matchup": {"hit_zone_boost": _avg(("zone_matchup", "hit_zone_boost"), 0.0)}, "family_zone_matchup": {"family_zone_whiff_risk": _avg(("family_zone_matchup", "family_zone_whiff_risk"), 0.0)}, "arsenal_matchup": {"arsenal_whiff_risk": _avg(("arsenal_matchup", "arsenal_whiff_risk"), 0.0)}, "trajectory": matchup_rows[0].get("trajectory") or {}, "count_context_profile": matchup_rows[0].get("count_context_profile") or {}, } else: matchup = { "predicted_whiff_regions": [], "predicted_attack_regions": [], "predicted_damage_regions": [], "tunnel_pair_scores": [], "matchup_coverage_confidence": 0.0, "component_source_map": {}, "zone_matchup": {}, "family_zone_matchup": {}, "arsenal_matchup": {}, "trajectory": {}, "count_context_profile": {}, } telemetry_path_status = "full_telemetry" if matchup_rows else "core_baseline_plus_projected_pitcher" model_tier = "full_telemetry" if matchup_rows else "core_baseline_plus_projected_pitcher" if matchup_rows and float(matchup.get("matchup_coverage_confidence") or 0.0) < 0.45: telemetry_path_status = "partial_telemetry" model_tier = "partial_telemetry" zone_matchup_subscore = _safe_float((matchup.get("zone_matchup") or {}).get("hit_zone_boost"), 0.0) or 0.0 family_zone_matchup_subscore = _safe_float((matchup.get("family_zone_matchup") or {}).get("family_zone_whiff_risk"), 0.0) or 0.0 arsenal_fit_subscore = _safe_float((matchup.get("arsenal_matchup") or {}).get("arsenal_whiff_risk"), 0.0) or 0.0 trajectory = matchup.get("trajectory") or {} tunneling_subscore = _safe_float(trajectory.get("tunnel_score"), 0.5) or 0.5 release_consistency_subscore = _safe_float(trajectory.get("release_consistency_score"), 0.5) or 0.5 sequencing_profiles = matchup.get("count_context_profile") or {} putaway_states = [v for k, v in sequencing_profiles.items() if str(k).endswith("-2")] count_leverage_subscore = 0.58 if putaway_states else 0.50 sequencing_subscore = _clamp(0.5 + ((count_leverage_subscore - 0.5) * 0.6), 0.0, 1.0) swing_miss_subscore = _clamp((swstr or 0.11) / 0.18, 0.0, 1.0) called_strike_subscore = _clamp((csw or 0.28) / 0.36, 0.0, 1.0) command_efficiency_signal = _clamp(1.0 - ((ball or 0.36) - 0.30) / 0.12, 0.0, 1.0) command_efficiency_subscore = command_efficiency_signal lineup_whiff_subscore = _clamp( ( family_zone_matchup_subscore * 0.55 + arsenal_fit_subscore * 0.45 ) / 0.35 if (family_zone_matchup_subscore or arsenal_fit_subscore) else 0.5, 0.0, 1.0, ) opportunity = build_projected_strikeout_opportunity( pitcher_row=pitcher_row, opponent_batters=opponent_batters, projected_starter_available=bool((game_row or {}).get("projected_starter_available")), projected_starter_match_status=str((game_row or {}).get("projected_starter_match_status") or ""), game_row=game_row, ) pitch_count = float(opportunity.get("projected_pitch_count") or 88.0) projected_batters_faced = float(opportunity.get("projected_batters_faced") or 22.5) projected_innings = float(opportunity.get("projected_innings") or 5.2) pitches_per_bf = float(opportunity.get("pitches_per_bf") or 3.85) times_through_order_penalty = float(opportunity.get("times_through_order_penalty") or 0.0) leash_risk_score = float(opportunity.get("leash_risk_score") or 0.0) role_certainty_score = float(opportunity.get("role_certainty_score") or 0.7) opportunity_confidence = float(opportunity.get("opportunity_confidence") or 0.0) opportunity_reasons = list(opportunity.get("opportunity_reasons") or []) pitch_signal = ( swing_miss_subscore * 0.34 + called_strike_subscore * 0.18 + command_efficiency_subscore * 0.14 + lineup_whiff_subscore * 0.10 + _clamp(zone_matchup_subscore / 0.40, 0.0, 1.0) * 0.08 + _clamp(family_zone_matchup_subscore / 0.40, 0.0, 1.0) * 0.06 + _clamp(arsenal_fit_subscore / 0.35, 0.0, 1.0) * 0.05 + tunneling_subscore * 0.03 + release_consistency_subscore * 0.01 + sequencing_subscore * 0.01 ) k_rate_pitch_signal = _clamp(0.12 + (pitch_signal * 0.26), 0.14, 0.42) k_rate_anchor = strike_anchor if strike_anchor is not None else k_rate_pitch_signal projected_k_rate = ( k_rate_pitch_signal * (1.0 - reliability) + k_rate_anchor * reliability ) projected_k_rate = _clamp(projected_k_rate - times_through_order_penalty, 0.12, 0.40) expected_strikeouts = _clamp(projected_batters_faced * projected_k_rate, 1.5, 11.5) raw_prob = _poisson_prob_over(expected_strikeouts, float(line)) if selection_side == "over" else _poisson_prob_under(expected_strikeouts, float(line)) calibrated_prob = _calibrate(raw_prob) variance = _clamp(0.35 + leash_risk_score * 0.9 + (1.0 - reliability) * 0.8, 0.35, 1.8) confidence = 56.0 reasons: list[str] = [] bonuses: list[dict[str, Any]] = [] penalties: list[dict[str, Any]] = [] if sample_size >= 400: confidence += 10.0 bonuses.append(_confidence_component("Strong pitcher sample", 10.0, "bonus")) elif sample_size < 150: confidence -= 10.0 reasons.append("Limited pitcher pitch sample") penalties.append(_confidence_component("Limited pitcher pitch sample", 10.0, "penalty")) if matchup.get("matchup_coverage_confidence", 0.0) >= 0.45: confidence += 8.0 bonuses.append(_confidence_component("Strong telemetry and zone coverage", 8.0, "bonus")) else: confidence -= 6.0 reasons.append("Thin telemetry and zone-coverage sample") penalties.append(_confidence_component("Thin telemetry and zone-coverage sample", 6.0, "penalty")) if opponent_batters and len(opponent_batters) >= 7: confidence += 5.0 bonuses.append(_confidence_component("Projected lineup mostly complete", 5.0, "bonus")) else: confidence -= 5.0 reasons.append("Projected opponent lineup is incomplete") penalties.append(_confidence_component("Projected opponent lineup is incomplete", 5.0, "penalty")) if leash_risk_score >= 0.55: confidence -= 7.0 reasons.append("Pitch-count and leash risk remain elevated") penalties.append(_confidence_component("Pitch-count and leash risk remain elevated", 7.0, "penalty")) if role_certainty_score >= 0.92: confidence += 4.0 bonuses.append(_confidence_component("Public projected starter confirmed", 4.0, "bonus")) elif role_certainty_score <= 0.65: confidence -= 6.0 reasons.append("Starter role and leash certainty remain soft") penalties.append(_confidence_component("Starter role and leash certainty remain soft", 6.0, "penalty")) reason_tags_for: list[str] = [] reason_tags_against: list[str] = [] if swing_miss_subscore >= 0.62: reason_tags_for.append("Misses bats consistently") if called_strike_subscore >= 0.72: reason_tags_for.append("Strong called plus whiff strike mix") if projected_k_rate >= 0.27: reason_tags_for.append("Projected strikeout rate supports the line") if projected_batters_faced >= 24.0: reason_tags_for.append("Projected workload supports deep strikeout opportunity") if leash_risk_score >= 0.48: reason_tags_against.append("Pitch-count and leash risk limit the strikeout path") if projected_batters_faced <= 21.5: reason_tags_against.append("Projected batters faced are lighter than ideal") if not opponent_batters or len(opponent_batters) < 7: reason_tags_against.append("Projected opponent lineup is incomplete") confidence_raw = _clamp(confidence, 1.0, 100.0) bucket = "high" if confidence_raw >= 75 else "medium" if confidence_raw >= 55 else "low" primary_penalty = max( [item for item in penalties if float(item.get("value") or 0.0) > 0.0], key=lambda item: float(item.get("value") or 0.0), default=None, ) primary_bonus = max( [item for item in bonuses if float(item.get("value") or 0.0) > 0.0], key=lambda item: float(item.get("value") or 0.0), default=None, ) primary_driver = primary_penalty or primary_bonus summary_label = str((primary_driver or {}).get("label") or "").strip() or None result.update( { "raw_k_prob": raw_prob, "calibrated_k_prob": calibrated_prob, "fair_prob": calibrated_prob, "expected_strikeouts": expected_strikeouts, "raw_k_prob_v2": raw_prob, "calibrated_k_prob_v2": calibrated_prob, "fair_prob_v2": calibrated_prob, "expected_strikeouts_v2": expected_strikeouts, "projected_pitch_count": round(pitch_count, 2), "projected_batters_faced": round(projected_batters_faced, 2), "projected_innings": round(projected_innings, 2), "projected_k_rate": round(projected_k_rate, 4), "pitches_per_bf": round(pitches_per_bf, 3), "opportunity_confidence": round(opportunity_confidence, 4), "opportunity_reasons": opportunity_reasons, "k_rate_pitch_signal": round(k_rate_pitch_signal, 4), "k_rate_anchor": round(k_rate_anchor, 4) if k_rate_anchor is not None else None, "bb_rate_anchor": round(walk_anchor, 4) if walk_anchor is not None else None, "command_efficiency_signal": round(command_efficiency_signal, 4), "pitcher_swstr_rate": round(swstr, 4) if swstr is not None else None, "pitcher_csw_rate": round(csw, 4) if csw is not None else None, "pitcher_ball_rate": round(ball, 4) if ball is not None else None, "swing_miss_subscore": round(swing_miss_subscore, 4), "called_strike_subscore": round(called_strike_subscore, 4), "command_efficiency_subscore": round(command_efficiency_subscore, 4), "lineup_whiff_subscore": round(lineup_whiff_subscore, 4), "zone_matchup_subscore": round(zone_matchup_subscore, 4), "family_zone_matchup_subscore": round(family_zone_matchup_subscore, 4), "arsenal_fit_subscore": round(arsenal_fit_subscore, 4), "tunneling_subscore": round(tunneling_subscore, 4), "release_consistency_subscore": round(release_consistency_subscore, 4), "sequencing_subscore": round(sequencing_subscore, 4), "count_leverage_subscore": round(count_leverage_subscore, 4), "leash_risk_subscore": round(leash_risk_score, 4), "role_certainty_score": round(role_certainty_score, 4), "times_through_order_penalty": round(times_through_order_penalty, 4), "telemetry_path_status": telemetry_path_status, "model_tier": model_tier, "variance_band_low": round(_clamp(expected_strikeouts - variance, 0.5, 12.0), 2), "variance_band_high": round(_clamp(expected_strikeouts + variance, 0.5, 12.5), 2), "matchup_coverage_confidence": matchup.get("matchup_coverage_confidence"), "component_source_map": matchup.get("component_source_map") or {}, "predicted_whiff_regions": matchup.get("predicted_whiff_regions") or [], "predicted_attack_regions": matchup.get("predicted_attack_regions") or [], "predicted_damage_regions": matchup.get("predicted_damage_regions") or [], "tunnel_pair_scores": matchup.get("tunnel_pair_scores") or [], "applied_layers": "opportunity|pitch_win|probability|uncertainty", "skipped_layers": "", "confidence_score": round(confidence_raw, 1), "confidence_score_raw": round(confidence_raw, 1), "confidence_score_display": round(confidence_raw, 1), "confidence_bucket": bucket, "confidence_reasons": reasons[:5], "confidence_component_bonuses": bonuses, "confidence_component_penalties": penalties, "confidence_primary_driver": primary_driver, "confidence_summary_label": summary_label, "reason_tags_for": reason_tags_for, "reason_tags_against": reason_tags_against, "applied_layers_v2": "opportunity|pitch_win|probability|uncertainty", "skipped_layers_v2": "", "confidence_score_v2": round(confidence_raw, 1), "confidence_score_raw_v2": round(confidence_raw, 1), "confidence_score_display_v2": round(confidence_raw, 1), "confidence_bucket_v2": bucket, "confidence_reasons_v2": reasons[:5], "confidence_component_bonuses_v2": bonuses, "confidence_component_penalties_v2": penalties, "confidence_primary_driver_v2": primary_driver, "confidence_summary_label_v2": summary_label, } ) return result