Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import math | |
| from typing import Any | |
| import pandas as pd | |
| from models.opportunity_model import build_projected_strikeout_opportunity | |
| from models.pitcher_adjustment import build_pitcher_feature_row | |
| from models.shared_matchup_engine import compose_shared_matchup_context | |
| def _safe_float(value: Any, default: float | None = None) -> float | None: | |
| try: | |
| if value is None: | |
| return default | |
| text = str(value).strip().lower() | |
| if text in {"", "nan", "none"}: | |
| return default | |
| return float(value) | |
| except Exception: | |
| return default | |
| def _clamp(value: float, lo: float, hi: float) -> float: | |
| return max(lo, min(hi, value)) | |
| def _reliability(sample_size: Any, k: float = 120.0) -> float: | |
| sample = max(0.0, float(_safe_float(sample_size, 0.0) or 0.0)) | |
| return _clamp(sample / (sample + max(1.0, k)), 0.0, 1.0) | |
| def _confidence_component(label: str, value: float, direction: str) -> dict[str, Any]: | |
| return { | |
| "label": label, | |
| "value": round(float(value), 1), | |
| "direction": direction, | |
| } | |
| def _poisson_prob_over(expected_value: float, line: float) -> float: | |
| if expected_value <= 0: | |
| return 0.0 | |
| target = int(math.floor(line)) | |
| cumulative = 0.0 | |
| for k in range(0, target + 1): | |
| cumulative += math.exp(-expected_value) * (expected_value ** k) / math.factorial(k) | |
| return _clamp(1.0 - cumulative, 0.0, 1.0) | |
| def _poisson_prob_under(expected_value: float, line: float) -> float: | |
| return _clamp(1.0 - _poisson_prob_over(expected_value, line), 0.0, 1.0) | |
| def _calibrate(probability: float) -> float: | |
| centered = probability - 0.50 | |
| return _clamp(0.50 + (centered * 0.92), 0.02, 0.98) | |
| def build_strikeout_probability_result_v2( | |
| pitcher_statcast_df: pd.DataFrame, | |
| pitcher_name: str, | |
| batter_statcast_df: pd.DataFrame | None = None, | |
| opponent_batters: list[str] | None = None, | |
| opponent_team: str | None = None, | |
| line: float | None = None, | |
| selection_side: str | None = None, | |
| game_row: dict[str, Any] | None = None, | |
| runtime_cache: dict[str, Any] | None = None, | |
| ) -> dict[str, Any]: | |
| result: dict[str, Any] = { | |
| "formula_version": "strikeout_v2_live", | |
| "raw_k_prob": None, | |
| "calibrated_k_prob": None, | |
| "fair_prob": None, | |
| "expected_strikeouts": None, | |
| "raw_k_prob_v2": None, | |
| "calibrated_k_prob_v2": None, | |
| "fair_prob_v2": None, | |
| "expected_strikeouts_v2": None, | |
| "projected_pitch_count": None, | |
| "projected_batters_faced": None, | |
| "projected_innings": None, | |
| "projected_k_rate": None, | |
| "pitches_per_bf": None, | |
| "opportunity_confidence": None, | |
| "opportunity_reasons": [], | |
| "k_rate_pitch_signal": None, | |
| "k_rate_anchor": None, | |
| "bb_rate_anchor": None, | |
| "command_efficiency_signal": None, | |
| "pitcher_swstr_rate": None, | |
| "pitcher_csw_rate": None, | |
| "pitcher_ball_rate": None, | |
| "swing_miss_subscore": None, | |
| "called_strike_subscore": None, | |
| "command_efficiency_subscore": None, | |
| "lineup_whiff_subscore": None, | |
| "zone_matchup_subscore": None, | |
| "family_zone_matchup_subscore": None, | |
| "arsenal_fit_subscore": None, | |
| "tunneling_subscore": None, | |
| "release_consistency_subscore": None, | |
| "sequencing_subscore": None, | |
| "count_leverage_subscore": None, | |
| "leash_risk_subscore": None, | |
| "role_certainty_score": None, | |
| "times_through_order_penalty": None, | |
| "telemetry_path_status": "baseline_only", | |
| "model_tier": "baseline_only_degraded", | |
| "variance_band_low": None, | |
| "variance_band_high": None, | |
| "matchup_coverage_confidence": None, | |
| "component_source_map": {}, | |
| "predicted_whiff_regions": [], | |
| "predicted_attack_regions": [], | |
| "predicted_damage_regions": [], | |
| "tunnel_pair_scores": [], | |
| "applied_layers": "", | |
| "skipped_layers": "", | |
| "confidence_score": None, | |
| "confidence_score_raw": None, | |
| "confidence_score_display": None, | |
| "confidence_source": "strikeout_v2_live", | |
| "confidence_bucket": None, | |
| "confidence_reasons": [], | |
| "confidence_component_bonuses": [], | |
| "confidence_component_penalties": [], | |
| "confidence_primary_driver": None, | |
| "confidence_summary_label": None, | |
| "reason_tags_for": [], | |
| "reason_tags_against": [], | |
| "applied_layers_v2": "", | |
| "skipped_layers_v2": "", | |
| "confidence_score_v2": None, | |
| "confidence_score_raw_v2": None, | |
| "confidence_score_display_v2": None, | |
| "confidence_source_v2": "strikeout_v2_live", | |
| "confidence_bucket_v2": None, | |
| "confidence_reasons_v2": [], | |
| "confidence_component_bonuses_v2": [], | |
| "confidence_component_penalties_v2": [], | |
| "confidence_primary_driver_v2": None, | |
| "confidence_summary_label_v2": None, | |
| } | |
| if ( | |
| pitcher_statcast_df is None | |
| or pitcher_statcast_df.empty | |
| or not pitcher_name | |
| or line is None | |
| or selection_side not in {"over", "under"} | |
| ): | |
| result["skipped_layers"] = "missing_pitcher_or_line" | |
| return result | |
| pitcher_row = build_pitcher_feature_row(pitcher_statcast_df, pitcher_name) | |
| sample_size = int(pitcher_row.get("sample_size") or 0) | |
| reliability = _reliability(sample_size, k=180.0) | |
| swstr = _safe_float(pitcher_row.get("swstr_rate")) | |
| csw = _safe_float(pitcher_row.get("csw_rate")) | |
| ball = _safe_float(pitcher_row.get("ball_rate")) | |
| strike_anchor = None | |
| walk_anchor = None | |
| if swstr is not None: | |
| strike_anchor = _clamp(0.12 + ((swstr - 0.11) * 1.15), 0.12, 0.42) | |
| if ball is not None: | |
| walk_anchor = _clamp(0.05 + ((ball - 0.36) * 0.75), 0.02, 0.14) | |
| matchup_rows: list[dict[str, Any]] = [] | |
| if batter_statcast_df is not None and not batter_statcast_df.empty and opponent_batters: | |
| matchup_cache_bucket = None | |
| lineup_cache_key = None | |
| if runtime_cache is not None: | |
| matchup_cache_bucket = runtime_cache.setdefault("strikeout_lineup_matchups", {}) | |
| lineup_cache_key = ( | |
| id(batter_statcast_df), | |
| id(pitcher_statcast_df), | |
| str(pitcher_name or "").strip().lower(), | |
| tuple(str(name or "").strip().lower() for name in (opponent_batters or [])[:9]), | |
| str((game_row or {}).get("away_team") or "").strip().lower(), | |
| str((game_row or {}).get("home_team") or "").strip().lower(), | |
| str((game_row or {}).get("projected_starter_match_status") or "").strip().lower(), | |
| str(opponent_team or "").strip().lower(), | |
| ) | |
| if matchup_cache_bucket is not None and lineup_cache_key in matchup_cache_bucket: | |
| matchup_rows = list(matchup_cache_bucket[lineup_cache_key]) | |
| else: | |
| for batter_name in opponent_batters[:9]: | |
| try: | |
| matchup_rows.append( | |
| compose_shared_matchup_context( | |
| batter_name=batter_name, | |
| pitcher_name=pitcher_name, | |
| batter_statcast_df=batter_statcast_df, | |
| pitcher_statcast_df=pitcher_statcast_df, | |
| pitcher_row=pitcher_row, | |
| game_row=game_row, | |
| batter_features={"batter_stand": "L"}, | |
| runtime_cache=runtime_cache, | |
| ) | |
| ) | |
| except Exception: | |
| continue | |
| if matchup_cache_bucket is not None and lineup_cache_key is not None: | |
| matchup_cache_bucket[lineup_cache_key] = list(matchup_rows) | |
| if matchup_rows: | |
| def _avg(path: tuple[str, ...], default: float = 0.0) -> float: | |
| vals: list[float] = [] | |
| for row in matchup_rows: | |
| cur: Any = row | |
| for key in path: | |
| if not isinstance(cur, dict): | |
| cur = default | |
| break | |
| cur = cur.get(key) | |
| if cur is not None: | |
| vals.append(float(_safe_float(cur, default) or default)) | |
| return sum(vals) / len(vals) if vals else default | |
| matchup = { | |
| "predicted_whiff_regions": matchup_rows[0].get("predicted_whiff_regions") or [], | |
| "predicted_attack_regions": matchup_rows[0].get("predicted_attack_regions") or [], | |
| "predicted_damage_regions": matchup_rows[0].get("predicted_damage_regions") or [], | |
| "tunnel_pair_scores": matchup_rows[0].get("tunnel_pair_scores") or [], | |
| "matchup_coverage_confidence": _avg(("matchup_coverage_confidence",), 0.0), | |
| "component_source_map": matchup_rows[0].get("component_source_map") or {}, | |
| "zone_matchup": {"hit_zone_boost": _avg(("zone_matchup", "hit_zone_boost"), 0.0)}, | |
| "family_zone_matchup": {"family_zone_whiff_risk": _avg(("family_zone_matchup", "family_zone_whiff_risk"), 0.0)}, | |
| "arsenal_matchup": {"arsenal_whiff_risk": _avg(("arsenal_matchup", "arsenal_whiff_risk"), 0.0)}, | |
| "trajectory": matchup_rows[0].get("trajectory") or {}, | |
| "count_context_profile": matchup_rows[0].get("count_context_profile") or {}, | |
| } | |
| else: | |
| matchup = { | |
| "predicted_whiff_regions": [], | |
| "predicted_attack_regions": [], | |
| "predicted_damage_regions": [], | |
| "tunnel_pair_scores": [], | |
| "matchup_coverage_confidence": 0.0, | |
| "component_source_map": {}, | |
| "zone_matchup": {}, | |
| "family_zone_matchup": {}, | |
| "arsenal_matchup": {}, | |
| "trajectory": {}, | |
| "count_context_profile": {}, | |
| } | |
| telemetry_path_status = "full_telemetry" if matchup_rows else "core_baseline_plus_projected_pitcher" | |
| model_tier = "full_telemetry" if matchup_rows else "core_baseline_plus_projected_pitcher" | |
| if matchup_rows and float(matchup.get("matchup_coverage_confidence") or 0.0) < 0.45: | |
| telemetry_path_status = "partial_telemetry" | |
| model_tier = "partial_telemetry" | |
| zone_matchup_subscore = _safe_float((matchup.get("zone_matchup") or {}).get("hit_zone_boost"), 0.0) or 0.0 | |
| family_zone_matchup_subscore = _safe_float((matchup.get("family_zone_matchup") or {}).get("family_zone_whiff_risk"), 0.0) or 0.0 | |
| arsenal_fit_subscore = _safe_float((matchup.get("arsenal_matchup") or {}).get("arsenal_whiff_risk"), 0.0) or 0.0 | |
| trajectory = matchup.get("trajectory") or {} | |
| tunneling_subscore = _safe_float(trajectory.get("tunnel_score"), 0.5) or 0.5 | |
| release_consistency_subscore = _safe_float(trajectory.get("release_consistency_score"), 0.5) or 0.5 | |
| sequencing_profiles = matchup.get("count_context_profile") or {} | |
| putaway_states = [v for k, v in sequencing_profiles.items() if str(k).endswith("-2")] | |
| count_leverage_subscore = 0.58 if putaway_states else 0.50 | |
| sequencing_subscore = _clamp(0.5 + ((count_leverage_subscore - 0.5) * 0.6), 0.0, 1.0) | |
| swing_miss_subscore = _clamp((swstr or 0.11) / 0.18, 0.0, 1.0) | |
| called_strike_subscore = _clamp((csw or 0.28) / 0.36, 0.0, 1.0) | |
| command_efficiency_signal = _clamp(1.0 - ((ball or 0.36) - 0.30) / 0.12, 0.0, 1.0) | |
| command_efficiency_subscore = command_efficiency_signal | |
| lineup_whiff_subscore = _clamp( | |
| ( | |
| family_zone_matchup_subscore * 0.55 | |
| + arsenal_fit_subscore * 0.45 | |
| ) / 0.35 if (family_zone_matchup_subscore or arsenal_fit_subscore) else 0.5, | |
| 0.0, | |
| 1.0, | |
| ) | |
| opportunity = build_projected_strikeout_opportunity( | |
| pitcher_row=pitcher_row, | |
| opponent_batters=opponent_batters, | |
| projected_starter_available=bool((game_row or {}).get("projected_starter_available")), | |
| projected_starter_match_status=str((game_row or {}).get("projected_starter_match_status") or ""), | |
| game_row=game_row, | |
| ) | |
| pitch_count = float(opportunity.get("projected_pitch_count") or 88.0) | |
| projected_batters_faced = float(opportunity.get("projected_batters_faced") or 22.5) | |
| projected_innings = float(opportunity.get("projected_innings") or 5.2) | |
| pitches_per_bf = float(opportunity.get("pitches_per_bf") or 3.85) | |
| times_through_order_penalty = float(opportunity.get("times_through_order_penalty") or 0.0) | |
| leash_risk_score = float(opportunity.get("leash_risk_score") or 0.0) | |
| role_certainty_score = float(opportunity.get("role_certainty_score") or 0.7) | |
| opportunity_confidence = float(opportunity.get("opportunity_confidence") or 0.0) | |
| opportunity_reasons = list(opportunity.get("opportunity_reasons") or []) | |
| pitch_signal = ( | |
| swing_miss_subscore * 0.34 | |
| + called_strike_subscore * 0.18 | |
| + command_efficiency_subscore * 0.14 | |
| + lineup_whiff_subscore * 0.10 | |
| + _clamp(zone_matchup_subscore / 0.40, 0.0, 1.0) * 0.08 | |
| + _clamp(family_zone_matchup_subscore / 0.40, 0.0, 1.0) * 0.06 | |
| + _clamp(arsenal_fit_subscore / 0.35, 0.0, 1.0) * 0.05 | |
| + tunneling_subscore * 0.03 | |
| + release_consistency_subscore * 0.01 | |
| + sequencing_subscore * 0.01 | |
| ) | |
| k_rate_pitch_signal = _clamp(0.12 + (pitch_signal * 0.26), 0.14, 0.42) | |
| k_rate_anchor = strike_anchor if strike_anchor is not None else k_rate_pitch_signal | |
| projected_k_rate = ( | |
| k_rate_pitch_signal * (1.0 - reliability) | |
| + k_rate_anchor * reliability | |
| ) | |
| projected_k_rate = _clamp(projected_k_rate - times_through_order_penalty, 0.12, 0.40) | |
| expected_strikeouts = _clamp(projected_batters_faced * projected_k_rate, 1.5, 11.5) | |
| raw_prob = _poisson_prob_over(expected_strikeouts, float(line)) if selection_side == "over" else _poisson_prob_under(expected_strikeouts, float(line)) | |
| calibrated_prob = _calibrate(raw_prob) | |
| variance = _clamp(0.35 + leash_risk_score * 0.9 + (1.0 - reliability) * 0.8, 0.35, 1.8) | |
| confidence = 56.0 | |
| reasons: list[str] = [] | |
| bonuses: list[dict[str, Any]] = [] | |
| penalties: list[dict[str, Any]] = [] | |
| if sample_size >= 400: | |
| confidence += 10.0 | |
| bonuses.append(_confidence_component("Strong pitcher sample", 10.0, "bonus")) | |
| elif sample_size < 150: | |
| confidence -= 10.0 | |
| reasons.append("Limited pitcher pitch sample") | |
| penalties.append(_confidence_component("Limited pitcher pitch sample", 10.0, "penalty")) | |
| if matchup.get("matchup_coverage_confidence", 0.0) >= 0.45: | |
| confidence += 8.0 | |
| bonuses.append(_confidence_component("Strong telemetry and zone coverage", 8.0, "bonus")) | |
| else: | |
| confidence -= 6.0 | |
| reasons.append("Thin telemetry and zone-coverage sample") | |
| penalties.append(_confidence_component("Thin telemetry and zone-coverage sample", 6.0, "penalty")) | |
| if opponent_batters and len(opponent_batters) >= 7: | |
| confidence += 5.0 | |
| bonuses.append(_confidence_component("Projected lineup mostly complete", 5.0, "bonus")) | |
| else: | |
| confidence -= 5.0 | |
| reasons.append("Projected opponent lineup is incomplete") | |
| penalties.append(_confidence_component("Projected opponent lineup is incomplete", 5.0, "penalty")) | |
| if leash_risk_score >= 0.55: | |
| confidence -= 7.0 | |
| reasons.append("Pitch-count and leash risk remain elevated") | |
| penalties.append(_confidence_component("Pitch-count and leash risk remain elevated", 7.0, "penalty")) | |
| if role_certainty_score >= 0.92: | |
| confidence += 4.0 | |
| bonuses.append(_confidence_component("Public projected starter confirmed", 4.0, "bonus")) | |
| elif role_certainty_score <= 0.65: | |
| confidence -= 6.0 | |
| reasons.append("Starter role and leash certainty remain soft") | |
| penalties.append(_confidence_component("Starter role and leash certainty remain soft", 6.0, "penalty")) | |
| reason_tags_for: list[str] = [] | |
| reason_tags_against: list[str] = [] | |
| if swing_miss_subscore >= 0.62: | |
| reason_tags_for.append("Misses bats consistently") | |
| if called_strike_subscore >= 0.72: | |
| reason_tags_for.append("Strong called plus whiff strike mix") | |
| if projected_k_rate >= 0.27: | |
| reason_tags_for.append("Projected strikeout rate supports the line") | |
| if projected_batters_faced >= 24.0: | |
| reason_tags_for.append("Projected workload supports deep strikeout opportunity") | |
| if leash_risk_score >= 0.48: | |
| reason_tags_against.append("Pitch-count and leash risk limit the strikeout path") | |
| if projected_batters_faced <= 21.5: | |
| reason_tags_against.append("Projected batters faced are lighter than ideal") | |
| if not opponent_batters or len(opponent_batters) < 7: | |
| reason_tags_against.append("Projected opponent lineup is incomplete") | |
| confidence_raw = _clamp(confidence, 1.0, 100.0) | |
| bucket = "high" if confidence_raw >= 75 else "medium" if confidence_raw >= 55 else "low" | |
| primary_penalty = max( | |
| [item for item in penalties if float(item.get("value") or 0.0) > 0.0], | |
| key=lambda item: float(item.get("value") or 0.0), | |
| default=None, | |
| ) | |
| primary_bonus = max( | |
| [item for item in bonuses if float(item.get("value") or 0.0) > 0.0], | |
| key=lambda item: float(item.get("value") or 0.0), | |
| default=None, | |
| ) | |
| primary_driver = primary_penalty or primary_bonus | |
| summary_label = str((primary_driver or {}).get("label") or "").strip() or None | |
| result.update( | |
| { | |
| "raw_k_prob": raw_prob, | |
| "calibrated_k_prob": calibrated_prob, | |
| "fair_prob": calibrated_prob, | |
| "expected_strikeouts": expected_strikeouts, | |
| "raw_k_prob_v2": raw_prob, | |
| "calibrated_k_prob_v2": calibrated_prob, | |
| "fair_prob_v2": calibrated_prob, | |
| "expected_strikeouts_v2": expected_strikeouts, | |
| "projected_pitch_count": round(pitch_count, 2), | |
| "projected_batters_faced": round(projected_batters_faced, 2), | |
| "projected_innings": round(projected_innings, 2), | |
| "projected_k_rate": round(projected_k_rate, 4), | |
| "pitches_per_bf": round(pitches_per_bf, 3), | |
| "opportunity_confidence": round(opportunity_confidence, 4), | |
| "opportunity_reasons": opportunity_reasons, | |
| "k_rate_pitch_signal": round(k_rate_pitch_signal, 4), | |
| "k_rate_anchor": round(k_rate_anchor, 4) if k_rate_anchor is not None else None, | |
| "bb_rate_anchor": round(walk_anchor, 4) if walk_anchor is not None else None, | |
| "command_efficiency_signal": round(command_efficiency_signal, 4), | |
| "pitcher_swstr_rate": round(swstr, 4) if swstr is not None else None, | |
| "pitcher_csw_rate": round(csw, 4) if csw is not None else None, | |
| "pitcher_ball_rate": round(ball, 4) if ball is not None else None, | |
| "swing_miss_subscore": round(swing_miss_subscore, 4), | |
| "called_strike_subscore": round(called_strike_subscore, 4), | |
| "command_efficiency_subscore": round(command_efficiency_subscore, 4), | |
| "lineup_whiff_subscore": round(lineup_whiff_subscore, 4), | |
| "zone_matchup_subscore": round(zone_matchup_subscore, 4), | |
| "family_zone_matchup_subscore": round(family_zone_matchup_subscore, 4), | |
| "arsenal_fit_subscore": round(arsenal_fit_subscore, 4), | |
| "tunneling_subscore": round(tunneling_subscore, 4), | |
| "release_consistency_subscore": round(release_consistency_subscore, 4), | |
| "sequencing_subscore": round(sequencing_subscore, 4), | |
| "count_leverage_subscore": round(count_leverage_subscore, 4), | |
| "leash_risk_subscore": round(leash_risk_score, 4), | |
| "role_certainty_score": round(role_certainty_score, 4), | |
| "times_through_order_penalty": round(times_through_order_penalty, 4), | |
| "telemetry_path_status": telemetry_path_status, | |
| "model_tier": model_tier, | |
| "variance_band_low": round(_clamp(expected_strikeouts - variance, 0.5, 12.0), 2), | |
| "variance_band_high": round(_clamp(expected_strikeouts + variance, 0.5, 12.5), 2), | |
| "matchup_coverage_confidence": matchup.get("matchup_coverage_confidence"), | |
| "component_source_map": matchup.get("component_source_map") or {}, | |
| "predicted_whiff_regions": matchup.get("predicted_whiff_regions") or [], | |
| "predicted_attack_regions": matchup.get("predicted_attack_regions") or [], | |
| "predicted_damage_regions": matchup.get("predicted_damage_regions") or [], | |
| "tunnel_pair_scores": matchup.get("tunnel_pair_scores") or [], | |
| "applied_layers": "opportunity|pitch_win|probability|uncertainty", | |
| "skipped_layers": "", | |
| "confidence_score": round(confidence_raw, 1), | |
| "confidence_score_raw": round(confidence_raw, 1), | |
| "confidence_score_display": round(confidence_raw, 1), | |
| "confidence_bucket": bucket, | |
| "confidence_reasons": reasons[:5], | |
| "confidence_component_bonuses": bonuses, | |
| "confidence_component_penalties": penalties, | |
| "confidence_primary_driver": primary_driver, | |
| "confidence_summary_label": summary_label, | |
| "reason_tags_for": reason_tags_for, | |
| "reason_tags_against": reason_tags_against, | |
| "applied_layers_v2": "opportunity|pitch_win|probability|uncertainty", | |
| "skipped_layers_v2": "", | |
| "confidence_score_v2": round(confidence_raw, 1), | |
| "confidence_score_raw_v2": round(confidence_raw, 1), | |
| "confidence_score_display_v2": round(confidence_raw, 1), | |
| "confidence_bucket_v2": bucket, | |
| "confidence_reasons_v2": reasons[:5], | |
| "confidence_component_bonuses_v2": bonuses, | |
| "confidence_component_penalties_v2": penalties, | |
| "confidence_primary_driver_v2": primary_driver, | |
| "confidence_summary_label_v2": summary_label, | |
| } | |
| ) | |
| return result | |