File size: 2,924 Bytes
4c2f97d
 
 
 
 
95e7620
4c2f97d
26e1c80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09655a8
 
95e7620
09655a8
 
dc9850b
95e7620
09655a8
95e7620
 
 
 
09655a8
95e7620
 
09655a8
95e7620
 
09655a8
95e7620
 
 
09655a8
95e7620
 
 
09655a8
95e7620
 
 
09655a8
95e7620
 
 
09655a8
95e7620
 
 
09655a8
95e7620
 
 
09655a8
95e7620
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from __future__ import annotations

from typing import Any
import pandas as pd

from models.batter_zone_store import load_batter_zone_store_metrics

PITCH_FAMILY_MAP = {
    "4-seam fastball": "fastball",
    "four-seam fastball": "fastball",
    "fastball": "fastball",
    "sinker": "fastball",
    "cutter": "fastball",
    "slider": "breaking",
    "sweeper": "breaking",
    "curveball": "breaking",
    "knuckle curve": "breaking",
    "slurve": "breaking",
    "changeup": "offspeed",
    "splitter": "offspeed",
    "forkball": "offspeed",
    "split-finger": "offspeed",
    "circle change": "offspeed",
}


def normalize_pitch_family(pitch_name: Any) -> str:
    text = str(pitch_name or "").strip().lower()
    if text in {"", "nan", "none"}:
        return "unknown"
    return PITCH_FAMILY_MAP.get(text, "unknown")


def classify_zone_bucket(plate_x: Any, plate_z: Any) -> str:
    try:
        x = float(plate_x)
        z = float(plate_z)
    except Exception:
        return "unknown"

    zone_left = -0.83
    zone_right = 0.83
    zone_bottom = 1.50
    zone_top = 3.50

    if zone_left <= x <= zone_right and zone_bottom <= z <= zone_top:
        inner_left = -0.45
        inner_right = 0.45
        inner_bottom = 1.90
        inner_top = 3.10

        if inner_left <= x <= inner_right and inner_bottom <= z <= inner_top:
            return "heart"
        return "shadow"

    chase_left = -1.20
    chase_right = 1.20
    chase_bottom = 1.10
    chase_top = 3.90

    if chase_left <= x <= chase_right and chase_bottom <= z <= chase_top:
        return "chase"

    return "waste"

def build_batter_zone_feature_row(
    statcast_df: pd.DataFrame,
    player_name: str,
) -> dict[str, Any]:

    store_metrics = load_batter_zone_store_metrics(player_name)

    row: dict[str, Any] = {
        "player_name": player_name,
        "zone_sample_size": store_metrics.get("stored_zone_sample_size", 0),
    }

    pitch_families = ["fastball", "breaking", "offspeed"]
    zones = ["heart", "shadow", "chase", "waste"]

    for family in pitch_families:
        for zone in zones:

            row[f"hr_prob_{family}_{zone}"] = store_metrics.get(
                f"stored_hr_prob_{family}_{zone}"
            )

            row[f"hit_prob_{family}_{zone}"] = store_metrics.get(
                f"stored_hit_prob_{family}_{zone}"
            )

            row[f"tb2p_prob_{family}_{zone}"] = store_metrics.get(
                f"stored_tb2p_prob_{family}_{zone}"
            )

            row[f"whiff_prob_{family}_{zone}"] = store_metrics.get(
                f"stored_whiff_prob_{family}_{zone}"
            )

            row[f"damage_prob_{family}_{zone}"] = store_metrics.get(
                f"stored_damage_prob_{family}_{zone}"
            )

            row[f"sample_size_{family}_{zone}"] = store_metrics.get(
                f"stored_sample_size_{family}_{zone}"
            )

    return row