File size: 11,407 Bytes
726b3a2
 
01d9b69
726b3a2
 
01d9b69
 
726b3a2
 
 
 
 
01d9b69
726b3a2
 
 
 
 
 
 
 
 
 
 
 
c151fc1
 
 
 
 
 
 
 
 
 
 
726b3a2
 
 
 
16ab1fe
 
c151fc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01d9b69
 
 
 
 
 
726b3a2
 
 
 
 
 
 
c151fc1
 
 
16ab1fe
726b3a2
 
16ab1fe
 
c151fc1
 
 
 
 
 
01d9b69
 
 
 
 
 
 
726b3a2
 
 
16ab1fe
 
 
 
 
 
 
 
 
 
 
726b3a2
 
15cc068
 
726b3a2
 
 
16ab1fe
 
 
 
 
 
726b3a2
15cc068
 
 
 
01d9b69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726b3a2
01d9b69
 
 
 
16ab1fe
01d9b69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16ab1fe
 
 
 
 
 
 
 
 
 
 
 
726b3a2
 
16ab1fe
 
 
 
 
 
 
01d9b69
 
16ab1fe
 
 
 
 
 
 
01d9b69
 
16ab1fe
15cc068
 
 
 
 
 
 
 
 
 
01d9b69
 
15cc068
 
 
 
 
 
 
01d9b69
 
15cc068
 
 
 
 
 
01d9b69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15cc068
16ab1fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726b3a2
 
 
 
 
16ab1fe
 
 
 
 
726b3a2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
from __future__ import annotations

import logging
from typing import Any

logger = logging.getLogger(__name__)


def build_sequence_features(
    game_row: dict[str, Any],
    pitcher_row: dict[str, Any],
    batter_row: dict[str, Any],
    pitcher_family_zone_row: dict[str, Any] | None = None,
) -> dict[str, Any]:
    def _safe_int(value: Any, default: int = 0) -> int:
        try:
            if value is None:
                return default
            text = str(value).strip().lower()
            if text in {"", "nan", "none"}:
                return default
            return int(float(value))
        except Exception:
            return default

    def _safe_float(value: Any, default: float | None = None) -> float | None:
        try:
            if value is None:
                return default
            text = str(value).strip().lower()
            if text in {"", "nan", "none"}:
                return default
            return float(value)
        except Exception:
            return default

    balls = _safe_int(game_row.get("balls"), 0)
    strikes = _safe_int(game_row.get("strikes"), 0)
    outs = _safe_int(game_row.get("outs"), 0)

    last_pitch_type = str(game_row.get("pitch_type", "") or "").strip().lower()

    avg_release_speed = _safe_float(pitcher_row.get("avg_release_speed"))
    avg_release_spin_rate = _safe_float(pitcher_row.get("avg_release_spin_rate"))
    avg_release_extension = _safe_float(pitcher_row.get("avg_release_extension"))

    last_pitch_velocity = _safe_float(game_row.get("pitch_velocity"))
    last_pitch_spin_rate = _safe_float(game_row.get("pitch_spin_rate"))
    last_pitch_extension = _safe_float(game_row.get("pitch_extension"))

    velo_delta_from_baseline = None
    if last_pitch_velocity is not None and avg_release_speed is not None:
        velo_delta_from_baseline = last_pitch_velocity - avg_release_speed

    spin_delta_from_baseline = None
    if last_pitch_spin_rate is not None and avg_release_spin_rate is not None:
        spin_delta_from_baseline = last_pitch_spin_rate - avg_release_spin_rate

    extension_delta_from_baseline = None
    if last_pitch_extension is not None and avg_release_extension is not None:
        extension_delta_from_baseline = last_pitch_extension - avg_release_extension

    # D1: Player-specific pitch usage rates from pitcher's family-zone profiles
    pf_zone = pitcher_family_zone_row or {}
    pitcher_fastball_usage_rate = pf_zone.get("fastball_usage_rate")
    pitcher_breaking_usage_rate = pf_zone.get("breaking_usage_rate")
    pitcher_offspeed_usage_rate = pf_zone.get("offspeed_usage_rate")

    return {
        "balls": balls,
        "strikes": strikes,
        "outs": outs,
        "runner_on_1b": bool(game_row.get("runner_on_1b", False)),
        "runner_on_2b": bool(game_row.get("runner_on_2b", False)),
        "runner_on_3b": bool(game_row.get("runner_on_3b", False)),
        "pitcher_avg_release_speed": avg_release_speed,
        "pitcher_avg_release_spin_rate": avg_release_spin_rate,
        "pitcher_avg_release_extension": avg_release_extension,
        "pitcher_ev_allowed": pitcher_row.get("ev_allowed"),
        "batter_ev90": batter_row.get("ev90"),
        "batter_barrel_rate": batter_row.get("barrel_rate"),
        "batter_hard_hit_rate": batter_row.get("hard_hit_rate"),
        "last_pitch_type": last_pitch_type,
        "last_pitch_velocity": last_pitch_velocity,
        "last_pitch_spin_rate": last_pitch_spin_rate,
        "last_pitch_extension": last_pitch_extension,
        "velo_delta_from_baseline": velo_delta_from_baseline,
        "spin_delta_from_baseline": spin_delta_from_baseline,
        "extension_delta_from_baseline": extension_delta_from_baseline,
        # E3: Handedness
        "batter_stand": str(batter_row.get("batter_stand", "R") or "R"),
        "p_throws": str(pitcher_row.get("p_throws", "R") or "R"),
        # D1: Player-specific usage rates
        "pitcher_fastball_usage_rate": pitcher_fastball_usage_rate,
        "pitcher_breaking_usage_rate": pitcher_breaking_usage_rate,
        "pitcher_offspeed_usage_rate": pitcher_offspeed_usage_rate,
    }


def _normalize_probs(fastball_prob: float, breaking_prob: float, offspeed_prob: float) -> tuple[float, float, float]:
    total = fastball_prob + breaking_prob + offspeed_prob
    if total <= 0:
        return 0.48, 0.32, 0.20
    return (
        fastball_prob / total,
        breaking_prob / total,
        offspeed_prob / total,
    )


def predict_next_pitch_distribution(feature_row: dict[str, Any]) -> dict[str, Any]:
    """
    Pitch sequencing model v2.1
    Count-aware, sequence-aware, and lightly live-telemetry-aware.
    """
    balls = int(feature_row.get("balls", 0))
    strikes = int(feature_row.get("strikes", 0))
    outs = int(feature_row.get("outs", 0))
    last_pitch_type = str(feature_row.get("last_pitch_type", "") or "").lower()

    batter_ev90 = feature_row.get("batter_ev90")
    batter_barrel_rate = feature_row.get("batter_barrel_rate")
    batter_hard_hit_rate = feature_row.get("batter_hard_hit_rate")

    velo_delta = feature_row.get("velo_delta_from_baseline")
    spin_delta = feature_row.get("spin_delta_from_baseline")
    extension_delta = feature_row.get("extension_delta_from_baseline")

    # F1: Count-conditioned pitch distribution (full lookup table replaces crude adjustments)
    count_state_table = {
        (0, 0): (0.54, 0.28, 0.18),
        (1, 0): (0.55, 0.27, 0.18),
        (2, 0): (0.62, 0.22, 0.16),
        (3, 0): (0.75, 0.12, 0.13),
        (0, 1): (0.52, 0.30, 0.18),
        (1, 1): (0.52, 0.30, 0.18),
        (2, 1): (0.56, 0.26, 0.18),
        (3, 1): (0.58, 0.24, 0.18),
        (0, 2): (0.42, 0.40, 0.18),
        (1, 2): (0.44, 0.36, 0.20),
        (2, 2): (0.48, 0.32, 0.20),
        (3, 2): (0.52, 0.28, 0.20),
    }
    count_key = (min(balls, 3), min(strikes, 2))
    cs_fb, cs_br, cs_os = count_state_table.get(count_key, (0.54, 0.28, 0.18))

    # D1: Blend with player-specific usage rates if available (50/50)
    player_fb = feature_row.get("pitcher_fastball_usage_rate")
    player_br = feature_row.get("pitcher_breaking_usage_rate")
    player_os = feature_row.get("pitcher_offspeed_usage_rate")

    if player_fb is not None and player_br is not None and player_os is not None:
        try:
            p_fb, p_br, p_os = float(player_fb), float(player_br), float(player_os)
            p_total = p_fb + p_br + p_os
            if p_total > 0:
                p_fb /= p_total
                p_br /= p_total
                p_os /= p_total
                fastball_prob = (cs_fb + p_fb) / 2
                breaking_prob = (cs_br + p_br) / 2
                offspeed_prob = (cs_os + p_os) / 2
            else:
                fastball_prob, breaking_prob, offspeed_prob = cs_fb, cs_br, cs_os
        except Exception as e:
            logger.debug(f"[sequence_model] player usage rate blend skipped: {e}")
            fastball_prob, breaking_prob, offspeed_prob = cs_fb, cs_br, cs_os
    else:
        fastball_prob, breaking_prob, offspeed_prob = cs_fb, cs_br, cs_os

    # Previous-pitch memory
    if "slider" in last_pitch_type or "curve" in last_pitch_type:
        fastball_prob += 0.04
        breaking_prob -= 0.03
        offspeed_prob -= 0.01
    elif "change" in last_pitch_type or "split" in last_pitch_type:
        fastball_prob += 0.03
        offspeed_prob -= 0.02
    elif "fastball" in last_pitch_type or "sinker" in last_pitch_type or "cutter" in last_pitch_type:
        fastball_prob -= 0.03
        breaking_prob += 0.02
        offspeed_prob += 0.01

    # Better hitters tend to see fewer heart-zone fastballs in advantage counts
    try:
        if batter_ev90 is not None and float(batter_ev90) >= 102:
            if balls >= 2:
                fastball_prob -= 0.03
                breaking_prob += 0.02
                offspeed_prob += 0.01
    except Exception as e:
        logger.debug(f"[sequence_model] EV90 adjustment skipped: {e}")

    try:
        if batter_barrel_rate is not None and float(batter_barrel_rate) >= 0.10:
            if strikes < 2:
                fastball_prob -= 0.02
                breaking_prob += 0.01
                offspeed_prob += 0.01
    except Exception as e:
        logger.debug(f"[sequence_model] barrel_rate adjustment skipped: {e}")

    # Live overlay: if velo is down, reduce fastball confidence slightly
    try:
        if velo_delta is not None and float(velo_delta) <= -1.0:
            fastball_prob -= 0.03
            breaking_prob += 0.02
            offspeed_prob += 0.01
        elif velo_delta is not None and float(velo_delta) >= 1.0:
            fastball_prob += 0.02
            breaking_prob -= 0.01
            offspeed_prob -= 0.01
    except Exception as e:
        logger.debug(f"[sequence_model] live velo overlay skipped: {e}")

    # Live overlay: if spin is down, slightly reduce breaking-ball trust
    try:
        if spin_delta is not None and float(spin_delta) <= -120:
            breaking_prob -= 0.02
            fastball_prob += 0.01
            offspeed_prob += 0.01
    except Exception as e:
        logger.debug(f"[sequence_model] live spin overlay skipped: {e}")

    # Live overlay: shorter extension can imply less deception
    try:
        if extension_delta is not None and float(extension_delta) <= -0.25:
            fastball_prob -= 0.01
            offspeed_prob += 0.01
    except Exception as e:
        logger.debug(f"[sequence_model] live extension overlay skipped: {e}")

    # E3: Handedness-aware pitch tendency adjustment
    batter_stand = feature_row.get("batter_stand", "R")
    p_throws = feature_row.get("p_throws", "R")
    if p_throws == "R" and batter_stand == "L":
        # RHP vs LHB: sliders break away from LHB
        fastball_prob -= 0.02
        breaking_prob += 0.03
    elif p_throws == "L" and batter_stand == "R":
        # LHP vs RHB: changeup advantage
        fastball_prob -= 0.02
        offspeed_prob += 0.04
    elif (p_throws == "L" and batter_stand == "L") or (p_throws == "R" and batter_stand == "R"):
        # Same-hand matchup: breaking balls break away
        fastball_prob -= 0.01
        breaking_prob += 0.02

    fastball_prob, breaking_prob, offspeed_prob = _normalize_probs(
        fastball_prob, breaking_prob, offspeed_prob
    )

    heart = 0.22
    shadow = 0.43
    chase = 0.35

    if balls >= 2:
        heart += 0.04
        chase -= 0.03
        shadow -= 0.01

    if strikes >= 2:
        chase += 0.06
        heart -= 0.03
        shadow -= 0.03

    if balls == 3 and strikes == 2:
        heart += 0.05
        chase -= 0.03
        shadow -= 0.02

    try:
        if batter_hard_hit_rate is not None and float(batter_hard_hit_rate) >= 0.42:
            heart -= 0.02
            shadow += 0.01
            chase += 0.01
    except Exception:
        pass

    if outs == 2 and (feature_row.get("runner_on_2b") or feature_row.get("runner_on_3b")):
        shadow += 0.02
        heart -= 0.01
        chase -= 0.01

    zone_total = heart + shadow + chase
    heart /= zone_total
    shadow /= zone_total
    chase /= zone_total

    return {
        "fastball_prob": fastball_prob,
        "breaking_prob": breaking_prob,
        "offspeed_prob": offspeed_prob,
        "zone_probs": {
            "heart": heart,
            "shadow": shadow,
            "chase": chase,
        },
    }