Syntrex commited on
Commit
907228e
·
verified ·
1 Parent(s): 91ff9d0

Create pitcher_arsenal_model.py

Browse files
Files changed (1) hide show
  1. models/pitcher_arsenal_model.py +47 -0
models/pitcher_arsenal_model.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ import pandas as pd
6
+
7
+ from models.batter_zone_model import normalize_pitch_family
8
+
9
+
10
+ def build_pitcher_arsenal_feature_row(
11
+ statcast_df: pd.DataFrame,
12
+ pitcher_name: str,
13
+ ) -> dict[str, Any]:
14
+ if statcast_df is None or statcast_df.empty:
15
+ return {"pitcher_name": pitcher_name, "arsenal_sample_size": 0}
16
+
17
+ if "player_name" not in statcast_df.columns:
18
+ return {"pitcher_name": pitcher_name, "arsenal_sample_size": 0}
19
+
20
+ df = statcast_df[statcast_df["player_name"].astype(str) == str(pitcher_name)].copy()
21
+ if df.empty:
22
+ return {"pitcher_name": pitcher_name, "arsenal_sample_size": 0}
23
+
24
+ if "pitch_name" in df.columns:
25
+ pitch_name_series = df["pitch_name"]
26
+ elif "pitch_type" in df.columns:
27
+ pitch_name_series = df["pitch_type"]
28
+ else:
29
+ pitch_name_series = pd.Series(["unknown"] * len(df), index=df.index)
30
+
31
+ df["pitch_family"] = pitch_name_series.apply(normalize_pitch_family)
32
+
33
+ total_count = float(len(df))
34
+
35
+ out: dict[str, Any] = {
36
+ "pitcher_name": pitcher_name,
37
+ "arsenal_sample_size": int(len(df)),
38
+ }
39
+
40
+ for family in ["fastball", "breaking", "offspeed"]:
41
+ family_count = int((df["pitch_family"] == family).sum())
42
+ usage_rate = float(family_count / total_count) if total_count > 0 else None
43
+
44
+ out[f"{family}_usage_rate"] = usage_rate
45
+ out[f"sample_size_{family}"] = family_count
46
+
47
+ return out