nesticot commited on
Commit
f003aff
·
verified ·
1 Parent(s): 235ac0d

Update stuff_model/calculate_arm_angles.py

Browse files
Files changed (1) hide show
  1. stuff_model/calculate_arm_angles.py +64 -77
stuff_model/calculate_arm_angles.py CHANGED
@@ -3,43 +3,58 @@ import numpy as np
3
  import requests
4
  from io import StringIO
5
 
6
- def calculate_arm_angles(df: pl.DataFrame,pitcher_id:int) -> pl.DataFrame:
7
-
8
- url = f"https://baseballsavant.mlb.com/leaderboard/pitcher-arm-angles?batSide=&dateStart={df['game_date'][0]}&dateEnd={df['game_date'][-1]}&gameType=R&groupBy=&min=1&minGroupPitches=1&perspective=back&pitchHand=&pitchType=&season={int(df['game_date'][0][0:4])}&size=small&sort=ascending&team=&csv=true"
9
-
10
- headers = {
11
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
12
- }
13
-
14
- response = requests.get(url, headers=headers)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  old_data = False
17
- daily_check = df['game_date'][0] == df['game_date'][-1]
18
-
19
- # Assuming response.text contains the CSV formatted string
20
- csv_data = response.text
21
-
22
- # Use StringIO to convert the string into a file-like object
23
- data = StringIO(csv_data)
24
-
25
- # Read the CSV data into a DataFrame
26
- df_arm_angle = pl.read_csv(data)
27
-
28
- if pitcher_id not in df_arm_angle["pitcher"]:
29
  old_data = True
30
- df_arm_angle = pl.read_csv('stuff_model/pitcher_arm_angles_2024.csv')
31
 
32
-
33
- #pitcher_id = 489446
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  df_filter = df.filter(pl.col("pitcher_id") == pitcher_id).drop_nulls(subset=["release_pos_x", "release_pos_z"])
35
- # data = requests.get(f'https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}').json()
36
 
37
  if pitcher_id not in df_arm_angle["pitcher"]:
38
-
39
  data = requests.get(f'https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}').json()
40
  height_in = data['people'][0]['height']
41
  height = int(height_in.split("'")[0]) * 12 + int(height_in.split("'")[1].split('"')[0])
42
- df_filter = (df_filter.with_columns(
 
 
43
  (pl.col("release_pos_x") * 12).alias("release_pos_x"),
44
  (pl.col("release_pos_z") * 12).alias("release_pos_z"),
45
  (pl.lit(height * 0.70)).alias("shoulder_pos"),
@@ -50,72 +65,44 @@ def calculate_arm_angles(df: pl.DataFrame,pitcher_id:int) -> pl.DataFrame:
50
  )
51
  .with_columns(
52
  pl.struct(["Opp", "Adj"]).map_elements(lambda x: np.arctan2(x["Opp"], x["Adj"])).alias("arm_angle_rad")
53
- ))
54
-
55
- df_filter = (df_filter.with_columns(
56
-
57
  pl.col("arm_angle_rad").degrees().alias("arm_angle")
58
-
59
- #.drop(["Opp", "arm_angle_rad"])
60
- ))
61
-
62
- else:
63
- shoulder_x = df_arm_angle.filter(pl.col("pitcher") == pitcher_id)["relative_shoulder_x"][0]
64
- shoulder_z = df_arm_angle.filter(pl.col("pitcher") == pitcher_id)["shoulder_z"][0]
65
- rel_x = df_arm_angle.filter(pl.col("pitcher") == pitcher_id)["relative_release_ball_x"][0]
66
- rel_z = df_arm_angle.filter(pl.col("pitcher") == pitcher_id)["release_ball_z"][0]
67
-
68
-
69
- ball_angle = df_arm_angle.filter(pl.col("pitcher") == pitcher_id)["ball_angle"][0]
70
 
 
 
 
 
 
71
  hyp = np.sqrt((rel_x - shoulder_x)**2 + (rel_z - shoulder_z)**2)
72
 
73
- print(shoulder_x, shoulder_z)
74
-
75
- df_filter = (df_filter.with_columns(
76
-
77
- )
78
- .with_columns(
79
  (pl.col("release_pos_z") - shoulder_z).alias("Opp"),
80
- (pl.lit(hyp)).alias("Hyp"),
81
  )
82
  .with_columns(
83
- pl.struct(["Opp","Hyp"]).map_elements(lambda x: np.arcsin(x["Opp"] / x["Hyp"])).alias("arm_angle_rad")
84
  )
85
  .with_columns(
86
  pl.col("arm_angle_rad").degrees().alias("arm_angle")
87
  )
88
- #.drop(["Opp", "arm_angle_rad"])
89
- )
90
-
91
  if old_data:
92
- df_filter = df_filter.with_columns(
93
- ((pl.col("arm_angle") * 0.5) + (ball_angle * 0.5)).alias("arm_angle")
94
- )
95
-
96
  elif daily_check:
97
- df_filter = df_filter.with_columns(
98
- ((pl.col("arm_angle") * 0.25) + (ball_angle * 0.75)).alias("arm_angle")
99
- )
100
-
101
  else:
102
- df_filter = df_filter.with_columns(
103
- ((pl.col("arm_angle") * 0.0) + (ball_angle * 1)).alias("arm_angle")
104
- )
105
 
 
106
  valid_mean = df_filter["arm_angle"].fill_nan(None).drop_nulls().mean()
107
-
108
-
109
  df_filter = df_filter.with_columns(
110
- df_filter["arm_angle"]
111
- .fill_nan(None) # Convert NaN to null
112
- .fill_null(valid_mean) # Fill nulls with mean
113
  )
114
-
115
-
116
- #print([x for x in df_filter["arm_angle"]])
117
-
118
-
119
 
120
-
121
- return df_filter
 
3
  import requests
4
  from io import StringIO
5
 
6
+ def calculate_arm_angles(df: pl.DataFrame, pitcher_id: int) -> pl.DataFrame:
7
+ def fetch_arm_angle_data(url: str):
8
+ headers = {
9
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
10
+ }
11
+ response = requests.get(url, headers=headers)
12
+ if not response.ok or "<html" in response.text.lower():
13
+ return None
14
+ return pl.read_csv(StringIO(response.text), truncate_ragged_lines=True)
15
+
16
+ date_start = df['game_date'][0]
17
+ date_end = df['game_date'][-1]
18
+ season = int(date_start[:4])
19
+ daily_check = date_start == date_end
20
+
21
+ # Try fetching current-season arm angle data
22
+ url = (
23
+ f"https://baseballsavant.mlb.com/leaderboard/pitcher-arm-angles"
24
+ f"?batSide=&dateStart={date_start}&dateEnd={date_end}&gameType=R&groupBy=&min=1"
25
+ f"&minGroupPitches=1&perspective=back&pitchHand=&pitchType=&season={season}"
26
+ f"&size=small&sort=ascending&team=&csv=true"
27
+ )
28
+ df_arm_angle = fetch_arm_angle_data(url)
29
 
30
  old_data = False
31
+ if df_arm_angle is None or pitcher_id not in df_arm_angle["pitcher"]:
 
 
 
 
 
 
 
 
 
 
 
32
  old_data = True
 
33
 
34
+ # Fallback to saved CSVs if 2025 data isn't fetched or pitcher not found
35
+ try:
36
+ df_arm_angle_2025 = pl.read_csv("stuff_model/pitcher_arm_angles_2025.csv", truncate_ragged_lines=True)
37
+ except Exception as e:
38
+ raise RuntimeError("Failed to load fallback 2025 arm angle CSV.") from e
39
+
40
+ try:
41
+ df_arm_angle_2024 = pl.read_csv("stuff_model/pitcher_arm_angles_2024.csv", truncate_ragged_lines=True)
42
+ df_arm_angle_2024 = df_arm_angle_2024.cast(df_arm_angle_2025.schema)
43
+ except Exception as e:
44
+ raise RuntimeError("Failed to load or cast 2024 arm angle CSV.") from e
45
+
46
+ df_arm_angle = pl.concat([df_arm_angle_2025, df_arm_angle_2024]).unique(subset=["pitcher"], keep="first")
47
+
48
+ # Filter your tracking data
49
  df_filter = df.filter(pl.col("pitcher_id") == pitcher_id).drop_nulls(subset=["release_pos_x", "release_pos_z"])
 
50
 
51
  if pitcher_id not in df_arm_angle["pitcher"]:
 
52
  data = requests.get(f'https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}').json()
53
  height_in = data['people'][0]['height']
54
  height = int(height_in.split("'")[0]) * 12 + int(height_in.split("'")[1].split('"')[0])
55
+
56
+ df_filter = (
57
+ df_filter.with_columns(
58
  (pl.col("release_pos_x") * 12).alias("release_pos_x"),
59
  (pl.col("release_pos_z") * 12).alias("release_pos_z"),
60
  (pl.lit(height * 0.70)).alias("shoulder_pos"),
 
65
  )
66
  .with_columns(
67
  pl.struct(["Opp", "Adj"]).map_elements(lambda x: np.arctan2(x["Opp"], x["Adj"])).alias("arm_angle_rad")
68
+ )
69
+ .with_columns(
 
 
70
  pl.col("arm_angle_rad").degrees().alias("arm_angle")
71
+ )
72
+ )
 
 
 
 
 
 
 
 
 
 
73
 
74
+ else:
75
+ row = df_arm_angle.filter(pl.col("pitcher") == pitcher_id).select([
76
+ "relative_shoulder_x", "shoulder_z", "relative_release_ball_x", "release_ball_z", "ball_angle"
77
+ ]).row(0)
78
+ shoulder_x, shoulder_z, rel_x, rel_z, ball_angle = row
79
  hyp = np.sqrt((rel_x - shoulder_x)**2 + (rel_z - shoulder_z)**2)
80
 
81
+ df_filter = (
82
+ df_filter.with_columns(
 
 
 
 
83
  (pl.col("release_pos_z") - shoulder_z).alias("Opp"),
84
+ pl.lit(hyp).alias("Hyp"),
85
  )
86
  .with_columns(
87
+ pl.struct(["Opp", "Hyp"]).map_elements(lambda x: np.arcsin(x["Opp"] / x["Hyp"])).alias("arm_angle_rad")
88
  )
89
  .with_columns(
90
  pl.col("arm_angle_rad").degrees().alias("arm_angle")
91
  )
92
+ )
93
+
94
+ # Adjust based on data source freshness
95
  if old_data:
96
+ df_filter = df_filter.with_columns(((pl.col("arm_angle") * 0.5) + (ball_angle * 0.5)).alias("arm_angle"))
 
 
 
97
  elif daily_check:
98
+ df_filter = df_filter.with_columns(((pl.col("arm_angle") * 0.25) + (ball_angle * 0.75)).alias("arm_angle"))
 
 
 
99
  else:
100
+ df_filter = df_filter.with_columns(((pl.col("arm_angle") * 0.0) + (ball_angle * 1)).alias("arm_angle"))
 
 
101
 
102
+ # Fill missing arm_angle values with mean
103
  valid_mean = df_filter["arm_angle"].fill_nan(None).drop_nulls().mean()
 
 
104
  df_filter = df_filter.with_columns(
105
+ df_filter["arm_angle"].fill_nan(None).fill_null(valid_mean)
 
 
106
  )
 
 
 
 
 
107
 
108
+ return df_filter