nesticot commited on
Commit
55ee9e9
·
verified ·
1 Parent(s): 089cb42

Upload 5 files

Browse files
functions/__pycache__/df_update.cpython-39.pyc ADDED
Binary file (14 kB). View file
 
functions/__pycache__/pitch_summary_functions.cpython-39.pyc ADDED
Binary file (37.4 kB). View file
 
functions/df_update.py ADDED
@@ -0,0 +1,472 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import polars as pl
2
+ import numpy as np
3
+ import joblib
4
+
5
+ loaded_model = joblib.load('joblib_model/barrel_model.joblib')
6
+ in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
7
+ attack_zone_model = joblib.load('joblib_model/model_attack_zone.joblib')
8
+ xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
9
+ px_model = joblib.load('joblib_model/linear_reg_model_x.joblib')
10
+ pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib')
11
+
12
+
13
+ class df_update:
14
+ def __init__(self):
15
+ pass
16
+
17
+ def update(self, df_clone: pl.DataFrame):
18
+
19
+ df = df_clone.clone()
20
+ # Assuming px_model is defined and df is your DataFrame
21
+ hit_codes = ['single',
22
+ 'double','home_run', 'triple']
23
+
24
+ ab_codes = ['single', 'strikeout', 'field_out',
25
+ 'grounded_into_double_play', 'fielders_choice', 'force_out',
26
+ 'double', 'field_error', 'home_run', 'triple',
27
+ 'double_play',
28
+ 'fielders_choice_out', 'strikeout_double_play',
29
+ 'other_out','triple_play']
30
+
31
+
32
+ obp_true_codes = ['single', 'walk',
33
+ 'double','home_run', 'triple',
34
+ 'hit_by_pitch', 'intent_walk']
35
+
36
+ obp_codes = ['single', 'strikeout', 'walk', 'field_out',
37
+ 'grounded_into_double_play', 'fielders_choice', 'force_out',
38
+ 'double', 'sac_fly', 'field_error', 'home_run', 'triple',
39
+ 'hit_by_pitch', 'double_play', 'intent_walk',
40
+ 'fielders_choice_out', 'strikeout_double_play',
41
+ 'sac_fly_double_play',
42
+ 'other_out','triple_play']
43
+
44
+
45
+ contact_codes = ['In play, no out',
46
+ 'Foul', 'In play, out(s)',
47
+ 'In play, run(s)',
48
+ 'Foul Bunt']
49
+
50
+ bip_codes = ['In play, no out', 'In play, run(s)','In play, out(s)']
51
+
52
+
53
+ conditions_barrel = [
54
+ df['launch_speed'].is_null(),
55
+ (df['launch_speed'] * 1.5 - df['launch_angle'] >= 117) &
56
+ (df['launch_speed'] + df['launch_angle'] >= 124) &
57
+ (df['launch_speed'] >= 98) &
58
+ (df['launch_angle'] >= 4) & (df['launch_angle'] <= 50)
59
+ ]
60
+ choices_barrel = [False, True]
61
+
62
+ conditions_tb = [
63
+ (df['event_type'] == 'single'),
64
+ (df['event_type'] == 'double'),
65
+ (df['event_type'] == 'triple'),
66
+ (df['event_type'] == 'home_run')
67
+ ]
68
+ choices_tb = [1, 2, 3, 4]
69
+
70
+
71
+ conditions_woba = [
72
+ df['event_type'].is_in(['strikeout', 'field_out', 'sac_fly', 'force_out', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out']),
73
+ df['event_type'] == 'walk',
74
+ df['event_type'] == 'hit_by_pitch',
75
+ df['event_type'] == 'single',
76
+ df['event_type'] == 'double',
77
+ df['event_type'] == 'triple',
78
+ df['event_type'] == 'home_run'
79
+ ]
80
+ choices_woba = [0, 0.689, 0.720, 0.881, 1.254, 1.589, 2.048]
81
+
82
+ woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', 'double', 'sac_fly', 'force_out', 'home_run', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'triple', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out']
83
+
84
+ pitch_cat = {'FA': 'Fastball',
85
+ 'FF': 'Fastball',
86
+ 'FT': 'Fastball',
87
+ 'FC': 'Fastball',
88
+ 'FS': 'Off-Speed',
89
+ 'FO': 'Off-Speed',
90
+ 'SI': 'Fastball',
91
+ 'ST': 'Breaking',
92
+ 'SL': 'Breaking',
93
+ 'CU': 'Breaking',
94
+ 'KC': 'Breaking',
95
+ 'SC': 'Off-Speed',
96
+ 'GY': 'Off-Speed',
97
+ 'SV': 'Breaking',
98
+ 'CS': 'Breaking',
99
+ 'CH': 'Off-Speed',
100
+ 'KN': 'Off-Speed',
101
+ 'EP': 'Breaking',
102
+ 'UN': None,
103
+ 'IN': None,
104
+ 'PO': None,
105
+ 'AB': None,
106
+ 'AS': None,
107
+ 'NP': None}
108
+
109
+
110
+ df = df.with_columns([
111
+ pl.when(df['type_ab'].is_not_null()).then(1).otherwise(0).alias('pa'),
112
+ pl.when(df['is_pitch']).then(1).otherwise(0).alias('pitches'),
113
+ pl.when(df['sz_top'] == 0).then(None).otherwise(df['sz_top']).alias('sz_top'),
114
+ pl.when(df['sz_bot'] == 0).then(None).otherwise(df['sz_bot']).alias('sz_bot'),
115
+ pl.when(df['zone'] > 0).then(df['zone'] < 10).otherwise(None).alias('in_zone'),
116
+ pl.Series(px_model.predict(df[['x']].fill_null(0).to_numpy())[:, 0]).alias('px_predict'),
117
+ pl.Series(pz_model.predict(df[['y']].fill_null(0).to_numpy())[:, 0] + 3.2).alias('pz_predict'),
118
+ pl.Series(in_zone_model.predict(df[['px','pz','sz_top','sz_bot']].fill_null(0).to_numpy())[:]).alias('in_zone_predict'),
119
+ pl.Series(attack_zone_model.predict(df[['px','pz','sz_top','sz_bot']].fill_null(0).to_numpy())[:]).alias('attack_zone_predict'),
120
+ pl.when(df['event_type'].is_in(hit_codes)).then(True).otherwise(False).alias('hits'),
121
+ pl.when(df['event_type'].is_in(ab_codes)).then(True).otherwise(False).alias('ab'),
122
+ pl.when(df['event_type'].is_in(obp_true_codes)).then(True).otherwise(False).alias('on_base'),
123
+ pl.when(df['event_type'].is_in(obp_codes)).then(True).otherwise(False).alias('obp'),
124
+ pl.when(df['play_description'].is_in(bip_codes)).then(True).otherwise(False).alias('bip'),
125
+ pl.when(conditions_barrel[0]).then(choices_barrel[0]).when(conditions_barrel[1]).then(choices_barrel[1]).otherwise(None).alias('barrel'),
126
+ pl.when(df['launch_angle'].is_null()).then(False).when((df['launch_angle'] >= 8) & (df['launch_angle'] <= 32)).then(True).otherwise(None).alias('sweet_spot'),
127
+ pl.when(df['launch_speed'].is_null()).then(False).when(df['launch_speed'] >= 94.5).then(True).otherwise(None).alias('hard_hit'),
128
+ pl.when(conditions_tb[0]).then(choices_tb[0]).when(conditions_tb[1]).then(choices_tb[1]).when(conditions_tb[2]).then(choices_tb[2]).when(conditions_tb[3]).then(choices_tb[3]).otherwise(None).alias('tb'),
129
+ pl.when(conditions_woba[0]).then(choices_woba[0]).when(conditions_woba[1]).then(choices_woba[1]).when(conditions_woba[2]).then(choices_woba[2]).when(conditions_woba[3]).then(choices_woba[3]).when(conditions_woba[4]).then(choices_woba[4]).when(conditions_woba[5]).then(choices_woba[5]).when(conditions_woba[6]).then(choices_woba[6]).otherwise(None).alias('woba'),
130
+ pl.when((df['play_code'] == 'S') | (df['play_code'] == 'W') | (df['play_code'] == 'T')).then(1).otherwise(0).alias('whiffs'),
131
+ pl.when((df['play_code'] == 'S') | (df['play_code'] == 'W') | (df['play_code'] == 'T') | (df['play_code'] == 'C')).then(1).otherwise(0).alias('csw'),
132
+ pl.when(pl.col('is_swing').cast(pl.Boolean)).then(1).otherwise(0).alias('swings'),
133
+ pl.col('event_type').is_in(['strikeout','strikeout_double_play']).alias('k'),
134
+ pl.col('event_type').is_in(['walk', 'intent_walk']).alias('bb'),
135
+ pl.lit(None).alias('attack_zone'),
136
+ pl.lit(None).alias('woba_pred'),
137
+ pl.lit(None).alias('woba_pred_contact')
138
+
139
+ ])
140
+
141
+ df = df.with_columns([
142
+ pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('woba_codes'),
143
+ pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('xwoba_codes'),
144
+ pl.when((pl.col('tb') >= 0)).then(df['woba']).otherwise(None).alias('woba_contact'),
145
+ pl.when(pl.col('px').is_null()).then(pl.col('px_predict')).otherwise(pl.col('px')).alias('px'),
146
+ pl.when(pl.col('pz').is_null()).then(pl.col('pz_predict')).otherwise(pl.col('pz')).alias('pz'),
147
+ pl.when(pl.col('in_zone').is_null()).then(pl.col('in_zone_predict')).otherwise(pl.col('in_zone')).alias('in_zone'),
148
+ pl.when(df['launch_speed'].is_null()).then(None).otherwise(df['barrel']).alias('barrel'),
149
+ pl.lit('average').alias('average'),
150
+ pl.when(pl.col('in_zone') == False).then(True).otherwise(False).alias('out_zone'),
151
+ pl.when((pl.col('in_zone') == True) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('zone_swing'),
152
+ pl.when((pl.col('in_zone') == True) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('zone_contact'),
153
+ pl.when((pl.col('in_zone') == False) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('ozone_swing'),
154
+ pl.when((pl.col('in_zone') == False) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('ozone_contact'),
155
+ pl.when(pl.col('event_type').str.contains('strikeout')).then(True).otherwise(False).alias('k'),
156
+ pl.when(pl.col('event_type').is_in(['walk', 'intent_walk'])).then(True).otherwise(False).alias('bb'),
157
+ pl.when(pl.col('attack_zone').is_null()).then(pl.col('attack_zone_predict')).otherwise(pl.col('attack_zone')).alias('attack_zone'),
158
+
159
+
160
+ ])
161
+
162
+ df = df.with_columns([
163
+ (df['k'].cast(pl.Float32) - df['bb'].cast(pl.Float32)).alias('k_minus_bb'),
164
+ (df['bb'].cast(pl.Float32) - df['k'].cast(pl.Float32)).alias('bb_minus_k'),
165
+ (df['launch_speed'] > 0).alias('bip_div'),
166
+ (df['attack_zone'] == 0).alias('heart'),
167
+ (df['attack_zone'] == 1).alias('shadow'),
168
+ (df['attack_zone'] == 2).alias('chase'),
169
+ (df['attack_zone'] == 3).alias('waste'),
170
+ ((df['attack_zone'] == 0) & (df['swings'] == 1)).alias('heart_swing'),
171
+ ((df['attack_zone'] == 1) & (df['swings'] == 1)).alias('shadow_swing'),
172
+ ((df['attack_zone'] == 2) & (df['swings'] == 1)).alias('chase_swing'),
173
+ ((df['attack_zone'] == 3) & (df['swings'] == 1)).alias('waste_swing'),
174
+ ((df['attack_zone'] == 0) & (df['whiffs'] == 1)).alias('heart_whiff'),
175
+ ((df['attack_zone'] == 1) & (df['whiffs'] == 1)).alias('shadow_whiff'),
176
+ ((df['attack_zone'] == 2) & (df['whiffs'] == 1)).alias('chase_whiff'),
177
+ ((df['attack_zone'] == 3) & (df['whiffs'] == 1)).alias('waste_whiff')
178
+ ])
179
+
180
+
181
+ [0, 0.689, 0.720, 0.881, 1.254, 1.589, 2.048]
182
+
183
+ df = df.with_columns([
184
+ pl.Series(
185
+ [sum(x) for x in xwoba_model.predict_proba(df[['launch_angle', 'launch_speed']].fill_null(0).to_numpy()[:]) * ([0, 0.881, 1.254, 1.589, 2.048])]
186
+ ).alias('woba_pred_predict')
187
+ ])
188
+
189
+ df = df.with_columns([
190
+ pl.when(pl.col('event_type').is_in(['walk'])).then(0.689)
191
+ .when(pl.col('event_type').is_in(['hit_by_pitch'])).then(0.720)
192
+ .when(pl.col('event_type').is_in(['strikeout', 'strikeout_double_play'])).then(0)
193
+ .otherwise(pl.col('woba_pred_predict')).alias('woba_pred_predict')
194
+ ])
195
+
196
+ df = df.with_columns([
197
+ pl.when(pl.col('woba_codes').is_null()).then(None).otherwise(pl.col('woba_pred_predict')).alias('woba_pred'),
198
+ pl.when(pl.col('bip')!=1).then(None).otherwise(pl.col('woba_pred_predict')).alias('woba_pred_contact'),
199
+ ])
200
+
201
+ df = df.with_columns([
202
+ pl.when(pl.col('trajectory').is_in(['bunt_popup'])).then(pl.lit('popup'))
203
+ .when(pl.col('trajectory').is_in(['bunt_grounder'])).then(pl.lit('ground_ball'))
204
+ .when(pl.col('trajectory').is_in(['bunt_line_drive'])).then(pl.lit('line_drive'))
205
+ .when(pl.col('trajectory').is_in([''])).then(pl.lit(None))
206
+ .otherwise(pl.col('trajectory')).alias('trajectory')
207
+ ])
208
+
209
+
210
+ # Create one-hot encoded columns for the trajectory column
211
+ dummy_df = df.select(pl.col('trajectory')).to_dummies()
212
+
213
+ # Rename the one-hot encoded columns
214
+ dummy_df = dummy_df.rename({
215
+ 'trajectory_fly_ball': 'trajectory_fly_ball',
216
+ 'trajectory_ground_ball': 'trajectory_ground_ball',
217
+ 'trajectory_line_drive': 'trajectory_line_drive',
218
+ 'trajectory_popup': 'trajectory_popup'
219
+ })
220
+
221
+ # Ensure the columns are present in the DataFrame
222
+ for col in ['trajectory_fly_ball', 'trajectory_ground_ball', 'trajectory_line_drive', 'trajectory_popup']:
223
+ if col not in dummy_df.columns:
224
+ dummy_df = dummy_df.with_columns(pl.lit(0).alias(col))
225
+
226
+ # Join the one-hot encoded columns back to the original DataFrame
227
+ df = df.hstack(dummy_df)
228
+
229
+ # Check if 'trajectory_null' column exists and drop it
230
+ if 'trajectory_null' in df.columns:
231
+ df = df.drop('trajectory_null')
232
+
233
+ return df
234
+
235
+ # Assuming df is your Polars DataFrame
236
+ def update_summary(self, df: pl.DataFrame, pitcher: bool = True) -> pl.DataFrame:
237
+ """
238
+ Update summary statistics for pitchers or batters.
239
+
240
+ Parameters:
241
+ df (pl.DataFrame): The input Polars DataFrame containing player statistics.
242
+ pitcher (bool): A flag indicating whether to calculate statistics for pitchers (True) or batters (False).
243
+
244
+ Returns:
245
+ pl.DataFrame: A Polars DataFrame with aggregated and calculated summary statistics.
246
+ """
247
+
248
+ # Determine the position based on the pitcher flag
249
+ if pitcher:
250
+ position = 'pitcher'
251
+ else:
252
+ position = 'batter'
253
+
254
+ # Group by position_id and position_name, then aggregate various statistics
255
+ df_summ = df.group_by([f'{position}_id', f'{position}_name']).agg([
256
+ pl.col('pa').sum().alias('pa'),
257
+ pl.col('ab').sum().alias('ab'),
258
+ pl.col('obp').sum().alias('obp_pa'),
259
+ pl.col('hits').sum().alias('hits'),
260
+ pl.col('on_base').sum().alias('on_base'),
261
+ pl.col('k').sum().alias('k'),
262
+ pl.col('bb').sum().alias('bb'),
263
+ pl.col('bb_minus_k').sum().alias('bb_minus_k'),
264
+ pl.col('csw').sum().alias('csw'),
265
+ pl.col('bip').sum().alias('bip'),
266
+ pl.col('bip_div').sum().alias('bip_div'),
267
+ pl.col('tb').sum().alias('tb'),
268
+ pl.col('woba').sum().alias('woba'),
269
+ pl.col('woba_contact').sum().alias('woba_contact'),
270
+ pl.col('woba_pred').sum().alias('xwoba'),
271
+ pl.col('woba_pred_contact').sum().alias('xwoba_contact'),
272
+ pl.col('woba_codes').sum().alias('woba_codes'),
273
+ pl.col('xwoba_codes').sum().alias('xwoba_codes'),
274
+ pl.col('hard_hit').sum().alias('hard_hit'),
275
+ pl.col('barrel').sum().alias('barrel'),
276
+ pl.col('sweet_spot').sum().alias('sweet_spot'),
277
+ pl.col('launch_speed').max().alias('max_launch_speed'),
278
+ pl.col('launch_speed').quantile(0.90).alias('launch_speed_90'),
279
+ pl.col('launch_speed').mean().alias('launch_speed'),
280
+ pl.col('launch_angle').mean().alias('launch_angle'),
281
+ pl.col('is_pitch').sum().alias('pitches'),
282
+ pl.col('swings').sum().alias('swings'),
283
+ pl.col('in_zone').sum().alias('in_zone'),
284
+ pl.col('out_zone').sum().alias('out_zone'),
285
+ pl.col('whiffs').sum().alias('whiffs'),
286
+ pl.col('zone_swing').sum().alias('zone_swing'),
287
+ pl.col('zone_contact').sum().alias('zone_contact'),
288
+ pl.col('ozone_swing').sum().alias('ozone_swing'),
289
+ pl.col('ozone_contact').sum().alias('ozone_contact'),
290
+ pl.col('trajectory_ground_ball').sum().alias('ground_ball'),
291
+ pl.col('trajectory_line_drive').sum().alias('line_drive'),
292
+ pl.col('trajectory_fly_ball').sum().alias('fly_ball'),
293
+ pl.col('trajectory_popup').sum().alias('pop_up'),
294
+ pl.col('attack_zone').count().alias('attack_zone'),
295
+ pl.col('heart').sum().alias('heart'),
296
+ pl.col('shadow').sum().alias('shadow'),
297
+ pl.col('chase').sum().alias('chase'),
298
+ pl.col('waste').sum().alias('waste'),
299
+ pl.col('heart_swing').sum().alias('heart_swing'),
300
+ pl.col('shadow_swing').sum().alias('shadow_swing'),
301
+ pl.col('chase_swing').sum().alias('chase_swing'),
302
+ pl.col('waste_swing').sum().alias('waste_swing'),
303
+ pl.col('heart_whiff').sum().alias('heart_whiff'),
304
+ pl.col('shadow_whiff').sum().alias('shadow_whiff'),
305
+ pl.col('chase_whiff').sum().alias('chase_whiff'),
306
+ pl.col('waste_whiff').sum().alias('waste_whiff')
307
+ ])
308
+
309
+ # Add calculated columns to the summary DataFrame
310
+ df_summ = df_summ.with_columns([
311
+ (pl.col('hits') / pl.col('ab')).alias('avg'),
312
+ (pl.col('on_base') / pl.col('obp_pa')).alias('obp'),
313
+ (pl.col('tb') / pl.col('ab')).alias('slg'),
314
+ (pl.col('on_base') / pl.col('obp_pa') + pl.col('tb') / pl.col('ab')).alias('ops'),
315
+ (pl.col('k') / pl.col('pa')).alias('k_percent'),
316
+ (pl.col('bb') / pl.col('pa')).alias('bb_percent'),
317
+ (pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
318
+ (pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
319
+ (pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
320
+ (pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
321
+ (pl.col('woba') / pl.col('woba_codes')).alias('woba_percent'),
322
+ (pl.col('woba_contact') / pl.col('bip')).alias('woba_percent_contact'),
323
+ (pl.col('hard_hit') / pl.col('bip_div')).alias('hard_hit_percent'),
324
+ (pl.col('barrel') / pl.col('bip_div')).alias('barrel_percent'),
325
+ (pl.col('zone_contact') / pl.col('zone_swing')).alias('zone_contact_percent'),
326
+ (pl.col('zone_swing') / pl.col('in_zone')).alias('zone_swing_percent'),
327
+ (pl.col('in_zone') / pl.col('pitches')).alias('zone_percent'),
328
+ (pl.col('ozone_swing') / (pl.col('pitches') - pl.col('in_zone'))).alias('chase_percent'),
329
+ (pl.col('ozone_contact') / pl.col('ozone_swing')).alias('chase_contact'),
330
+ (pl.col('swings') / pl.col('pitches')).alias('swing_percent'),
331
+ (pl.col('whiffs') / pl.col('swings')).alias('whiff_rate'),
332
+ (pl.col('whiffs') / pl.col('pitches')).alias('swstr_rate'),
333
+ (pl.col('ground_ball') / pl.col('bip')).alias('ground_ball_percent'),
334
+ (pl.col('line_drive') / pl.col('bip')).alias('line_drive_percent'),
335
+ (pl.col('fly_ball') / pl.col('bip')).alias('fly_ball_percent'),
336
+ (pl.col('pop_up') / pl.col('bip')).alias('pop_up_percent'),
337
+ (pl.col('heart') / pl.col('attack_zone')).alias('heart_zone_percent'),
338
+ (pl.col('shadow') / pl.col('attack_zone')).alias('shadow_zone_percent'),
339
+ (pl.col('chase') / pl.col('attack_zone')).alias('chase_zone_percent'),
340
+ (pl.col('waste') / pl.col('attack_zone')).alias('waste_zone_percent'),
341
+ (pl.col('heart_swing') / pl.col('heart')).alias('heart_zone_swing_percent'),
342
+ (pl.col('shadow_swing') / pl.col('shadow')).alias('shadow_zone_swing_percent'),
343
+ (pl.col('chase_swing') / pl.col('chase')).alias('chase_zone_swing_percent'),
344
+ (pl.col('waste_swing') / pl.col('waste')).alias('waste_zone_swing_percent'),
345
+ (pl.col('heart_whiff') / pl.col('heart_swing')).alias('heart_zone_whiff_percent'),
346
+ (pl.col('shadow_whiff') / pl.col('shadow_swing')).alias('shadow_zone_whiff_percent'),
347
+ (pl.col('chase_whiff') / pl.col('chase_swing')).alias('chase_zone_whiff_percent'),
348
+ (pl.col('waste_whiff') / pl.col('waste_swing')).alias('waste_zone_whiff_percent'),
349
+ (pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
350
+ (pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact')
351
+ ])
352
+
353
+ return df_summ
354
+
355
+
356
+
357
+
358
+
359
+
360
+ # Assuming df is your Polars DataFrame
361
+ def update_summary_select(self, df: pl.DataFrame, selection: list) -> pl.DataFrame:
362
+ """
363
+ Update summary statistics for pitchers or batters.
364
+
365
+ Parameters:
366
+ df (pl.DataFrame): The input Polars DataFrame containing player statistics.
367
+ pitcher (bool): A flag indicating whether to calculate statistics for pitchers (True) or batters (False).
368
+
369
+ Returns:
370
+ pl.DataFrame: A Polars DataFrame with aggregated and calculated summary statistics.
371
+ """
372
+
373
+ # Group by position_id and position_name, then aggregate various statistics
374
+ df_summ = df.group_by(selection).agg([
375
+ pl.col('pa').sum().alias('pa'),
376
+ pl.col('ab').sum().alias('ab'),
377
+ pl.col('obp').sum().alias('obp_pa'),
378
+ pl.col('hits').sum().alias('hits'),
379
+ pl.col('on_base').sum().alias('on_base'),
380
+ pl.col('k').sum().alias('k'),
381
+ pl.col('bb').sum().alias('bb'),
382
+ pl.col('bb_minus_k').sum().alias('bb_minus_k'),
383
+ pl.col('csw').sum().alias('csw'),
384
+ pl.col('bip').sum().alias('bip'),
385
+ pl.col('bip_div').sum().alias('bip_div'),
386
+ pl.col('tb').sum().alias('tb'),
387
+ pl.col('woba').sum().alias('woba'),
388
+ pl.col('woba_contact').sum().alias('woba_contact'),
389
+ pl.col('woba_pred').sum().alias('xwoba'),
390
+ pl.col('woba_pred_contact').sum().alias('xwoba_contact'),
391
+ pl.col('woba_codes').sum().alias('woba_codes'),
392
+ pl.col('xwoba_codes').sum().alias('xwoba_codes'),
393
+ pl.col('hard_hit').sum().alias('hard_hit'),
394
+ pl.col('barrel').sum().alias('barrel'),
395
+ pl.col('sweet_spot').sum().alias('sweet_spot'),
396
+ pl.col('launch_speed').max().alias('max_launch_speed'),
397
+ pl.col('launch_speed').quantile(0.90).alias('launch_speed_90'),
398
+ pl.col('launch_speed').mean().alias('launch_speed'),
399
+ pl.col('launch_angle').mean().alias('launch_angle'),
400
+ pl.col('is_pitch').sum().alias('pitches'),
401
+ pl.col('swings').sum().alias('swings'),
402
+ pl.col('in_zone').sum().alias('in_zone'),
403
+ pl.col('out_zone').sum().alias('out_zone'),
404
+ pl.col('whiffs').sum().alias('whiffs'),
405
+ pl.col('zone_swing').sum().alias('zone_swing'),
406
+ pl.col('zone_contact').sum().alias('zone_contact'),
407
+ pl.col('ozone_swing').sum().alias('ozone_swing'),
408
+ pl.col('ozone_contact').sum().alias('ozone_contact'),
409
+ pl.col('trajectory_ground_ball').sum().alias('ground_ball'),
410
+ pl.col('trajectory_line_drive').sum().alias('line_drive'),
411
+ pl.col('trajectory_fly_ball').sum().alias('fly_ball'),
412
+ pl.col('trajectory_popup').sum().alias('pop_up'),
413
+ pl.col('attack_zone').count().alias('attack_zone'),
414
+ pl.col('heart').sum().alias('heart'),
415
+ pl.col('shadow').sum().alias('shadow'),
416
+ pl.col('chase').sum().alias('chase'),
417
+ pl.col('waste').sum().alias('waste'),
418
+ pl.col('heart_swing').sum().alias('heart_swing'),
419
+ pl.col('shadow_swing').sum().alias('shadow_swing'),
420
+ pl.col('chase_swing').sum().alias('chase_swing'),
421
+ pl.col('waste_swing').sum().alias('waste_swing'),
422
+ pl.col('heart_whiff').sum().alias('heart_whiff'),
423
+ pl.col('shadow_whiff').sum().alias('shadow_whiff'),
424
+ pl.col('chase_whiff').sum().alias('chase_whiff'),
425
+ pl.col('waste_whiff').sum().alias('waste_whiff')
426
+ ])
427
+
428
+ # Add calculated columns to the summary DataFrame
429
+ df_summ = df_summ.with_columns([
430
+ (pl.col('hits') / pl.col('ab')).alias('avg'),
431
+ (pl.col('on_base') / pl.col('obp_pa')).alias('obp'),
432
+ (pl.col('tb') / pl.col('ab')).alias('slg'),
433
+ (pl.col('on_base') / pl.col('obp_pa') + pl.col('tb') / pl.col('ab')).alias('ops'),
434
+ (pl.col('k') / pl.col('pa')).alias('k_percent'),
435
+ (pl.col('bb') / pl.col('pa')).alias('bb_percent'),
436
+ (pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
437
+ (pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
438
+ (pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
439
+ (pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
440
+ (pl.col('woba') / pl.col('woba_codes')).alias('woba_percent'),
441
+ (pl.col('woba_contact') / pl.col('bip')).alias('woba_percent_contact'),
442
+ (pl.col('hard_hit') / pl.col('bip_div')).alias('hard_hit_percent'),
443
+ (pl.col('barrel') / pl.col('bip_div')).alias('barrel_percent'),
444
+ (pl.col('zone_contact') / pl.col('zone_swing')).alias('zone_contact_percent'),
445
+ (pl.col('zone_swing') / pl.col('in_zone')).alias('zone_swing_percent'),
446
+ (pl.col('in_zone') / pl.col('pitches')).alias('zone_percent'),
447
+ (pl.col('ozone_swing') / (pl.col('pitches') - pl.col('in_zone'))).alias('chase_percent'),
448
+ (pl.col('ozone_contact') / pl.col('ozone_swing')).alias('chase_contact'),
449
+ (pl.col('swings') / pl.col('pitches')).alias('swing_percent'),
450
+ (pl.col('whiffs') / pl.col('swings')).alias('whiff_rate'),
451
+ (pl.col('whiffs') / pl.col('pitches')).alias('swstr_rate'),
452
+ (pl.col('ground_ball') / pl.col('bip')).alias('ground_ball_percent'),
453
+ (pl.col('line_drive') / pl.col('bip')).alias('line_drive_percent'),
454
+ (pl.col('fly_ball') / pl.col('bip')).alias('fly_ball_percent'),
455
+ (pl.col('pop_up') / pl.col('bip')).alias('pop_up_percent'),
456
+ (pl.col('heart') / pl.col('attack_zone')).alias('heart_zone_percent'),
457
+ (pl.col('shadow') / pl.col('attack_zone')).alias('shadow_zone_percent'),
458
+ (pl.col('chase') / pl.col('attack_zone')).alias('chase_zone_percent'),
459
+ (pl.col('waste') / pl.col('attack_zone')).alias('waste_zone_percent'),
460
+ (pl.col('heart_swing') / pl.col('heart')).alias('heart_zone_swing_percent'),
461
+ (pl.col('shadow_swing') / pl.col('shadow')).alias('shadow_zone_swing_percent'),
462
+ (pl.col('chase_swing') / pl.col('chase')).alias('chase_zone_swing_percent'),
463
+ (pl.col('waste_swing') / pl.col('waste')).alias('waste_zone_swing_percent'),
464
+ (pl.col('heart_whiff') / pl.col('heart_swing')).alias('heart_zone_whiff_percent'),
465
+ (pl.col('shadow_whiff') / pl.col('shadow_swing')).alias('shadow_zone_whiff_percent'),
466
+ (pl.col('chase_whiff') / pl.col('chase_swing')).alias('chase_zone_whiff_percent'),
467
+ (pl.col('waste_whiff') / pl.col('waste_swing')).alias('waste_zone_whiff_percent'),
468
+ (pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
469
+ (pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact')
470
+ ])
471
+
472
+ return df_summ
functions/pitch_summary_functions.py ADDED
@@ -0,0 +1,1152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import json
4
+ from matplotlib.ticker import FuncFormatter
5
+ from matplotlib.ticker import MaxNLocator
6
+ import math
7
+ from matplotlib.patches import Ellipse
8
+ import matplotlib.transforms as transforms
9
+ import matplotlib.colors
10
+ import matplotlib.colors as mcolors
11
+ import seaborn as sns
12
+ import matplotlib.pyplot as plt
13
+ import requests
14
+ import polars as pl
15
+ from PIL import Image
16
+ import requests
17
+ from io import BytesIO
18
+ from matplotlib.offsetbox import OffsetImage, AnnotationBbox
19
+ import matplotlib.pyplot as plt
20
+ import matplotlib.gridspec as gridspec
21
+ import PIL
22
+ from stuff_model import calculate_arm_angles as caa
23
+
24
+
25
+ ### PITCH COLOURS ###
26
+
27
+ # Dictionary to map pitch types to their corresponding colors and names
28
+ pitch_colours = {
29
+ ## Fastballs ##
30
+ 'FF': {'colour': '#FF007D', 'name': '4-Seam Fastball'},
31
+ 'FA': {'colour': '#FF007D', 'name': 'Fastball'},
32
+ 'SI': {'colour': '#98165D', 'name': 'Sinker'},
33
+ 'FC': {'colour': '#BE5FA0', 'name': 'Cutter'},
34
+
35
+ ## Offspeed ##
36
+ 'CH': {'colour': '#F79E70', 'name': 'Changeup'},
37
+ 'FS': {'colour': '#FE6100', 'name': 'Splitter'},
38
+ 'SC': {'colour': '#F08223', 'name': 'Screwball'},
39
+ 'FO': {'colour': '#FFB000', 'name': 'Forkball'},
40
+
41
+ ## Sliders ##
42
+ 'SL': {'colour': '#67E18D', 'name': 'Slider'},
43
+ 'ST': {'colour': '#1BB999', 'name': 'Sweeper'},
44
+ 'SV': {'colour': '#376748', 'name': 'Slurve'},
45
+
46
+ ## Curveballs ##
47
+ 'KC': {'colour': '#311D8B', 'name': 'Knuckle Curve'},
48
+ 'CU': {'colour': '#3025CE', 'name': 'Curveball'},
49
+ 'CS': {'colour': '#274BFC', 'name': 'Slow Curve'},
50
+ 'EP': {'colour': '#648FFF', 'name': 'Eephus'},
51
+
52
+ ## Others ##
53
+ 'KN': {'colour': '#867A08', 'name': 'Knuckleball'},
54
+ 'KN': {'colour': '#867A08', 'name': 'Knuckle Ball'},
55
+ 'PO': {'colour': '#472C30', 'name': 'Pitch Out'},
56
+ 'UN': {'colour': '#9C8975', 'name': 'Unknown'},
57
+ }
58
+
59
+ # Create dictionaries for pitch types and their attributes
60
+ dict_colour = {key: value['colour'] for key, value in pitch_colours.items()}
61
+ dict_pitch = {key: value['name'] for key, value in pitch_colours.items()}
62
+ dict_pitch_desc_type = {value['name']: key for key, value in pitch_colours.items()}
63
+ dict_pitch_desc_type.update({'Four-Seam Fastball':'FF'})
64
+ dict_pitch_desc_type.update({'All':'All'})
65
+ dict_pitch_name = {value['name']: value['colour'] for key, value in pitch_colours.items()}
66
+ dict_pitch_name.update({'Four-Seam Fastball':'#FF007D'})
67
+
68
+ font_properties = {'family': 'calibi', 'size': 12}
69
+ font_properties_titles = {'family': 'calibi', 'size': 20}
70
+ font_properties_axes = {'family': 'calibi', 'size': 16}
71
+
72
+ cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
73
+
74
+ ### FANGRAPHS STATS DICT ###
75
+ fangraphs_stats_dict = {'IP':{'table_header':'$\\bf{IP}$','format':'.1f',} ,
76
+ 'TBF':{'table_header':'$\\bf{PA}$','format':'.0f',} ,
77
+ 'AVG':{'table_header':'$\\bf{AVG}$','format':'.3f',} ,
78
+ 'K/9':{'table_header':'$\\bf{K\/9}$','format':'.2f',} ,
79
+ 'BB/9':{'table_header':'$\\bf{BB\/9}$','format':'.2f',} ,
80
+ 'K/BB':{'table_header':'$\\bf{K\/BB}$','format':'.2f',} ,
81
+ 'HR/9':{'table_header':'$\\bf{HR\/9}$','format':'.2f',} ,
82
+ 'K%':{'table_header':'$\\bf{K\%}$','format':'.1%',} ,
83
+ 'BB%':{'table_header':'$\\bf{BB\%}$','format':'.1%',} ,
84
+ 'K-BB%':{'table_header':'$\\bf{K-BB\%}$','format':'.1%',} ,
85
+ 'WHIP':{'table_header':'$\\bf{WHIP}$','format':'.2f',} ,
86
+ 'BABIP':{'table_header':'$\\bf{BABIP}$','format':'.3f',} ,
87
+ 'LOB%':{'table_header':'$\\bf{LOB\%}$','format':'.1%',} ,
88
+ 'xFIP':{'table_header':'$\\bf{xFIP}$','format':'.2f',} ,
89
+ 'FIP':{'table_header':'$\\bf{FIP}$','format':'.2f',} ,
90
+ 'H':{'table_header':'$\\bf{H}$','format':'.0f',} ,
91
+ '2B':{'table_header':'$\\bf{2B}$','format':'.0f',} ,
92
+ '3B':{'table_header':'$\\bf{3B}$','format':'.0f',} ,
93
+ 'R':{'table_header':'$\\bf{R}$','format':'.0f',} ,
94
+ 'ER':{'table_header':'$\\bf{ER}$','format':'.0f',} ,
95
+ 'HR':{'table_header':'$\\bf{HR}$','format':'.0f',} ,
96
+ 'BB':{'table_header':'$\\bf{BB}$','format':'.0f',} ,
97
+ 'IBB':{'table_header':'$\\bf{IBB}$','format':'.0f',} ,
98
+ 'HBP':{'table_header':'$\\bf{HBP}$','format':'.0f',} ,
99
+ 'SO':{'table_header':'$\\bf{SO}$','format':'.0f',} ,
100
+ 'OBP':{'table_header':'$\\bf{OBP}$','format':'.0f',} ,
101
+ 'SLG':{'table_header':'$\\bf{SLG}$','format':'.0f',} ,
102
+ 'ERA':{'table_header':'$\\bf{ERA}$','format':'.2f',} ,
103
+ 'wOBA':{'table_header':'$\\bf{wOBA}$','format':'.3f',} ,
104
+ 'G':{'table_header':'$\\bf{G}$','format':'.0f',},
105
+ 'strikePercentage':{'table_header':'$\\bf{Strike\%}$','format':'.1%'} }
106
+
107
+ colour_palette = ['#FFB000','#648FFF','#785EF0',
108
+ '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
109
+
110
+ ### GET COLOURS ###
111
+ def get_color(value, normalize, cmap_sum):
112
+ """
113
+ Get the color corresponding to a value based on a colormap and normalization.
114
+
115
+ Parameters
116
+ ----------
117
+ value : float
118
+ The value to be mapped to a color.
119
+ normalize : matplotlib.colors.Normalize
120
+ The normalization function to scale the value.
121
+ cmap_sum : matplotlib.colors.Colormap
122
+ The colormap to use for mapping the value to a color.
123
+
124
+ Returns
125
+ -------
126
+ str
127
+ The hexadecimal color code corresponding to the value.
128
+ """
129
+ color = cmap_sum(normalize(value))
130
+ return mcolors.to_hex(color)
131
+
132
+ ### PITCH ELLIPSE ###
133
+ def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs):
134
+ """
135
+ Create a plot of the covariance confidence ellipse of *x* and *y*.
136
+
137
+ Parameters
138
+ ----------
139
+ x, y : array-like, shape (n, )
140
+ Input data.
141
+
142
+ ax : matplotlib.axes.Axes
143
+ The axes object to draw the ellipse into.
144
+
145
+ n_std : float
146
+ The number of standard deviations to determine the ellipse's radiuses.
147
+
148
+ **kwargs
149
+ Forwarded to `~matplotlib.patches.Ellipse`
150
+
151
+ Returns
152
+ -------
153
+ matplotlib.patches.Ellipse
154
+ """
155
+
156
+ if len(x) != len(y):
157
+ raise ValueError("x and y must be the same size")
158
+ try:
159
+ cov = np.cov(x, y)
160
+ pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
161
+ # Using a special case to obtain the eigenvalues of this
162
+ # two-dimensional dataset.
163
+ ell_radius_x = np.sqrt(1 + pearson)
164
+ ell_radius_y = np.sqrt(1 - pearson)
165
+ ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2,
166
+ facecolor=facecolor,linewidth=2,linestyle='--', **kwargs)
167
+
168
+
169
+ # Calculating the standard deviation of x from
170
+ # the squareroot of the variance and multiplying
171
+ # with the given number of standard deviations.
172
+ scale_x = np.sqrt(cov[0, 0]) * n_std
173
+ mean_x = x.mean()
174
+
175
+
176
+ # calculating the standard deviation of y ...
177
+ scale_y = np.sqrt(cov[1, 1]) * n_std
178
+ mean_y = y.mean()
179
+
180
+
181
+ transf = transforms.Affine2D() \
182
+ .rotate_deg(45) \
183
+ .scale(scale_x, scale_y) \
184
+ .translate(mean_x, mean_y)
185
+
186
+
187
+
188
+ ellipse.set_transform(transf + ax.transData)
189
+ except ValueError:
190
+ return
191
+
192
+ return ax.add_patch(ellipse)
193
+ ### VELOCITY KDES ###
194
+ def velocity_kdes(df: pl.DataFrame, ax: plt.Axes, gs: gridspec.GridSpec, gs_x: list, gs_y: list, fig: plt.Figure):
195
+ """
196
+ Plot the velocity KDEs for different pitch types.
197
+
198
+ Parameters
199
+ ----------
200
+ df : pl.DataFrame
201
+ The DataFrame containing pitch data.
202
+ ax : plt.Axes
203
+ The axis to plot on.
204
+ gs : GridSpec
205
+ The GridSpec for the subplot layout.
206
+ gs_x : list
207
+ The x-coordinates for the GridSpec.
208
+ gs_y : list
209
+ The y-coordinates for the GridSpec.
210
+ fig : plt.Figure
211
+ The figure to plot on.
212
+ """
213
+ # Get unique pitch types sorted by pitch count
214
+ items_in_order = df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy()
215
+
216
+ # Create the inner subplot inside the outer subplot
217
+ ax.axis('off')
218
+ ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20})
219
+ inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order), 1, subplot_spec=gs[gs_x[0]:gs_x[-1], gs_y[0]:gs_y[-1]])
220
+ ax_top = [fig.add_subplot(inner) for inner in inner_grid_1]
221
+
222
+ for idx, i in enumerate(items_in_order):
223
+ pitch_data = df.filter(pl.col('pitch_type') == i)['start_speed']
224
+ if np.unique(pitch_data).size == 1: # Check if all values are the same
225
+ ax_top[idx].plot([np.unique(pitch_data), np.unique(pitch_data)], [0, 1], linewidth=4, color=dict_colour[i], zorder=20)
226
+ else:
227
+ sns.kdeplot(pitch_data, ax=ax_top[idx], fill=True, clip=(pitch_data.min(), pitch_data.max()), color=dict_colour[i])
228
+
229
+ # Plot the mean release speed for the current data
230
+ df_average = df.filter(df['pitch_type'] == i)['start_speed']
231
+ ax_top[idx].plot([df_average.mean(), df_average.mean()], [ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]], color=dict_colour[i], linestyle='--')
232
+
233
+ # Plot the mean release speed for the statcast group data
234
+ df_statcast_group = pl.read_csv('functions/statcast_2024_grouped.csv')
235
+ df_average = df_statcast_group.filter(df_statcast_group['pitch_type'] == i)['release_speed']
236
+ ax_top[idx].plot([df_average.mean(), df_average.mean()], [ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]], color=dict_colour[i], linestyle=':')
237
+
238
+ ax_top[idx].set_xlim(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5)
239
+ ax_top[idx].set_xlabel('')
240
+ ax_top[idx].set_ylabel('')
241
+ if idx < len(items_in_order) - 1:
242
+ ax_top[idx].spines['top'].set_visible(False)
243
+ ax_top[idx].spines['right'].set_visible(False)
244
+ ax_top[idx].spines['left'].set_visible(False)
245
+ ax_top[idx].tick_params(axis='x', colors='none')
246
+
247
+ ax_top[idx].set_xticks(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5))
248
+ ax_top[idx].set_yticks([])
249
+ ax_top[idx].grid(axis='x', linestyle='--')
250
+ ax_top[idx].text(-0.01, 0.5, i, transform=ax_top[idx].transAxes, fontsize=14, va='center', ha='right')
251
+
252
+ ax_top[-1].spines['top'].set_visible(False)
253
+ ax_top[-1].spines['right'].set_visible(False)
254
+ ax_top[-1].spines['left'].set_visible(False)
255
+ ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5)))
256
+ ax_top[-1].set_xlabel('Velocity (mph)')
257
+
258
+ ### TJ STUFF+ ROLLING ###
259
+ def tj_stuff_roling(df: pl.DataFrame, window: int, ax: plt.Axes):
260
+ """
261
+ Plot the rolling average of tjStuff+ for different pitch types.
262
+
263
+ Parameters
264
+ ----------
265
+ df : pl.DataFrame
266
+ The DataFrame containing pitch data.
267
+ window : int
268
+ The window size for calculating the rolling average.
269
+ ax : plt.Axes
270
+ The axis to plot on.
271
+ """
272
+ # Get unique pitch types sorted by pitch count
273
+ items_in_order = df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy()
274
+
275
+ # Plot the rolling average for each pitch type
276
+ for i in items_in_order:
277
+ pitch_data = df.filter(pl.col('pitch_type') == i)
278
+ if pitch_data['pitch_count'].max() >= window:
279
+ sns.lineplot(
280
+ x=range(1, pitch_data['pitch_count'].max() + 1),
281
+ y=pitch_data['tj_stuff_plus'].rolling_mean(window),
282
+ color=dict_colour[i],
283
+ ax=ax,
284
+ linewidth=3
285
+ )
286
+
287
+ # Adjust x-axis limits to start from 1
288
+ ax.set_xlim(window, df['pitch_count'].max())
289
+ ax.set_ylim(70, 130)
290
+ ax.set_xlabel('Pitches', fontdict=font_properties_axes)
291
+ ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
292
+ ax.set_title(f"{window} Pitch Rolling tjStuff+", fontdict=font_properties_titles)
293
+ ax.xaxis.set_major_locator(MaxNLocator(integer=True))
294
+
295
+ ### TJ STUFF+ ROLLING ###
296
+ def tj_stuff_roling_game(df: pl.DataFrame, window: int, ax: plt.Axes):
297
+ """
298
+ Plot the rolling average of tjStuff+ for different pitch types over games.
299
+
300
+ Parameters
301
+ ----------
302
+ df : pl.DataFrame
303
+ The DataFrame containing pitch data.
304
+ window : int
305
+ The window size for calculating the rolling average.
306
+ ax : plt.Axes
307
+ The axis to plot on.
308
+ """
309
+ # Map game_id to sequential numbers
310
+ date_to_number = {date: i + 1 for i, date in enumerate(df['game_id'].unique(maintain_order=True))}
311
+
312
+ # Add a column with the sequential game numbers
313
+ df_plot = df.with_columns(
314
+ pl.col("game_id").map_elements(lambda x: date_to_number.get(x, x)).alias("start_number")
315
+ )
316
+
317
+ # Group by relevant columns and calculate mean tj_stuff_plus
318
+ plot_game_roll = df_plot.group_by(['start_number', 'game_id', 'game_date', 'pitch_type', 'pitch_description']).agg(
319
+ pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus')
320
+ ).sort('start_number', descending=False)
321
+
322
+ # Get the list of pitch types ordered by frequency
323
+ sorted_value_counts = df['pitch_type'].value_counts().sort('count', descending=True)
324
+ items_in_order = sorted_value_counts['pitch_type'].to_list()
325
+
326
+ # Plot the rolling average for each pitch type
327
+ for i in items_in_order:
328
+ df_item = plot_game_roll.filter(pl.col('pitch_type') == i)
329
+ df_item = df_item.with_columns(
330
+ pl.col("start_number").cast(pl.Int64)
331
+ ).join(
332
+ pl.DataFrame({"start_number": list(date_to_number.values())}),
333
+ on="start_number",
334
+ how="outer"
335
+ ).sort("start_number_right").with_columns([
336
+ pl.col("start_number").fill_null(strategy="forward").fill_null(strategy="backward"),
337
+ pl.col("tj_stuff_plus").fill_null(strategy="forward").fill_null(strategy="backward"),
338
+ pl.col("pitch_type").fill_null(strategy="forward").fill_null(strategy="backward"),
339
+ pl.col("pitch_description").fill_null(strategy="forward").fill_null(strategy="backward")
340
+ ])
341
+
342
+ sns.lineplot(x=range(1, max(df_item['start_number_right']) + 1),
343
+ y=df_item.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window,min_periods=1),
344
+ color=dict_colour[i],
345
+ ax=ax, linewidth=3)
346
+
347
+ # Highlight missing game data points
348
+ for n in range(len(df_item)):
349
+ if df_item['game_id'].is_null()[n]:
350
+ sns.scatterplot(x=[df_item['start_number_right'][n]],
351
+ y=[df_item['tj_stuff_plus'].rolling_mean(window,min_periods=1)[n]],
352
+ color='white',
353
+ ec=dict_colour[i],
354
+ ax=ax,
355
+ zorder=100)
356
+
357
+ # Adjust x-axis limits to start from 1
358
+ ax.set_xlim(1, max(df_item['start_number']))
359
+ ax.set_ylim(70, 130)
360
+ ax.set_xlabel('Games', fontdict=font_properties_axes)
361
+ ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
362
+ ax.set_title(f"{window} Game Rolling tjStuff+", fontdict=font_properties_titles)
363
+ ax.xaxis.set_major_locator(MaxNLocator(integer=True))
364
+
365
+ def break_plot(df: pl.DataFrame, ax: plt.Axes):
366
+ """
367
+ Plot the pitch breaks for different pitch types.
368
+
369
+ Parameters
370
+ ----------
371
+ df : pl.DataFrame
372
+ The DataFrame containing pitch data.
373
+ ax : plt.Axes
374
+ The axis to plot on.
375
+ """
376
+ # Get unique pitch types sorted by pitch count
377
+ label_labels = df.sort(by=['pitch_count', 'pitch_type'], descending=[False, True])['pitch_type'].unique(maintain_order=True).to_numpy()
378
+
379
+ # Plot confidence ellipses for each pitch type
380
+ for idx, label in enumerate(label_labels):
381
+ subset = df.filter(pl.col('pitch_type') == label)
382
+ if len(subset) > 4:
383
+ try:
384
+ confidence_ellipse(subset['hb'], subset['ivb'], ax=ax, edgecolor=dict_colour[label], n_std=2, facecolor=dict_colour[label], alpha=0.2)
385
+ except ValueError:
386
+ return
387
+
388
+ # Plot scatter plot for pitch breaks
389
+ if df['pitcher_hand'][0] == 'R':
390
+ sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'], hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2)
391
+ elif df['pitcher_hand'][0] == 'L':
392
+ sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'], hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2)
393
+
394
+ # Set axis limits
395
+ ax.set_xlim((-25, 25))
396
+ ax.set_ylim((-25, 25))
397
+
398
+
399
+ df_aa = caa.calculate_arm_angles(df,df['pitcher_id'][0])['arm_angle']
400
+
401
+ # Plot average arm angle
402
+ mean_arm_angle = df_aa.mean()
403
+ x_end = 30
404
+ y_end = x_end * np.tan(np.radians(mean_arm_angle))
405
+ ax.plot([0, x_end], [0, y_end], color='grey', linestyle='--', linewidth=2,zorder=0)
406
+
407
+
408
+
409
+ # Add horizontal and vertical lines
410
+ ax.hlines(y=0, xmin=-50, xmax=50, color=colour_palette[8], alpha=0.5, linestyles='--', zorder=1)
411
+ ax.vlines(x=0, ymin=-50, ymax=50, color=colour_palette[8], alpha=0.5, linestyles='--', zorder=1)
412
+
413
+ # Set axis labels and title
414
+ ax.set_xlabel('Horizontal Break (in)', fontdict=font_properties_axes)
415
+ ax.set_ylabel('Induced Vertical Break (in)', fontdict=font_properties_axes)
416
+ ax.set_title(f"Pitch Breaks - Arm Angle: {mean_arm_angle:.0f}°", fontdict=font_properties_titles)
417
+
418
+ # Remove legend
419
+ ax.get_legend().remove()
420
+
421
+ # Set tick labels
422
+ ax.set_xticklabels(ax.get_xticks(), fontdict=font_properties)
423
+ ax.set_yticklabels(ax.get_yticks(), fontdict=font_properties)
424
+
425
+ # Add text annotations for glove side and arm side
426
+ if df['pitcher_hand'][0] == 'R':
427
+ ax.text(-24.5, -24.5, s='← Glove Side', fontstyle='italic', ha='left', va='bottom',
428
+ bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
429
+ ax.text(24.5, -24.5, s='Arm Side →', fontstyle='italic', ha='right', va='bottom',
430
+ bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
431
+ elif df['pitcher_hand'][0] == 'L':
432
+ ax.invert_xaxis()
433
+ ax.text(24.5, -24.5, s='← Arm Side', fontstyle='italic', ha='left', va='bottom',
434
+ bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
435
+ ax.text(-24.5, -24.5, s='Glove Side →', fontstyle='italic', ha='right', va='bottom',
436
+ bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
437
+
438
+ # Set aspect ratio and format axis ticks
439
+ ax.set_aspect('equal', adjustable='box')
440
+ ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
441
+ ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
442
+
443
+ # DEFINE STRIKE ZONE
444
+ strike_zone = pl.DataFrame({
445
+ 'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9],
446
+ 'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5]
447
+ })
448
+
449
+ ### STRIKE ZONE ###
450
+ def draw_line(axis, alpha_spot=1, catcher_p=True):
451
+ """
452
+ Draw the strike zone and home plate on the given axis.
453
+
454
+ Parameters
455
+ ----------
456
+ axis : matplotlib.axes.Axes
457
+ The axis to draw the strike zone on.
458
+ alpha_spot : float, optional
459
+ The transparency level of the lines (default is 1).
460
+ catcher_p : bool, optional
461
+ Whether to draw the catcher's perspective (default is True).
462
+ """
463
+ # Draw the strike zone
464
+ axis.plot(strike_zone['PlateLocSide'].to_list(), strike_zone['PlateLocHeight'].to_list(),
465
+ color='black', linewidth=1.3, zorder=3, alpha=alpha_spot)
466
+
467
+ if catcher_p:
468
+ # Draw home plate from catcher's perspective
469
+ axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
470
+ axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
471
+ axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
472
+ axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
473
+ axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
474
+ else:
475
+ # Draw home plate from pitcher's perspective
476
+ axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
477
+ axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
478
+ axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
479
+ axis.plot([0, 0.9], [-0.35, -0.1], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
480
+ axis.plot([0.9, 0.708], [-0.1, 0.4], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
481
+
482
+ def location_plot(df: pl.DataFrame, ax: plt.Axes, hand: str):
483
+ """
484
+ Plot the pitch locations for different pitch types against a specific batter hand.
485
+
486
+ Parameters
487
+ ----------
488
+ df : pl.DataFrame
489
+ The DataFrame containing pitch data.
490
+ ax : plt.Axes
491
+ The axis to plot on.
492
+ hand : str
493
+ The batter hand ('L' for left-handed, 'R' for right-handed).
494
+ """
495
+ # Get unique pitch types sorted by pitch count
496
+ label_labels = df.sort(by=['pitch_count', 'pitch_type'], descending=[False, True])['pitch_type'].unique(maintain_order=True).to_numpy()
497
+
498
+ # Plot confidence ellipses for each pitch type
499
+ for label in label_labels:
500
+ subset = df.filter((pl.col('pitch_type') == label) & (pl.col('batter_hand') == hand))
501
+ if len(subset) >= 5:
502
+ confidence_ellipse(subset['px'], subset['pz'], ax=ax, edgecolor=dict_colour[label], n_std=1.5, facecolor=dict_colour[label], alpha=0.3)
503
+
504
+ # Group pitch locations by pitch type and calculate mean values
505
+ pitch_location_group = (
506
+ df.filter(pl.col("batter_hand") == hand)
507
+ .group_by("pitch_type")
508
+ .agg([
509
+ pl.col("start_speed").count().alias("pitches"),
510
+ pl.col("px").mean().alias("px"),
511
+ pl.col("pz").mean().alias("pz")
512
+ ])
513
+ )
514
+
515
+ # Calculate pitch percentages
516
+ total_pitches = pitch_location_group['pitches'].sum()
517
+ pitch_location_group = pitch_location_group.with_columns(
518
+ (pl.col("pitches") / total_pitches).alias("pitch_percent")
519
+ )
520
+
521
+ # Plot pitch locations
522
+ sns.scatterplot(ax=ax, x=pitch_location_group['px'], y=pitch_location_group['pz'],
523
+ hue=pitch_location_group['pitch_type'], palette=dict_colour, ec='black',
524
+ s=pitch_location_group['pitch_percent'] * 750, linewidth=2, zorder=2)
525
+
526
+ # Customize plot appearance
527
+ ax.axis('square')
528
+ draw_line(ax, alpha_spot=0.75, catcher_p=False)
529
+ ax.axis('off')
530
+ ax.set_xlim((-2.75, 2.75))
531
+ ax.set_ylim((-0.5, 5))
532
+ if len(pitch_location_group['px']) > 0:
533
+ ax.get_legend().remove()
534
+ ax.grid(False)
535
+ ax.set_title(f"Pitch Locations vs {hand}HB\n{pitch_location_group['pitches'].sum()} Pitches", fontdict=font_properties_titles)
536
+
537
+
538
+ def summary_table(df: pl.DataFrame, ax: plt.Axes):
539
+ """
540
+ Create a summary table of pitch data.
541
+
542
+ Parameters
543
+ ----------
544
+ df : pl.DataFrame
545
+ The DataFrame containing pitch data.
546
+ ax : plt.Axes
547
+ The axis to plot the table on.
548
+ """
549
+ # Aggregate pitch data by pitch description
550
+ df_agg = df.group_by("pitch_description").agg(
551
+ pl.col('is_pitch').sum().alias('count'),
552
+ (pl.col('is_pitch').sum() / df.select(pl.col('is_pitch').sum())).alias('count_percent'),
553
+ pl.col('start_speed').mean().alias('start_speed'),
554
+ pl.col('ivb').mean().alias('ivb'),
555
+ pl.col('hb').mean().alias('hb'),
556
+ pl.col('spin_rate').mean().alias('spin_rate'),
557
+ pl.col('vaa').mean().alias('vaa'),
558
+ pl.col('haa').mean().alias('haa'),
559
+ pl.col('release_pos_z').mean().alias('z0'),
560
+ pl.col('release_pos_x').mean().alias('x0'),
561
+ pl.col('extension').mean().alias('extension'),
562
+ (((pl.col('spin_direction').mean() + 180) % 360 // 30) +
563
+ (((pl.col('spin_direction').mean() + 180) % 360 % 30 / 30 / 100 * 60).round(2) * 10).round(0) // 1.5 / 4)
564
+ .cast(pl.Float64).map_elements(lambda x: f"{int(x)}:{int((x % 1) * 60):02d}", return_dtype=pl.Utf8).alias('clock_time'),
565
+ pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
566
+ pl.col('pitch_grade').mean().alias('pitch_grade'),
567
+ (pl.col('in_zone').sum() / pl.col('is_pitch').sum()).alias('zone_percent'),
568
+ (pl.col('ozone_swing').sum() / pl.col('out_zone').sum()).alias('chase_percent'),
569
+ (pl.col('whiffs').sum() / pl.col('swings').sum()).alias('whiff_percent'),
570
+ (pl.col('woba_pred_contact').sum() / pl.col('bip').sum()).alias('xwobacon')
571
+ ).sort("count", descending=True)
572
+
573
+ # Aggregate all pitch data
574
+ df_agg_all = df.group_by(pl.lit("All").alias("pitch_description")).agg(
575
+ pl.col('is_pitch').sum().alias('count'),
576
+ (pl.col('is_pitch').sum() / df.select(pl.col('is_pitch').sum())).alias('count_percent'),
577
+ pl.lit(None).alias('start_speed'),
578
+ pl.lit(None).alias('ivb'),
579
+ pl.lit(None).alias('hb'),
580
+ pl.lit(None).alias('spin_rate'),
581
+ pl.lit(None).alias('vaa'),
582
+ pl.lit(None).alias('haa'),
583
+ pl.lit(None).alias('z0'),
584
+ pl.lit(None).alias('x0'),
585
+ pl.col('extension').mean().alias('extension'),
586
+ pl.lit(None).alias('clock_time'),
587
+ pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
588
+ pl.lit(None).alias('pitch_grade'),
589
+ (pl.col('in_zone').sum() / pl.col('is_pitch').sum()).alias('zone_percent'),
590
+ (pl.col('ozone_swing').sum() / pl.col('out_zone').sum()).alias('chase_percent'),
591
+ (pl.col('whiffs').sum() / pl.col('swings').sum()).alias('whiff_percent'),
592
+ (pl.col('woba_pred_contact').sum() / pl.col('bip').sum()).alias('xwobacon')
593
+ )
594
+
595
+ # Concatenate aggregated data
596
+ df_agg = pl.concat([df_agg, df_agg_all]).fill_nan(None)
597
+
598
+ # Load statcast pitch summary data
599
+ statcast_pitch_summary = pl.read_csv('functions/statcast_2024_grouped.csv')
600
+
601
+ # Create table
602
+ table = ax.table(cellText=df_agg.fill_nan('—').fill_null('—').to_numpy(), colLabels=df_agg.columns, cellLoc='center',
603
+ colWidths=[2.3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], bbox=[0.0, 0, 1, 0.8])
604
+
605
+ # Set table properties
606
+ min_font_size = 14
607
+ table.auto_set_font_size(False)
608
+ table.set_fontsize(min_font_size)
609
+ table.scale(1, 0.5)
610
+
611
+ # Set font size for values
612
+ min_font_size = 18
613
+ for i in range(len(df_agg) + 1):
614
+ for j in range(len(df_agg.columns)):
615
+ if i > 0: # Skip the header row
616
+ cell = table.get_celld()[i, j]
617
+ cell.set_fontsize(min_font_size)
618
+
619
+ # Define color maps
620
+ cmap_sum = mcolors.LinearSegmentedColormap.from_list("", ['#648FFF', '#FFFFFF', '#FFB000'])
621
+ cmap_sum_r = mcolors.LinearSegmentedColormap.from_list("", ['#FFB000', '#FFFFFF', '#648FFF'])
622
+
623
+ # Update table cells with colors and text properties
624
+ for i in range(len(df_agg)):
625
+ pitch_check = dict_pitch_desc_type[df_agg['pitch_description'][i]]
626
+ cell_text = table.get_celld()[(i + 1, 0)].get_text().get_text()
627
+
628
+ if cell_text != 'All':
629
+ table.get_celld()[(i + 1, 0)].set_facecolor(dict_pitch_name[cell_text])
630
+ text_props = {'color': '#000000', 'fontweight': 'bold'} if cell_text in ['Split-Finger', 'Slider', 'Changeup'] else {'color': '#ffffff', 'fontweight': 'bold'}
631
+ table.get_celld()[(i + 1, 0)].set_text_props(**text_props)
632
+ if cell_text == 'Four-Seam Fastball':
633
+ table.get_celld()[(i + 1, 0)].get_text().set_text('4-Seam')
634
+
635
+ select_df = statcast_pitch_summary.filter(statcast_pitch_summary['pitch_type'] == pitch_check)
636
+
637
+ # Apply color to specific columns based on normalized values
638
+ columns_to_color = [(3, 'release_speed', 0.95, 1.05), (11, 'release_extension', 0.9, 1.1), (13, None, 80, 120),
639
+ (14, None, 30, 70), (15, 'in_zone_rate', 0.7, 1.3), (16, 'chase_rate', 0.7, 1.3),
640
+ (17, 'whiff_rate', 0.7, 1.3), (18, 'xwobacon', 0.7, 1.3)]
641
+
642
+ for col, stat, vmin_factor, vmax_factor in columns_to_color:
643
+ cell_value = table.get_celld()[(i + 1, col)].get_text().get_text()
644
+ if cell_value != '—':
645
+ vmin = select_df[stat].mean() * vmin_factor if stat else vmin_factor
646
+ vmax = select_df[stat].mean() * vmax_factor if stat else vmax_factor
647
+ normalize = mcolors.Normalize(vmin=vmin, vmax=vmax)
648
+ cmap = cmap_sum if col != 18 else cmap_sum_r
649
+ table.get_celld()[(i + 1, col)].set_facecolor(get_color(float(cell_value.strip('%')), normalize, cmap))
650
+
651
+ # Set header text properties
652
+ table.get_celld()[(len(df_agg), 0)].set_text_props(color='#000000', fontweight='bold')
653
+
654
+ # Update column names
655
+ new_column_names = ['$\\bf{Pitch\\ Name}$', '$\\bf{Count}$', '$\\bf{Pitch\\%}$', '$\\bf{Velocity}$', '$\\bf{iVB}$',
656
+ '$\\bf{HB}$', '$\\bf{Spin}$', '$\\bf{VAA}$', '$\\bf{HAA}$', '$\\bf{vRel}$', '$\\bf{hRel}$',
657
+ '$\\bf{Ext.}$', '$\\bf{Axis}$', '$\\bf{tjStuff+}$', '$\\bf{Grade}$', '$\\bf{Zone\\%}$',
658
+ '$\\bf{Chase\\%}$', '$\\bf{Whiff\\%}$', '$\\bf{xwOBA}$\n$\\bf{Contact}$']
659
+
660
+ for i, col_name in enumerate(new_column_names):
661
+ table.get_celld()[(0, i)].get_text().set_text(col_name)
662
+
663
+ # Format cell values
664
+ def format_cells(columns, fmt):
665
+ for col in columns:
666
+ col_idx = df_agg.columns.index(col)
667
+ for row in range(1, len(df_agg) + 1):
668
+ cell_value = table.get_celld()[(row, col_idx)].get_text().get_text()
669
+ if cell_value != '—':
670
+ table.get_celld()[(row, col_idx)].get_text().set_text(fmt.format(float(cell_value.strip('%'))))
671
+
672
+ format_cells(['start_speed', 'ivb', 'hb', 'vaa', 'haa', 'z0', 'x0', 'extension'], '{:,.1f}')
673
+ format_cells(['xwobacon'], '{:,.3f}')
674
+ format_cells(['count_percent', 'zone_percent', 'chase_percent', 'whiff_percent'], '{:,.1%}')
675
+ format_cells(['tj_stuff_plus', 'pitch_grade', 'spin_rate'], '{:,.0f}')
676
+
677
+ # Create legend for pitch types
678
+ items_in_order = (df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy())
679
+ colour_pitches = [dict_colour[x] for x in items_in_order]
680
+ label = [dict_pitch[x] for x in items_in_order]
681
+ handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches]
682
+ if len(label) > 5:
683
+ ax.legend(handles, label, bbox_to_anchor=(0.1, 0.81, 0.8, 0.14), ncol=5,
684
+ fancybox=True, loc='lower center', fontsize=16, framealpha=1.0, markerscale=1.7, prop={'family': 'calibi', 'size': 16})
685
+ else:
686
+ ax.legend(handles, label, bbox_to_anchor=(0.1, 0.81, 0.8, 0.14), ncol=5,
687
+ fancybox=True, loc='lower center', fontsize=20, framealpha=1.0, markerscale=2, prop={'family': 'calibi', 'size': 20})
688
+ ax.axis('off')
689
+
690
+ def plot_footer(ax: plt.Axes):
691
+ """
692
+ Add footer text to the plot.
693
+
694
+ Parameters
695
+ ----------
696
+ ax : plt.Axes
697
+ The axis to add the footer text to.
698
+ """
699
+ # Add footer text
700
+ ax.text(0, 1, 'By: @TJStats', ha='left', va='top', fontsize=24)
701
+ ax.text(0.5, 0.25,
702
+ '''
703
+ Colour Coding Compares to League Average By Pitch
704
+ tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type
705
+ tjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10
706
+ Pitch Grade scales tjStuff+ to the traditional 20-80 Scouting Scale for a given pitch type
707
+ ''',
708
+ ha='center', va='bottom', fontsize=12)
709
+ ax.text(1, 1, 'Data: MLB, Fangraphs\nImages: MLB, ESPN', ha='right', va='top', fontsize=24)
710
+ ax.axis('off')
711
+
712
+ # Function to get an image from a URL and display it on the given axis
713
+ def player_headshot(player_input: str, ax: plt.Axes, sport_id: int, season: int):
714
+ """
715
+ Display the player's headshot image on the given axis.
716
+
717
+ Parameters
718
+ ----------
719
+ player_input : str
720
+ The player's ID.
721
+ ax : plt.Axes
722
+ The axis to display the image on.
723
+ sport_id : int
724
+ The sport ID (1 for MLB, other for minor leagues).
725
+ season : int
726
+ The season year.
727
+ """
728
+ try:
729
+ # Construct the URL for the player's headshot image based on sport ID
730
+ if int(sport_id) == 1:
731
+ url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_640,q_auto:best/v1/people/{player_input}/headshot/silo/current.png'
732
+ else:
733
+ url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{player_input}/headshot/milb/current.png'
734
+
735
+ # Send a GET request to the URL and open the image from the response content
736
+ response = requests.get(url)
737
+ img = Image.open(BytesIO(response.content))
738
+
739
+ # Display the image on the axis
740
+ ax.set_xlim(0, 1.3)
741
+ ax.set_ylim(0, 1)
742
+ ax.imshow(img, extent=[0, 1, 0, 1] if sport_id == 1 else [1/6, 5/6, 0, 1], origin='upper')
743
+ except PIL.UnidentifiedImageError:
744
+ ax.axis('off')
745
+ return
746
+
747
+ # Turn off the axis
748
+ ax.axis('off')
749
+
750
+ def player_bio(pitcher_id: str, ax: plt.Axes, sport_id: int, year_input: int):
751
+ """
752
+ Display the player's bio information on the given axis.
753
+
754
+ Parameters
755
+ ----------
756
+ pitcher_id : str
757
+ The player's ID.
758
+ ax : plt.Axes
759
+ The axis to display the bio information on.
760
+ sport_id : int
761
+ The sport ID (1 for MLB, other for minor leagues).
762
+ year_input : int
763
+ The season year.
764
+ """
765
+ # Construct the URL to fetch player data
766
+ url = f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}&hydrate=currentTeam"
767
+
768
+ # Send a GET request to the URL and parse the JSON response
769
+ data = requests.get(url).json()
770
+
771
+ # Extract player information from the JSON data
772
+ player_name = data['people'][0]['fullName']
773
+ pitcher_hand = data['people'][0]['pitchHand']['code']
774
+ age = data['people'][0]['currentAge']
775
+ height = data['people'][0]['height']
776
+ weight = data['people'][0]['weight']
777
+
778
+ # Display the player's name, handedness, age, height, and weight on the axis
779
+ ax.text(0.5, 1, f'{player_name}', va='top', ha='center', fontsize=56)
780
+ ax.text(0.5, 0.7, f'{pitcher_hand}HP, Age: {age}, {height}/{weight}', va='top', ha='center', fontsize=30)
781
+ ax.text(0.5, 0.45, f'Season Pitching Summary', va='top', ha='center', fontsize=40)
782
+
783
+ # Make API call to retrieve sports information
784
+ response = requests.get(url='https://statsapi.mlb.com/api/v1/sports').json()
785
+
786
+ # Convert the JSON response into a Polars DataFrame
787
+ df_sport_id = pl.DataFrame(response['sports'])
788
+ abb = df_sport_id.filter(pl.col('id') == sport_id)['abbreviation'][0]
789
+
790
+ # Display the season and sport abbreviation
791
+ ax.text(0.5, 0.20, f'{year_input} {abb} Season', va='top', ha='center', fontsize=30, fontstyle='italic')
792
+
793
+ # Turn off the axis
794
+ ax.axis('off')
795
+
796
+ def plot_logo(pitcher_id: str, ax: plt.Axes, df_team: pl.DataFrame, df_players: pl.DataFrame):
797
+ """
798
+ Display the team logo for the given pitcher on the specified axis.
799
+
800
+ Parameters
801
+ ----------
802
+ pitcher_id : str
803
+ The ID of the pitcher.
804
+ ax : plt.Axes
805
+ The axis to display the logo on.
806
+ df_team : pl.DataFrame
807
+ The DataFrame containing team data.
808
+ df_players : pl.DataFrame
809
+ The DataFrame containing player data.
810
+ """
811
+ # List of MLB teams and their corresponding ESPN logo URLs
812
+ mlb_teams = [
813
+ {"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"},
814
+ {"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"},
815
+ {"team": "BAL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bal.png&h=500&w=500"},
816
+ {"team": "BOS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bos.png&h=500&w=500"},
817
+ {"team": "CHC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chc.png&h=500&w=500"},
818
+ {"team": "CWS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chw.png&h=500&w=500"},
819
+ {"team": "CIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cin.png&h=500&w=500"},
820
+ {"team": "CLE", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cle.png&h=500&w=500"},
821
+ {"team": "COL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/col.png&h=500&w=500"},
822
+ {"team": "DET", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/det.png&h=500&w=500"},
823
+ {"team": "HOU", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/hou.png&h=500&w=500"},
824
+ {"team": "KC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/kc.png&h=500&w=500"},
825
+ {"team": "LAA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/laa.png&h=500&w=500"},
826
+ {"team": "LAD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/lad.png&h=500&w=500"},
827
+ {"team": "MIA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mia.png&h=500&w=500"},
828
+ {"team": "MIL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mil.png&h=500&w=500"},
829
+ {"team": "MIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/min.png&h=500&w=500"},
830
+ {"team": "NYM", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nym.png&h=500&w=500"},
831
+ {"team": "NYY", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nyy.png&h=500&w=500"},
832
+ {"team": "OAK", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
833
+ {"team": "PHI", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/phi.png&h=500&w=500"},
834
+ {"team": "PIT", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/pit.png&h=500&w=500"},
835
+ {"team": "SD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sd.png&h=500&w=500"},
836
+ {"team": "SF", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sf.png&h=500&w=500"},
837
+ {"team": "SEA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sea.png&h=500&w=500"},
838
+ {"team": "STL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/stl.png&h=500&w=500"},
839
+ {"team": "TB", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tb.png&h=500&w=500"},
840
+ {"team": "TEX", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tex.png&h=500&w=500"},
841
+ {"team": "TOR", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tor.png&h=500&w=500"},
842
+ {"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"},
843
+ {"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
844
+ ]
845
+
846
+ try:
847
+ # Create a DataFrame from the list of dictionaries
848
+ df_image = pd.DataFrame(mlb_teams)
849
+ image_dict = df_image.set_index('team')['logo_url'].to_dict()
850
+
851
+ # Get the team ID for the given pitcher
852
+ team_id = df_players.filter(pl.col('player_id') == pitcher_id)['team'][0]
853
+
854
+ # Construct the URL to fetch team data
855
+ url_team = f'https://statsapi.mlb.com/api/v1/teams/{team_id}'
856
+
857
+ # Send a GET request to the team URL and parse the JSON response
858
+ data_team = requests.get(url_team).json()
859
+
860
+ # Extract the team abbreviation
861
+ if data_team['teams'][0]['id'] in df_team['parent_org_id']:
862
+ team_abb = df_team.filter(pl.col('team_id') == data_team['teams'][0]['id'])['parent_org_abbreviation'][0]
863
+ else:
864
+ team_abb = df_team.filter(pl.col('parent_org_id') == data_team['teams'][0]['parentOrgId'])['parent_org_abbreviation'][0]
865
+
866
+ # Get the logo URL from the image dictionary using the team abbreviation
867
+ logo_url = image_dict[team_abb]
868
+
869
+ # Send a GET request to the logo URL
870
+ response = requests.get(logo_url)
871
+
872
+ # Open the image from the response content
873
+ img = Image.open(BytesIO(response.content))
874
+
875
+ # Display the image on the axis
876
+ ax.set_xlim(0, 1.3)
877
+ ax.set_ylim(0, 1)
878
+ ax.imshow(img, extent=[0.3, 1.3, 0, 1], origin='upper')
879
+
880
+ # Turn off the axis
881
+ ax.axis('off')
882
+ except (KeyError,IndexError) as e:
883
+ ax.axis('off')
884
+ return
885
+
886
+ splits = {
887
+ 'all':0,
888
+ 'left':13,
889
+ 'right':14,
890
+ }
891
+
892
+ splits_title = {
893
+
894
+ 'all':'',
895
+ 'left':' vs LHH',
896
+ 'right':' vs RHH',
897
+
898
+ }
899
+
900
+
901
+ def fangraphs_pitching_leaderboards(season: int,
902
+ split: str,
903
+ start_date: str = '2024-01-01',
904
+ end_date: str = '2024-12-31'):
905
+ """
906
+ Fetch pitching leaderboards data from Fangraphs.
907
+
908
+ Parameters
909
+ ----------
910
+ season : int
911
+ The season year.
912
+ split : str
913
+ The split type (e.g., 'All', 'LHH', 'RHH').
914
+ start_date : str, optional
915
+ The start date for the data (default is '2024-01-01').
916
+ end_date : str, optional
917
+ The end date for the data (default is '2024-12-31').
918
+
919
+ Returns
920
+ -------
921
+ pl.DataFrame
922
+ The DataFrame containing the pitching leaderboards data.
923
+ """
924
+ url = f"""
925
+ https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&season={season}&season1={season}
926
+ &startdate={start_date}&enddate={end_date}&ind=0&qual=0&type=8&month=1000&pageitems=500000
927
+ """
928
+
929
+ data = requests.get(url).json()
930
+ df = pl.DataFrame(data=data['data'], infer_schema_length=1000)
931
+ return df
932
+
933
+ def fangraphs_splits_scrape(player_input: str, year_input: int, start_date: str, end_date: str, split: str) -> pl.DataFrame:
934
+ """
935
+ Scrape Fangraphs splits data for a specific player.
936
+
937
+ Parameters
938
+ ----------
939
+ player_input : str
940
+ The player's ID.
941
+ year_input : int
942
+ The season year.
943
+ start_date : str
944
+ The start date for the data.
945
+ end_date : str
946
+ The end date for the data.
947
+ split : str
948
+ The split type (e.g., 'all', 'left', 'right').
949
+
950
+ Returns
951
+ -------
952
+ pl.DataFrame
953
+ The DataFrame containing the splits data.
954
+ """
955
+ split_dict = {
956
+ 'all': [],
957
+ 'left': ['5'],
958
+ 'right': ['6']
959
+ }
960
+
961
+
962
+
963
+ url = "https://www.fangraphs.com/api/leaders/splits/splits-leaders"
964
+
965
+ # Get Fangraphs player ID
966
+ fg_id = str(fangraphs_pitching_leaderboards(
967
+ year_input,
968
+ split='All',
969
+ start_date=f'{year_input}-01-01',
970
+ end_date=f'{year_input}-12-31'
971
+ ).filter(pl.col('xMLBAMID') == player_input)['playerid'][0])
972
+
973
+ # Payload for basic stats
974
+ payload = {
975
+ "strPlayerId": fg_id,
976
+ "strSplitArr": split_dict[split],
977
+ "strGroup": "season",
978
+ "strPosition": "P",
979
+ "strType": "2",
980
+ "strStartDate": pd.to_datetime(start_date).strftime('%Y-%m-%d'),
981
+ "strEndDate": pd.to_datetime(end_date).strftime('%Y-%m-%d'),
982
+ "strSplitTeams": False,
983
+ "dctFilters": [],
984
+ "strStatType": "player",
985
+ "strAutoPt": False,
986
+ "arrPlayerId": [],
987
+ "strSplitArrPitch": [],
988
+ "arrWxTemperature": None,
989
+ "arrWxPressure": None,
990
+ "arrWxAirDensity": None,
991
+ "arrWxElevation": None,
992
+ "arrWxWindSpeed": None
993
+ }
994
+
995
+ # Fetch basic stats
996
+ response = requests.post(url, data=json.dumps(payload), headers={'Content-Type': 'application/json'})
997
+ data_pull = response.json()['data'][0]
998
+
999
+ # Payload for advanced stats
1000
+ payload_advanced = payload.copy()
1001
+ payload_advanced["strType"] = "1"
1002
+
1003
+ # Fetch advanced stats
1004
+ response_advanced = requests.post(url, data=json.dumps(payload_advanced), headers={'Content-Type': 'application/json'})
1005
+ data_pull_advanced = response_advanced.json()['data'][0]
1006
+
1007
+ # Combine basic and advanced stats
1008
+ data_pull.update(data_pull_advanced)
1009
+ df_pull = pl.DataFrame(data_pull)
1010
+
1011
+ return df_pull
1012
+
1013
+
1014
+ def fangraphs_table(df: pl.DataFrame,
1015
+ ax: plt.Axes,
1016
+ player_input: str,
1017
+ season: int,
1018
+ split: str):
1019
+ """
1020
+ Create a table of Fangraphs pitching leaderboards data for a specific player.
1021
+
1022
+ Parameters
1023
+ ----------
1024
+ ax : plt.Axes
1025
+ The axis to plot the table on.
1026
+ season : int
1027
+ The season year.
1028
+ split : str
1029
+ The split type (e.g., 'All', 'LHH', 'RHH').
1030
+ """
1031
+
1032
+ start_date = df['game_date'][0]
1033
+ end_date = df['game_date'][-1]
1034
+
1035
+ # Fetch Fangraphs splits data
1036
+ df_fangraphs = fangraphs_splits_scrape(player_input=player_input,
1037
+ year_input=season,
1038
+ start_date=start_date,
1039
+ end_date=end_date,
1040
+ split=split)
1041
+
1042
+ # Select relevant columns for the table
1043
+ plot_table = df_fangraphs.select(['IP', 'WHIP', 'ERA', 'TBF', 'FIP', 'K%', 'BB%', 'K-BB%'])
1044
+
1045
+ # Format table values
1046
+ plot_table_values = [format(plot_table[x][0], fangraphs_stats_dict[x]['format']) if plot_table[x][0] != '---' else '---' for x in plot_table.columns]
1047
+
1048
+ # Create the table
1049
+ table_fg = ax.table(cellText=[plot_table_values], colLabels=plot_table.columns, cellLoc='center',
1050
+ bbox=[0.0, 0.1, 1, 0.7])
1051
+
1052
+ # Set font size for the table
1053
+ min_font_size = 20
1054
+ table_fg.set_fontsize(min_font_size)
1055
+
1056
+ # Update column names with formatted headers
1057
+ new_column_names = [fangraphs_stats_dict[col]['table_header'] for col in plot_table.columns]
1058
+ for i, col_name in enumerate(new_column_names):
1059
+ table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
1060
+
1061
+ # Set header text properties
1062
+ ax.text(0.5, 0.9, f'{start_date} to {end_date}{splits_title[split]}', va='bottom', ha='center',
1063
+ fontsize=36, fontstyle='italic')
1064
+ ax.axis('off')
1065
+
1066
+
1067
+ def stat_summary_table(df: pl.DataFrame,
1068
+ player_input: int,
1069
+ sport_id: int,
1070
+ ax: plt.Axes,
1071
+ split: str = 'All',
1072
+ game_type: list = ['R']):
1073
+ """
1074
+ Create a summary table of player statistics.
1075
+
1076
+ Parameters
1077
+ ----------
1078
+ df : pl.DataFrame
1079
+ The DataFrame containing pitch data.
1080
+ player_input : int
1081
+ The player's ID.
1082
+ sport_id : int
1083
+ The sport ID (1 for MLB, other for minor leagues).
1084
+ ax : plt.Axes
1085
+ The axis to plot the table on.
1086
+ split : str, optional
1087
+ The split type (default is 'All').
1088
+ """
1089
+
1090
+ type_dict = {'R':'Regular Season',
1091
+ 'S':'Spring',
1092
+ 'P':'Playoffs' }
1093
+
1094
+ split_title = {
1095
+ 'all':'',
1096
+ 'right':' vs RHH',
1097
+ 'left':' vs LHH'
1098
+ }
1099
+
1100
+
1101
+ # Format start and end dates
1102
+ start_date_format = str(pd.to_datetime(df['game_date'][0]).strftime('%m/%d/%Y'))
1103
+ end_date_format = str(pd.to_datetime(df['game_date'][-1]).strftime('%m/%d/%Y'))
1104
+
1105
+ # Determine app context based on sport ID
1106
+ appContext = 'majorLeague' if sport_id == 1 else 'minorLeague'
1107
+
1108
+ game_type_str = ','.join([str(x) for x in game_type])
1109
+
1110
+ # Fetch player stats from MLB API
1111
+ pitcher_stats_call = requests.get(
1112
+ f'https://statsapi.mlb.com/api/v1/people/{player_input}?appContext={appContext}&hydrate=stats(group=[pitching],type=[byDateRange],sportId={sport_id},startDate={start_date_format},endDate={end_date_format},gameType=[{game_type_str}])'
1113
+ ).json()
1114
+ print('HERE')
1115
+ print(f'https://statsapi.mlb.com/api/v1/people/{player_input}?appContext={appContext}&hydrate=stats(group=[pitching],type=[byDateRange],sportId={sport_id},startDate={start_date_format},endDate={end_date_format},gameType=[{game_type_str}])')
1116
+ # Extract stats and create DataFrame
1117
+ pitcher_stats_call_header = [x for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
1118
+ pitcher_stats_call_values = [pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat'][x] for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
1119
+ pitcher_stats_call_df = pl.DataFrame(data=dict(zip(pitcher_stats_call_header, pitcher_stats_call_values)))
1120
+
1121
+ # Add additional calculated columns
1122
+ pitcher_stats_call_df = pitcher_stats_call_df.with_columns(
1123
+ pl.lit(df['is_whiff'].sum()).alias('whiffs'),
1124
+ (pl.col('strikeOuts') / pl.col('battersFaced') * 100).round(1).cast(pl.Utf8).str.concat('%').alias('k_percent'),
1125
+ (pl.col('baseOnBalls') / pl.col('battersFaced') * 100).round(1).cast(pl.Utf8).str.concat('%').alias('bb_percent'),
1126
+ ((pl.col('strikeOuts') - pl.col('baseOnBalls')) / pl.col('battersFaced') * 100).round(1).cast(pl.Utf8).str.concat('%').alias('k_bb_percent'),
1127
+ (((pl.col('homeRuns') * 13 + 3 * ((pl.col('baseOnBalls')) + (pl.col('hitByPitch'))) - 2 * (pl.col('strikeOuts')))) / ((pl.col('outs')) / 3) + 3.15).round(2).map_elements(lambda x: f"{x:.2f}").alias('fip'),
1128
+ ((pl.col('strikes') / pl.col('numberOfPitches') * 100)).round(1).cast(pl.Utf8).str.concat('%').alias('strikePercentage'),
1129
+ )
1130
+
1131
+ # Determine columns and title based on game count and sport ID
1132
+ if df['game_id'][0] == df['game_id'][-1]:
1133
+ pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched', 'battersFaced', 'earnedRuns', 'hits', 'strikeOuts', 'baseOnBalls', 'hitByPitch', 'homeRuns', 'strikePercentage', 'whiffs'])
1134
+ new_column_names = ['$\\bf{IP}$', '$\\bf{PA}$', '$\\bf{ER}$', '$\\bf{H}$', '$\\bf{K}$', '$\\bf{BB}$', '$\\bf{HBP}$', '$\\bf{HR}$', '$\\bf{Strike\%}$', '$\\bf{Whiffs}$']
1135
+ title = f'{df["game_date"][0]} vs {df["batter_team"][0]} ({type_dict[game_type[0]]}){split_title[split]}'
1136
+ elif sport_id != 1 or game_type[0] in ['S','P']:
1137
+ pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched', 'battersFaced', 'whip', 'era', 'fip', 'k_percent', 'bb_percent', 'k_bb_percent', 'strikePercentage'])
1138
+ new_column_names = ['$\\bf{IP}$', '$\\bf{PA}$', '$\\bf{WHIP}$', '$\\bf{ERA}$', '$\\bf{FIP}$', '$\\bf{K\%}$', '$\\bf{BB\%}$', '$\\bf{K-BB\%}$', '$\\bf{Strike\%}$']
1139
+ title = f'{df["game_date"][0]} to {df["game_date"][-1]} ({type_dict[game_type[0]]}{split_title[split]})'
1140
+ else:
1141
+ fangraphs_table(df=df, ax=ax, player_input=player_input, season=int(df['game_date'][0][0:4]), split=split)
1142
+ return
1143
+
1144
+ # Create and format the table
1145
+ table_fg = ax.table(cellText=pitcher_stats_call_df_small.to_numpy(), colLabels=pitcher_stats_call_df_small.columns, cellLoc='center', bbox=[0.0, 0.1, 1, 0.7])
1146
+ table_fg.set_fontsize(20)
1147
+ for i, col_name in enumerate(new_column_names):
1148
+ table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
1149
+
1150
+ # Add title to the plot
1151
+ ax.text(0.5, 0.9, title, va='bottom', ha='center', fontsize=36, fontstyle='italic')
1152
+ ax.axis('off')
functions/statcast_2024_grouped.csv ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pitch_type,pitch,release_speed,pfx_z,pfx_x,release_spin_rate,release_pos_x,release_pos_z,release_extension,delta_run_exp,swing,whiff,in_zone,out_zone,chase,xwoba,xwobacon,pitch_usage,whiff_rate,in_zone_rate,chase_rate,delta_run_exp_per_100,all
2
+ CH,74155,85.46226726,5.247514143,-3.974501168,1803.342541,-0.507762986,5.740925968,6.449406057,204.631,37385,11538,28912,45151,15250,0.289735649,0.341580895,0.102188463,0.308626454,0.389886049,0.337755531,-0.275950374,
3
+ CS,22,66.38181818,-7.232727273,5.176363636,2039.272727,-1.798181818,6.517727273,6.063636364,-0.629,9,2,10,12,2,0.134666667,0.1945,3.03E-05,0.222222222,0.454545455,0.166666667,2.859090909,
4
+ CU,47579,79.40938533,-9.345106446,4.516206279,2568.859105,-0.676571206,5.943843838,6.401792909,93.572,19910,6150,20751,26738,7749,0.280497676,0.36671832,0.065565706,0.308890005,0.436137792,0.289812252,-0.196666597,
5
+ EP,576,50.51909722,16.35729167,-3.82875,1256.715278,-0.966875,6.647100694,4.442013889,23.643,252,7,207,369,106,0.39714307,0.361505495,0.00079375,0.027777778,0.359375,0.287262873,-4.1046875,
6
+ FA,635,67.81354331,15.86551181,-3.722645669,1674.014469,-1.116377953,6.317716535,4.92488189,15.495,284,29,296,339,73,0.43393491,0.388761905,0.000875055,0.102112676,0.466141732,0.215339233,-2.44015748,
7
+ FC,58379,89.56435814,8.088953962,1.55092437,2389.231716,-0.974536268,5.8461769,6.403954997,-20.39,28753,6674,30002,28189,7757,0.340778229,0.370073593,0.080448524,0.23211491,0.513917676,0.275178261,0.034926943,
8
+ FF,230412,94.27369496,15.72027483,-3.107441897,2296.59179,-0.768543293,5.821400777,6.524392111,-80.284,113157,24741,127386,102722,24808,0.340125691,0.388167436,0.317516664,0.218643124,0.55286183,0.241506201,0.034843671,
9
+ FO,168,82.07916667,1.735714286,0.137857143,946.8154762,-0.533333333,5.891428571,6.666666667,2.539,89,29,60,108,43,0.277987474,0.3952,0.000231511,0.325842697,0.357142857,0.398148148,-1.511309524,
10
+ FS,21727,86.31228886,2.979608782,-8.765506513,1302.399298,-1.464082478,5.742066553,6.508958525,-16.641,11333,3906,7982,13745,4946,0.254878506,0.344396607,0.029940648,0.344657196,0.367376996,0.359839942,0.076591338,
11
+ KC,11916,81.79965592,-9.370896274,4.89529708,2444.16428,-0.878808325,5.940037764,6.434007554,-12.997,5312,1860,4858,7058,2316,0.258451373,0.364636161,0.01642071,0.350150602,0.407687143,0.328138283,0.109071836,
12
+ KN,971,76.94819773,-2.945375901,-5.356498455,263.5632699,-1.230339856,5.542131823,6.45653965,12.681,426,113,428,543,130,0.287038918,0.369510345,0.001338076,0.265258216,0.440782698,0.239410681,-1.305973223,
13
+ PO,55,91.24909091,13.11709091,-6.399272727,2195.381818,-1.494181818,5.861272727,6.305454545,0,0,0,1,54,0,,,7.58E-05,,0.018181818,0,0,
14
+ SC,159,81.02264151,-3.105660377,-8.001509434,2050.597484,-1.053584906,6.110377358,6.064150943,4.623,58,13,63,96,20,0.353494636,0.413142857,0.000219108,0.224137931,0.396226415,0.208333333,-2.90754717,
15
+ SI,116002,93.34805382,7.567078832,-6.14847607,2147.36315,-0.767198351,5.622119363,6.435364206,-32.837,53318,7390,65492,50222,12474,0.350196742,0.364144629,0.159855251,0.138602348,0.564576473,0.248377205,0.028307271,
16
+ SL,116390,85.60138786,1.57598588,2.732511063,2435.570552,-0.981103401,5.761407576,6.433055359,-167.415,56606,19101,52478,63672,20396,0.281860701,0.357665208,0.16038993,0.337437727,0.45088066,0.320329187,0.143839677,
17
+ ST,43821,81.85801556,1.479693298,7.821825152,2575.366192,-1.080187125,5.460724082,6.403526748,-52.968,20035,6276,19349,24472,7531,0.259780708,0.337221732,0.060387036,0.313251809,0.441546291,0.307739457,0.120873554,
18
+ SV,2702,81.67483346,-4.788941525,7.356861584,2470.624859,-0.577957069,5.420762398,6.227296393,0.193,1117,339,1138,1564,479,0.290768371,0.374640553,0.003723461,0.303491495,0.421169504,0.306265985,-0.007142857,
19
+ All,725669,89.15210527,7.058379139,-1.214008754,2255.676825,-0.828252978,5.758824349,6.456550519,-20.178,352163,89742,359413,365054,104080,0.314703752,0.366398,1,0.25483086,0.49528504,0.285108504,0.002780607,all