nesticot commited on
Commit
23f1568
·
verified ·
1 Parent(s): 5c7e1d9

Upload 4 files

Browse files
functions/__pycache__/df_update.cpython-39.pyc ADDED
Binary file (14 kB). View file
 
functions/__pycache__/rolling_batter_functions.cpython-39.pyc ADDED
Binary file (9.32 kB). View file
 
functions/df_update.py ADDED
@@ -0,0 +1,472 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import polars as pl
2
+ import numpy as np
3
+ import joblib
4
+
5
+ loaded_model = joblib.load('joblib_model/barrel_model.joblib')
6
+ in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
7
+ attack_zone_model = joblib.load('joblib_model/model_attack_zone.joblib')
8
+ xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
9
+ px_model = joblib.load('joblib_model/linear_reg_model_x.joblib')
10
+ pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib')
11
+
12
+
13
+ class df_update:
14
+ def __init__(self):
15
+ pass
16
+
17
+ def update(self, df_clone: pl.DataFrame):
18
+
19
+ df = df_clone.clone()
20
+ # Assuming px_model is defined and df is your DataFrame
21
+ hit_codes = ['single',
22
+ 'double','home_run', 'triple']
23
+
24
+ ab_codes = ['single', 'strikeout', 'field_out',
25
+ 'grounded_into_double_play', 'fielders_choice', 'force_out',
26
+ 'double', 'field_error', 'home_run', 'triple',
27
+ 'double_play',
28
+ 'fielders_choice_out', 'strikeout_double_play',
29
+ 'other_out','triple_play']
30
+
31
+
32
+ obp_true_codes = ['single', 'walk',
33
+ 'double','home_run', 'triple',
34
+ 'hit_by_pitch', 'intent_walk']
35
+
36
+ obp_codes = ['single', 'strikeout', 'walk', 'field_out',
37
+ 'grounded_into_double_play', 'fielders_choice', 'force_out',
38
+ 'double', 'sac_fly', 'field_error', 'home_run', 'triple',
39
+ 'hit_by_pitch', 'double_play', 'intent_walk',
40
+ 'fielders_choice_out', 'strikeout_double_play',
41
+ 'sac_fly_double_play',
42
+ 'other_out','triple_play']
43
+
44
+
45
+ contact_codes = ['In play, no out',
46
+ 'Foul', 'In play, out(s)',
47
+ 'In play, run(s)',
48
+ 'Foul Bunt']
49
+
50
+ bip_codes = ['In play, no out', 'In play, run(s)','In play, out(s)']
51
+
52
+
53
+ conditions_barrel = [
54
+ df['launch_speed'].is_null(),
55
+ (df['launch_speed'] * 1.5 - df['launch_angle'] >= 117) &
56
+ (df['launch_speed'] + df['launch_angle'] >= 124) &
57
+ (df['launch_speed'] >= 98) &
58
+ (df['launch_angle'] >= 4) & (df['launch_angle'] <= 50)
59
+ ]
60
+ choices_barrel = [False, True]
61
+
62
+ conditions_tb = [
63
+ (df['event_type'] == 'single'),
64
+ (df['event_type'] == 'double'),
65
+ (df['event_type'] == 'triple'),
66
+ (df['event_type'] == 'home_run')
67
+ ]
68
+ choices_tb = [1, 2, 3, 4]
69
+
70
+
71
+ conditions_woba = [
72
+ df['event_type'].is_in(['strikeout', 'field_out', 'sac_fly', 'force_out', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out']),
73
+ df['event_type'] == 'walk',
74
+ df['event_type'] == 'hit_by_pitch',
75
+ df['event_type'] == 'single',
76
+ df['event_type'] == 'double',
77
+ df['event_type'] == 'triple',
78
+ df['event_type'] == 'home_run'
79
+ ]
80
+ choices_woba = [0, 0.689, 0.720, 0.881, 1.254, 1.589, 2.048]
81
+
82
+ woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', 'double', 'sac_fly', 'force_out', 'home_run', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'triple', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out']
83
+
84
+ pitch_cat = {'FA': 'Fastball',
85
+ 'FF': 'Fastball',
86
+ 'FT': 'Fastball',
87
+ 'FC': 'Fastball',
88
+ 'FS': 'Off-Speed',
89
+ 'FO': 'Off-Speed',
90
+ 'SI': 'Fastball',
91
+ 'ST': 'Breaking',
92
+ 'SL': 'Breaking',
93
+ 'CU': 'Breaking',
94
+ 'KC': 'Breaking',
95
+ 'SC': 'Off-Speed',
96
+ 'GY': 'Off-Speed',
97
+ 'SV': 'Breaking',
98
+ 'CS': 'Breaking',
99
+ 'CH': 'Off-Speed',
100
+ 'KN': 'Off-Speed',
101
+ 'EP': 'Breaking',
102
+ 'UN': None,
103
+ 'IN': None,
104
+ 'PO': None,
105
+ 'AB': None,
106
+ 'AS': None,
107
+ 'NP': None}
108
+
109
+
110
+ df = df.with_columns([
111
+ pl.when(df['type_ab'].is_not_null()).then(1).otherwise(0).alias('pa'),
112
+ pl.when(df['is_pitch']).then(1).otherwise(0).alias('pitches'),
113
+ pl.when(df['sz_top'] == 0).then(None).otherwise(df['sz_top']).alias('sz_top'),
114
+ pl.when(df['sz_bot'] == 0).then(None).otherwise(df['sz_bot']).alias('sz_bot'),
115
+ pl.when(df['zone'] > 0).then(df['zone'] < 10).otherwise(None).alias('in_zone'),
116
+ pl.Series(px_model.predict(df[['x']].fill_null(0).to_numpy())[:, 0]).alias('px_predict'),
117
+ pl.Series(pz_model.predict(df[['y']].fill_null(0).to_numpy())[:, 0] + 3.2).alias('pz_predict'),
118
+ pl.Series(in_zone_model.predict(df[['px','pz','sz_top','sz_bot']].fill_null(0).to_numpy())[:]).alias('in_zone_predict'),
119
+ pl.Series(attack_zone_model.predict(df[['px','pz','sz_top','sz_bot']].fill_null(0).to_numpy())[:]).alias('attack_zone_predict'),
120
+ pl.when(df['event_type'].is_in(hit_codes)).then(True).otherwise(False).alias('hits'),
121
+ pl.when(df['event_type'].is_in(ab_codes)).then(True).otherwise(False).alias('ab'),
122
+ pl.when(df['event_type'].is_in(obp_true_codes)).then(True).otherwise(False).alias('on_base'),
123
+ pl.when(df['event_type'].is_in(obp_codes)).then(True).otherwise(False).alias('obp'),
124
+ pl.when(df['play_description'].is_in(bip_codes)).then(True).otherwise(False).alias('bip'),
125
+ pl.when(conditions_barrel[0]).then(choices_barrel[0]).when(conditions_barrel[1]).then(choices_barrel[1]).otherwise(None).alias('barrel'),
126
+ pl.when(df['launch_angle'].is_null()).then(False).when((df['launch_angle'] >= 8) & (df['launch_angle'] <= 32)).then(True).otherwise(None).alias('sweet_spot'),
127
+ pl.when(df['launch_speed'].is_null()).then(False).when(df['launch_speed'] >= 94.5).then(True).otherwise(None).alias('hard_hit'),
128
+ pl.when(conditions_tb[0]).then(choices_tb[0]).when(conditions_tb[1]).then(choices_tb[1]).when(conditions_tb[2]).then(choices_tb[2]).when(conditions_tb[3]).then(choices_tb[3]).otherwise(None).alias('tb'),
129
+ pl.when(conditions_woba[0]).then(choices_woba[0]).when(conditions_woba[1]).then(choices_woba[1]).when(conditions_woba[2]).then(choices_woba[2]).when(conditions_woba[3]).then(choices_woba[3]).when(conditions_woba[4]).then(choices_woba[4]).when(conditions_woba[5]).then(choices_woba[5]).when(conditions_woba[6]).then(choices_woba[6]).otherwise(None).alias('woba'),
130
+ pl.when((df['play_code'] == 'S') | (df['play_code'] == 'W') | (df['play_code'] == 'T')).then(1).otherwise(0).alias('whiffs'),
131
+ pl.when((df['play_code'] == 'S') | (df['play_code'] == 'W') | (df['play_code'] == 'T') | (df['play_code'] == 'C')).then(1).otherwise(0).alias('csw'),
132
+ pl.when(pl.col('is_swing').cast(pl.Boolean)).then(1).otherwise(0).alias('swings'),
133
+ pl.col('event_type').is_in(['strikeout','strikeout_double_play']).alias('k'),
134
+ pl.col('event_type').is_in(['walk', 'intent_walk']).alias('bb'),
135
+ pl.lit(None).alias('attack_zone'),
136
+ pl.lit(None).alias('woba_pred'),
137
+ pl.lit(None).alias('woba_pred_contact')
138
+
139
+ ])
140
+
141
+ df = df.with_columns([
142
+ pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('woba_codes'),
143
+ pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('xwoba_codes'),
144
+ pl.when((pl.col('tb') >= 0)).then(df['woba']).otherwise(None).alias('woba_contact'),
145
+ pl.when(pl.col('px').is_null()).then(pl.col('px_predict')).otherwise(pl.col('px')).alias('px'),
146
+ pl.when(pl.col('pz').is_null()).then(pl.col('pz_predict')).otherwise(pl.col('pz')).alias('pz'),
147
+ pl.when(pl.col('in_zone').is_null()).then(pl.col('in_zone_predict')).otherwise(pl.col('in_zone')).alias('in_zone'),
148
+ pl.when(df['launch_speed'].is_null()).then(None).otherwise(df['barrel']).alias('barrel'),
149
+ pl.lit('average').alias('average'),
150
+ pl.when(pl.col('in_zone') == False).then(True).otherwise(False).alias('out_zone'),
151
+ pl.when((pl.col('in_zone') == True) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('zone_swing'),
152
+ pl.when((pl.col('in_zone') == True) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('zone_contact'),
153
+ pl.when((pl.col('in_zone') == False) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('ozone_swing'),
154
+ pl.when((pl.col('in_zone') == False) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('ozone_contact'),
155
+ pl.when(pl.col('event_type').str.contains('strikeout')).then(True).otherwise(False).alias('k'),
156
+ pl.when(pl.col('event_type').is_in(['walk', 'intent_walk'])).then(True).otherwise(False).alias('bb'),
157
+ pl.when(pl.col('attack_zone').is_null()).then(pl.col('attack_zone_predict')).otherwise(pl.col('attack_zone')).alias('attack_zone'),
158
+
159
+
160
+ ])
161
+
162
+ df = df.with_columns([
163
+ (df['k'].cast(pl.Float32) - df['bb'].cast(pl.Float32)).alias('k_minus_bb'),
164
+ (df['bb'].cast(pl.Float32) - df['k'].cast(pl.Float32)).alias('bb_minus_k'),
165
+ (df['launch_speed'] > 0).alias('bip_div'),
166
+ (df['attack_zone'] == 0).alias('heart'),
167
+ (df['attack_zone'] == 1).alias('shadow'),
168
+ (df['attack_zone'] == 2).alias('chase'),
169
+ (df['attack_zone'] == 3).alias('waste'),
170
+ ((df['attack_zone'] == 0) & (df['swings'] == 1)).alias('heart_swing'),
171
+ ((df['attack_zone'] == 1) & (df['swings'] == 1)).alias('shadow_swing'),
172
+ ((df['attack_zone'] == 2) & (df['swings'] == 1)).alias('chase_swing'),
173
+ ((df['attack_zone'] == 3) & (df['swings'] == 1)).alias('waste_swing'),
174
+ ((df['attack_zone'] == 0) & (df['whiffs'] == 1)).alias('heart_whiff'),
175
+ ((df['attack_zone'] == 1) & (df['whiffs'] == 1)).alias('shadow_whiff'),
176
+ ((df['attack_zone'] == 2) & (df['whiffs'] == 1)).alias('chase_whiff'),
177
+ ((df['attack_zone'] == 3) & (df['whiffs'] == 1)).alias('waste_whiff')
178
+ ])
179
+
180
+
181
+ [0, 0.689, 0.720, 0.881, 1.254, 1.589, 2.048]
182
+
183
+ df = df.with_columns([
184
+ pl.Series(
185
+ [sum(x) for x in xwoba_model.predict_proba(df[['launch_angle', 'launch_speed']].fill_null(0).to_numpy()[:]) * ([0, 0.881, 1.254, 1.589, 2.048])]
186
+ ).alias('woba_pred_predict')
187
+ ])
188
+
189
+ df = df.with_columns([
190
+ pl.when(pl.col('event_type').is_in(['walk'])).then(0.689)
191
+ .when(pl.col('event_type').is_in(['hit_by_pitch'])).then(0.720)
192
+ .when(pl.col('event_type').is_in(['strikeout', 'strikeout_double_play'])).then(0)
193
+ .otherwise(pl.col('woba_pred_predict')).alias('woba_pred_predict')
194
+ ])
195
+
196
+ df = df.with_columns([
197
+ pl.when(pl.col('woba_codes').is_null()).then(None).otherwise(pl.col('woba_pred_predict')).alias('woba_pred'),
198
+ pl.when(pl.col('bip')!=1).then(None).otherwise(pl.col('woba_pred_predict')).alias('woba_pred_contact'),
199
+ ])
200
+
201
+ df = df.with_columns([
202
+ pl.when(pl.col('trajectory').is_in(['bunt_popup'])).then(pl.lit('popup'))
203
+ .when(pl.col('trajectory').is_in(['bunt_grounder'])).then(pl.lit('ground_ball'))
204
+ .when(pl.col('trajectory').is_in(['bunt_line_drive'])).then(pl.lit('line_drive'))
205
+ .when(pl.col('trajectory').is_in([''])).then(pl.lit(None))
206
+ .otherwise(pl.col('trajectory')).alias('trajectory')
207
+ ])
208
+
209
+
210
+ # Create one-hot encoded columns for the trajectory column
211
+ dummy_df = df.select(pl.col('trajectory')).to_dummies()
212
+
213
+ # Rename the one-hot encoded columns
214
+ dummy_df = dummy_df.rename({
215
+ 'trajectory_fly_ball': 'trajectory_fly_ball',
216
+ 'trajectory_ground_ball': 'trajectory_ground_ball',
217
+ 'trajectory_line_drive': 'trajectory_line_drive',
218
+ 'trajectory_popup': 'trajectory_popup'
219
+ })
220
+
221
+ # Ensure the columns are present in the DataFrame
222
+ for col in ['trajectory_fly_ball', 'trajectory_ground_ball', 'trajectory_line_drive', 'trajectory_popup']:
223
+ if col not in dummy_df.columns:
224
+ dummy_df = dummy_df.with_columns(pl.lit(0).alias(col))
225
+
226
+ # Join the one-hot encoded columns back to the original DataFrame
227
+ df = df.hstack(dummy_df)
228
+
229
+ # Check if 'trajectory_null' column exists and drop it
230
+ if 'trajectory_null' in df.columns:
231
+ df = df.drop('trajectory_null')
232
+
233
+ return df
234
+
235
+ # Assuming df is your Polars DataFrame
236
+ def update_summary(self, df: pl.DataFrame, pitcher: bool = True) -> pl.DataFrame:
237
+ """
238
+ Update summary statistics for pitchers or batters.
239
+
240
+ Parameters:
241
+ df (pl.DataFrame): The input Polars DataFrame containing player statistics.
242
+ pitcher (bool): A flag indicating whether to calculate statistics for pitchers (True) or batters (False).
243
+
244
+ Returns:
245
+ pl.DataFrame: A Polars DataFrame with aggregated and calculated summary statistics.
246
+ """
247
+
248
+ # Determine the position based on the pitcher flag
249
+ if pitcher:
250
+ position = 'pitcher'
251
+ else:
252
+ position = 'batter'
253
+
254
+ # Group by position_id and position_name, then aggregate various statistics
255
+ df_summ = df.group_by([f'{position}_id', f'{position}_name']).agg([
256
+ pl.col('pa').sum().alias('pa'),
257
+ pl.col('ab').sum().alias('ab'),
258
+ pl.col('obp').sum().alias('obp_pa'),
259
+ pl.col('hits').sum().alias('hits'),
260
+ pl.col('on_base').sum().alias('on_base'),
261
+ pl.col('k').sum().alias('k'),
262
+ pl.col('bb').sum().alias('bb'),
263
+ pl.col('bb_minus_k').sum().alias('bb_minus_k'),
264
+ pl.col('csw').sum().alias('csw'),
265
+ pl.col('bip').sum().alias('bip'),
266
+ pl.col('bip_div').sum().alias('bip_div'),
267
+ pl.col('tb').sum().alias('tb'),
268
+ pl.col('woba').sum().alias('woba'),
269
+ pl.col('woba_contact').sum().alias('woba_contact'),
270
+ pl.col('woba_pred').sum().alias('xwoba'),
271
+ pl.col('woba_pred_contact').sum().alias('xwoba_contact'),
272
+ pl.col('woba_codes').sum().alias('woba_codes'),
273
+ pl.col('xwoba_codes').sum().alias('xwoba_codes'),
274
+ pl.col('hard_hit').sum().alias('hard_hit'),
275
+ pl.col('barrel').sum().alias('barrel'),
276
+ pl.col('sweet_spot').sum().alias('sweet_spot'),
277
+ pl.col('launch_speed').max().alias('max_launch_speed'),
278
+ pl.col('launch_speed').quantile(0.90).alias('launch_speed_90'),
279
+ pl.col('launch_speed').mean().alias('launch_speed'),
280
+ pl.col('launch_angle').mean().alias('launch_angle'),
281
+ pl.col('is_pitch').sum().alias('pitches'),
282
+ pl.col('swings').sum().alias('swings'),
283
+ pl.col('in_zone').sum().alias('in_zone'),
284
+ pl.col('out_zone').sum().alias('out_zone'),
285
+ pl.col('whiffs').sum().alias('whiffs'),
286
+ pl.col('zone_swing').sum().alias('zone_swing'),
287
+ pl.col('zone_contact').sum().alias('zone_contact'),
288
+ pl.col('ozone_swing').sum().alias('ozone_swing'),
289
+ pl.col('ozone_contact').sum().alias('ozone_contact'),
290
+ pl.col('trajectory_ground_ball').sum().alias('ground_ball'),
291
+ pl.col('trajectory_line_drive').sum().alias('line_drive'),
292
+ pl.col('trajectory_fly_ball').sum().alias('fly_ball'),
293
+ pl.col('trajectory_popup').sum().alias('pop_up'),
294
+ pl.col('attack_zone').count().alias('attack_zone'),
295
+ pl.col('heart').sum().alias('heart'),
296
+ pl.col('shadow').sum().alias('shadow'),
297
+ pl.col('chase').sum().alias('chase'),
298
+ pl.col('waste').sum().alias('waste'),
299
+ pl.col('heart_swing').sum().alias('heart_swing'),
300
+ pl.col('shadow_swing').sum().alias('shadow_swing'),
301
+ pl.col('chase_swing').sum().alias('chase_swing'),
302
+ pl.col('waste_swing').sum().alias('waste_swing'),
303
+ pl.col('heart_whiff').sum().alias('heart_whiff'),
304
+ pl.col('shadow_whiff').sum().alias('shadow_whiff'),
305
+ pl.col('chase_whiff').sum().alias('chase_whiff'),
306
+ pl.col('waste_whiff').sum().alias('waste_whiff')
307
+ ])
308
+
309
+ # Add calculated columns to the summary DataFrame
310
+ df_summ = df_summ.with_columns([
311
+ (pl.col('hits') / pl.col('ab')).alias('avg'),
312
+ (pl.col('on_base') / pl.col('obp_pa')).alias('obp'),
313
+ (pl.col('tb') / pl.col('ab')).alias('slg'),
314
+ (pl.col('on_base') / pl.col('obp_pa') + pl.col('tb') / pl.col('ab')).alias('ops'),
315
+ (pl.col('k') / pl.col('pa')).alias('k_percent'),
316
+ (pl.col('bb') / pl.col('pa')).alias('bb_percent'),
317
+ (pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
318
+ (pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
319
+ (pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
320
+ (pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
321
+ (pl.col('woba') / pl.col('woba_codes')).alias('woba_percent'),
322
+ (pl.col('woba_contact') / pl.col('bip')).alias('woba_percent_contact'),
323
+ (pl.col('hard_hit') / pl.col('bip_div')).alias('hard_hit_percent'),
324
+ (pl.col('barrel') / pl.col('bip_div')).alias('barrel_percent'),
325
+ (pl.col('zone_contact') / pl.col('zone_swing')).alias('zone_contact_percent'),
326
+ (pl.col('zone_swing') / pl.col('in_zone')).alias('zone_swing_percent'),
327
+ (pl.col('in_zone') / pl.col('pitches')).alias('zone_percent'),
328
+ (pl.col('ozone_swing') / (pl.col('pitches') - pl.col('in_zone'))).alias('chase_percent'),
329
+ (pl.col('ozone_contact') / pl.col('ozone_swing')).alias('chase_contact'),
330
+ (pl.col('swings') / pl.col('pitches')).alias('swing_percent'),
331
+ (pl.col('whiffs') / pl.col('swings')).alias('whiff_rate'),
332
+ (pl.col('whiffs') / pl.col('pitches')).alias('swstr_rate'),
333
+ (pl.col('ground_ball') / pl.col('bip')).alias('ground_ball_percent'),
334
+ (pl.col('line_drive') / pl.col('bip')).alias('line_drive_percent'),
335
+ (pl.col('fly_ball') / pl.col('bip')).alias('fly_ball_percent'),
336
+ (pl.col('pop_up') / pl.col('bip')).alias('pop_up_percent'),
337
+ (pl.col('heart') / pl.col('attack_zone')).alias('heart_zone_percent'),
338
+ (pl.col('shadow') / pl.col('attack_zone')).alias('shadow_zone_percent'),
339
+ (pl.col('chase') / pl.col('attack_zone')).alias('chase_zone_percent'),
340
+ (pl.col('waste') / pl.col('attack_zone')).alias('waste_zone_percent'),
341
+ (pl.col('heart_swing') / pl.col('heart')).alias('heart_zone_swing_percent'),
342
+ (pl.col('shadow_swing') / pl.col('shadow')).alias('shadow_zone_swing_percent'),
343
+ (pl.col('chase_swing') / pl.col('chase')).alias('chase_zone_swing_percent'),
344
+ (pl.col('waste_swing') / pl.col('waste')).alias('waste_zone_swing_percent'),
345
+ (pl.col('heart_whiff') / pl.col('heart_swing')).alias('heart_zone_whiff_percent'),
346
+ (pl.col('shadow_whiff') / pl.col('shadow_swing')).alias('shadow_zone_whiff_percent'),
347
+ (pl.col('chase_whiff') / pl.col('chase_swing')).alias('chase_zone_whiff_percent'),
348
+ (pl.col('waste_whiff') / pl.col('waste_swing')).alias('waste_zone_whiff_percent'),
349
+ (pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
350
+ (pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact')
351
+ ])
352
+
353
+ return df_summ
354
+
355
+
356
+
357
+
358
+
359
+
360
+ # Assuming df is your Polars DataFrame
361
+ def update_summary_select(self, df: pl.DataFrame, selection: list) -> pl.DataFrame:
362
+ """
363
+ Update summary statistics for pitchers or batters.
364
+
365
+ Parameters:
366
+ df (pl.DataFrame): The input Polars DataFrame containing player statistics.
367
+ pitcher (bool): A flag indicating whether to calculate statistics for pitchers (True) or batters (False).
368
+
369
+ Returns:
370
+ pl.DataFrame: A Polars DataFrame with aggregated and calculated summary statistics.
371
+ """
372
+
373
+ # Group by position_id and position_name, then aggregate various statistics
374
+ df_summ = df.group_by(selection).agg([
375
+ pl.col('pa').sum().alias('pa'),
376
+ pl.col('ab').sum().alias('ab'),
377
+ pl.col('obp').sum().alias('obp_pa'),
378
+ pl.col('hits').sum().alias('hits'),
379
+ pl.col('on_base').sum().alias('on_base'),
380
+ pl.col('k').sum().alias('k'),
381
+ pl.col('bb').sum().alias('bb'),
382
+ pl.col('bb_minus_k').sum().alias('bb_minus_k'),
383
+ pl.col('csw').sum().alias('csw'),
384
+ pl.col('bip').sum().alias('bip'),
385
+ pl.col('bip_div').sum().alias('bip_div'),
386
+ pl.col('tb').sum().alias('tb'),
387
+ pl.col('woba').sum().alias('woba'),
388
+ pl.col('woba_contact').sum().alias('woba_contact'),
389
+ pl.col('woba_pred').sum().alias('xwoba'),
390
+ pl.col('woba_pred_contact').sum().alias('xwoba_contact'),
391
+ pl.col('woba_codes').sum().alias('woba_codes'),
392
+ pl.col('xwoba_codes').sum().alias('xwoba_codes'),
393
+ pl.col('hard_hit').sum().alias('hard_hit'),
394
+ pl.col('barrel').sum().alias('barrel'),
395
+ pl.col('sweet_spot').sum().alias('sweet_spot'),
396
+ pl.col('launch_speed').max().alias('max_launch_speed'),
397
+ pl.col('launch_speed').quantile(0.90).alias('launch_speed_90'),
398
+ pl.col('launch_speed').mean().alias('launch_speed'),
399
+ pl.col('launch_angle').mean().alias('launch_angle'),
400
+ pl.col('is_pitch').sum().alias('pitches'),
401
+ pl.col('swings').sum().alias('swings'),
402
+ pl.col('in_zone').sum().alias('in_zone'),
403
+ pl.col('out_zone').sum().alias('out_zone'),
404
+ pl.col('whiffs').sum().alias('whiffs'),
405
+ pl.col('zone_swing').sum().alias('zone_swing'),
406
+ pl.col('zone_contact').sum().alias('zone_contact'),
407
+ pl.col('ozone_swing').sum().alias('ozone_swing'),
408
+ pl.col('ozone_contact').sum().alias('ozone_contact'),
409
+ pl.col('trajectory_ground_ball').sum().alias('ground_ball'),
410
+ pl.col('trajectory_line_drive').sum().alias('line_drive'),
411
+ pl.col('trajectory_fly_ball').sum().alias('fly_ball'),
412
+ pl.col('trajectory_popup').sum().alias('pop_up'),
413
+ pl.col('attack_zone').count().alias('attack_zone'),
414
+ pl.col('heart').sum().alias('heart'),
415
+ pl.col('shadow').sum().alias('shadow'),
416
+ pl.col('chase').sum().alias('chase'),
417
+ pl.col('waste').sum().alias('waste'),
418
+ pl.col('heart_swing').sum().alias('heart_swing'),
419
+ pl.col('shadow_swing').sum().alias('shadow_swing'),
420
+ pl.col('chase_swing').sum().alias('chase_swing'),
421
+ pl.col('waste_swing').sum().alias('waste_swing'),
422
+ pl.col('heart_whiff').sum().alias('heart_whiff'),
423
+ pl.col('shadow_whiff').sum().alias('shadow_whiff'),
424
+ pl.col('chase_whiff').sum().alias('chase_whiff'),
425
+ pl.col('waste_whiff').sum().alias('waste_whiff')
426
+ ])
427
+
428
+ # Add calculated columns to the summary DataFrame
429
+ df_summ = df_summ.with_columns([
430
+ (pl.col('hits') / pl.col('ab')).alias('avg'),
431
+ (pl.col('on_base') / pl.col('obp_pa')).alias('obp'),
432
+ (pl.col('tb') / pl.col('ab')).alias('slg'),
433
+ (pl.col('on_base') / pl.col('obp_pa') + pl.col('tb') / pl.col('ab')).alias('ops'),
434
+ (pl.col('k') / pl.col('pa')).alias('k_percent'),
435
+ (pl.col('bb') / pl.col('pa')).alias('bb_percent'),
436
+ (pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
437
+ (pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
438
+ (pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
439
+ (pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
440
+ (pl.col('woba') / pl.col('woba_codes')).alias('woba_percent'),
441
+ (pl.col('woba_contact') / pl.col('bip')).alias('woba_percent_contact'),
442
+ (pl.col('hard_hit') / pl.col('bip_div')).alias('hard_hit_percent'),
443
+ (pl.col('barrel') / pl.col('bip_div')).alias('barrel_percent'),
444
+ (pl.col('zone_contact') / pl.col('zone_swing')).alias('zone_contact_percent'),
445
+ (pl.col('zone_swing') / pl.col('in_zone')).alias('zone_swing_percent'),
446
+ (pl.col('in_zone') / pl.col('pitches')).alias('zone_percent'),
447
+ (pl.col('ozone_swing') / (pl.col('pitches') - pl.col('in_zone'))).alias('chase_percent'),
448
+ (pl.col('ozone_contact') / pl.col('ozone_swing')).alias('chase_contact'),
449
+ (pl.col('swings') / pl.col('pitches')).alias('swing_percent'),
450
+ (pl.col('whiffs') / pl.col('swings')).alias('whiff_rate'),
451
+ (pl.col('whiffs') / pl.col('pitches')).alias('swstr_rate'),
452
+ (pl.col('ground_ball') / pl.col('bip')).alias('ground_ball_percent'),
453
+ (pl.col('line_drive') / pl.col('bip')).alias('line_drive_percent'),
454
+ (pl.col('fly_ball') / pl.col('bip')).alias('fly_ball_percent'),
455
+ (pl.col('pop_up') / pl.col('bip')).alias('pop_up_percent'),
456
+ (pl.col('heart') / pl.col('attack_zone')).alias('heart_zone_percent'),
457
+ (pl.col('shadow') / pl.col('attack_zone')).alias('shadow_zone_percent'),
458
+ (pl.col('chase') / pl.col('attack_zone')).alias('chase_zone_percent'),
459
+ (pl.col('waste') / pl.col('attack_zone')).alias('waste_zone_percent'),
460
+ (pl.col('heart_swing') / pl.col('heart')).alias('heart_zone_swing_percent'),
461
+ (pl.col('shadow_swing') / pl.col('shadow')).alias('shadow_zone_swing_percent'),
462
+ (pl.col('chase_swing') / pl.col('chase')).alias('chase_zone_swing_percent'),
463
+ (pl.col('waste_swing') / pl.col('waste')).alias('waste_zone_swing_percent'),
464
+ (pl.col('heart_whiff') / pl.col('heart_swing')).alias('heart_zone_whiff_percent'),
465
+ (pl.col('shadow_whiff') / pl.col('shadow_swing')).alias('shadow_zone_whiff_percent'),
466
+ (pl.col('chase_whiff') / pl.col('chase_swing')).alias('chase_zone_whiff_percent'),
467
+ (pl.col('waste_whiff') / pl.col('waste_swing')).alias('waste_zone_whiff_percent'),
468
+ (pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
469
+ (pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact')
470
+ ])
471
+
472
+ return df_summ
functions/rolling_batter_functions.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import numpy as np
6
+ from scipy.stats import gaussian_kde
7
+ import matplotlib
8
+ from matplotlib.ticker import MaxNLocator
9
+ from matplotlib.gridspec import GridSpec
10
+ from scipy.stats import zscore
11
+ import math
12
+ import matplotlib
13
+ from adjustText import adjust_text
14
+ import matplotlib.ticker as mtick
15
+ import pandas as pd
16
+ from matplotlib.pyplot import text
17
+ import inflect
18
+
19
+
20
+ colour_palette = ['#FFB000','#648FFF','#785EF0',
21
+ '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
22
+
23
+ plot_dict = {
24
+ 'k':{'x_axis':'Plate Appearances','y_axis':'K%','title':'K%','x_value':'k','x_range':[0.0,0.1,0.2,0.3,0.4],'percent':True,'percentile_label':'k_percent','flip_p':True,'percentile':False,'avg_adjust':False},
25
+ 'bb':{'x_axis':'Plate Appearances','y_axis':'BB%','title':'BB%','x_value':'bb','x_range':[0.0,0.1,0.2,0.3],'percent':True,'percentile_label':'bb_percent','flip_p':False,'percentile':False,'avg_adjust':False},
26
+ 'bb_minus_k':{'x_axis':'Plate Appearances','y_axis':'BB-K%','title':'BB-K%','x_value':'bb_minus_k','x_range':[-0.3,-0.2,-0.1,0,0.1,0.2],'percent':True,'percentile_label':'bb_minus_k_percent','flip_p':False,'percentile':False,'avg_adjust':False},
27
+ 'csw':{'x_axis':'Pitches','y_axis':'CSW%','title':'CSW%','x_value':'csw','x_range':[.2,.25,.3,.35,.4],'percent':True,'percentile_label':'csw_percent','flip_p':True,'percentile':False,'avg_adjust':False},
28
+ 'woba':{'x_axis':'wOBA PA','y_axis':'wOBA','title':'wOBA','x_value':'woba','x_range':[.20,.30,.40,.50],'percent':False,'percentile_label':'woba_percent','flip_p':False,'percentile':False,'avg_adjust':True},
29
+ 'launch_speed':{'x_axis':'Balls In Play','y_axis':'Exit Velocity','title':'Exit Velocity','x_value':'launch_speed','x_range':[85,90,95,100],'percent':False,'percentile_label':'launch_speed','flip_p':False,'percentile':False,'avg_adjust':False},
30
+ 'launch_speed_90':{'x_axis':'Balls In Play','y_axis':'90th Percentile Exit Velocity','title':'90th Percentile Exit Velocity','x_value':'launch_speed','x_range':[95,100,105,110,115],'percent':False,'percentile_label':'launch_speed_90','flip_p':False,'percentile':True,'avg_adjust':False},
31
+ 'hard_hit':{'x_axis':'Balls In Play','y_axis':'HardHit%','title':'HardHit%','x_value':'hard_hit','x_range':[0.2,0.3,0.4,0.5,0.6,0.7],'percent':True,'percentile_label':'hard_hit_percent','flip_p':False,'percentile':False,'avg_adjust':False},
32
+ 'sweet_spot':{'x_axis':'Balls In Play','y_axis':'SweetSpot%','title':'SweetSpot%','x_value':'sweet_spot','x_range':[0.2,0.3,0.4,0.5],'percent':True,'percentile_label':'sweet_spot_percent','flip_p':False,'percentile':False,'avg_adjust':False},
33
+ 'launch_angle':{'x_axis':'Balls In Play','y_axis':'Launch Angle','title':'Launch Angle','x_value':'launch_angle','x_range':[-20,-10,0,10,20],'percent':False,'percentile_label':'launch_angle','flip_p':False,'percentile':False,'avg_adjust':False},
34
+ 'barrel':{'x_axis':'Balls In Play','y_axis':'Barrel%','title':'Barrel%','x_value':'barrel','x_range':[0,0.05,0.10,.15,.20],'percent':True,'percentile_label':'barrel_percent','flip_p':False,'percentile':False,'avg_adjust':False},
35
+ 'zone_percent':{'x_axis':'Pitches','y_axis':'Zone%','title':'Zone%','x_value':'in_zone','x_range':[0.3,0.4,0.5,0.6,0.7],'percent':True,'percentile_label':'zone_percent','flip_p':False,'percentile':False,'avg_adjust':False},
36
+ 'swing_percent':{'x_axis':'Pitches','y_axis':'Swing%','title':'Swing%','x_value':'swings','x_range':[0.2,0.3,0.4,0.5,0.6,0.7,0.8],'percent':True,'percentile_label':'swing_percent','flip_p':False,'percentile':False,'avg_adjust':False},
37
+ 'whiff_percent':{'x_axis':'Swings','y_axis':'Whiff%','title':'Whiff%','x_value':'whiffs','x_range':[0.0,0.1,0.2,0.3,0.4,0.5],'percent':True,'percentile_label':'whiff_rate','flip_p':True,'percentile':False,'avg_adjust':False},
38
+ 'sw_str':{'x_axis':'Pitches','y_axis':'SwStr%','title':'SwStr%','x_value':'whiffs','x_range':[0.0,0.05,0.1,0.15,0.2,0.25],'percent':True,'percentile_label':'swstr_rate','flip_p':True,'percentile':False,'avg_adjust':False},
39
+ 'zone_swing':{'x_axis':'In-Zone Pitches','y_axis':'Z-Swing%','title':'Z-Swing%','x_value':'zone_swing','x_range':[0.3,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1],'percent':True,'percentile_label':'zone_swing_percent','flip_p':False,'percentile':False,'avg_adjust':False},
40
+ 'zone_contact':{'x_axis':'In-Zone Swings','y_axis':'Z-Contact%','title':'Z-Contact%','x_value':'zone_contact','x_range':[0.5,0.6,0.7,0.8,0.9,1],'percent':True,'percentile_label':'zone_contact_percent','flip_p':False,'percentile':False,'avg_adjust':False},
41
+ 'chase_percent':{'x_axis':'Out-of-Zone Pitches','y_axis':'O-Swing%','title':'O-Swing%','x_value':'ozone_swing','x_range':[0.0,0.1,0.2,0.3,0.4,0.5],'percent':True,'percentile_label':'chase_percent','flip_p':True,'percentile':False,'avg_adjust':False},
42
+ 'chase_contact':{'x_axis':'Out-of-Zone Swings','y_axis':'O-Contact%','title':'O-Contact%','x_value':'ozone_contact','x_range':[0.2,0.3,0.4,0.5,0.6,0.7,0.8],'percent':True,'percentile_label':'chase_contact','flip_p':False,'percentile':False,'avg_adjust':False},}
43
+
44
+
45
+ level_dict = {'MLB':'MLB','AAA':'AAA','AA':'AA','A+':'A+','A':'A'}
46
+
47
+
48
+ woba_list = ['woba']
49
+ pa_list = ['k','bb','bb_minus_k']
50
+ balls_in_play_list = ['hard_hit','launch_speed','launch_speed_90','launch_angle','barrel','sweet_spot']
51
+ pitches_list = ['zone_percent','swing_percent','sw_str','csw']
52
+ swings_list = ['whiff_percent']
53
+ in_zone_pitches_list = ['zone_swing']
54
+ in_zone_swings_list = ['zone_contact']
55
+ out_zone_pitches_list = ['chase_percent']
56
+ out_zone_swings_list = ['chase_contact']
57
+
58
+ plot_dict_small = {
59
+ 'k':'K%',
60
+ 'bb':'BB%',
61
+ 'bb_minus_k':'BB-K%',
62
+ 'csw':'CSW%',
63
+ 'woba':'wOBA',
64
+ 'launch_speed':'Exit Velocity',
65
+ 'launch_speed_90':'90th Percentile Exit Velocity',
66
+ 'hard_hit':'HardHit%',
67
+ 'sweet_spot':'SweetSpot%',
68
+ 'launch_angle':'Launch Angle',
69
+ 'zone_percent':'Zone%',
70
+ 'barrel':'Barrel%',
71
+ 'swing_percent':'Swing%',
72
+ 'whiff_percent':'Whiff%',
73
+ 'sw_str':'SwStr%',
74
+ 'zone_swing':'Z-Swing%',
75
+ 'zone_contact':'Z-Contact%',
76
+ 'chase_percent':'O-Swing%',
77
+ 'chase_contact':'O-Contact%',}
78
+
79
+
80
+
81
+ def rolling_plot(df,df_summ,player_id,stat_id,batter_dict,window_select,level_id):
82
+ season_title = df['game_date'].str[0:4].values[0]
83
+ sns.set_theme(style="whitegrid", palette="pastel")
84
+ if player_id == "":
85
+ fig = plt.figure(figsize=(12, 12))
86
+ fig.text(s='Please Select a Pitcher',x=0.5,y=0.5)
87
+ return
88
+
89
+
90
+
91
+ swing_min = int(window_select)
92
+ fig, ax = plt.subplots(1, 1, figsize=(10, 10))
93
+
94
+ fig.set_facecolor('white')
95
+ #ax.set_facecolor('white')
96
+ #fig.patch.set_facecolor('lightblue')
97
+
98
+ print(stat_id)
99
+
100
+ if stat_id in pa_list:
101
+ print('we hAVE MADE IT TO THIS PART OF THE CODE')
102
+
103
+
104
+ if stat_id in pa_list:
105
+ elly_zone_df = df[(df.pa==1)&(df.batter_id == int(player_id))&(df.level==level_id)]
106
+ divisor_x = 'pa'
107
+ print('this is short')
108
+ print(elly_zone_df)
109
+
110
+
111
+ if stat_id in balls_in_play_list:
112
+ elly_zone_df = df[(df.bip)&(df.batter_id == int(player_id))&(df.level==level_id)]
113
+ divisor_x = 'bip'
114
+ #print('this is short')
115
+
116
+ if stat_id in balls_in_play_list:
117
+ elly_zone_df = df[(df.bip)&(df.batter_id == int(player_id))&(df.level==level_id)]
118
+ divisor_x = 'bip'
119
+ print('this is short')
120
+
121
+ if stat_id in pitches_list:
122
+ elly_zone_df = df[(df.pitches == 1)&(df.batter_id == int(player_id))&(df.level==level_id)]
123
+ divisor_x = 'pitches'
124
+
125
+ if stat_id in swings_list:
126
+ elly_zone_df = df[(df.swings == 1)&(df.batter_id == int(player_id))&(df.level==level_id)]
127
+ divisor_x = 'swings'
128
+
129
+
130
+ if stat_id in in_zone_pitches_list:
131
+ elly_zone_df = df[(df.in_zone)&(df.batter_id == int(player_id))&(df.level==level_id)]
132
+ divisor_x = 'in_zone'
133
+
134
+
135
+ if stat_id in in_zone_swings_list:
136
+ elly_zone_df = df[(df.zone_swing)&(df.batter_id == int(player_id))&(df.level==level_id)]
137
+ divisor_x = 'zone_swing'
138
+
139
+
140
+ if stat_id in out_zone_pitches_list:
141
+ elly_zone_df = df[(df.in_zone == False)&(df.batter_id == int(player_id))&(df.level==level_id)]
142
+ divisor_x = 'out_zone'
143
+
144
+
145
+ if stat_id in out_zone_swings_list:
146
+ elly_zone_df = df[(df.ozone_swing)&(df.batter_id == int(player_id))&(df.level==level_id)]
147
+ divisor_x = 'ozone_swing'
148
+
149
+ if stat_id in woba_list:
150
+ elly_zone_df = df[(df.woba_codes==1)&(df.batter_id == int(player_id))&(df.level==level_id)]
151
+ divisor_x = 'woba_codes'
152
+
153
+ # penguins = sns.load_dataset("penguins")
154
+ # sns.histplot(data=penguins, x="flipper_length_mm")
155
+ # print('we made it here:')
156
+ # print(int(player_id))
157
+ # print(stat_id)
158
+ # print(level_id)
159
+ # print(df[(df.batter_id == int(player_id))&(df.level==level_id)])
160
+ # print(df.columns)
161
+ # print(elly_zone_df[plot_dict[stat_id]["x_value"]].sum())
162
+
163
+ df_summ_new = df_summ.copy()
164
+ df_summ_new = df_summ_new.set_index('batter_id','batter_name','level')
165
+ df_summ_new = df_summ_new[df_summ_new[divisor_x] >= int(window_select)]
166
+ df_summ_new = df_summ_new[df_summ_new.level==level_id]
167
+
168
+ df_summ_rank = df_summ_new.rank(method='max',ascending=False)
169
+ df_summ_rank.columns = df_summ_rank.columns+['_rank']
170
+
171
+ df_summ_rank_percent = df_summ_new.rank(pct=True)
172
+ df_summ_rank_percent.columns = df_summ_rank_percent.columns+['_percent']
173
+
174
+
175
+ df_summ_new = df_summ_new.reset_index()
176
+ df_summ_rank = df_summ_rank.reset_index()
177
+ df_summ_rank_percent = df_summ_rank_percent.reset_index()
178
+ print('Table columns:')
179
+
180
+ df_summ_new.batter_id = df_summ_new.batter_id.astype(int)
181
+ df_summ_rank.batter_id = df_summ_rank.batter_id.astype(int)
182
+ df_summ_rank_percent.batter_id = df_summ_rank_percent.batter_id.astype(int)
183
+
184
+ print('Table columns2:')
185
+ df_summ_new = df_summ_new.merge(df_summ_rank,left_on=['batter_id'],right_on=['batter_id'],how='left',suffixes=['','_rank'])
186
+
187
+ df_summ_new = df_summ_new.merge(df_summ_rank_percent,left_on=['batter_id'],right_on=['batter_id'],how='left',suffixes=['','_percent'])
188
+
189
+
190
+ print(df_summ_new)
191
+ print(df_summ_rank)
192
+ print(df_summ_rank_percent)
193
+
194
+
195
+
196
+
197
+ #sns.scatterplot(x=data_df.launch_speed_90,y=data_df.zone_contact,color=colour_palette[0],s=75,label=int(player_id))
198
+
199
+ df_summ_new_select = df_summ_new[df_summ_new.batter_id == int(player_id)].reset_index(drop=True)
200
+ print('whiffing')
201
+ print(df)
202
+ print('Player _df:')
203
+ print(df_summ_new_select)
204
+
205
+ if len(df_summ_new_select) < 1:
206
+ ax.text(x=0.5,y=0.5,s='Please Select Different Parameters to Produce a plot',fontsize=18,ha='center')
207
+ return
208
+
209
+ p = inflect.engine()
210
+
211
+ df_summ_new_select = df_summ_new_select.loc[:,~df_summ_new_select.columns.duplicated(keep='last')].copy()
212
+ print('Table for the player:')
213
+ print(list(df_summ_new_select.columns))
214
+ print(plot_dict[stat_id]["percentile_label"])
215
+ print(plot_dict[stat_id]["percentile_label"]+'_percent')
216
+ print(df_summ_new_select)
217
+ print(1*plot_dict[stat_id]["flip_p"])
218
+ print(round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))
219
+ print((1*plot_dict[stat_id]["flip_p"]-round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))*100)
220
+
221
+ # print(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+'_percent'])
222
+
223
+ if plot_dict[stat_id]['percent']:
224
+ label_1=f'{level_id} Average {df[df.level == level_id][plot_dict[stat_id]["x_value"]].sum()/df[df.level == level_id][divisor_x].sum():.1%}'
225
+ label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1%} ({p.ordinal(abs(int((1*plot_dict[stat_id]["flip_p"]-round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))*100)))} Percentile)'
226
+ #label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1%}'
227
+ ax.yaxis.set_major_formatter(mtick.PercentFormatter(1))
228
+
229
+ else:
230
+ label_1=f'{level_id} Average {df[df.level == level_id][plot_dict[stat_id]["x_value"]].sum()/df[df.level == level_id][divisor_x].sum():.1f}'
231
+ label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1f} ({p.ordinal(abs(int((1*plot_dict[stat_id]["flip_p"]-round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))*100)))} Percentile)'
232
+ #label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1f}'
233
+ #ax.yaxis.set_major_formatter(mtick.int)
234
+
235
+
236
+ if plot_dict[stat_id]['percentile']:
237
+ label_1=f'{level_id} Average {df[df.level == level_id][plot_dict[stat_id]["x_value"]].quantile(0.9):.1f}'
238
+ label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].quantile(0.9):.1f} ({p.ordinal(abs(int((1*plot_dict[stat_id]["flip_p"]-round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))*100)))} Percentile)'
239
+ #label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1%}'
240
+ #ax.yaxis.set_major_formatter(mtick.int)
241
+
242
+ if plot_dict[stat_id]['avg_adjust']:
243
+ label_1=f'{level_id} Average {df[df.level == level_id][plot_dict[stat_id]["x_value"]].sum()/df[df.level == level_id][divisor_x].sum():.3f}'
244
+ label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.3f} ({p.ordinal(abs(int((1*plot_dict[stat_id]["flip_p"]-round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))*100)))} Percentile)'
245
+ #label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1%}'
246
+ #ax.yaxis.set_major_formatter(mtick.int)
247
+
248
+ print(plot_dict[stat_id]["x_value"])
249
+ print(divisor_x)
250
+
251
+ # df_summ_new = df_summ.copy()
252
+ # df_summ_new = df_summ_new[df_summ_new.balls_in_play >= int(window_select)]
253
+ # df_summ_new = df_summ_new[df_summ_new.level==level_id]
254
+
255
+
256
+ print('this is here:')
257
+ print(df_summ_new.head())
258
+ print(df_summ_new.columns)
259
+
260
+
261
+ if plot_dict[stat_id]["flip_p"] == False:
262
+ ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.9),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[2],linestyle='dotted',alpha=0.5)
263
+ ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.75),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[3],linestyle='dotted',alpha=0.5)
264
+ ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.25),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[4],linestyle='dotted',alpha=0.5)
265
+ ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.1),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[5],linestyle='dotted',alpha=0.5)
266
+
267
+
268
+ hard_hit_dates = [(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.9),
269
+ (df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.75),
270
+ (df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.25),
271
+ (df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.1)]
272
+ hard_hit_text = ['90th %','75th %','25th %','10th %']
273
+ for i, x in enumerate(hard_hit_dates):
274
+ text(min(window_select+window_select/100,+window_select+1), x ,hard_hit_text[i], rotation=0, ha='left',
275
+ bbox=dict(facecolor='white',alpha=0.5, edgecolor=colour_palette[2+i], pad=2))
276
+
277
+
278
+
279
+ if plot_dict[stat_id]["flip_p"] == True:
280
+ ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.1),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[2],linestyle='dotted',alpha=0.5)
281
+ ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.25),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[3],linestyle='dotted',alpha=0.5)
282
+ ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.75),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[4],linestyle='dotted',alpha=0.5)
283
+ ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.9),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[5],linestyle='dotted',alpha=0.5)
284
+
285
+ hard_hit_dates = [(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.9),
286
+ (df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.75),
287
+ (df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.25),
288
+ (df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.1)]
289
+ hard_hit_text = ['10th %','25th %','75th %','90th %']
290
+ for i, x in enumerate(hard_hit_dates):
291
+ text(min(window_select+window_select/100,window_select+window_select+3), x ,hard_hit_text[i], rotation=0, ha='left',
292
+ bbox=dict(facecolor='white',alpha=0.5, edgecolor=colour_palette[2+i], pad=2))
293
+
294
+
295
+
296
+
297
+
298
+
299
+ if plot_dict[stat_id]["percentile"] == False:
300
+ ax.hlines(y=df[df.level == level_id][plot_dict[stat_id]["x_value"]].sum()/df[df.level == level_id][divisor_x].sum(),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[1],linestyle='-.',label=label_1)
301
+
302
+ ax.hlines(y=elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum(),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[0],linestyle='--',label=label_2)
303
+
304
+ sns.lineplot(x=range(1,len(elly_zone_df)+1),y=elly_zone_df[plot_dict[stat_id]["x_value"]].fillna(0).rolling(window=swing_min).sum()/swing_min,color=colour_palette[0],linewidth=3,ax=ax)
305
+
306
+
307
+
308
+ if plot_dict[stat_id]["percentile"] == True:
309
+
310
+ ax.hlines(y=df[df.level == level_id][plot_dict[stat_id]["x_value"]].quantile(0.9),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[1],linestyle='-.',label=label_1)
311
+
312
+ ax.hlines(y=elly_zone_df[plot_dict[stat_id]["x_value"]].fillna(0).quantile(0.9),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[0],linestyle='--',label=label_2)
313
+
314
+ sns.lineplot(x=range(1,len(elly_zone_df)+1),y=elly_zone_df[plot_dict[stat_id]["x_value"]].fillna(0).rolling(window=swing_min).quantile(0.9),color=colour_palette[0],linewidth=3,ax=ax)
315
+
316
+
317
+ #ax.set_xlim(window_select,exit_velo_df_small.pitch.max())
318
+ #plt.yticks([0,0.2,0.4,0.6,0.8,1])
319
+ #ax.set_ylim(math.floor((min(df_summ.zone_contact)/5)*100)*5/100,1)
320
+ ax.set_xlim(math.floor(swing_min),len(elly_zone_df))
321
+ ax.set_title(f'{batter_dict[int(player_id)]} - {season_title} - {level_id} - {swing_min} {plot_dict[stat_id]["x_axis"]} Rolling {plot_dict[stat_id]["title"]}', fontsize=16,fontname='Century Gothic',)
322
+ #vals = ax.get_yticks()
323
+ ax.set_xlabel(plot_dict[stat_id]['x_axis'], fontsize=16,fontname='Century Gothic')
324
+ ax.set_ylabel(plot_dict[stat_id]['y_axis'], fontsize=16,fontname='Century Gothic')
325
+
326
+ #fig.axes[0].invert_yaxis()
327
+
328
+ #fig.subplots_adjust(wspace=.02, hspace=.02)
329
+ #ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
330
+ ax.set_yticks(plot_dict[stat_id]["x_range"])
331
+ #fig.colorbar(plot_dist, ax=ax)
332
+ #fig.colorbar(plot_dist)
333
+ #fig.axes[0].invert_yaxis()
334
+ ax.legend(fontsize='16')
335
+ fig.text(x=0.03,y=0.02,s='By: @TJStats',fontname='Century Gothic')
336
+ fig.text(x=1-0.03,y=0.02,s='Data: MLB',ha='right',fontname='Century Gothic')
337
+ fig.tight_layout()
338
+ return