nesticot commited on
Commit
83079fe
·
verified ·
1 Parent(s): f97ff11

Upload batting_update.py

Browse files
Files changed (1) hide show
  1. batting_update.py +613 -0
batting_update.py ADDED
@@ -0,0 +1,613 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import joblib
4
+ import math
5
+ import pickle
6
+
7
+ loaded_model = joblib.load('joblib_model/barrel_model.joblib')
8
+ in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
9
+ attack_zone_model = joblib.load('joblib_model/model_attack_zone.joblib')
10
+ xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
11
+ px_model = joblib.load('joblib_model/linear_reg_model_x.joblib')
12
+ pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib')
13
+ barrel_model = joblib.load('joblib_model/barrel_model.joblib')
14
+
15
+
16
+ def percentile(n):
17
+ def percentile_(x):
18
+ return np.nanpercentile(x, n)
19
+ percentile_.__name__ = 'percentile_%s' % n
20
+ return percentile_
21
+
22
+
23
+ def df_update(df=pd.DataFrame()):
24
+ df.loc[df['sz_top']==0,'sz_top'] = np.nan
25
+ df.loc[df['sz_bot']==0,'sz_bot'] = np.nan
26
+
27
+
28
+ df['in_zone'] = [x < 10 if x > 0 else np.nan for x in df['zone']]
29
+ if len(df.loc[(~df['x'].isnull())&(df['px'].isnull()),'px']) > 0:
30
+ df.loc[(~df['x'].isnull())&(df['px'].isnull()),'px'] = px_model.predict(df.loc[(~df['x'].isnull())&(df['px'].isnull())][['x']])
31
+ df.loc[(~df['y'].isnull())&(df['pz'].isnull()),'pz'] = px_model.predict(df.loc[(~df['y'].isnull())&(df['pz'].isnull())][['y']]) + 3.2
32
+
33
+
34
+ # df['in_zone'] = [x < 10 if x > 0 else np.nan for x in df['zone']]
35
+ # df_a['in_zone'] = [x < 10 if x > 0 else np.nan for x in df_a['zone']]
36
+ if len(df.loc[(~df['px'].isna())&
37
+ (df['in_zone'].isna())&
38
+ (~df['sz_top'].isna())]) > 0:
39
+ print('We found missing data')
40
+ df.loc[(~df['px'].isna())&
41
+ (df['in_zone'].isna())&
42
+ (~df['sz_top'].isna())&
43
+ (~df['pz'].isna())&
44
+ (~df['sz_bot'].isna())
45
+ ,'in_zone'] = in_zone_model.predict(df.loc[(~df['px'].isna())&
46
+ (df['in_zone'].isna())&
47
+ (~df['sz_top'].isna())&
48
+ (~df['pz'].isna())&
49
+ (~df['sz_bot'].isna())][['px','pz','sz_top','sz_bot']].values)
50
+ hit_codes = ['single',
51
+ 'double','home_run', 'triple']
52
+
53
+ ab_codes = ['single', 'strikeout', 'field_out',
54
+ 'grounded_into_double_play', 'fielders_choice', 'force_out',
55
+ 'double', 'field_error', 'home_run', 'triple',
56
+ 'double_play',
57
+ 'fielders_choice_out', 'strikeout_double_play',
58
+ 'other_out','triple_play']
59
+
60
+
61
+ obp_true_codes = ['single', 'walk',
62
+ 'double','home_run', 'triple',
63
+ 'hit_by_pitch', 'intent_walk']
64
+
65
+ obp_codes = ['single', 'strikeout', 'walk', 'field_out',
66
+ 'grounded_into_double_play', 'fielders_choice', 'force_out',
67
+ 'double', 'sac_fly', 'field_error', 'home_run', 'triple',
68
+ 'hit_by_pitch', 'double_play', 'intent_walk',
69
+ 'fielders_choice_out', 'strikeout_double_play',
70
+ 'sac_fly_double_play',
71
+ 'other_out','triple_play']
72
+
73
+
74
+ contact_codes = ['In play, no out',
75
+ 'Foul', 'In play, out(s)',
76
+ 'In play, run(s)',
77
+ 'Foul Bunt']
78
+
79
+
80
+
81
+ conditions_hit = [df.event_type.isin(hit_codes)]
82
+ choices_hit = [True]
83
+ df['hits'] = np.select(conditions_hit, choices_hit, default=False)
84
+
85
+ conditions_ab = [df.event_type.isin(ab_codes)]
86
+ choices_ab = [True]
87
+ df['ab'] = np.select(conditions_ab, choices_ab, default=False)
88
+
89
+ conditions_obp_true = [df.event_type.isin(obp_true_codes)]
90
+ choices_obp_true = [True]
91
+ df['on_base'] = np.select(conditions_obp_true, choices_obp_true, default=False)
92
+
93
+ conditions_obp = [df.event_type.isin(obp_codes)]
94
+ choices_obp = [True]
95
+ df['obp'] = np.select(conditions_obp, choices_obp, default=False)
96
+
97
+ bip_codes = ['In play, no out', 'In play, run(s)','In play, out(s)']
98
+
99
+ conditions_bip = [df.play_description.isin(bip_codes)]
100
+ choices_bip = [True]
101
+ df['bip'] = np.select(conditions_bip, choices_bip, default=False)
102
+
103
+ # conditions = [
104
+ # (df['launch_speed'].isna()),
105
+ # (df['launch_speed']*1.5 - df['launch_angle'] >= 117 ) & (df['launch_speed'] + df['launch_angle'] >= 124) & (df['launch_speed'] > 98) & (df['launch_angle'] >= 8) & (df['launch_angle'] <= 50)
106
+ # ]
107
+ df['bip_div'] = ~df.launch_speed.isna()
108
+ # choices = [False,True]
109
+ # df['barrel'] = np.select(conditions, choices, default=np.nan)
110
+ # df['barrel'] = loaded_model.predict(df[['launch_speed','launch_angle']].fillna(0).values)
111
+ df['barrel'] = np.nan
112
+ if len(df.loc[(~df['launch_speed'].isnull())]) > 0:
113
+ df.loc[(~df['launch_speed'].isnull())&(~df['launch_angle'].isnull()),'barrel'] = barrel_model.predict(df.loc[(~df['launch_speed'].isnull())&(~df['launch_angle'].isnull())][['launch_speed','launch_angle']])
114
+
115
+
116
+ conditions_ss = [
117
+ (df['launch_angle'].isna()),
118
+ (df['launch_angle'] >= 8 ) * (df['launch_angle'] <= 32 )
119
+ ]
120
+
121
+ choices_ss = [False,True]
122
+ df['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan)
123
+
124
+ conditions_hh = [
125
+ (df['launch_speed'].isna()),
126
+ (df['launch_speed'] >= 94.5 )
127
+ ]
128
+
129
+ choices_hh = [False,True]
130
+ df['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan)
131
+
132
+
133
+ conditions_tb = [
134
+ (df['event_type']=='single'),
135
+ (df['event_type']=='double'),
136
+ (df['event_type']=='triple'),
137
+ (df['event_type']=='home_run'),
138
+ ]
139
+
140
+ choices_tb = [1,2,3,4]
141
+
142
+ df['tb'] = np.select(conditions_tb, choices_tb, default=np.nan)
143
+
144
+ conditions_woba = [
145
+ (df['event_type'].isin(['strikeout', 'field_out', 'sac_fly', 'force_out',
146
+ 'grounded_into_double_play', 'fielders_choice', 'field_error',
147
+ 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play',
148
+ 'sac_fly_double_play', 'other_out'])),
149
+ (df['event_type']=='walk'),
150
+ (df['event_type']=='hit_by_pitch'),
151
+ (df['event_type']=='single'),
152
+ (df['event_type']=='double'),
153
+ (df['event_type']=='triple'),
154
+ (df['event_type']=='home_run'),
155
+ ]
156
+
157
+ choices_woba = [0,
158
+ 0.696,
159
+ 0.726,
160
+ 0.883,
161
+ 1.244,
162
+ 1.569,
163
+ 2.004]
164
+
165
+ df['woba'] = np.select(conditions_woba, choices_woba, default=np.nan)
166
+
167
+
168
+ woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
169
+ 'double', 'sac_fly', 'force_out', 'home_run',
170
+ 'grounded_into_double_play', 'fielders_choice', 'field_error',
171
+ 'triple', 'sac_bunt', 'double_play',
172
+ 'fielders_choice_out', 'strikeout_double_play',
173
+ 'sac_fly_double_play', 'other_out']
174
+
175
+
176
+
177
+
178
+
179
+
180
+ conditions_woba_code = [
181
+ (df['event_type'].isin(woba_codes))
182
+ ]
183
+
184
+ choices_woba_code = [1]
185
+
186
+ df['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan)
187
+
188
+
189
+ df['woba_contact'] = [df['woba'].values[x] if df['bip'].values[x] == 1 else np.nan for x in range(len(df['woba_codes']))]
190
+
191
+ #df['in_zone'] = [x < 10 if type(x) == int else np.nan for x in df['zone']]
192
+
193
+ # df['in_zone_2'] = in_zone_model.predict(df[['x','y','sz_bot','sz_top']].fillna(0).values)
194
+ # df['in_zone_3'] = df['in_zone_2'] < 10
195
+ # df.loc[df['in_zone'].isna(),'in_zone'] = df.loc[df['in_zone'].isna(),'in_zone_3'].fillna(0)
196
+
197
+
198
+ df['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in df.play_code]
199
+ df['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in df.play_code]
200
+ df['swings'] = [1 if x == True else 0 for x in df.is_swing]
201
+
202
+
203
+ df['out_zone'] = df.in_zone == False
204
+ df['zone_swing'] = (df.in_zone == True)&(df.swings == 1)
205
+ df['zone_contact'] = (df.in_zone == True)&(df.swings == 1)&(df.whiffs == 0)
206
+ df['ozone_swing'] = (df.in_zone==False)&(df.swings == 1)
207
+ df['ozone_contact'] = (df.in_zone==False)&(df.swings == 1)&(df.whiffs == 0)
208
+
209
+ df['k'] = df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in df.event_type.dropna().unique()])))
210
+ df['bb'] = df.event_type.isin(['walk','intent_walk'])
211
+
212
+ df['k_minus_bb'] = df['k'].astype(np.float32)-df['bb'].astype(np.float32)
213
+ df['bb_minus_k'] = df['bb'].astype(np.float32)-df['k'].astype(np.float32)
214
+
215
+ df['pa'] = [1 if isinstance(x, str) else 0 for x in df.event_type]
216
+ df['pitches'] = [1 if x else 0 for x in df.is_pitch]
217
+
218
+
219
+ df.loc[df['launch_speed'].isna(),'barrel'] = np.nan
220
+
221
+
222
+ pitch_cat = {'FA':'Fastball',
223
+ 'FF':'Fastball',
224
+ 'FT':'Fastball',
225
+ 'FC':'Fastball',
226
+ 'FS':'Off-Speed',
227
+ 'FO':'Off-Speed',
228
+ 'SI':'Fastball',
229
+ 'ST':'Breaking',
230
+ 'SL':'Breaking',
231
+ 'CU':'Breaking',
232
+ 'KC':'Breaking',
233
+ 'SC':'Off-Speed',
234
+ 'GY':'Off-Speed',
235
+ 'SV':'Breaking',
236
+ 'CS':'Breaking',
237
+ 'CH':'Off-Speed',
238
+ 'KN':'Off-Speed',
239
+ 'EP':'Breaking',
240
+ 'UN':np.nan,
241
+ 'IN':np.nan,
242
+ 'PO':np.nan,
243
+ 'AB':np.nan,
244
+ 'AS':np.nan,
245
+ 'NP':np.nan}
246
+ df['pitch_category'] = df['pitch_type'].map(pitch_cat).fillna('Unknown')
247
+ df['average'] = 'average'
248
+
249
+ df.loc[df['trajectory'] == 'bunt_popup','trajectory'] = 'popup'
250
+ df.loc[df['trajectory'] == 'bunt_grounder','trajectory'] = 'ground_ball'
251
+ df.loc[df['trajectory'] == '','trajectory'] = np.nan
252
+ df.loc[df['trajectory'] == 'bunt_line_drive','trajectory'] = 'line_drive'
253
+ df[['trajectory_fly_ball','trajectory_ground_ball','trajectory_line_drive','trajectory_popup']] = pd.get_dummies(df['trajectory'], prefix='trajectory')
254
+
255
+ df['attack_zone'] = np.nan
256
+
257
+
258
+
259
+ df.loc[df[['px','pz','sz_top','sz_bot']].isnull().sum(axis=1)==0,'attack_zone'] = attack_zone_model.predict(df.loc[df[['px','pz','sz_top','sz_bot']].isnull().sum(axis=1)==0][['px','pz','sz_top','sz_bot']])
260
+
261
+
262
+
263
+ df['heart'] = df['attack_zone'] == 0
264
+ df['shadow'] = df['attack_zone'] == 1
265
+ df['chase'] = df['attack_zone'] == 2
266
+ df['waste'] = df['attack_zone'] == 3
267
+
268
+ df['heart_swing'] = (df['attack_zone'] == 0)&(df['swings']==1)
269
+ df['shadow_swing'] = (df['attack_zone'] == 1)&(df['swings']==1)
270
+ df['chase_swing'] = (df['attack_zone'] == 2)&(df['swings']==1)
271
+ df['waste_swing'] = (df['attack_zone'] == 3)&(df['swings']==1)
272
+
273
+ df['xwoba'] = np.nan
274
+ df['xwoba_contact'] = np.nan
275
+
276
+ if len(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba']) > 0:
277
+
278
+
279
+ df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba'] = [sum(x) for x in xwoba_model.predict_proba(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
280
+
281
+ ## Assign a value of 0.696 to every walk in the dataset
282
+ df.loc[df['event_type'].isin(['walk']),'xwoba'] = 0.696
283
+
284
+ ## Assign a value of 0.726 to every hit by pitch in the dataset
285
+ df.loc[df['event_type'].isin(['hit_by_pitch']),'xwoba'] = 0.726
286
+
287
+ ## Assign a value of 0 to every Strikeout in the dataset
288
+ df.loc[df['event_type'].isin(['strikeout','strikeout_double_play']),'xwoba'] = 0
289
+
290
+
291
+ df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'xwoba_contact'] = [sum(x) for x in xwoba_model.predict_proba(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
292
+
293
+
294
+ return df
295
+
296
+ def df_update_summ(df=pd.DataFrame()):
297
+ df_summ = df.groupby(['batter_id','batter_name']).agg(
298
+ pa = ('pa','sum'),
299
+ ab = ('ab','sum'),
300
+ obp_pa = ('obp','sum'),
301
+ hits = ('hits','sum'),
302
+ on_base = ('on_base','sum'),
303
+ k = ('k','sum'),
304
+ bb = ('bb','sum'),
305
+ bb_minus_k = ('bb_minus_k','sum'),
306
+ csw = ('csw','sum'),
307
+ bip = ('bip','sum'),
308
+ bip_div = ('bip_div','sum'),
309
+ tb = ('tb','sum'),
310
+ woba = ('woba','sum'),
311
+ woba_contact = ('woba_contact','sum'),
312
+ xwoba = ('xwoba','sum'),
313
+ xwoba_contact = ('xwoba_contact','sum'),
314
+ woba_codes = ('woba_codes','sum'),
315
+ hard_hit = ('hard_hit','sum'),
316
+ barrel = ('barrel','sum'),
317
+ sweet_spot = ('sweet_spot','sum'),
318
+ max_launch_speed = ('launch_speed','max'),
319
+ launch_speed_90 = ('launch_speed',percentile(90)),
320
+ launch_speed = ('launch_speed','mean'),
321
+ launch_angle = ('launch_angle','mean'),
322
+ pitches = ('is_pitch','sum'),
323
+ swings = ('swings','sum'),
324
+ in_zone = ('in_zone','sum'),
325
+ out_zone = ('out_zone','sum'),
326
+ whiffs = ('whiffs','sum'),
327
+ zone_swing = ('zone_swing','sum'),
328
+ zone_contact = ('zone_contact','sum'),
329
+ ozone_swing = ('ozone_swing','sum'),
330
+ ozone_contact = ('ozone_contact','sum'),
331
+ ground_ball = ('trajectory_ground_ball','sum'),
332
+ line_drive = ('trajectory_line_drive','sum'),
333
+ fly_ball =('trajectory_fly_ball','sum'),
334
+ pop_up = ('trajectory_popup','sum'),
335
+ attack_zone = ('attack_zone','count'),
336
+ heart = ('heart','sum'),
337
+ shadow = ('shadow','sum'),
338
+ chase = ('chase','sum'),
339
+ waste = ('waste','sum'),
340
+ heart_swing = ('heart_swing','sum'),
341
+ shadow_swing = ('shadow_swing','sum'),
342
+ chase_swing = ('chase_swing','sum'),
343
+ waste_swing = ('waste_swing','sum'),
344
+ ).reset_index()
345
+ return df_summ
346
+
347
+ def df_update_summ_avg(df=pd.DataFrame()):
348
+ df_summ_avg = df.groupby(['average']).agg(
349
+ pa = ('pa','sum'),
350
+ ab = ('ab','sum'),
351
+ obp_pa = ('obp','sum'),
352
+ hits = ('hits','sum'),
353
+ on_base = ('on_base','sum'),
354
+ k = ('k','sum'),
355
+ bb = ('bb','sum'),
356
+ bb_minus_k = ('bb_minus_k','sum'),
357
+ csw = ('csw','sum'),
358
+ bip = ('bip','sum'),
359
+ bip_div = ('bip_div','sum'),
360
+ tb = ('tb','sum'),
361
+ woba = ('woba','sum'),
362
+ woba_contact = ('woba_contact','sum'),
363
+ xwoba = ('xwoba','sum'),
364
+ xwoba_contact = ('xwoba_contact','sum'),
365
+ woba_codes = ('woba_codes','sum'),
366
+ hard_hit = ('hard_hit','sum'),
367
+ barrel = ('barrel','sum'),
368
+ sweet_spot = ('sweet_spot','sum'),
369
+ max_launch_speed = ('launch_speed','max'),
370
+ launch_speed_90 = ('launch_speed',percentile(90)),
371
+ launch_speed = ('launch_speed','mean'),
372
+ launch_angle = ('launch_angle','mean'),
373
+ pitches = ('is_pitch','sum'),
374
+ swings = ('swings','sum'),
375
+ in_zone = ('in_zone','sum'),
376
+ out_zone = ('out_zone','sum'),
377
+ whiffs = ('whiffs','sum'),
378
+ zone_swing = ('zone_swing','sum'),
379
+ zone_contact = ('zone_contact','sum'),
380
+ ozone_swing = ('ozone_swing','sum'),
381
+ ozone_contact = ('ozone_contact','sum'),
382
+ ground_ball = ('trajectory_ground_ball','sum'),
383
+ line_drive = ('trajectory_line_drive','sum'),
384
+ fly_ball =('trajectory_fly_ball','sum'),
385
+ pop_up = ('trajectory_popup','sum'),
386
+ attack_zone = ('attack_zone','count'),
387
+ heart = ('heart','sum'),
388
+ shadow = ('shadow','sum'),
389
+ chase = ('chase','sum'),
390
+ waste = ('waste','sum'),
391
+ heart_swing = ('heart_swing','sum'),
392
+ shadow_swing = ('shadow_swing','sum'),
393
+ chase_swing = ('chase_swing','sum'),
394
+ waste_swing = ('waste_swing','sum'),
395
+
396
+
397
+
398
+
399
+ ).reset_index()
400
+ return df_summ_avg
401
+
402
+ def df_summ_changes(df_summ=pd.DataFrame()):
403
+ df_summ['avg'] = [df_summ.hits[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
404
+ df_summ['obp'] = [df_summ.on_base[x]/df_summ.obp_pa[x] if df_summ.obp_pa[x] != 0 else np.nan for x in range(len(df_summ))]
405
+ df_summ['slg'] = [df_summ.tb[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
406
+
407
+ df_summ['ops'] = df_summ['obp']+df_summ['slg']
408
+
409
+ df_summ['k_percent'] = [df_summ.k[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
410
+ df_summ['bb_percent'] =[df_summ.bb[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
411
+ df_summ['bb_minus_k_percent'] =[(df_summ.bb_minus_k[x])/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
412
+
413
+ df_summ['bb_over_k_percent'] =[df_summ.bb[x]/df_summ.k[x] if df_summ.k[x] != 0 else np.nan for x in range(len(df_summ))]
414
+
415
+
416
+
417
+
418
+ df_summ['csw_percent'] =[df_summ.csw[x]/df_summ.pitches[x] if df_summ.pitches[x] != 0 else np.nan for x in range(len(df_summ))]
419
+
420
+
421
+ df_summ['sweet_spot_percent'] = [df_summ.sweet_spot[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
422
+
423
+ df_summ['woba_percent'] = [df_summ.woba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
424
+ df_summ['woba_percent_contact'] = [df_summ.woba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
425
+ #df_summ['hard_hit_percent'] = [df_summ.sweet_spot[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
426
+ df_summ['hard_hit_percent'] = [df_summ.hard_hit[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
427
+
428
+
429
+ df_summ['barrel_percent'] = [df_summ.barrel[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
430
+
431
+ df_summ['zone_contact_percent'] = [df_summ.zone_contact[x]/df_summ.zone_swing[x] if df_summ.zone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
432
+
433
+ df_summ['zone_swing_percent'] = [df_summ.zone_swing[x]/df_summ.in_zone[x] if df_summ.in_zone[x] != 0 else np.nan for x in range(len(df_summ))]
434
+
435
+ df_summ['zone_percent'] = [df_summ.in_zone[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
436
+
437
+ df_summ['chase_percent'] = [df_summ.ozone_swing[x]/(df_summ.pitches[x] - df_summ.in_zone[x]) if (df_summ.pitches[x]- df_summ.in_zone[x]) != 0 else np.nan for x in range(len(df_summ))]
438
+
439
+ df_summ['chase_contact'] = [df_summ.ozone_contact[x]/df_summ.ozone_swing[x] if df_summ.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
440
+
441
+ df_summ['swing_percent'] = [df_summ.swings[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
442
+
443
+ df_summ['whiff_rate'] = [df_summ.whiffs[x]/df_summ.swings[x] if df_summ.swings[x] != 0 else np.nan for x in range(len(df_summ))]
444
+
445
+ df_summ['swstr_rate'] = [df_summ.whiffs[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
446
+
447
+ df_summ['ground_ball_percent'] = [df_summ.ground_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
448
+
449
+ df_summ['line_drive_percent'] = [df_summ.line_drive[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
450
+
451
+ df_summ['fly_ball_percent'] = [df_summ.fly_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
452
+
453
+ df_summ['pop_up_percent'] = [df_summ.pop_up[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
454
+
455
+
456
+
457
+ df_summ['heart_zone_percent'] = [df_summ.heart[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
458
+
459
+ df_summ['shadow_zone_percent'] = [df_summ.shadow[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
460
+
461
+ df_summ['chase_zone_percent'] = [df_summ.chase[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
462
+
463
+ df_summ['waste_zone_percent'] = [df_summ.waste[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
464
+
465
+
466
+ df_summ['heart_zone_swing_percent'] = [df_summ.heart_swing[x]/df_summ.heart[x] if df_summ.heart[x] != 0 else np.nan for x in range(len(df_summ))]
467
+
468
+ df_summ['shadow_zone_swing_percent'] = [df_summ.shadow_swing[x]/df_summ.shadow[x] if df_summ.shadow[x] != 0 else np.nan for x in range(len(df_summ))]
469
+
470
+ df_summ['chase_zone_swing_percent'] = [df_summ.chase_swing[x]/df_summ.chase[x] if df_summ.chase[x] != 0 else np.nan for x in range(len(df_summ))]
471
+
472
+ df_summ['waste_zone_swing_percent'] = [df_summ.waste_swing[x]/df_summ.waste[x] if df_summ.waste[x] != 0 else np.nan for x in range(len(df_summ))]
473
+
474
+
475
+
476
+
477
+ df_summ['xwoba_percent'] = [df_summ.xwoba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
478
+ df_summ['xwoba_percent_contact'] = [df_summ.xwoba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
479
+
480
+ df_summ = df_summ.dropna(subset=['bip'])
481
+ return df_summ
482
+
483
+ def df_summ_filter_out(df_summ=pd.DataFrame(),batter_select = 0):
484
+ df_summ_filter = df_summ[df_summ['pa'] >= min(math.floor(df_summ.xs(batter_select,level=0)['pa']/10)*10,500)]
485
+ df_summ_filter_pct = df_summ_filter.rank(pct=True,ascending=True)
486
+ df_summ_player = df_summ.xs(batter_select,level=0)
487
+ df_summ_player_pct = df_summ_filter_pct.xs(batter_select,level=0)
488
+ return df_summ_filter,df_summ_filter_pct,df_summ_player,df_summ_player_pct
489
+
490
+ def df_summ_batter_pitch_up(df=pd.DataFrame()):
491
+ df_summ_batter_pitch = df.dropna(subset=['pitch_category']).groupby(['batter_id','batter_name','pitch_category']).agg(
492
+ pa = ('pa','sum'),
493
+ ab = ('ab','sum'),
494
+ obp_pa = ('obp','sum'),
495
+ hits = ('hits','sum'),
496
+ on_base = ('on_base','sum'),
497
+ k = ('k','sum'),
498
+ bb = ('bb','sum'),
499
+ bb_minus_k = ('bb_minus_k','sum'),
500
+ csw = ('csw','sum'),
501
+ bip = ('bip','sum'),
502
+ bip_div = ('bip_div','sum'),
503
+ tb = ('tb','sum'),
504
+ woba = ('woba','sum'),
505
+ woba_contact = ('xwoba_contact','sum'),
506
+ xwoba = ('xwoba','sum'),
507
+ xwoba_contact = ('xwoba','sum'),
508
+ woba_codes = ('woba_codes','sum'),
509
+ hard_hit = ('hard_hit','sum'),
510
+ barrel = ('barrel','sum'),
511
+ sweet_spot = ('sweet_spot','sum'),
512
+ max_launch_speed = ('launch_speed','max'),
513
+ launch_speed_90 = ('launch_speed',percentile(90)),
514
+ launch_speed = ('launch_speed','mean'),
515
+ launch_angle = ('launch_angle','mean'),
516
+ pitches = ('is_pitch','sum'),
517
+ swings = ('swings','sum'),
518
+ in_zone = ('in_zone','sum'),
519
+ out_zone = ('out_zone','sum'),
520
+ whiffs = ('whiffs','sum'),
521
+ zone_swing = ('zone_swing','sum'),
522
+ zone_contact = ('zone_contact','sum'),
523
+ ozone_swing = ('ozone_swing','sum'),
524
+ ozone_contact = ('ozone_contact','sum'),
525
+ ground_ball = ('trajectory_ground_ball','sum'),
526
+ line_drive = ('trajectory_line_drive','sum'),
527
+ fly_ball =('trajectory_fly_ball','sum'),
528
+ pop_up = ('trajectory_popup','sum'),
529
+ attack_zone = ('attack_zone','count'),
530
+ heart = ('heart','sum'),
531
+ shadow = ('shadow','sum'),
532
+ chase = ('chase','sum'),
533
+ waste = ('waste','sum'),
534
+ heart_swing = ('heart_swing','sum'),
535
+ shadow_swing = ('shadow_swing','sum'),
536
+ chase_swing = ('chase_swing','sum'),
537
+ waste_swing = ('waste_swing','sum'),
538
+ ).reset_index()
539
+
540
+ #return df_summ_batter_pitch
541
+ df_summ_batter_pitch['avg'] = [df_summ_batter_pitch.hits[x]/df_summ_batter_pitch.ab[x] if df_summ_batter_pitch.ab[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
542
+ df_summ_batter_pitch['obp'] = [df_summ_batter_pitch.on_base[x]/df_summ_batter_pitch.obp_pa[x] if df_summ_batter_pitch.obp_pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
543
+ df_summ_batter_pitch['slg'] = [df_summ_batter_pitch.tb[x]/df_summ_batter_pitch.ab[x] if df_summ_batter_pitch.ab[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
544
+
545
+ df_summ_batter_pitch['ops'] = df_summ_batter_pitch['obp']+df_summ_batter_pitch['slg']
546
+
547
+ df_summ_batter_pitch['k_percent'] = [df_summ_batter_pitch.k[x]/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
548
+ df_summ_batter_pitch['bb_percent'] =[df_summ_batter_pitch.bb[x]/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
549
+ df_summ_batter_pitch['bb_minus_k_percent'] =[(df_summ_batter_pitch.bb_minus_k[x])/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
550
+
551
+ df_summ_batter_pitch['bb_over_k_percent'] =[df_summ_batter_pitch.bb[x]/df_summ_batter_pitch.k[x] if df_summ_batter_pitch.k[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
552
+
553
+
554
+
555
+
556
+ df_summ_batter_pitch['csw_percent'] =[df_summ_batter_pitch.csw[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
557
+
558
+
559
+ df_summ_batter_pitch['sweet_spot_percent'] = [df_summ_batter_pitch.sweet_spot[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
560
+
561
+ df_summ_batter_pitch['woba_percent'] = [df_summ_batter_pitch.woba[x]/df_summ_batter_pitch.woba_codes[x] if df_summ_batter_pitch.woba_codes[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
562
+ df_summ_batter_pitch['woba_percent_contact'] = [df_summ_batter_pitch.woba_contact[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
563
+ #df_summ_batter_pitch['hard_hit_percent'] = [df_summ_batter_pitch.sweet_spot[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
564
+ df_summ_batter_pitch['hard_hit_percent'] = [df_summ_batter_pitch.hard_hit[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
565
+
566
+
567
+ df_summ_batter_pitch['barrel_percent'] = [df_summ_batter_pitch.barrel[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
568
+
569
+ df_summ_batter_pitch['zone_contact_percent'] = [df_summ_batter_pitch.zone_contact[x]/df_summ_batter_pitch.zone_swing[x] if df_summ_batter_pitch.zone_swing[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
570
+
571
+ df_summ_batter_pitch['zone_swing_percent'] = [df_summ_batter_pitch.zone_swing[x]/df_summ_batter_pitch.in_zone[x] if df_summ_batter_pitch.in_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
572
+
573
+ df_summ_batter_pitch['zone_percent'] = [df_summ_batter_pitch.in_zone[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
574
+
575
+ df_summ_batter_pitch['chase_percent'] = [df_summ_batter_pitch.ozone_swing[x]/(df_summ_batter_pitch.pitches[x] - df_summ_batter_pitch.in_zone[x]) if (df_summ_batter_pitch.pitches[x]- df_summ_batter_pitch.in_zone[x]) != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
576
+
577
+ df_summ_batter_pitch['chase_contact'] = [df_summ_batter_pitch.ozone_contact[x]/df_summ_batter_pitch.ozone_swing[x] if df_summ_batter_pitch.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
578
+
579
+ df_summ_batter_pitch['swing_percent'] = [df_summ_batter_pitch.swings[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
580
+
581
+ df_summ_batter_pitch['whiff_rate'] = [df_summ_batter_pitch.whiffs[x]/df_summ_batter_pitch.swings[x] if df_summ_batter_pitch.swings[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
582
+
583
+ df_summ_batter_pitch['swstr_rate'] = [df_summ_batter_pitch.whiffs[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
584
+
585
+ df_summ_batter_pitch['heart_zone_percent'] = [df_summ_batter_pitch.heart[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
586
+
587
+ df_summ_batter_pitch['shadow_zone_percent'] = [df_summ_batter_pitch.shadow[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
588
+
589
+ df_summ_batter_pitch['chase_zone_percent'] = [df_summ_batter_pitch.chase[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
590
+
591
+ df_summ_batter_pitch['waste_zone_percent'] = [df_summ_batter_pitch.waste[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
592
+
593
+
594
+ df_summ_batter_pitch['heart_zone_swing_percent'] = [df_summ_batter_pitch.heart_swing[x]/df_summ_batter_pitch.heart[x] if df_summ_batter_pitch.heart[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
595
+
596
+ df_summ_batter_pitch['shadow_zone_swing_percent'] = [df_summ_batter_pitch.shadow_swing[x]/df_summ_batter_pitch.shadow[x] if df_summ_batter_pitch.shadow[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
597
+
598
+ df_summ_batter_pitch['chase_zone_swing_percent'] = [df_summ_batter_pitch.chase_swing[x]/df_summ_batter_pitch.chase[x] if df_summ_batter_pitch.chase[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
599
+
600
+ df_summ_batter_pitch['waste_zone_swing_percent'] = [df_summ_batter_pitch.waste_swing[x]/df_summ_batter_pitch.waste[x] if df_summ_batter_pitch.waste[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
601
+
602
+
603
+
604
+
605
+ df_summ_batter_pitch['xwoba_percent'] = [df_summ_batter_pitch.xwoba[x]/df_summ_batter_pitch.woba_codes[x] if df_summ_batter_pitch.woba_codes[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
606
+ df_summ_batter_pitch['xwoba_percent_contact'] = [df_summ_batter_pitch.xwoba_contact[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
607
+
608
+
609
+
610
+
611
+ df_summ_batter_pitch['bip'] = df_summ_batter_pitch['bip'].fillna(0)
612
+
613
+ return df_summ_batter_pitch