Upload 22 files
Browse files- functions/__pycache__/df_update.cpython-39.pyc +0 -0
- functions/__pycache__/pitch_summary_functions.cpython-39.pyc +0 -0
- functions/df_update.py +472 -0
- functions/pitch_summary_functions.py +1029 -0
- functions/statcast_2024_grouped.csv +19 -0
- joblib_model/__pycache__/feature_engineering.cpython-39.pyc +0 -0
- joblib_model/barrel_model.joblib +3 -0
- joblib_model/in_zone.joblib +3 -0
- joblib_model/in_zone_model_knn_20240410.joblib +3 -0
- joblib_model/linear_reg_model_x.joblib +3 -0
- joblib_model/linear_reg_model_z.joblib +3 -0
- joblib_model/model_attack_zone.joblib +3 -0
- joblib_model/no_swing.joblib +3 -0
- joblib_model/swing.joblib +3 -0
- joblib_model/xwoba_model.joblib +3 -0
- stuff_model/__pycache__/feature_engineering.cpython-39.pyc +0 -0
- stuff_model/__pycache__/stuff_apply.cpython-39.pyc +0 -0
- stuff_model/feature_engineering.py +118 -0
- stuff_model/lgbm_model_2020_2023.joblib +3 -0
- stuff_model/stuff_apply.py +57 -0
- stuff_model/target_stats.txt +2 -0
- stuff_model/tj_stuff_plus_pitch.csv +16 -0
functions/__pycache__/df_update.cpython-39.pyc
ADDED
|
Binary file (14.1 kB). View file
|
|
|
functions/__pycache__/pitch_summary_functions.cpython-39.pyc
ADDED
|
Binary file (33.8 kB). View file
|
|
|
functions/df_update.py
ADDED
|
@@ -0,0 +1,472 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import polars as pl
|
| 2 |
+
import numpy as np
|
| 3 |
+
import joblib
|
| 4 |
+
|
| 5 |
+
loaded_model = joblib.load('joblib_model/barrel_model.joblib')
|
| 6 |
+
in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
|
| 7 |
+
attack_zone_model = joblib.load('joblib_model/model_attack_zone.joblib')
|
| 8 |
+
xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
|
| 9 |
+
px_model = joblib.load('joblib_model/linear_reg_model_x.joblib')
|
| 10 |
+
pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib')
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class df_update:
|
| 14 |
+
def __init__(self):
|
| 15 |
+
pass
|
| 16 |
+
|
| 17 |
+
def update(self, df_clone: pl.DataFrame):
|
| 18 |
+
|
| 19 |
+
df = df_clone.clone()
|
| 20 |
+
# Assuming px_model is defined and df is your DataFrame
|
| 21 |
+
hit_codes = ['single',
|
| 22 |
+
'double','home_run', 'triple']
|
| 23 |
+
|
| 24 |
+
ab_codes = ['single', 'strikeout', 'field_out',
|
| 25 |
+
'grounded_into_double_play', 'fielders_choice', 'force_out',
|
| 26 |
+
'double', 'field_error', 'home_run', 'triple',
|
| 27 |
+
'double_play',
|
| 28 |
+
'fielders_choice_out', 'strikeout_double_play',
|
| 29 |
+
'other_out','triple_play']
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
obp_true_codes = ['single', 'walk',
|
| 33 |
+
'double','home_run', 'triple',
|
| 34 |
+
'hit_by_pitch', 'intent_walk']
|
| 35 |
+
|
| 36 |
+
obp_codes = ['single', 'strikeout', 'walk', 'field_out',
|
| 37 |
+
'grounded_into_double_play', 'fielders_choice', 'force_out',
|
| 38 |
+
'double', 'sac_fly', 'field_error', 'home_run', 'triple',
|
| 39 |
+
'hit_by_pitch', 'double_play', 'intent_walk',
|
| 40 |
+
'fielders_choice_out', 'strikeout_double_play',
|
| 41 |
+
'sac_fly_double_play',
|
| 42 |
+
'other_out','triple_play']
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
contact_codes = ['In play, no out',
|
| 46 |
+
'Foul', 'In play, out(s)',
|
| 47 |
+
'In play, run(s)',
|
| 48 |
+
'Foul Bunt']
|
| 49 |
+
|
| 50 |
+
bip_codes = ['In play, no out', 'In play, run(s)','In play, out(s)']
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
conditions_barrel = [
|
| 54 |
+
df['launch_speed'].is_null(),
|
| 55 |
+
(df['launch_speed'] * 1.5 - df['launch_angle'] >= 117) &
|
| 56 |
+
(df['launch_speed'] + df['launch_angle'] >= 124) &
|
| 57 |
+
(df['launch_speed'] >= 98) &
|
| 58 |
+
(df['launch_angle'] >= 4) & (df['launch_angle'] <= 50)
|
| 59 |
+
]
|
| 60 |
+
choices_barrel = [False, True]
|
| 61 |
+
|
| 62 |
+
conditions_tb = [
|
| 63 |
+
(df['event_type'] == 'single'),
|
| 64 |
+
(df['event_type'] == 'double'),
|
| 65 |
+
(df['event_type'] == 'triple'),
|
| 66 |
+
(df['event_type'] == 'home_run')
|
| 67 |
+
]
|
| 68 |
+
choices_tb = [1, 2, 3, 4]
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
conditions_woba = [
|
| 72 |
+
df['event_type'].is_in(['strikeout', 'field_out', 'sac_fly', 'force_out', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out']),
|
| 73 |
+
df['event_type'] == 'walk',
|
| 74 |
+
df['event_type'] == 'hit_by_pitch',
|
| 75 |
+
df['event_type'] == 'single',
|
| 76 |
+
df['event_type'] == 'double',
|
| 77 |
+
df['event_type'] == 'triple',
|
| 78 |
+
df['event_type'] == 'home_run'
|
| 79 |
+
]
|
| 80 |
+
choices_woba = [0, 0.689, 0.720, 0.881, 1.254, 1.589, 2.048]
|
| 81 |
+
|
| 82 |
+
woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', 'double', 'sac_fly', 'force_out', 'home_run', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'triple', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out']
|
| 83 |
+
|
| 84 |
+
pitch_cat = {'FA': 'Fastball',
|
| 85 |
+
'FF': 'Fastball',
|
| 86 |
+
'FT': 'Fastball',
|
| 87 |
+
'FC': 'Fastball',
|
| 88 |
+
'FS': 'Off-Speed',
|
| 89 |
+
'FO': 'Off-Speed',
|
| 90 |
+
'SI': 'Fastball',
|
| 91 |
+
'ST': 'Breaking',
|
| 92 |
+
'SL': 'Breaking',
|
| 93 |
+
'CU': 'Breaking',
|
| 94 |
+
'KC': 'Breaking',
|
| 95 |
+
'SC': 'Off-Speed',
|
| 96 |
+
'GY': 'Off-Speed',
|
| 97 |
+
'SV': 'Breaking',
|
| 98 |
+
'CS': 'Breaking',
|
| 99 |
+
'CH': 'Off-Speed',
|
| 100 |
+
'KN': 'Off-Speed',
|
| 101 |
+
'EP': 'Breaking',
|
| 102 |
+
'UN': None,
|
| 103 |
+
'IN': None,
|
| 104 |
+
'PO': None,
|
| 105 |
+
'AB': None,
|
| 106 |
+
'AS': None,
|
| 107 |
+
'NP': None}
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
df = df.with_columns([
|
| 111 |
+
pl.when(df['type_ab'].is_not_null()).then(1).otherwise(0).alias('pa'),
|
| 112 |
+
pl.when(df['is_pitch']).then(1).otherwise(0).alias('pitches'),
|
| 113 |
+
pl.when(df['sz_top'] == 0).then(None).otherwise(df['sz_top']).alias('sz_top'),
|
| 114 |
+
pl.when(df['sz_bot'] == 0).then(None).otherwise(df['sz_bot']).alias('sz_bot'),
|
| 115 |
+
pl.when(df['zone'] > 0).then(df['zone'] < 10).otherwise(None).alias('in_zone'),
|
| 116 |
+
pl.Series(px_model.predict(df[['x']].fill_null(0).to_numpy())[:, 0]).alias('px_predict'),
|
| 117 |
+
pl.Series(pz_model.predict(df[['y']].fill_null(0).to_numpy())[:, 0] + 3.2).alias('pz_predict'),
|
| 118 |
+
pl.Series(in_zone_model.predict(df[['px','pz','sz_top','sz_bot']].fill_null(0).to_numpy())[:]).alias('in_zone_predict'),
|
| 119 |
+
pl.Series(attack_zone_model.predict(df[['px','pz','sz_top','sz_bot']].fill_null(0).to_numpy())[:]).alias('attack_zone_predict'),
|
| 120 |
+
pl.when(df['event_type'].is_in(hit_codes)).then(True).otherwise(False).alias('hits'),
|
| 121 |
+
pl.when(df['event_type'].is_in(ab_codes)).then(True).otherwise(False).alias('ab'),
|
| 122 |
+
pl.when(df['event_type'].is_in(obp_true_codes)).then(True).otherwise(False).alias('on_base'),
|
| 123 |
+
pl.when(df['event_type'].is_in(obp_codes)).then(True).otherwise(False).alias('obp'),
|
| 124 |
+
pl.when(df['play_description'].is_in(bip_codes)).then(True).otherwise(False).alias('bip'),
|
| 125 |
+
pl.when(conditions_barrel[0]).then(choices_barrel[0]).when(conditions_barrel[1]).then(choices_barrel[1]).otherwise(None).alias('barrel'),
|
| 126 |
+
pl.when(df['launch_angle'].is_null()).then(False).when((df['launch_angle'] >= 8) & (df['launch_angle'] <= 32)).then(True).otherwise(None).alias('sweet_spot'),
|
| 127 |
+
pl.when(df['launch_speed'].is_null()).then(False).when(df['launch_speed'] >= 94.5).then(True).otherwise(None).alias('hard_hit'),
|
| 128 |
+
pl.when(conditions_tb[0]).then(choices_tb[0]).when(conditions_tb[1]).then(choices_tb[1]).when(conditions_tb[2]).then(choices_tb[2]).when(conditions_tb[3]).then(choices_tb[3]).otherwise(None).alias('tb'),
|
| 129 |
+
pl.when(conditions_woba[0]).then(choices_woba[0]).when(conditions_woba[1]).then(choices_woba[1]).when(conditions_woba[2]).then(choices_woba[2]).when(conditions_woba[3]).then(choices_woba[3]).when(conditions_woba[4]).then(choices_woba[4]).when(conditions_woba[5]).then(choices_woba[5]).when(conditions_woba[6]).then(choices_woba[6]).otherwise(None).alias('woba'),
|
| 130 |
+
pl.when((df['play_code'] == 'S') | (df['play_code'] == 'W') | (df['play_code'] == 'T')).then(1).otherwise(0).alias('whiffs'),
|
| 131 |
+
pl.when((df['play_code'] == 'S') | (df['play_code'] == 'W') | (df['play_code'] == 'T') | (df['play_code'] == 'C')).then(1).otherwise(0).alias('csw'),
|
| 132 |
+
pl.when(pl.col('is_swing').cast(pl.Boolean)).then(1).otherwise(0).alias('swings'),
|
| 133 |
+
pl.col('event_type').is_in(['strikeout','strikeout_double_play']).alias('k'),
|
| 134 |
+
pl.col('event_type').is_in(['walk', 'intent_walk']).alias('bb'),
|
| 135 |
+
pl.lit(None).alias('attack_zone'),
|
| 136 |
+
pl.lit(None).alias('woba_pred'),
|
| 137 |
+
pl.lit(None).alias('woba_pred_contact')
|
| 138 |
+
|
| 139 |
+
])
|
| 140 |
+
|
| 141 |
+
df = df.with_columns([
|
| 142 |
+
pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('woba_codes'),
|
| 143 |
+
pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('xwoba_codes'),
|
| 144 |
+
pl.when((pl.col('tb') >= 0)).then(df['woba']).otherwise(None).alias('woba_contact'),
|
| 145 |
+
pl.when(pl.col('px').is_null()).then(pl.col('px_predict')).otherwise(pl.col('px')).alias('px'),
|
| 146 |
+
pl.when(pl.col('pz').is_null()).then(pl.col('pz_predict')).otherwise(pl.col('pz')).alias('pz'),
|
| 147 |
+
pl.when(pl.col('in_zone').is_null()).then(pl.col('in_zone_predict')).otherwise(pl.col('in_zone')).alias('in_zone'),
|
| 148 |
+
pl.when(df['launch_speed'].is_null()).then(None).otherwise(df['barrel']).alias('barrel'),
|
| 149 |
+
pl.lit('average').alias('average'),
|
| 150 |
+
pl.when(pl.col('in_zone') == False).then(True).otherwise(False).alias('out_zone'),
|
| 151 |
+
pl.when((pl.col('in_zone') == True) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('zone_swing'),
|
| 152 |
+
pl.when((pl.col('in_zone') == True) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('zone_contact'),
|
| 153 |
+
pl.when((pl.col('in_zone') == False) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('ozone_swing'),
|
| 154 |
+
pl.when((pl.col('in_zone') == False) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('ozone_contact'),
|
| 155 |
+
pl.when(pl.col('event_type').str.contains('strikeout')).then(True).otherwise(False).alias('k'),
|
| 156 |
+
pl.when(pl.col('event_type').is_in(['walk', 'intent_walk'])).then(True).otherwise(False).alias('bb'),
|
| 157 |
+
pl.when(pl.col('attack_zone').is_null()).then(pl.col('attack_zone_predict')).otherwise(pl.col('attack_zone')).alias('attack_zone'),
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
])
|
| 161 |
+
|
| 162 |
+
df = df.with_columns([
|
| 163 |
+
(df['k'].cast(pl.Float32) - df['bb'].cast(pl.Float32)).alias('k_minus_bb'),
|
| 164 |
+
(df['bb'].cast(pl.Float32) - df['k'].cast(pl.Float32)).alias('bb_minus_k'),
|
| 165 |
+
(df['launch_speed'] > 0).alias('bip_div'),
|
| 166 |
+
(df['attack_zone'] == 0).alias('heart'),
|
| 167 |
+
(df['attack_zone'] == 1).alias('shadow'),
|
| 168 |
+
(df['attack_zone'] == 2).alias('chase'),
|
| 169 |
+
(df['attack_zone'] == 3).alias('waste'),
|
| 170 |
+
((df['attack_zone'] == 0) & (df['swings'] == 1)).alias('heart_swing'),
|
| 171 |
+
((df['attack_zone'] == 1) & (df['swings'] == 1)).alias('shadow_swing'),
|
| 172 |
+
((df['attack_zone'] == 2) & (df['swings'] == 1)).alias('chase_swing'),
|
| 173 |
+
((df['attack_zone'] == 3) & (df['swings'] == 1)).alias('waste_swing'),
|
| 174 |
+
((df['attack_zone'] == 0) & (df['whiffs'] == 1)).alias('heart_whiff'),
|
| 175 |
+
((df['attack_zone'] == 1) & (df['whiffs'] == 1)).alias('shadow_whiff'),
|
| 176 |
+
((df['attack_zone'] == 2) & (df['whiffs'] == 1)).alias('chase_whiff'),
|
| 177 |
+
((df['attack_zone'] == 3) & (df['whiffs'] == 1)).alias('waste_whiff')
|
| 178 |
+
])
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
[0, 0.689, 0.720, 0.881, 1.254, 1.589, 2.048]
|
| 182 |
+
|
| 183 |
+
df = df.with_columns([
|
| 184 |
+
pl.Series(
|
| 185 |
+
[sum(x) for x in xwoba_model.predict_proba(df[['launch_angle', 'launch_speed']].fill_null(0).to_numpy()[:]) * ([0, 0.881, 1.254, 1.589, 2.048])]
|
| 186 |
+
).alias('woba_pred_predict')
|
| 187 |
+
])
|
| 188 |
+
|
| 189 |
+
df = df.with_columns([
|
| 190 |
+
pl.when(pl.col('event_type').is_in(['walk'])).then(0.689)
|
| 191 |
+
.when(pl.col('event_type').is_in(['hit_by_pitch'])).then(0.720)
|
| 192 |
+
.when(pl.col('event_type').is_in(['strikeout', 'strikeout_double_play'])).then(0)
|
| 193 |
+
.otherwise(pl.col('woba_pred_predict')).alias('woba_pred_predict')
|
| 194 |
+
])
|
| 195 |
+
|
| 196 |
+
df = df.with_columns([
|
| 197 |
+
pl.when(pl.col('woba_codes').is_null()).then(None).otherwise(pl.col('woba_pred_predict')).alias('woba_pred'),
|
| 198 |
+
pl.when(pl.col('bip')!=1).then(None).otherwise(pl.col('woba_pred_predict')).alias('woba_pred_contact'),
|
| 199 |
+
])
|
| 200 |
+
|
| 201 |
+
df = df.with_columns([
|
| 202 |
+
pl.when(pl.col('trajectory').is_in(['bunt_popup'])).then(pl.lit('popup'))
|
| 203 |
+
.when(pl.col('trajectory').is_in(['bunt_grounder'])).then(pl.lit('ground_ball'))
|
| 204 |
+
.when(pl.col('trajectory').is_in(['bunt_line_drive'])).then(pl.lit('line_drive'))
|
| 205 |
+
.when(pl.col('trajectory').is_in([''])).then(pl.lit(None))
|
| 206 |
+
.otherwise(pl.col('trajectory')).alias('trajectory')
|
| 207 |
+
])
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
# Create one-hot encoded columns for the trajectory column
|
| 211 |
+
dummy_df = df.select(pl.col('trajectory')).to_dummies()
|
| 212 |
+
|
| 213 |
+
# Rename the one-hot encoded columns
|
| 214 |
+
dummy_df = dummy_df.rename({
|
| 215 |
+
'trajectory_fly_ball': 'trajectory_fly_ball',
|
| 216 |
+
'trajectory_ground_ball': 'trajectory_ground_ball',
|
| 217 |
+
'trajectory_line_drive': 'trajectory_line_drive',
|
| 218 |
+
'trajectory_popup': 'trajectory_popup'
|
| 219 |
+
})
|
| 220 |
+
|
| 221 |
+
# Ensure the columns are present in the DataFrame
|
| 222 |
+
for col in ['trajectory_fly_ball', 'trajectory_ground_ball', 'trajectory_line_drive', 'trajectory_popup']:
|
| 223 |
+
if col not in dummy_df.columns:
|
| 224 |
+
dummy_df = dummy_df.with_columns(pl.lit(0).alias(col))
|
| 225 |
+
|
| 226 |
+
# Join the one-hot encoded columns back to the original DataFrame
|
| 227 |
+
df = df.hstack(dummy_df)
|
| 228 |
+
|
| 229 |
+
# Check if 'trajectory_null' column exists and drop it
|
| 230 |
+
if 'trajectory_null' in df.columns:
|
| 231 |
+
df = df.drop('trajectory_null')
|
| 232 |
+
|
| 233 |
+
return df
|
| 234 |
+
|
| 235 |
+
# Assuming df is your Polars DataFrame
|
| 236 |
+
def update_summary(self, df: pl.DataFrame, pitcher: bool = True) -> pl.DataFrame:
|
| 237 |
+
"""
|
| 238 |
+
Update summary statistics for pitchers or batters.
|
| 239 |
+
|
| 240 |
+
Parameters:
|
| 241 |
+
df (pl.DataFrame): The input Polars DataFrame containing player statistics.
|
| 242 |
+
pitcher (bool): A flag indicating whether to calculate statistics for pitchers (True) or batters (False).
|
| 243 |
+
|
| 244 |
+
Returns:
|
| 245 |
+
pl.DataFrame: A Polars DataFrame with aggregated and calculated summary statistics.
|
| 246 |
+
"""
|
| 247 |
+
|
| 248 |
+
# Determine the position based on the pitcher flag
|
| 249 |
+
if pitcher:
|
| 250 |
+
position = 'pitcher'
|
| 251 |
+
else:
|
| 252 |
+
position = 'batter'
|
| 253 |
+
|
| 254 |
+
# Group by position_id and position_name, then aggregate various statistics
|
| 255 |
+
df_summ = df.group_by([f'{position}_id', f'{position}_name']).agg([
|
| 256 |
+
pl.col('pa').sum().alias('pa'),
|
| 257 |
+
pl.col('ab').sum().alias('ab'),
|
| 258 |
+
pl.col('obp').sum().alias('obp_pa'),
|
| 259 |
+
pl.col('hits').sum().alias('hits'),
|
| 260 |
+
pl.col('on_base').sum().alias('on_base'),
|
| 261 |
+
pl.col('k').sum().alias('k'),
|
| 262 |
+
pl.col('bb').sum().alias('bb'),
|
| 263 |
+
pl.col('bb_minus_k').sum().alias('bb_minus_k'),
|
| 264 |
+
pl.col('csw').sum().alias('csw'),
|
| 265 |
+
pl.col('bip').sum().alias('bip'),
|
| 266 |
+
pl.col('bip_div').sum().alias('bip_div'),
|
| 267 |
+
pl.col('tb').sum().alias('tb'),
|
| 268 |
+
pl.col('woba').sum().alias('woba'),
|
| 269 |
+
pl.col('woba_contact').sum().alias('woba_contact'),
|
| 270 |
+
pl.col('woba_pred').sum().alias('xwoba'),
|
| 271 |
+
pl.col('woba_pred_contact').sum().alias('xwoba_contact'),
|
| 272 |
+
pl.col('woba_codes').sum().alias('woba_codes'),
|
| 273 |
+
pl.col('xwoba_codes').sum().alias('xwoba_codes'),
|
| 274 |
+
pl.col('hard_hit').sum().alias('hard_hit'),
|
| 275 |
+
pl.col('barrel').sum().alias('barrel'),
|
| 276 |
+
pl.col('sweet_spot').sum().alias('sweet_spot'),
|
| 277 |
+
pl.col('launch_speed').max().alias('max_launch_speed'),
|
| 278 |
+
pl.col('launch_speed').quantile(0.90).alias('launch_speed_90'),
|
| 279 |
+
pl.col('launch_speed').mean().alias('launch_speed'),
|
| 280 |
+
pl.col('launch_angle').mean().alias('launch_angle'),
|
| 281 |
+
pl.col('is_pitch').sum().alias('pitches'),
|
| 282 |
+
pl.col('swings').sum().alias('swings'),
|
| 283 |
+
pl.col('in_zone').sum().alias('in_zone'),
|
| 284 |
+
pl.col('out_zone').sum().alias('out_zone'),
|
| 285 |
+
pl.col('whiffs').sum().alias('whiffs'),
|
| 286 |
+
pl.col('zone_swing').sum().alias('zone_swing'),
|
| 287 |
+
pl.col('zone_contact').sum().alias('zone_contact'),
|
| 288 |
+
pl.col('ozone_swing').sum().alias('ozone_swing'),
|
| 289 |
+
pl.col('ozone_contact').sum().alias('ozone_contact'),
|
| 290 |
+
pl.col('trajectory_ground_ball').sum().alias('ground_ball'),
|
| 291 |
+
pl.col('trajectory_line_drive').sum().alias('line_drive'),
|
| 292 |
+
pl.col('trajectory_fly_ball').sum().alias('fly_ball'),
|
| 293 |
+
pl.col('trajectory_popup').sum().alias('pop_up'),
|
| 294 |
+
pl.col('attack_zone').count().alias('attack_zone'),
|
| 295 |
+
pl.col('heart').sum().alias('heart'),
|
| 296 |
+
pl.col('shadow').sum().alias('shadow'),
|
| 297 |
+
pl.col('chase').sum().alias('chase'),
|
| 298 |
+
pl.col('waste').sum().alias('waste'),
|
| 299 |
+
pl.col('heart_swing').sum().alias('heart_swing'),
|
| 300 |
+
pl.col('shadow_swing').sum().alias('shadow_swing'),
|
| 301 |
+
pl.col('chase_swing').sum().alias('chase_swing'),
|
| 302 |
+
pl.col('waste_swing').sum().alias('waste_swing'),
|
| 303 |
+
pl.col('heart_whiff').sum().alias('heart_whiff'),
|
| 304 |
+
pl.col('shadow_whiff').sum().alias('shadow_whiff'),
|
| 305 |
+
pl.col('chase_whiff').sum().alias('chase_whiff'),
|
| 306 |
+
pl.col('waste_whiff').sum().alias('waste_whiff')
|
| 307 |
+
])
|
| 308 |
+
|
| 309 |
+
# Add calculated columns to the summary DataFrame
|
| 310 |
+
df_summ = df_summ.with_columns([
|
| 311 |
+
(pl.col('hits') / pl.col('ab')).alias('avg'),
|
| 312 |
+
(pl.col('on_base') / pl.col('obp_pa')).alias('obp'),
|
| 313 |
+
(pl.col('tb') / pl.col('ab')).alias('slg'),
|
| 314 |
+
(pl.col('on_base') / pl.col('obp_pa') + pl.col('tb') / pl.col('ab')).alias('ops'),
|
| 315 |
+
(pl.col('k') / pl.col('pa')).alias('k_percent'),
|
| 316 |
+
(pl.col('bb') / pl.col('pa')).alias('bb_percent'),
|
| 317 |
+
(pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
|
| 318 |
+
(pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
|
| 319 |
+
(pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
|
| 320 |
+
(pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
|
| 321 |
+
(pl.col('woba') / pl.col('woba_codes')).alias('woba_percent'),
|
| 322 |
+
(pl.col('woba_contact') / pl.col('bip')).alias('woba_percent_contact'),
|
| 323 |
+
(pl.col('hard_hit') / pl.col('bip_div')).alias('hard_hit_percent'),
|
| 324 |
+
(pl.col('barrel') / pl.col('bip_div')).alias('barrel_percent'),
|
| 325 |
+
(pl.col('zone_contact') / pl.col('zone_swing')).alias('zone_contact_percent'),
|
| 326 |
+
(pl.col('zone_swing') / pl.col('in_zone')).alias('zone_swing_percent'),
|
| 327 |
+
(pl.col('in_zone') / pl.col('pitches')).alias('zone_percent'),
|
| 328 |
+
(pl.col('ozone_swing') / (pl.col('pitches') - pl.col('in_zone'))).alias('chase_percent'),
|
| 329 |
+
(pl.col('ozone_contact') / pl.col('ozone_swing')).alias('chase_contact'),
|
| 330 |
+
(pl.col('swings') / pl.col('pitches')).alias('swing_percent'),
|
| 331 |
+
(pl.col('whiffs') / pl.col('swings')).alias('whiff_rate'),
|
| 332 |
+
(pl.col('whiffs') / pl.col('pitches')).alias('swstr_rate'),
|
| 333 |
+
(pl.col('ground_ball') / pl.col('bip')).alias('ground_ball_percent'),
|
| 334 |
+
(pl.col('line_drive') / pl.col('bip')).alias('line_drive_percent'),
|
| 335 |
+
(pl.col('fly_ball') / pl.col('bip')).alias('fly_ball_percent'),
|
| 336 |
+
(pl.col('pop_up') / pl.col('bip')).alias('pop_up_percent'),
|
| 337 |
+
(pl.col('heart') / pl.col('attack_zone')).alias('heart_zone_percent'),
|
| 338 |
+
(pl.col('shadow') / pl.col('attack_zone')).alias('shadow_zone_percent'),
|
| 339 |
+
(pl.col('chase') / pl.col('attack_zone')).alias('chase_zone_percent'),
|
| 340 |
+
(pl.col('waste') / pl.col('attack_zone')).alias('waste_zone_percent'),
|
| 341 |
+
(pl.col('heart_swing') / pl.col('heart')).alias('heart_zone_swing_percent'),
|
| 342 |
+
(pl.col('shadow_swing') / pl.col('shadow')).alias('shadow_zone_swing_percent'),
|
| 343 |
+
(pl.col('chase_swing') / pl.col('chase')).alias('chase_zone_swing_percent'),
|
| 344 |
+
(pl.col('waste_swing') / pl.col('waste')).alias('waste_zone_swing_percent'),
|
| 345 |
+
(pl.col('heart_whiff') / pl.col('heart_swing')).alias('heart_zone_whiff_percent'),
|
| 346 |
+
(pl.col('shadow_whiff') / pl.col('shadow_swing')).alias('shadow_zone_whiff_percent'),
|
| 347 |
+
(pl.col('chase_whiff') / pl.col('chase_swing')).alias('chase_zone_whiff_percent'),
|
| 348 |
+
(pl.col('waste_whiff') / pl.col('waste_swing')).alias('waste_zone_whiff_percent'),
|
| 349 |
+
(pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
|
| 350 |
+
(pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact')
|
| 351 |
+
])
|
| 352 |
+
|
| 353 |
+
return df_summ
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
|
| 360 |
+
# Assuming df is your Polars DataFrame
|
| 361 |
+
def update_summary_select(self, df: pl.DataFrame, selection: list) -> pl.DataFrame:
|
| 362 |
+
"""
|
| 363 |
+
Update summary statistics for pitchers or batters.
|
| 364 |
+
|
| 365 |
+
Parameters:
|
| 366 |
+
df (pl.DataFrame): The input Polars DataFrame containing player statistics.
|
| 367 |
+
pitcher (bool): A flag indicating whether to calculate statistics for pitchers (True) or batters (False).
|
| 368 |
+
|
| 369 |
+
Returns:
|
| 370 |
+
pl.DataFrame: A Polars DataFrame with aggregated and calculated summary statistics.
|
| 371 |
+
"""
|
| 372 |
+
|
| 373 |
+
# Group by position_id and position_name, then aggregate various statistics
|
| 374 |
+
df_summ = df.group_by(selection).agg([
|
| 375 |
+
pl.col('pa').sum().alias('pa'),
|
| 376 |
+
pl.col('ab').sum().alias('ab'),
|
| 377 |
+
pl.col('obp').sum().alias('obp_pa'),
|
| 378 |
+
pl.col('hits').sum().alias('hits'),
|
| 379 |
+
pl.col('on_base').sum().alias('on_base'),
|
| 380 |
+
pl.col('k').sum().alias('k'),
|
| 381 |
+
pl.col('bb').sum().alias('bb'),
|
| 382 |
+
pl.col('bb_minus_k').sum().alias('bb_minus_k'),
|
| 383 |
+
pl.col('csw').sum().alias('csw'),
|
| 384 |
+
pl.col('bip').sum().alias('bip'),
|
| 385 |
+
pl.col('bip_div').sum().alias('bip_div'),
|
| 386 |
+
pl.col('tb').sum().alias('tb'),
|
| 387 |
+
pl.col('woba').sum().alias('woba'),
|
| 388 |
+
pl.col('woba_contact').sum().alias('woba_contact'),
|
| 389 |
+
pl.col('woba_pred').sum().alias('xwoba'),
|
| 390 |
+
pl.col('woba_pred_contact').sum().alias('xwoba_contact'),
|
| 391 |
+
pl.col('woba_codes').sum().alias('woba_codes'),
|
| 392 |
+
pl.col('xwoba_codes').sum().alias('xwoba_codes'),
|
| 393 |
+
pl.col('hard_hit').sum().alias('hard_hit'),
|
| 394 |
+
pl.col('barrel').sum().alias('barrel'),
|
| 395 |
+
pl.col('sweet_spot').sum().alias('sweet_spot'),
|
| 396 |
+
pl.col('launch_speed').max().alias('max_launch_speed'),
|
| 397 |
+
pl.col('launch_speed').quantile(0.90).alias('launch_speed_90'),
|
| 398 |
+
pl.col('launch_speed').mean().alias('launch_speed'),
|
| 399 |
+
pl.col('launch_angle').mean().alias('launch_angle'),
|
| 400 |
+
pl.col('is_pitch').sum().alias('pitches'),
|
| 401 |
+
pl.col('swings').sum().alias('swings'),
|
| 402 |
+
pl.col('in_zone').sum().alias('in_zone'),
|
| 403 |
+
pl.col('out_zone').sum().alias('out_zone'),
|
| 404 |
+
pl.col('whiffs').sum().alias('whiffs'),
|
| 405 |
+
pl.col('zone_swing').sum().alias('zone_swing'),
|
| 406 |
+
pl.col('zone_contact').sum().alias('zone_contact'),
|
| 407 |
+
pl.col('ozone_swing').sum().alias('ozone_swing'),
|
| 408 |
+
pl.col('ozone_contact').sum().alias('ozone_contact'),
|
| 409 |
+
pl.col('trajectory_ground_ball').sum().alias('ground_ball'),
|
| 410 |
+
pl.col('trajectory_line_drive').sum().alias('line_drive'),
|
| 411 |
+
pl.col('trajectory_fly_ball').sum().alias('fly_ball'),
|
| 412 |
+
pl.col('trajectory_popup').sum().alias('pop_up'),
|
| 413 |
+
pl.col('attack_zone').count().alias('attack_zone'),
|
| 414 |
+
pl.col('heart').sum().alias('heart'),
|
| 415 |
+
pl.col('shadow').sum().alias('shadow'),
|
| 416 |
+
pl.col('chase').sum().alias('chase'),
|
| 417 |
+
pl.col('waste').sum().alias('waste'),
|
| 418 |
+
pl.col('heart_swing').sum().alias('heart_swing'),
|
| 419 |
+
pl.col('shadow_swing').sum().alias('shadow_swing'),
|
| 420 |
+
pl.col('chase_swing').sum().alias('chase_swing'),
|
| 421 |
+
pl.col('waste_swing').sum().alias('waste_swing'),
|
| 422 |
+
pl.col('heart_whiff').sum().alias('heart_whiff'),
|
| 423 |
+
pl.col('shadow_whiff').sum().alias('shadow_whiff'),
|
| 424 |
+
pl.col('chase_whiff').sum().alias('chase_whiff'),
|
| 425 |
+
pl.col('waste_whiff').sum().alias('waste_whiff')
|
| 426 |
+
])
|
| 427 |
+
|
| 428 |
+
# Add calculated columns to the summary DataFrame
|
| 429 |
+
df_summ = df_summ.with_columns([
|
| 430 |
+
(pl.col('hits') / pl.col('ab')).alias('avg'),
|
| 431 |
+
(pl.col('on_base') / pl.col('obp_pa')).alias('obp'),
|
| 432 |
+
(pl.col('tb') / pl.col('ab')).alias('slg'),
|
| 433 |
+
(pl.col('on_base') / pl.col('obp_pa') + pl.col('tb') / pl.col('ab')).alias('ops'),
|
| 434 |
+
(pl.col('k') / pl.col('pa')).alias('k_percent'),
|
| 435 |
+
(pl.col('bb') / pl.col('pa')).alias('bb_percent'),
|
| 436 |
+
(pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
|
| 437 |
+
(pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
|
| 438 |
+
(pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
|
| 439 |
+
(pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
|
| 440 |
+
(pl.col('woba') / pl.col('woba_codes')).alias('woba_percent'),
|
| 441 |
+
(pl.col('woba_contact') / pl.col('bip')).alias('woba_percent_contact'),
|
| 442 |
+
(pl.col('hard_hit') / pl.col('bip_div')).alias('hard_hit_percent'),
|
| 443 |
+
(pl.col('barrel') / pl.col('bip_div')).alias('barrel_percent'),
|
| 444 |
+
(pl.col('zone_contact') / pl.col('zone_swing')).alias('zone_contact_percent'),
|
| 445 |
+
(pl.col('zone_swing') / pl.col('in_zone')).alias('zone_swing_percent'),
|
| 446 |
+
(pl.col('in_zone') / pl.col('pitches')).alias('zone_percent'),
|
| 447 |
+
(pl.col('ozone_swing') / (pl.col('pitches') - pl.col('in_zone'))).alias('chase_percent'),
|
| 448 |
+
(pl.col('ozone_contact') / pl.col('ozone_swing')).alias('chase_contact'),
|
| 449 |
+
(pl.col('swings') / pl.col('pitches')).alias('swing_percent'),
|
| 450 |
+
(pl.col('whiffs') / pl.col('swings')).alias('whiff_rate'),
|
| 451 |
+
(pl.col('whiffs') / pl.col('pitches')).alias('swstr_rate'),
|
| 452 |
+
(pl.col('ground_ball') / pl.col('bip')).alias('ground_ball_percent'),
|
| 453 |
+
(pl.col('line_drive') / pl.col('bip')).alias('line_drive_percent'),
|
| 454 |
+
(pl.col('fly_ball') / pl.col('bip')).alias('fly_ball_percent'),
|
| 455 |
+
(pl.col('pop_up') / pl.col('bip')).alias('pop_up_percent'),
|
| 456 |
+
(pl.col('heart') / pl.col('attack_zone')).alias('heart_zone_percent'),
|
| 457 |
+
(pl.col('shadow') / pl.col('attack_zone')).alias('shadow_zone_percent'),
|
| 458 |
+
(pl.col('chase') / pl.col('attack_zone')).alias('chase_zone_percent'),
|
| 459 |
+
(pl.col('waste') / pl.col('attack_zone')).alias('waste_zone_percent'),
|
| 460 |
+
(pl.col('heart_swing') / pl.col('heart')).alias('heart_zone_swing_percent'),
|
| 461 |
+
(pl.col('shadow_swing') / pl.col('shadow')).alias('shadow_zone_swing_percent'),
|
| 462 |
+
(pl.col('chase_swing') / pl.col('chase')).alias('chase_zone_swing_percent'),
|
| 463 |
+
(pl.col('waste_swing') / pl.col('waste')).alias('waste_zone_swing_percent'),
|
| 464 |
+
(pl.col('heart_whiff') / pl.col('heart_swing')).alias('heart_zone_whiff_percent'),
|
| 465 |
+
(pl.col('shadow_whiff') / pl.col('shadow_swing')).alias('shadow_zone_whiff_percent'),
|
| 466 |
+
(pl.col('chase_whiff') / pl.col('chase_swing')).alias('chase_zone_whiff_percent'),
|
| 467 |
+
(pl.col('waste_whiff') / pl.col('waste_swing')).alias('waste_zone_whiff_percent'),
|
| 468 |
+
(pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
|
| 469 |
+
(pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact')
|
| 470 |
+
])
|
| 471 |
+
|
| 472 |
+
return df_summ
|
functions/pitch_summary_functions.py
ADDED
|
@@ -0,0 +1,1029 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import json
|
| 4 |
+
from matplotlib.ticker import FuncFormatter
|
| 5 |
+
from matplotlib.ticker import MaxNLocator
|
| 6 |
+
import math
|
| 7 |
+
from matplotlib.patches import Ellipse
|
| 8 |
+
import matplotlib.transforms as transforms
|
| 9 |
+
import matplotlib.colors
|
| 10 |
+
import matplotlib.colors as mcolors
|
| 11 |
+
import seaborn as sns
|
| 12 |
+
import matplotlib.pyplot as plt
|
| 13 |
+
import requests
|
| 14 |
+
import polars as pl
|
| 15 |
+
from PIL import Image
|
| 16 |
+
import requests
|
| 17 |
+
from io import BytesIO
|
| 18 |
+
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
|
| 19 |
+
import matplotlib.pyplot as plt
|
| 20 |
+
import matplotlib.gridspec as gridspec
|
| 21 |
+
import PIL
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
### PITCH COLOURS ###
|
| 25 |
+
|
| 26 |
+
# Dictionary to map pitch types to their corresponding colors and names
|
| 27 |
+
pitch_colours = {
|
| 28 |
+
## Fastballs ##
|
| 29 |
+
'FF': {'colour': '#FF007D', 'name': '4-Seam Fastball'},
|
| 30 |
+
'FA': {'colour': '#FF007D', 'name': 'Fastball'},
|
| 31 |
+
'SI': {'colour': '#98165D', 'name': 'Sinker'},
|
| 32 |
+
'FC': {'colour': '#BE5FA0', 'name': 'Cutter'},
|
| 33 |
+
|
| 34 |
+
## Offspeed ##
|
| 35 |
+
'CH': {'colour': '#F79E70', 'name': 'Changeup'},
|
| 36 |
+
'FS': {'colour': '#FE6100', 'name': 'Splitter'},
|
| 37 |
+
'SC': {'colour': '#F08223', 'name': 'Screwball'},
|
| 38 |
+
'FO': {'colour': '#FFB000', 'name': 'Forkball'},
|
| 39 |
+
|
| 40 |
+
## Sliders ##
|
| 41 |
+
'SL': {'colour': '#67E18D', 'name': 'Slider'},
|
| 42 |
+
'ST': {'colour': '#1BB999', 'name': 'Sweeper'},
|
| 43 |
+
'SV': {'colour': '#376748', 'name': 'Slurve'},
|
| 44 |
+
|
| 45 |
+
## Curveballs ##
|
| 46 |
+
'KC': {'colour': '#311D8B', 'name': 'Knuckle Curve'},
|
| 47 |
+
'CU': {'colour': '#3025CE', 'name': 'Curveball'},
|
| 48 |
+
'CS': {'colour': '#274BFC', 'name': 'Slow Curve'},
|
| 49 |
+
'EP': {'colour': '#648FFF', 'name': 'Eephus'},
|
| 50 |
+
|
| 51 |
+
## Others ##
|
| 52 |
+
'KN': {'colour': '#867A08', 'name': 'Knuckleball'},
|
| 53 |
+
'PO': {'colour': '#472C30', 'name': 'Pitch Out'},
|
| 54 |
+
'UN': {'colour': '#9C8975', 'name': 'Unknown'},
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
# Create dictionaries for pitch types and their attributes
|
| 58 |
+
dict_colour = {key: value['colour'] for key, value in pitch_colours.items()}
|
| 59 |
+
dict_pitch = {key: value['name'] for key, value in pitch_colours.items()}
|
| 60 |
+
dict_pitch_desc_type = {value['name']: key for key, value in pitch_colours.items()}
|
| 61 |
+
dict_pitch_desc_type.update({'Four-Seam Fastball':'FF'})
|
| 62 |
+
dict_pitch_desc_type.update({'All':'All'})
|
| 63 |
+
dict_pitch_name = {value['name']: value['colour'] for key, value in pitch_colours.items()}
|
| 64 |
+
dict_pitch_name.update({'Four-Seam Fastball':'#FF007D'})
|
| 65 |
+
|
| 66 |
+
font_properties = {'family': 'calibi', 'size': 12}
|
| 67 |
+
font_properties_titles = {'family': 'calibi', 'size': 20}
|
| 68 |
+
font_properties_axes = {'family': 'calibi', 'size': 16}
|
| 69 |
+
|
| 70 |
+
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
|
| 71 |
+
|
| 72 |
+
### FANGRAPHS STATS DICT ###
|
| 73 |
+
fangraphs_stats_dict = {'IP':{'table_header':'$\\bf{IP}$','format':'.1f',} ,
|
| 74 |
+
'TBF':{'table_header':'$\\bf{PA}$','format':'.0f',} ,
|
| 75 |
+
'AVG':{'table_header':'$\\bf{AVG}$','format':'.3f',} ,
|
| 76 |
+
'K/9':{'table_header':'$\\bf{K\/9}$','format':'.2f',} ,
|
| 77 |
+
'BB/9':{'table_header':'$\\bf{BB\/9}$','format':'.2f',} ,
|
| 78 |
+
'K/BB':{'table_header':'$\\bf{K\/BB}$','format':'.2f',} ,
|
| 79 |
+
'HR/9':{'table_header':'$\\bf{HR\/9}$','format':'.2f',} ,
|
| 80 |
+
'K%':{'table_header':'$\\bf{K\%}$','format':'.1%',} ,
|
| 81 |
+
'BB%':{'table_header':'$\\bf{BB\%}$','format':'.1%',} ,
|
| 82 |
+
'K-BB%':{'table_header':'$\\bf{K-BB\%}$','format':'.1%',} ,
|
| 83 |
+
'WHIP':{'table_header':'$\\bf{WHIP}$','format':'.2f',} ,
|
| 84 |
+
'BABIP':{'table_header':'$\\bf{BABIP}$','format':'.3f',} ,
|
| 85 |
+
'LOB%':{'table_header':'$\\bf{LOB\%}$','format':'.1%',} ,
|
| 86 |
+
'xFIP':{'table_header':'$\\bf{xFIP}$','format':'.2f',} ,
|
| 87 |
+
'FIP':{'table_header':'$\\bf{FIP}$','format':'.2f',} ,
|
| 88 |
+
'H':{'table_header':'$\\bf{H}$','format':'.0f',} ,
|
| 89 |
+
'2B':{'table_header':'$\\bf{2B}$','format':'.0f',} ,
|
| 90 |
+
'3B':{'table_header':'$\\bf{3B}$','format':'.0f',} ,
|
| 91 |
+
'R':{'table_header':'$\\bf{R}$','format':'.0f',} ,
|
| 92 |
+
'ER':{'table_header':'$\\bf{ER}$','format':'.0f',} ,
|
| 93 |
+
'HR':{'table_header':'$\\bf{HR}$','format':'.0f',} ,
|
| 94 |
+
'BB':{'table_header':'$\\bf{BB}$','format':'.0f',} ,
|
| 95 |
+
'IBB':{'table_header':'$\\bf{IBB}$','format':'.0f',} ,
|
| 96 |
+
'HBP':{'table_header':'$\\bf{HBP}$','format':'.0f',} ,
|
| 97 |
+
'SO':{'table_header':'$\\bf{SO}$','format':'.0f',} ,
|
| 98 |
+
'OBP':{'table_header':'$\\bf{OBP}$','format':'.0f',} ,
|
| 99 |
+
'SLG':{'table_header':'$\\bf{SLG}$','format':'.0f',} ,
|
| 100 |
+
'ERA':{'table_header':'$\\bf{ERA}$','format':'.2f',} ,
|
| 101 |
+
'wOBA':{'table_header':'$\\bf{wOBA}$','format':'.3f',} ,
|
| 102 |
+
'G':{'table_header':'$\\bf{G}$','format':'.0f',},
|
| 103 |
+
'strikePercentage':{'table_header':'$\\bf{Strike\%}$','format':'.1%'} }
|
| 104 |
+
|
| 105 |
+
colour_palette = ['#FFB000','#648FFF','#785EF0',
|
| 106 |
+
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
|
| 107 |
+
|
| 108 |
+
### GET COLOURS ###
|
| 109 |
+
def get_color(value, normalize, cmap_sum):
|
| 110 |
+
"""
|
| 111 |
+
Get the color corresponding to a value based on a colormap and normalization.
|
| 112 |
+
|
| 113 |
+
Parameters
|
| 114 |
+
----------
|
| 115 |
+
value : float
|
| 116 |
+
The value to be mapped to a color.
|
| 117 |
+
normalize : matplotlib.colors.Normalize
|
| 118 |
+
The normalization function to scale the value.
|
| 119 |
+
cmap_sum : matplotlib.colors.Colormap
|
| 120 |
+
The colormap to use for mapping the value to a color.
|
| 121 |
+
|
| 122 |
+
Returns
|
| 123 |
+
-------
|
| 124 |
+
str
|
| 125 |
+
The hexadecimal color code corresponding to the value.
|
| 126 |
+
"""
|
| 127 |
+
color = cmap_sum(normalize(value))
|
| 128 |
+
return mcolors.to_hex(color)
|
| 129 |
+
|
| 130 |
+
### PITCH ELLIPSE ###
|
| 131 |
+
def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs):
|
| 132 |
+
"""
|
| 133 |
+
Create a plot of the covariance confidence ellipse of *x* and *y*.
|
| 134 |
+
|
| 135 |
+
Parameters
|
| 136 |
+
----------
|
| 137 |
+
x, y : array-like, shape (n, )
|
| 138 |
+
Input data.
|
| 139 |
+
|
| 140 |
+
ax : matplotlib.axes.Axes
|
| 141 |
+
The axes object to draw the ellipse into.
|
| 142 |
+
|
| 143 |
+
n_std : float
|
| 144 |
+
The number of standard deviations to determine the ellipse's radiuses.
|
| 145 |
+
|
| 146 |
+
**kwargs
|
| 147 |
+
Forwarded to `~matplotlib.patches.Ellipse`
|
| 148 |
+
|
| 149 |
+
Returns
|
| 150 |
+
-------
|
| 151 |
+
matplotlib.patches.Ellipse
|
| 152 |
+
"""
|
| 153 |
+
|
| 154 |
+
if len(x) != len(y):
|
| 155 |
+
raise ValueError("x and y must be the same size")
|
| 156 |
+
try:
|
| 157 |
+
cov = np.cov(x, y)
|
| 158 |
+
pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
|
| 159 |
+
# Using a special case to obtain the eigenvalues of this
|
| 160 |
+
# two-dimensional dataset.
|
| 161 |
+
ell_radius_x = np.sqrt(1 + pearson)
|
| 162 |
+
ell_radius_y = np.sqrt(1 - pearson)
|
| 163 |
+
ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2,
|
| 164 |
+
facecolor=facecolor,linewidth=2,linestyle='--', **kwargs)
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
# Calculating the standard deviation of x from
|
| 168 |
+
# the squareroot of the variance and multiplying
|
| 169 |
+
# with the given number of standard deviations.
|
| 170 |
+
scale_x = np.sqrt(cov[0, 0]) * n_std
|
| 171 |
+
mean_x = x.mean()
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
# calculating the standard deviation of y ...
|
| 175 |
+
scale_y = np.sqrt(cov[1, 1]) * n_std
|
| 176 |
+
mean_y = y.mean()
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
transf = transforms.Affine2D() \
|
| 180 |
+
.rotate_deg(45) \
|
| 181 |
+
.scale(scale_x, scale_y) \
|
| 182 |
+
.translate(mean_x, mean_y)
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
ellipse.set_transform(transf + ax.transData)
|
| 187 |
+
except ValueError:
|
| 188 |
+
return
|
| 189 |
+
|
| 190 |
+
return ax.add_patch(ellipse)
|
| 191 |
+
### VELOCITY KDES ###
|
| 192 |
+
def velocity_kdes(df: pl.DataFrame,
|
| 193 |
+
ax: plt.Axes,
|
| 194 |
+
gs: gridspec.GridSpec,
|
| 195 |
+
gs_x: list,
|
| 196 |
+
gs_y: list,
|
| 197 |
+
fig: plt.Figure):
|
| 198 |
+
"""
|
| 199 |
+
Plot the velocity KDEs for different pitch types.
|
| 200 |
+
|
| 201 |
+
Parameters
|
| 202 |
+
----------
|
| 203 |
+
df : pl.DataFrame
|
| 204 |
+
The DataFrame containing pitch data.
|
| 205 |
+
ax : plt.Axes
|
| 206 |
+
The axis to plot on.
|
| 207 |
+
gs : GridSpec
|
| 208 |
+
The GridSpec for the subplot layout.
|
| 209 |
+
gs_x : list
|
| 210 |
+
The x-coordinates for the GridSpec.
|
| 211 |
+
gs_y : list
|
| 212 |
+
The y-coordinates for the GridSpec.
|
| 213 |
+
fig : plt.Figure
|
| 214 |
+
The figure to plot on.
|
| 215 |
+
"""
|
| 216 |
+
# Join the original DataFrame on 'pitch_type' with sorted counts to reorder
|
| 217 |
+
items_in_order = (df
|
| 218 |
+
.sort("pitch_count", descending=True)['pitch_type']
|
| 219 |
+
.unique(maintain_order=True)
|
| 220 |
+
.to_numpy()
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
# Create the inner subplot inside the outer subplot
|
| 224 |
+
import matplotlib.gridspec as gridspec
|
| 225 |
+
ax.axis('off')
|
| 226 |
+
ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20})
|
| 227 |
+
|
| 228 |
+
inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order), 1, subplot_spec=gs[gs_x[0]:gs_x[-1], gs_y[0]:gs_y[-1]])
|
| 229 |
+
ax_top = [fig.add_subplot(inner) for inner in inner_grid_1]
|
| 230 |
+
|
| 231 |
+
for idx, i in enumerate(items_in_order):
|
| 232 |
+
pitch_data = df.filter(pl.col('pitch_type') == i)['start_speed']
|
| 233 |
+
if np.unique(pitch_data).size == 1: # Check if all values are the same
|
| 234 |
+
ax_top[idx].plot([np.unique(pitch_data), np.unique(pitch_data)], [0, 1], linewidth=4,
|
| 235 |
+
color=dict_colour[i], zorder=20)
|
| 236 |
+
else:
|
| 237 |
+
sns.kdeplot(pitch_data, ax=ax_top[idx], fill=True,
|
| 238 |
+
clip=(pitch_data.min(), pitch_data.max()),
|
| 239 |
+
color=dict_colour[i])
|
| 240 |
+
|
| 241 |
+
# Plot the mean release speed for the current data
|
| 242 |
+
df_average = df.filter(df['pitch_type'] == i)['start_speed']
|
| 243 |
+
ax_top[idx].plot([df_average.mean(), df_average.mean()],
|
| 244 |
+
[ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]],
|
| 245 |
+
color=dict_colour[i],
|
| 246 |
+
linestyle='--')
|
| 247 |
+
df_statcast_group = pl.read_csv('functions/statcast_2024_grouped.csv')
|
| 248 |
+
|
| 249 |
+
# Plot the mean release speed for the statcast group data
|
| 250 |
+
df_average = df_statcast_group.filter(df_statcast_group['pitch_type'] == i)['release_speed']
|
| 251 |
+
ax_top[idx].plot([df_average.mean(), df_average.mean()],
|
| 252 |
+
[ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]],
|
| 253 |
+
color=dict_colour[i],
|
| 254 |
+
linestyle=':')
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
ax_top[idx].set_xlim(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5)
|
| 258 |
+
ax_top[idx].set_xlabel('')
|
| 259 |
+
ax_top[idx].set_ylabel('')
|
| 260 |
+
if idx < len(items_in_order) - 1:
|
| 261 |
+
ax_top[idx].spines['top'].set_visible(False)
|
| 262 |
+
ax_top[idx].spines['right'].set_visible(False)
|
| 263 |
+
ax_top[idx].spines['left'].set_visible(False)
|
| 264 |
+
ax_top[idx].tick_params(axis='x', colors='none')
|
| 265 |
+
|
| 266 |
+
ax_top[idx].set_xticks(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5))
|
| 267 |
+
ax_top[idx].set_yticks([])
|
| 268 |
+
ax_top[idx].grid(axis='x', linestyle='--')
|
| 269 |
+
ax_top[idx].text(-0.01, 0.5, i, transform=ax_top[idx].transAxes,
|
| 270 |
+
fontsize=14, va='center', ha='right')
|
| 271 |
+
|
| 272 |
+
ax_top[-1].spines['top'].set_visible(False)
|
| 273 |
+
ax_top[-1].spines['right'].set_visible(False)
|
| 274 |
+
ax_top[-1].spines['left'].set_visible(False)
|
| 275 |
+
ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5)))
|
| 276 |
+
ax_top[-1].set_xlabel('Velocity (mph)')
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
### TJ STUFF+ ROLLING ###
|
| 280 |
+
def tj_stuff_roling(df: pl.DataFrame, window: int, ax: plt.Axes):
|
| 281 |
+
"""
|
| 282 |
+
Plot the rolling average of tjStuff+ for different pitch types.
|
| 283 |
+
|
| 284 |
+
Parameters
|
| 285 |
+
----------
|
| 286 |
+
df : pl.DataFrame
|
| 287 |
+
The DataFrame containing pitch data.
|
| 288 |
+
window : int
|
| 289 |
+
The window size for calculating the rolling average.
|
| 290 |
+
ax : plt.Axes
|
| 291 |
+
The axis to plot on.
|
| 292 |
+
"""
|
| 293 |
+
# Join the original DataFrame on 'pitch_type' with sorted counts to reorder
|
| 294 |
+
items_in_order = (
|
| 295 |
+
df.sort("pitch_count", descending=True)['pitch_type']
|
| 296 |
+
.unique(maintain_order=True)
|
| 297 |
+
.to_numpy()
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
# Plot the rolling average for each pitch type
|
| 301 |
+
for i in items_in_order:
|
| 302 |
+
if max(df.filter(pl.col('pitch_type') == i)['pitch_count']) >= window:
|
| 303 |
+
print('LENGTH',
|
| 304 |
+
len(range(1, max(df.filter(pl.col('pitch_type') == i)['pitch_count']) + 1)),
|
| 305 |
+
len(df.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window)))
|
| 306 |
+
sns.lineplot(
|
| 307 |
+
x=range(1, max(df.filter(pl.col('pitch_type') == i)['pitch_count']) + 1),
|
| 308 |
+
y=df.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window),
|
| 309 |
+
color=dict_colour[i],
|
| 310 |
+
ax=ax,
|
| 311 |
+
linewidth=3
|
| 312 |
+
)
|
| 313 |
+
|
| 314 |
+
# Adjust x-axis limits to start from 1
|
| 315 |
+
ax.set_xlim(window, max(df['pitch_count']))
|
| 316 |
+
ax.set_ylim(70, 130)
|
| 317 |
+
ax.set_xlabel('Pitches', fontdict=font_properties_axes)
|
| 318 |
+
ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
|
| 319 |
+
ax.set_title(f"{window} Pitch Rolling tjStuff+", fontdict=font_properties_titles)
|
| 320 |
+
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
### TJ STUFF+ ROLLING ###
|
| 324 |
+
def tj_stuff_roling_game(df: pl.DataFrame, window: int, ax: plt.Axes):
|
| 325 |
+
"""
|
| 326 |
+
Plot the rolling average of tjStuff+ for different pitch types over games.
|
| 327 |
+
|
| 328 |
+
Parameters
|
| 329 |
+
----------
|
| 330 |
+
df : pl.DataFrame
|
| 331 |
+
The DataFrame containing pitch data.
|
| 332 |
+
window : int
|
| 333 |
+
The window size for calculating the rolling average.
|
| 334 |
+
ax : plt.Axes
|
| 335 |
+
The axis to plot on.
|
| 336 |
+
"""
|
| 337 |
+
# Map game_id to sequential numbers
|
| 338 |
+
date_to_number = {date: i + 1 for i, date in enumerate(df['game_id'].unique(maintain_order=True))}
|
| 339 |
+
|
| 340 |
+
# Add a column with the sequential game numbers
|
| 341 |
+
df_plot = df.with_columns(
|
| 342 |
+
pl.col("game_id").map_elements(lambda x: date_to_number.get(x, x)).alias("start_number")
|
| 343 |
+
)
|
| 344 |
+
|
| 345 |
+
# Group by relevant columns and calculate mean tj_stuff_plus
|
| 346 |
+
plot_game_roll = df_plot.group_by(['start_number', 'game_id', 'game_date', 'pitch_type', 'pitch_description']).agg(
|
| 347 |
+
pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus')
|
| 348 |
+
).sort('start_number', descending=False)
|
| 349 |
+
|
| 350 |
+
# Get the list of pitch types ordered by frequency
|
| 351 |
+
sorted_value_counts = df['pitch_type'].value_counts().sort('count', descending=True)
|
| 352 |
+
items_in_order = sorted_value_counts['pitch_type'].to_list()
|
| 353 |
+
|
| 354 |
+
# Plot the rolling average for each pitch type
|
| 355 |
+
for i in items_in_order:
|
| 356 |
+
df_item = plot_game_roll.filter(pl.col('pitch_type') == i)
|
| 357 |
+
df_item = df_item.with_columns(
|
| 358 |
+
pl.col("start_number").cast(pl.Int64)
|
| 359 |
+
).join(
|
| 360 |
+
pl.DataFrame({"start_number": list(date_to_number.values())}),
|
| 361 |
+
on="start_number",
|
| 362 |
+
how="outer"
|
| 363 |
+
).sort("start_number_right").with_columns([
|
| 364 |
+
pl.col("start_number").fill_null(strategy="forward").fill_null(strategy="backward"),
|
| 365 |
+
pl.col("tj_stuff_plus").fill_null(strategy="forward").fill_null(strategy="backward"),
|
| 366 |
+
pl.col("pitch_type").fill_null(strategy="forward").fill_null(strategy="backward"),
|
| 367 |
+
pl.col("pitch_description").fill_null(strategy="forward").fill_null(strategy="backward")
|
| 368 |
+
])
|
| 369 |
+
|
| 370 |
+
sns.lineplot(x=range(1, max(df_item['start_number_right']) + 1),
|
| 371 |
+
y=df_item.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window),
|
| 372 |
+
color=dict_colour[i],
|
| 373 |
+
ax=ax, linewidth=3)
|
| 374 |
+
|
| 375 |
+
# Highlight missing game data points
|
| 376 |
+
for n in range(len(df_item)):
|
| 377 |
+
if df_item['game_id'].is_null()[n]:
|
| 378 |
+
sns.scatterplot(x=[df_item['start_number_right'][n]],
|
| 379 |
+
y=[df_item['tj_stuff_plus'][n]],
|
| 380 |
+
color='white',
|
| 381 |
+
ec='black',
|
| 382 |
+
ax=ax,
|
| 383 |
+
zorder=100)
|
| 384 |
+
|
| 385 |
+
# Adjust x-axis limits to start from 1
|
| 386 |
+
ax.set_xlim(window, max(df_item['start_number']))
|
| 387 |
+
ax.set_ylim(70, 130)
|
| 388 |
+
ax.set_xlabel('Games', fontdict=font_properties_axes)
|
| 389 |
+
ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
|
| 390 |
+
ax.set_title(f"{window} Game Rolling tjStuff+", fontdict=font_properties_titles)
|
| 391 |
+
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
def break_plot(df: pl.DataFrame, ax: plt.Axes):
|
| 395 |
+
"""
|
| 396 |
+
Plot the pitch breaks for different pitch types.
|
| 397 |
+
|
| 398 |
+
Parameters
|
| 399 |
+
----------
|
| 400 |
+
df : pl.DataFrame
|
| 401 |
+
The DataFrame containing pitch data.
|
| 402 |
+
ax : plt.Axes
|
| 403 |
+
The axis to plot on.
|
| 404 |
+
"""
|
| 405 |
+
# Get unique pitch types sorted by pitch count
|
| 406 |
+
label_labels = df.sort(by=['pitch_count', 'pitch_type'], descending=[False, True])['pitch_type'].unique(maintain_order=True).to_numpy()
|
| 407 |
+
|
| 408 |
+
# Plot confidence ellipses for each pitch type
|
| 409 |
+
for idx, label in enumerate(label_labels):
|
| 410 |
+
subset = df.filter(pl.col('pitch_type') == label)
|
| 411 |
+
if len(subset) > 4:
|
| 412 |
+
try:
|
| 413 |
+
confidence_ellipse(subset['hb'], subset['ivb'], ax=ax, edgecolor=dict_colour[label], n_std=2, facecolor=dict_colour[label], alpha=0.2)
|
| 414 |
+
except ValueError:
|
| 415 |
+
return
|
| 416 |
+
|
| 417 |
+
# Plot scatter plot for pitch breaks
|
| 418 |
+
if df['pitcher_hand'][0] == 'R':
|
| 419 |
+
sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'] * 1, hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2)
|
| 420 |
+
if df['pitcher_hand'][0] == 'L':
|
| 421 |
+
sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'] * 1, hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2)
|
| 422 |
+
|
| 423 |
+
# Set axis limits
|
| 424 |
+
ax.set_xlim((-25, 25))
|
| 425 |
+
ax.set_ylim((-25, 25))
|
| 426 |
+
|
| 427 |
+
# Add horizontal and vertical lines
|
| 428 |
+
ax.hlines(y=0, xmin=-50, xmax=50, color=colour_palette[8], alpha=0.5, linestyles='--', zorder=1)
|
| 429 |
+
ax.vlines(x=0, ymin=-50, ymax=50, color=colour_palette[8], alpha=0.5, linestyles='--', zorder=1)
|
| 430 |
+
|
| 431 |
+
# Set axis labels and title
|
| 432 |
+
ax.set_xlabel('Horizontal Break (in)', fontdict=font_properties_axes)
|
| 433 |
+
ax.set_ylabel('Induced Vertical Break (in)', fontdict=font_properties_axes)
|
| 434 |
+
ax.set_title("Pitch Breaks", fontdict=font_properties_titles)
|
| 435 |
+
|
| 436 |
+
# Remove legend
|
| 437 |
+
ax.get_legend().remove()
|
| 438 |
+
|
| 439 |
+
# Set tick labels
|
| 440 |
+
ax.set_xticklabels(ax.get_xticks(), fontdict=font_properties)
|
| 441 |
+
ax.set_yticklabels(ax.get_yticks(), fontdict=font_properties)
|
| 442 |
+
|
| 443 |
+
# Add text annotations for glove side and arm side
|
| 444 |
+
if df['pitcher_hand'][0] == 'R':
|
| 445 |
+
ax.text(-24.5, -24.5, s='← Glove Side', fontstyle='italic', ha='left', va='bottom',
|
| 446 |
+
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
|
| 447 |
+
ax.text(24.5, -24.5, s='Arm Side →', fontstyle='italic', ha='right', va='bottom',
|
| 448 |
+
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
|
| 449 |
+
if df['pitcher_hand'][0] == 'L':
|
| 450 |
+
ax.invert_xaxis()
|
| 451 |
+
ax.text(24.5, -24.5, s='← Arm Side', fontstyle='italic', ha='left', va='bottom',
|
| 452 |
+
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
|
| 453 |
+
ax.text(-24.5, -24.5, s='Glove Side →', fontstyle='italic', ha='right', va='bottom',
|
| 454 |
+
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
|
| 455 |
+
|
| 456 |
+
# Set aspect ratio and format axis ticks
|
| 457 |
+
ax.set_aspect('equal', adjustable='box')
|
| 458 |
+
ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
|
| 459 |
+
ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
|
| 460 |
+
|
| 461 |
+
# DEFINE STRIKE ZONE
|
| 462 |
+
strike_zone = pl.DataFrame({
|
| 463 |
+
'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9],
|
| 464 |
+
'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5]
|
| 465 |
+
})
|
| 466 |
+
|
| 467 |
+
### STRIKE ZONE ###
|
| 468 |
+
def draw_line(axis, alpha_spot=1, catcher_p=True):
|
| 469 |
+
"""
|
| 470 |
+
Draw the strike zone and home plate on the given axis.
|
| 471 |
+
|
| 472 |
+
Parameters
|
| 473 |
+
----------
|
| 474 |
+
axis : matplotlib.axes.Axes
|
| 475 |
+
The axis to draw the strike zone on.
|
| 476 |
+
alpha_spot : float, optional
|
| 477 |
+
The transparency level of the lines (default is 1).
|
| 478 |
+
catcher_p : bool, optional
|
| 479 |
+
Whether to draw the catcher's perspective (default is True).
|
| 480 |
+
"""
|
| 481 |
+
# Draw the strike zone
|
| 482 |
+
axis.plot(strike_zone['PlateLocSide'].to_list(), strike_zone['PlateLocHeight'].to_list(),
|
| 483 |
+
color='black', linewidth=1.3, zorder=3, alpha=alpha_spot)
|
| 484 |
+
|
| 485 |
+
if catcher_p:
|
| 486 |
+
# Draw home plate from catcher's perspective
|
| 487 |
+
axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
| 488 |
+
axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
| 489 |
+
axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
| 490 |
+
axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
| 491 |
+
axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
| 492 |
+
else:
|
| 493 |
+
# Draw home plate from pitcher's perspective
|
| 494 |
+
axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
| 495 |
+
axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
| 496 |
+
axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
| 497 |
+
axis.plot([0, 0.9], [-0.35, -0.1], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
| 498 |
+
axis.plot([0.9, 0.708], [-0.1, 0.4], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
| 499 |
+
|
| 500 |
+
def location_plot(df: pl.DataFrame, ax: plt.Axes, hand: str):
|
| 501 |
+
"""
|
| 502 |
+
Plot the pitch locations for different pitch types against a specific batter hand.
|
| 503 |
+
|
| 504 |
+
Parameters
|
| 505 |
+
----------
|
| 506 |
+
df : pl.DataFrame
|
| 507 |
+
The DataFrame containing pitch data.
|
| 508 |
+
ax : plt.Axes
|
| 509 |
+
The axis to plot on.
|
| 510 |
+
hand : str
|
| 511 |
+
The batter hand ('L' for left-handed, 'R' for right-handed).
|
| 512 |
+
"""
|
| 513 |
+
# Get unique pitch types sorted by pitch count
|
| 514 |
+
label_labels = df.sort(by=['pitch_count', 'pitch_type'], descending=[False, True])['pitch_type'].unique(maintain_order=True).to_numpy()
|
| 515 |
+
|
| 516 |
+
# Plot confidence ellipses for each pitch type
|
| 517 |
+
for label in label_labels:
|
| 518 |
+
subset = df.filter((pl.col('pitch_type') == label) & (pl.col('batter_hand') == hand))
|
| 519 |
+
if len(subset) >= 5:
|
| 520 |
+
confidence_ellipse(subset['px'], subset['pz'], ax=ax, edgecolor=dict_colour[label], n_std=1.5, facecolor=dict_colour[label], alpha=0.3)
|
| 521 |
+
|
| 522 |
+
# Group pitch locations by pitch type and calculate mean values
|
| 523 |
+
pitch_location_group = (
|
| 524 |
+
df.filter(pl.col("batter_hand") == hand)
|
| 525 |
+
.group_by("pitch_type")
|
| 526 |
+
.agg([
|
| 527 |
+
pl.col("start_speed").count().alias("pitches"),
|
| 528 |
+
pl.col("px").mean().alias("px"),
|
| 529 |
+
pl.col("pz").mean().alias("pz")
|
| 530 |
+
])
|
| 531 |
+
)
|
| 532 |
+
|
| 533 |
+
# Calculate pitch percentages
|
| 534 |
+
total_pitches = pitch_location_group['pitches'].sum()
|
| 535 |
+
pitch_location_group = pitch_location_group.with_columns(
|
| 536 |
+
(pl.col("pitches") / total_pitches).alias("pitch_percent")
|
| 537 |
+
)
|
| 538 |
+
|
| 539 |
+
# Plot pitch locations
|
| 540 |
+
sns.scatterplot(ax=ax, x=pitch_location_group['px'], y=pitch_location_group['pz'],
|
| 541 |
+
hue=pitch_location_group['pitch_type'], palette=dict_colour, ec='black',
|
| 542 |
+
s=pitch_location_group['pitch_percent'] * 750, linewidth=2, zorder=2)
|
| 543 |
+
|
| 544 |
+
# Customize plot appearance
|
| 545 |
+
ax.axis('square')
|
| 546 |
+
draw_line(ax, alpha_spot=0.75, catcher_p=False)
|
| 547 |
+
ax.axis('off')
|
| 548 |
+
ax.set_xlim((-2.75, 2.75))
|
| 549 |
+
ax.set_ylim((-0.5, 5))
|
| 550 |
+
if len(pitch_location_group['px']) > 0:
|
| 551 |
+
ax.get_legend().remove()
|
| 552 |
+
ax.grid(False)
|
| 553 |
+
ax.set_title(f"Pitch Locations vs {hand}HB\n{pitch_location_group['pitches'].sum()} Pitches", fontdict=font_properties_titles)
|
| 554 |
+
|
| 555 |
+
|
| 556 |
+
def summary_table(df: pl.DataFrame, ax: plt.Axes):
|
| 557 |
+
"""
|
| 558 |
+
Create a summary table of pitch data.
|
| 559 |
+
|
| 560 |
+
Parameters
|
| 561 |
+
----------
|
| 562 |
+
df : pl.DataFrame
|
| 563 |
+
The DataFrame containing pitch data.
|
| 564 |
+
ax : plt.Axes
|
| 565 |
+
The axis to plot the table on.
|
| 566 |
+
"""
|
| 567 |
+
# Aggregate pitch data by pitch description
|
| 568 |
+
df_agg = df.group_by("pitch_description").agg(
|
| 569 |
+
pl.col('is_pitch').sum().alias('count'),
|
| 570 |
+
(pl.col('is_pitch').sum() / df.select(pl.col('is_pitch').sum())).alias('count_percent'),
|
| 571 |
+
pl.col('start_speed').mean().alias('start_speed'),
|
| 572 |
+
pl.col('ivb').mean().alias('ivb'),
|
| 573 |
+
pl.col('hb').mean().alias('hb'),
|
| 574 |
+
pl.col('spin_rate').mean().alias('spin_rate'),
|
| 575 |
+
pl.col('vaa').mean().alias('vaa'),
|
| 576 |
+
pl.col('haa').mean().alias('haa'),
|
| 577 |
+
pl.col('z0').mean().alias('z0'),
|
| 578 |
+
pl.col('x0').mean().alias('x0'),
|
| 579 |
+
pl.col('extension').mean().alias('extension'),
|
| 580 |
+
(((pl.col('spin_direction').mean() + 180) % 360 // 30) +
|
| 581 |
+
(((pl.col('spin_direction').mean() + 180) % 360 % 30 / 30 / 100 * 60).round(2) * 10).round(0) // 1.5 / 4)
|
| 582 |
+
.cast(pl.Float64).map_elements(lambda x: f"{int(x)}:{int((x % 1) * 60):02d}", return_dtype=pl.Utf8).alias('clock_time'),
|
| 583 |
+
pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
|
| 584 |
+
pl.col('pitch_grade').mean().alias('pitch_grade'),
|
| 585 |
+
(pl.col('in_zone').sum() / pl.col('is_pitch').sum()).alias('zone_percent'),
|
| 586 |
+
(pl.col('ozone_swing').sum() / pl.col('out_zone').sum()).alias('chase_percent'),
|
| 587 |
+
(pl.col('whiffs').sum() / pl.col('swings').sum()).alias('whiff_percent'),
|
| 588 |
+
(pl.col('woba_pred_contact').sum() / pl.col('bip').sum()).alias('xwobacon')
|
| 589 |
+
).sort("count", descending=True)
|
| 590 |
+
|
| 591 |
+
# Aggregate all pitch data
|
| 592 |
+
df_agg_all = df.group_by(pl.lit("All").alias("pitch_description")).agg(
|
| 593 |
+
pl.col('is_pitch').sum().alias('count'),
|
| 594 |
+
(pl.col('is_pitch').sum() / df.select(pl.col('is_pitch').sum())).alias('count_percent'),
|
| 595 |
+
pl.lit(None).alias('start_speed'),
|
| 596 |
+
pl.lit(None).alias('ivb'),
|
| 597 |
+
pl.lit(None).alias('hb'),
|
| 598 |
+
pl.lit(None).alias('spin_rate'),
|
| 599 |
+
pl.lit(None).alias('vaa'),
|
| 600 |
+
pl.lit(None).alias('haa'),
|
| 601 |
+
pl.lit(None).alias('z0'),
|
| 602 |
+
pl.lit(None).alias('x0'),
|
| 603 |
+
pl.col('extension').mean().alias('extension'),
|
| 604 |
+
pl.lit(None).alias('clock_time'),
|
| 605 |
+
pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
|
| 606 |
+
pl.lit(None).alias('pitch_grade'),
|
| 607 |
+
(pl.col('in_zone').sum() / pl.col('is_pitch').sum()).alias('zone_percent'),
|
| 608 |
+
(pl.col('ozone_swing').sum() / pl.col('out_zone').sum()).alias('chase_percent'),
|
| 609 |
+
(pl.col('whiffs').sum() / pl.col('swings').sum()).alias('whiff_percent'),
|
| 610 |
+
(pl.col('woba_pred_contact').sum() / pl.col('bip').sum()).alias('xwobacon')
|
| 611 |
+
)
|
| 612 |
+
|
| 613 |
+
# Concatenate aggregated data
|
| 614 |
+
df_agg = pl.concat([df_agg, df_agg_all]).fill_nan(None)
|
| 615 |
+
|
| 616 |
+
# Load statcast pitch summary data
|
| 617 |
+
statcast_pitch_summary = pl.read_csv('functions/statcast_2024_grouped.csv')
|
| 618 |
+
|
| 619 |
+
# Create table
|
| 620 |
+
table = ax.table(cellText=df_agg.fill_nan('—').fill_null('—').to_numpy(), colLabels=df_agg.columns, cellLoc='center',
|
| 621 |
+
colWidths=[2.3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], bbox=[0.0, 0, 1, 0.8])
|
| 622 |
+
|
| 623 |
+
# Set table properties
|
| 624 |
+
min_font_size = 14
|
| 625 |
+
table.auto_set_font_size(False)
|
| 626 |
+
table.set_fontsize(min_font_size)
|
| 627 |
+
table.scale(1, 0.5)
|
| 628 |
+
|
| 629 |
+
# Set font size for values
|
| 630 |
+
min_font_size = 18
|
| 631 |
+
for i in range(len(df_agg) + 1):
|
| 632 |
+
for j in range(len(df_agg.columns)):
|
| 633 |
+
if i > 0: # Skip the header row
|
| 634 |
+
cell = table.get_celld()[i, j]
|
| 635 |
+
cell.set_fontsize(min_font_size)
|
| 636 |
+
|
| 637 |
+
# Define color maps
|
| 638 |
+
cmap_sum = mcolors.LinearSegmentedColormap.from_list("", ['#648FFF', '#FFFFFF', '#FFB000'])
|
| 639 |
+
cmap_sum_r = mcolors.LinearSegmentedColormap.from_list("", ['#FFB000', '#FFFFFF', '#648FFF'])
|
| 640 |
+
|
| 641 |
+
# Update table cells with colors and text properties
|
| 642 |
+
for i in range(len(df_agg)):
|
| 643 |
+
pitch_check = dict_pitch_desc_type[df_agg['pitch_description'][i]]
|
| 644 |
+
cell_text = table.get_celld()[(i + 1, 0)].get_text().get_text()
|
| 645 |
+
|
| 646 |
+
if cell_text != 'All':
|
| 647 |
+
table.get_celld()[(i + 1, 0)].set_facecolor(dict_pitch_name[cell_text])
|
| 648 |
+
text_props = {'color': '#000000', 'fontweight': 'bold'} if cell_text in ['Split-Finger', 'Slider', 'Changeup'] else {'color': '#ffffff', 'fontweight': 'bold'}
|
| 649 |
+
table.get_celld()[(i + 1, 0)].set_text_props(**text_props)
|
| 650 |
+
if cell_text == 'Four-Seam Fastball':
|
| 651 |
+
table.get_celld()[(i + 1, 0)].get_text().set_text('4-Seam')
|
| 652 |
+
|
| 653 |
+
select_df = statcast_pitch_summary.filter(statcast_pitch_summary['pitch_type'] == pitch_check)
|
| 654 |
+
|
| 655 |
+
# Apply color to specific columns based on normalized values
|
| 656 |
+
columns_to_color = [(3, 'release_speed', 0.95, 1.05), (11, 'release_extension', 0.9, 1.1), (13, None, 80, 120),
|
| 657 |
+
(14, None, 30, 70), (15, 'in_zone_rate', 0.7, 1.3), (16, 'chase_rate', 0.7, 1.3),
|
| 658 |
+
(17, 'whiff_rate', 0.7, 1.3), (18, 'xwoba', 0.7, 1.3)]
|
| 659 |
+
|
| 660 |
+
for col, stat, vmin_factor, vmax_factor in columns_to_color:
|
| 661 |
+
cell_value = table.get_celld()[(i + 1, col)].get_text().get_text()
|
| 662 |
+
if cell_value != '—':
|
| 663 |
+
vmin = select_df[stat].mean() * vmin_factor if stat else vmin_factor
|
| 664 |
+
vmax = select_df[stat].mean() * vmax_factor if stat else vmax_factor
|
| 665 |
+
normalize = mcolors.Normalize(vmin=vmin, vmax=vmax)
|
| 666 |
+
cmap = cmap_sum if col != 18 else cmap_sum_r
|
| 667 |
+
table.get_celld()[(i + 1, col)].set_facecolor(get_color(float(cell_value.strip('%')), normalize, cmap))
|
| 668 |
+
|
| 669 |
+
# Set header text properties
|
| 670 |
+
table.get_celld()[(len(df_agg), 0)].set_text_props(color='#000000', fontweight='bold')
|
| 671 |
+
|
| 672 |
+
# Update column names
|
| 673 |
+
new_column_names = ['$\\bf{Pitch\\ Name}$', '$\\bf{Count}$', '$\\bf{Pitch\\%}$', '$\\bf{Velocity}$', '$\\bf{iVB}$',
|
| 674 |
+
'$\\bf{HB}$', '$\\bf{Spin}$', '$\\bf{VAA}$', '$\\bf{HAA}$', '$\\bf{vRel}$', '$\\bf{hRel}$',
|
| 675 |
+
'$\\bf{Ext.}$', '$\\bf{Axis}$', '$\\bf{tjStuff+}$', '$\\bf{Grade}$', '$\\bf{Zone\\%}$',
|
| 676 |
+
'$\\bf{Chase\\%}$', '$\\bf{Whiff\\%}$', '$\\bf{xwOBA}$\n$\\bf{Contact}$']
|
| 677 |
+
|
| 678 |
+
for i, col_name in enumerate(new_column_names):
|
| 679 |
+
table.get_celld()[(0, i)].get_text().set_text(col_name)
|
| 680 |
+
|
| 681 |
+
# Format cell values
|
| 682 |
+
def format_cells(columns, fmt):
|
| 683 |
+
for col in columns:
|
| 684 |
+
col_idx = df_agg.columns.index(col)
|
| 685 |
+
for row in range(1, len(df_agg) + 1):
|
| 686 |
+
cell_value = table.get_celld()[(row, col_idx)].get_text().get_text()
|
| 687 |
+
if cell_value != '—':
|
| 688 |
+
table.get_celld()[(row, col_idx)].get_text().set_text(fmt.format(float(cell_value.strip('%'))))
|
| 689 |
+
|
| 690 |
+
format_cells(['start_speed', 'ivb', 'hb', 'vaa', 'haa', 'z0', 'x0', 'extension'], '{:,.1f}')
|
| 691 |
+
format_cells(['xwobacon'], '{:,.3f}')
|
| 692 |
+
format_cells(['count_percent', 'zone_percent', 'chase_percent', 'whiff_percent'], '{:,.1%}')
|
| 693 |
+
format_cells(['tj_stuff_plus', 'pitch_grade', 'spin_rate'], '{:,.0f}')
|
| 694 |
+
|
| 695 |
+
# Create legend for pitch types
|
| 696 |
+
items_in_order = (df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy())
|
| 697 |
+
colour_pitches = [dict_colour[x] for x in items_in_order]
|
| 698 |
+
label = [dict_pitch[x] for x in items_in_order]
|
| 699 |
+
handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches]
|
| 700 |
+
if len(label) > 5:
|
| 701 |
+
ax.legend(handles, label, bbox_to_anchor=(0.1, 0.81, 0.8, 0.14), ncol=5,
|
| 702 |
+
fancybox=True, loc='lower center', fontsize=16, framealpha=1.0, markerscale=1.7, prop={'family': 'calibi', 'size': 16})
|
| 703 |
+
else:
|
| 704 |
+
ax.legend(handles, label, bbox_to_anchor=(0.1, 0.81, 0.8, 0.14), ncol=5,
|
| 705 |
+
fancybox=True, loc='lower center', fontsize=20, framealpha=1.0, markerscale=2, prop={'family': 'calibi', 'size': 20})
|
| 706 |
+
ax.axis('off')
|
| 707 |
+
|
| 708 |
+
def plot_footer(ax:plt.Axes):
|
| 709 |
+
# Add footer text
|
| 710 |
+
ax.text(0, 1, 'By: @TJStats', ha='left', va='top', fontsize=24)
|
| 711 |
+
ax.text(0.5, 0.25,
|
| 712 |
+
'''
|
| 713 |
+
Colour Coding Compares to League Average By Pitch
|
| 714 |
+
tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type
|
| 715 |
+
tjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10
|
| 716 |
+
Pitch Grade scales tjStuff+ to the traditional 20-80 Scouting Scale for a given pitch type
|
| 717 |
+
''',
|
| 718 |
+
ha='center', va='bottom', fontsize=16)
|
| 719 |
+
ax.text(1, 1, 'Data: MLB, Fangraphs\nImages: MLB, ESPN', ha='right', va='top', fontsize=24)
|
| 720 |
+
ax.axis('off')
|
| 721 |
+
|
| 722 |
+
|
| 723 |
+
# Function to get an image from a URL and display it on the given axis
|
| 724 |
+
def player_headshot(player_input: str, ax: plt.Axes, sport_id: int,season: int):
|
| 725 |
+
# Construct the URL for the player's headshot image
|
| 726 |
+
print('SPORT ID',sport_id)
|
| 727 |
+
try:
|
| 728 |
+
if int(sport_id) == 1:
|
| 729 |
+
url = f'https://img.mlbstatic.com/mlb-photos/image/'\
|
| 730 |
+
f'upload/d_people:generic:headshot:67:current.png'\
|
| 731 |
+
f'/w_640,q_auto:best/v1/people/{player_input}/headshot/silo/current.png'
|
| 732 |
+
|
| 733 |
+
# Send a GET request to the URL
|
| 734 |
+
response = requests.get(url)
|
| 735 |
+
|
| 736 |
+
# Open the image from the response content
|
| 737 |
+
img = Image.open(BytesIO(response.content))
|
| 738 |
+
|
| 739 |
+
|
| 740 |
+
# Display the image on the axis
|
| 741 |
+
ax.set_xlim(0, 1.3)
|
| 742 |
+
ax.set_ylim(0, 1)
|
| 743 |
+
ax.imshow(img, extent=[0, 1, 0, 1], origin='upper')
|
| 744 |
+
else:
|
| 745 |
+
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{player_input}/headshot/milb/current.png'
|
| 746 |
+
response = requests.get(url)
|
| 747 |
+
img = Image.open(BytesIO(response.content))
|
| 748 |
+
ax.set_xlim(0, 1.3)
|
| 749 |
+
ax.set_ylim(0, 1)
|
| 750 |
+
ax.imshow(img, extent=[1/6, 5/6, 0, 1], origin='upper')
|
| 751 |
+
except PIL.UnidentifiedImageError as e:
|
| 752 |
+
ax.axis('off')
|
| 753 |
+
return
|
| 754 |
+
|
| 755 |
+
# Turn off the axis
|
| 756 |
+
ax.axis('off')
|
| 757 |
+
|
| 758 |
+
|
| 759 |
+
def player_bio(pitcher_id: str, ax: plt.Axes,sport_id: int,year_input: int):
|
| 760 |
+
# Construct the URL to fetch player data
|
| 761 |
+
url = f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}&hydrate=currentTeam"
|
| 762 |
+
|
| 763 |
+
# Send a GET request to the URL and parse the JSON response
|
| 764 |
+
data = requests.get(url).json()
|
| 765 |
+
|
| 766 |
+
# Extract player information from the JSON data
|
| 767 |
+
player_name = data['people'][0]['fullName']
|
| 768 |
+
pitcher_hand = data['people'][0]['pitchHand']['code']
|
| 769 |
+
age = data['people'][0]['currentAge']
|
| 770 |
+
height = data['people'][0]['height']
|
| 771 |
+
weight = data['people'][0]['weight']
|
| 772 |
+
|
| 773 |
+
# Display the player's name, handedness, age, height, and weight on the axis
|
| 774 |
+
ax.text(0.5, 1, f'{player_name}', va='top', ha='center', fontsize=56)
|
| 775 |
+
ax.text(0.5, 0.7, f'{pitcher_hand}HP, Age:{age}, {height}/{weight}', va='top', ha='center', fontsize=30)
|
| 776 |
+
ax.text(0.5, 0.45, f'Season Pitching Summary', va='top', ha='center', fontsize=40)
|
| 777 |
+
|
| 778 |
+
# Make API call to retrieve sports information
|
| 779 |
+
response = requests.get(url='https://statsapi.mlb.com/api/v1/sports').json()
|
| 780 |
+
|
| 781 |
+
# Convert the JSON response into a Polars DataFrame
|
| 782 |
+
df_sport_id = pl.DataFrame(response['sports'])
|
| 783 |
+
abb = df_sport_id.filter(pl.col('id') == sport_id)['abbreviation'][0]
|
| 784 |
+
|
| 785 |
+
ax.text(0.5, 0.20, f'{year_input} {abb} Season', va='top', ha='center', fontsize=30, fontstyle='italic')
|
| 786 |
+
|
| 787 |
+
# Turn off the axis
|
| 788 |
+
ax.axis('off')
|
| 789 |
+
|
| 790 |
+
|
| 791 |
+
def plot_logo(pitcher_id: str, ax: plt.Axes,df_team: pl.DataFrame,df_players : pl.DataFrame):
|
| 792 |
+
# List of MLB teams and their corresponding ESPN logo URLs
|
| 793 |
+
mlb_teams = [
|
| 794 |
+
{"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"},
|
| 795 |
+
{"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"},
|
| 796 |
+
{"team": "BAL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bal.png&h=500&w=500"},
|
| 797 |
+
{"team": "BOS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bos.png&h=500&w=500"},
|
| 798 |
+
{"team": "CHC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chc.png&h=500&w=500"},
|
| 799 |
+
{"team": "CWS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chw.png&h=500&w=500"},
|
| 800 |
+
{"team": "CIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cin.png&h=500&w=500"},
|
| 801 |
+
{"team": "CLE", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cle.png&h=500&w=500"},
|
| 802 |
+
{"team": "COL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/col.png&h=500&w=500"},
|
| 803 |
+
{"team": "DET", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/det.png&h=500&w=500"},
|
| 804 |
+
{"team": "HOU", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/hou.png&h=500&w=500"},
|
| 805 |
+
{"team": "KC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/kc.png&h=500&w=500"},
|
| 806 |
+
{"team": "LAA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/laa.png&h=500&w=500"},
|
| 807 |
+
{"team": "LAD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/lad.png&h=500&w=500"},
|
| 808 |
+
{"team": "MIA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mia.png&h=500&w=500"},
|
| 809 |
+
{"team": "MIL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mil.png&h=500&w=500"},
|
| 810 |
+
{"team": "MIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/min.png&h=500&w=500"},
|
| 811 |
+
{"team": "NYM", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nym.png&h=500&w=500"},
|
| 812 |
+
{"team": "NYY", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nyy.png&h=500&w=500"},
|
| 813 |
+
{"team": "OAK", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
|
| 814 |
+
{"team": "PHI", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/phi.png&h=500&w=500"},
|
| 815 |
+
{"team": "PIT", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/pit.png&h=500&w=500"},
|
| 816 |
+
{"team": "SD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sd.png&h=500&w=500"},
|
| 817 |
+
{"team": "SF", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sf.png&h=500&w=500"},
|
| 818 |
+
{"team": "SEA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sea.png&h=500&w=500"},
|
| 819 |
+
{"team": "STL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/stl.png&h=500&w=500"},
|
| 820 |
+
{"team": "TB", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tb.png&h=500&w=500"},
|
| 821 |
+
{"team": "TEX", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tex.png&h=500&w=500"},
|
| 822 |
+
{"team": "TOR", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tor.png&h=500&w=500"},
|
| 823 |
+
{"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"},
|
| 824 |
+
{"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
|
| 825 |
+
]
|
| 826 |
+
try:
|
| 827 |
+
# Create a DataFrame from the list of dictionaries
|
| 828 |
+
df_image = pd.DataFrame(mlb_teams)
|
| 829 |
+
image_dict = df_image.set_index('team')['logo_url'].to_dict()
|
| 830 |
+
|
| 831 |
+
team_id = df_players.filter(pl.col('player_id') == pitcher_id)['team'][0]
|
| 832 |
+
|
| 833 |
+
# Construct the URL to fetch team data
|
| 834 |
+
url_team = f'https://statsapi.mlb.com/api/v1/teams/{team_id}'
|
| 835 |
+
|
| 836 |
+
# Send a GET request to the team URL and parse the JSON response
|
| 837 |
+
data_team = requests.get(url_team).json()
|
| 838 |
+
|
| 839 |
+
# Extract the team abbreviation
|
| 840 |
+
if data_team['teams'][0]['id'] in df_team['parent_org_id']:
|
| 841 |
+
team_abb = df_team.filter(pl.col('team_id') == data_team['teams'][0]['id'])['parent_org_abbreviation'][0]
|
| 842 |
+
|
| 843 |
+
else:
|
| 844 |
+
team_abb = df_team.filter(pl.col('parent_org_id') == data_team['teams'][0]['parentOrgId'])['parent_org_abbreviation'][0]
|
| 845 |
+
|
| 846 |
+
# Get the logo URL from the image dictionary using the team abbreviation
|
| 847 |
+
logo_url = image_dict[team_abb]
|
| 848 |
+
|
| 849 |
+
# Send a GET request to the logo URL
|
| 850 |
+
response = requests.get(logo_url)
|
| 851 |
+
|
| 852 |
+
# Open the image from the response content
|
| 853 |
+
img = Image.open(BytesIO(response.content))
|
| 854 |
+
|
| 855 |
+
# Display the image on the axis
|
| 856 |
+
ax.set_xlim(0, 1.3)
|
| 857 |
+
ax.set_ylim(0, 1)
|
| 858 |
+
ax.imshow(img, extent=[0.3, 1.3, 0, 1], origin='upper')
|
| 859 |
+
|
| 860 |
+
# Turn off the axis
|
| 861 |
+
ax.axis('off')
|
| 862 |
+
except KeyError as e:
|
| 863 |
+
ax.axis('off')
|
| 864 |
+
return
|
| 865 |
+
|
| 866 |
+
splits = {
|
| 867 |
+
'All':0,
|
| 868 |
+
'LHH':13,
|
| 869 |
+
'RHH':14,
|
| 870 |
+
}
|
| 871 |
+
|
| 872 |
+
splits_title = {
|
| 873 |
+
|
| 874 |
+
'All':'',
|
| 875 |
+
'LHH':' vs LHH',
|
| 876 |
+
'RHH':' vs RHH',
|
| 877 |
+
|
| 878 |
+
}
|
| 879 |
+
|
| 880 |
+
|
| 881 |
+
def fangraphs_pitching_leaderboards(season: int,
|
| 882 |
+
split: str,
|
| 883 |
+
start_date: str = '2024-01-01',
|
| 884 |
+
end_date: str = '2024-12-31'):
|
| 885 |
+
"""
|
| 886 |
+
Fetch pitching leaderboards data from Fangraphs.
|
| 887 |
+
|
| 888 |
+
Parameters
|
| 889 |
+
----------
|
| 890 |
+
season : int
|
| 891 |
+
The season year.
|
| 892 |
+
split : str
|
| 893 |
+
The split type (e.g., 'All', 'LHH', 'RHH').
|
| 894 |
+
start_date : str, optional
|
| 895 |
+
The start date for the data (default is '2024-01-01').
|
| 896 |
+
end_date : str, optional
|
| 897 |
+
The end date for the data (default is '2024-12-31').
|
| 898 |
+
|
| 899 |
+
Returns
|
| 900 |
+
-------
|
| 901 |
+
pl.DataFrame
|
| 902 |
+
The DataFrame containing the pitching leaderboards data.
|
| 903 |
+
"""
|
| 904 |
+
url = f"""
|
| 905 |
+
https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&season={season}&season1={season}
|
| 906 |
+
&startdate={start_date}&enddate={end_date}&ind=0&qual=0&type=8&month={splits[split]}&pageitems=500000
|
| 907 |
+
"""
|
| 908 |
+
|
| 909 |
+
data = requests.get(url).json()
|
| 910 |
+
df = pl.DataFrame(data=data['data'], infer_schema_length=1000)
|
| 911 |
+
return df
|
| 912 |
+
|
| 913 |
+
|
| 914 |
+
def fangraphs_table(df: pl.DataFrame,
|
| 915 |
+
ax: plt.Axes,
|
| 916 |
+
player_input: str,
|
| 917 |
+
season: int,
|
| 918 |
+
split: str):
|
| 919 |
+
"""
|
| 920 |
+
Create a table of Fangraphs pitching leaderboards data for a specific player.
|
| 921 |
+
|
| 922 |
+
Parameters
|
| 923 |
+
----------
|
| 924 |
+
ax : plt.Axes
|
| 925 |
+
The axis to plot the table on.
|
| 926 |
+
season : int
|
| 927 |
+
The season year.
|
| 928 |
+
split : str
|
| 929 |
+
The split type (e.g., 'All', 'LHH', 'RHH').
|
| 930 |
+
"""
|
| 931 |
+
|
| 932 |
+
start_date = df['game_date'][0]
|
| 933 |
+
end_date = df['game_date'][-1]
|
| 934 |
+
|
| 935 |
+
# Fetch Fangraphs pitching leaderboards data
|
| 936 |
+
df_fangraphs = fangraphs_pitching_leaderboards(season=season,
|
| 937 |
+
split=split,
|
| 938 |
+
start_date=start_date,
|
| 939 |
+
end_date=end_date).filter(pl.col('xMLBAMID') == player_input)
|
| 940 |
+
|
| 941 |
+
df_fangraphs = df_fangraphs.with_columns(
|
| 942 |
+
((pl.col('Strikes')/pl.col('Pitches'))).alias('strikePercentage'),
|
| 943 |
+
|
| 944 |
+
)
|
| 945 |
+
|
| 946 |
+
# Select relevant columns for the table
|
| 947 |
+
plot_table = df_fangraphs.select(['IP', 'WHIP', 'ERA', 'TBF', 'FIP', 'K%', 'BB%', 'K-BB%','strikePercentage'])
|
| 948 |
+
|
| 949 |
+
# Format table values
|
| 950 |
+
plot_table_values = [format(plot_table[x][0], fangraphs_stats_dict[x]['format']) if plot_table[x][0] != '---' else '---' for x in plot_table.columns]
|
| 951 |
+
|
| 952 |
+
# Create the table
|
| 953 |
+
table_fg = ax.table(cellText=[plot_table_values], colLabels=plot_table.columns, cellLoc='center',
|
| 954 |
+
bbox=[0.0, 0.1, 1, 0.7])
|
| 955 |
+
|
| 956 |
+
# Set font size for the table
|
| 957 |
+
min_font_size = 20
|
| 958 |
+
table_fg.set_fontsize(min_font_size)
|
| 959 |
+
|
| 960 |
+
# Update column names with formatted headers
|
| 961 |
+
new_column_names = [fangraphs_stats_dict[col]['table_header'] for col in plot_table.columns]
|
| 962 |
+
for i, col_name in enumerate(new_column_names):
|
| 963 |
+
table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
|
| 964 |
+
|
| 965 |
+
# Set header text properties
|
| 966 |
+
ax.text(0.5, 0.9, f'{start_date} to {end_date}{splits_title[split]}', va='bottom', ha='center',
|
| 967 |
+
fontsize=36, fontstyle='italic')
|
| 968 |
+
ax.axis('off')
|
| 969 |
+
|
| 970 |
+
|
| 971 |
+
def stat_summary_table(df: pl.DataFrame,
|
| 972 |
+
player_input: int,
|
| 973 |
+
sport_id: int,
|
| 974 |
+
ax: plt.Axes,
|
| 975 |
+
split: str = 'All'):
|
| 976 |
+
start_date_format = str(pd.to_datetime(df['game_date'][0]).strftime('%m/%d/%Y'))
|
| 977 |
+
end_date_format = str(pd.to_datetime(df['game_date'][-1]).strftime('%m/%d/%Y'))
|
| 978 |
+
|
| 979 |
+
if sport_id == 1:
|
| 980 |
+
appContext = 'majorLeague'
|
| 981 |
+
else:
|
| 982 |
+
appContext = 'minorLeague'
|
| 983 |
+
|
| 984 |
+
pitcher_stats_call = requests.get(f'https://statsapi.mlb.com/api/v1/people/{player_input}?appContext={appContext}&hydrate=stats(group=[pitching],type=[byDateRange],sportId={sport_id},startDate={start_date_format},endDate={end_date_format})').json()
|
| 985 |
+
pitcher_stats_call_header = [x for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
|
| 986 |
+
pitcher_stats_call_values = [pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat'][x] for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
|
| 987 |
+
pitcher_stats_call_df = pl.DataFrame(data=dict(zip(pitcher_stats_call_header,pitcher_stats_call_values)))
|
| 988 |
+
|
| 989 |
+
pitcher_stats_call_df = pitcher_stats_call_df.with_columns(
|
| 990 |
+
pl.lit(df['is_whiff'].sum()).alias('whiffs'),
|
| 991 |
+
(pl.col('strikeOuts')/pl.col('battersFaced')*100).round(1).cast(pl.Utf8).str.concat('%').alias('k_percent'),
|
| 992 |
+
(pl.col('baseOnBalls')/pl.col('battersFaced')*100).round(1).cast(pl.Utf8).str.concat('%').alias('bb_percent'),
|
| 993 |
+
((pl.col('strikeOuts') - pl.col('baseOnBalls'))/pl.col('battersFaced')*100).round(1).cast(pl.Utf8).str.concat('%').alias('k_bb_percent'),
|
| 994 |
+
(((pl.col('homeRuns')*13 + 3*((pl.col('baseOnBalls'))+(pl.col('hitByPitch')))-2*(pl.col('strikeOuts'))))/((pl.col('outs'))/3)+3.15).round(2).map_elements(lambda x: f"{x:.2f}") .alias('fip'),
|
| 995 |
+
((pl.col('strikes')/pl.col('numberOfPitches')*100)).round(1).cast(pl.Utf8).str.concat('%').alias('strikePercentage'),
|
| 996 |
+
)
|
| 997 |
+
|
| 998 |
+
|
| 999 |
+
if df['game_id'][0] == df['game_id'][-1]:
|
| 1000 |
+
pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched','battersFaced','earnedRuns','hits','strikeOuts','baseOnBalls','hitByPitch','homeRuns','strikePercentage','whiffs'])
|
| 1001 |
+
new_column_names = ['$\\bf{IP}$','$\\bf{PA}$','$\\bf{ER}$','$\\bf{H}$','$\\bf{K}$','$\\bf{BB}$','$\\bf{HBP}$','$\\bf{HR}$','$\\bf{Strike\%}$','$\\bf{Whiffs}$']
|
| 1002 |
+
title = f'{df["game_date"][0]} vs {df["batter_team"][0]}'
|
| 1003 |
+
elif sport_id != 1:
|
| 1004 |
+
pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched','battersFaced','whip','era','fip','k_percent','bb_percent','k_bb_percent','strikePercentage'])
|
| 1005 |
+
new_column_names = ['$\\bf{IP}$','$\\bf{PA}$','$\\bf{WHIP}$','$\\bf{ERA}$','$\\bf{FIP}$','$\\bf{K\%}$','$\\bf{BB\%}$','$\\bf{K-BB\%}$','$\\bf{Strike\%}$']
|
| 1006 |
+
title = f'{df["game_date"][0]} to {df["game_date"][-1]}'
|
| 1007 |
+
else:
|
| 1008 |
+
fangraphs_table(df=df,
|
| 1009 |
+
ax=ax,
|
| 1010 |
+
player_input=player_input,
|
| 1011 |
+
season=2024,
|
| 1012 |
+
split=split)
|
| 1013 |
+
return
|
| 1014 |
+
|
| 1015 |
+
import matplotlib.pyplot as plt
|
| 1016 |
+
table_fg = ax.table(cellText=pitcher_stats_call_df_small.to_numpy(), colLabels=pitcher_stats_call_df_small.columns, cellLoc='center',
|
| 1017 |
+
bbox=[0.0, 0.1, 1, 0.7])
|
| 1018 |
+
|
| 1019 |
+
min_font_size = 20
|
| 1020 |
+
table_fg.set_fontsize(min_font_size)
|
| 1021 |
+
|
| 1022 |
+
# #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%']
|
| 1023 |
+
for i, col_name in enumerate(new_column_names):
|
| 1024 |
+
table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
|
| 1025 |
+
|
| 1026 |
+
ax.text(0.5, 0.9, title, va='bottom', ha='center',
|
| 1027 |
+
fontsize=36, fontstyle='italic')
|
| 1028 |
+
|
| 1029 |
+
ax.axis('off')
|
functions/statcast_2024_grouped.csv
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pitch_type,pitch,release_speed,pfx_z,pfx_x,release_spin_rate,release_pos_x,release_pos_z,release_extension,delta_run_exp,swing,whiff,in_zone,out_zone,chase,xwoba,pitch_usage,whiff_rate,in_zone_rate,chase_rate,delta_run_exp_per_100,all
|
| 2 |
+
CH,74155,85.46226725895522,5.247514143364433,-3.9745011679246045,1803.342540762527,-0.5077629855663421,5.740925968432281,6.449406057002311,204.631,37385,11538,28912,45151,15250,0.28973564881286695,0.10218846333521206,0.30862645446034503,0.38988604949093114,0.3377555314389493,-0.27595037421616886,
|
| 3 |
+
CS,22,66.38181818181819,-7.232727272727273,5.176363636363637,2039.2727272727273,-1.7981818181818183,6.5177272727272735,6.0636363636363635,-0.6290000000000001,9,2,10,12,2,0.13466666666666668,3.0316852449257168e-05,0.2222222222222222,0.45454545454545453,0.16666666666666666,2.85909090909091,
|
| 4 |
+
CU,47579,79.40938533133989,-9.345106445703216,4.516206279348902,2568.8591051473077,-0.6765712059634863,5.9438438375202685,6.401792908519479,93.57199999999999,19910,6150,20751,26738,7749,0.28049767649520974,0.0655657055765094,0.3088900050226017,0.4361377918829736,0.28981225222529733,-0.1966665966077471,
|
| 5 |
+
EP,576,50.51909722222222,16.357291666666665,-3.8287500000000003,1256.7152777777778,-0.9668749999999999,6.647100694444444,4.442013888888889,23.643,252,7,207,369,106,0.3971430703517588,0.0007937503186714604,0.027777777777777776,0.359375,0.2872628726287263,-4.104687500000001,
|
| 6 |
+
FA,635,67.81354330708662,15.865511811023623,-3.7226456692913388,1674.0144694533763,-1.1163779527559055,6.317716535433071,4.92488188976378,15.495,284,29,296,339,73,0.43393490999999995,0.0008750546047853774,0.10211267605633803,0.46614173228346456,0.2153392330383481,-2.4401574803149604,
|
| 7 |
+
FC,58379,89.56435813713696,8.08895396195288,1.5509243697478992,2389.231715947733,-0.9745362684951281,5.8461769002079365,6.403954996645393,-20.390000000000015,28753,6674,30002,28189,7757,0.34077822947428493,0.08044852405159929,0.23211490974854798,0.5139176758765994,0.2751782610238036,0.034926942907552404,
|
| 8 |
+
FF,230412,94.27369496062718,15.720274827472318,-3.1074418968484365,2296.591789895323,-0.7685432927147252,5.821400777026439,6.524392110813926,-80.28400000000002,113157,24741,127386,102722,24808,0.3401256910065045,0.3175166639335565,0.21864312415493517,0.5528618301130149,0.2415062012032476,0.03484367133656234,
|
| 9 |
+
FO,168,82.07916666666667,1.7357142857142858,0.1378571428571428,946.8154761904761,-0.5333333333333333,5.8914285714285715,6.666666666666667,2.539,89,29,60,108,43,0.27798747368421056,0.0002315105096125093,0.3258426966292135,0.35714285714285715,0.39814814814814814,-1.511309523809524,
|
| 10 |
+
FS,21727,86.31228885718231,2.979608781700189,-8.76550651263405,1302.3992981808108,-1.4640824780227366,5.742066553136651,6.508958525345622,-16.641000000000005,11333,3906,7982,13745,4946,0.2548785060302361,0.02994064787113684,0.34465719579987647,0.3673769963639711,0.3598399417970171,0.07659133796658538,
|
| 11 |
+
KC,11916,81.79965592480698,-9.370896273917422,4.895297079556898,2444.1642796967144,-0.8788083249412554,5.940037764350453,6.434007553503986,-12.997000000000003,5312,1860,4858,7058,2316,0.25845137325418993,0.016420709717515837,0.3501506024096386,0.40768714333669015,0.32813828279965995,0.10907183618663985,
|
| 12 |
+
KN,971,76.94819773429454,-2.9453759011328526,-5.356498455200824,263.56326987681973,-1.2303398558187437,5.542131822863028,6.45653964984552,12.681,426,113,428,543,130,0.2870389181034483,0.0013380756240103959,0.2652582159624413,0.4407826982492276,0.23941068139963168,-1.3059732234809474,
|
| 13 |
+
PO,55,91.24909090909091,13.11709090909091,-6.399272727272727,2195.3818181818183,-1.494181818181818,5.861272727272727,6.305454545454546,0.0,0,0,1,54,0,,7.579213112314292e-05,,0.01818181818181818,0.0,-0.0,
|
| 14 |
+
SC,159,81.02264150943397,-3.1056603773584905,-8.001509433962264,2050.5974842767296,-1.0535849056603774,6.110377358490566,6.064150943396227,4.623,58,13,63,96,20,0.35349463636363637,0.0002191081608832677,0.22413793103448276,0.39622641509433965,0.20833333333333334,-2.9075471698113207,
|
| 15 |
+
SI,116002,93.34805382235511,7.567078832293412,-6.148476070311284,2147.3631502060834,-0.7671983511070397,5.622119363257688,6.435364206296976,-32.837000000000025,53318,7390,65492,50222,12474,0.3501967420378125,0.15985525080994228,0.13860234817510034,0.5645764728194341,0.2483772052088726,0.028307270564300636,
|
| 16 |
+
SL,116390,85.60138786052518,1.5759858803271631,2.7325110632802407,2435.5705519351436,-0.9811034007748601,5.761407576409815,6.433055359327349,-167.41500000000002,56606,19101,52478,63672,20396,0.2818607008786495,0.16038992984404735,0.337437727449387,0.45088065985050263,0.3203291870838045,0.14383967694819144,
|
| 17 |
+
ST,43821,81.8580155633144,1.4796932977339632,7.821825152324228,2575.3661920073496,-1.080187124894457,5.4607240820611125,6.40352674793587,-52.96800000000001,20035,6276,19349,24472,7531,0.25978070794500324,0.0603870359626772,0.3132518093336661,0.44154629059126904,0.30773945733899966,0.12087355377558708,
|
| 18 |
+
SV,2702,81.67483345669874,-4.788941524796447,7.356861584011844,2470.624858757062,-0.5779570688378979,5.420762398223538,6.227296392711045,0.19299999999999926,1117,339,1138,1564,479,0.2907683709923664,0.0037234606962678577,0.3034914950760967,0.42116950407105846,0.3062659846547315,-0.007142857142857115,
|
| 19 |
+
All,725669,89.1521052747817,7.058379139422499,-1.2140087540219224,2255.6768252515376,-0.8282529777063689,5.758824349487279,6.456550518555369,-20.178000000000118,352163,89742,359413,365054,104080,0.3147037524825,1.0,0.25483085957354973,0.4952850404247667,0.28510850449522535,0.002780606585095976,all
|
joblib_model/__pycache__/feature_engineering.cpython-39.pyc
ADDED
|
Binary file (2.14 kB). View file
|
|
|
joblib_model/barrel_model.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9428e89f2a408148377efb3cd169dc8790bcc89df9495cb895b9db5a955e8fb7
|
| 3 |
+
size 11447
|
joblib_model/in_zone.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5300b15a6ccfb1dd1e79c85bd9ea478a1945c454845e6be31cd8815e4063a3e
|
| 3 |
+
size 54459064
|
joblib_model/in_zone_model_knn_20240410.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82d6d95be88b006bea7efd4bbf0464a0a50f261f6f65f060bf022114300721ed
|
| 3 |
+
size 46782024
|
joblib_model/linear_reg_model_x.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:179663ae0fa65c626b9a941b6934bda1ce58bdf02a69c0daefc28abd28154201
|
| 3 |
+
size 579
|
joblib_model/linear_reg_model_z.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ceabc302949cdbe5515b428f900bce98d6f6bedf99153c8d8a645cb0240ef8b
|
| 3 |
+
size 579
|
joblib_model/model_attack_zone.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2671d4db2606cfee299dcffba2a94138fce77c1b7ef6ad14695a972a38dda3c8
|
| 3 |
+
size 50570139
|
joblib_model/no_swing.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3da3e7ab2b513b87d05e90ae30c788ac819dfcaa7cc1cd9943fc13d2958a00f
|
| 3 |
+
size 279409
|
joblib_model/swing.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4fef4a66363e5f3fdc70ae45c5382bd986c800ff8bf9296a1f9b334461e70fd4
|
| 3 |
+
size 262137
|
joblib_model/xwoba_model.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05bade9c0420657d3f0dfe35f0b1adbd2d5ae25c87a07bdf6629987f29926438
|
| 3 |
+
size 10684246
|
stuff_model/__pycache__/feature_engineering.cpython-39.pyc
ADDED
|
Binary file (2.17 kB). View file
|
|
|
stuff_model/__pycache__/stuff_apply.cpython-39.pyc
ADDED
|
Binary file (1.33 kB). View file
|
|
|
stuff_model/feature_engineering.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import polars as pl
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
def feature_engineering(df: pl.DataFrame) -> pl.DataFrame:
|
| 5 |
+
# Extract the year from the game_date column
|
| 6 |
+
df = df.with_columns(
|
| 7 |
+
pl.col('game_date').str.slice(0, 4).alias('year')
|
| 8 |
+
)
|
| 9 |
+
|
| 10 |
+
df = df.with_columns([
|
| 11 |
+
|
| 12 |
+
(-(pl.col('vy0')**2 - (2 * pl.col('ay') * (pl.col('y0') - 17/12)))**0.5).alias('vy_f'),
|
| 13 |
+
])
|
| 14 |
+
|
| 15 |
+
df = df.with_columns([
|
| 16 |
+
((pl.col('vy_f') - pl.col('vy0')) / pl.col('ay')).alias('t'),
|
| 17 |
+
])
|
| 18 |
+
|
| 19 |
+
df = df.with_columns([
|
| 20 |
+
(pl.col('vz0') + (pl.col('az') * pl.col('t'))).alias('vz_f'),
|
| 21 |
+
(pl.col('vx0') + (pl.col('ax') * pl.col('t'))).alias('vx_f')
|
| 22 |
+
])
|
| 23 |
+
|
| 24 |
+
df = df.with_columns([
|
| 25 |
+
(-np.arctan(pl.col('vz_f') / pl.col('vy_f')) * (180 / np.pi)).alias('vaa'),
|
| 26 |
+
(-np.arctan(pl.col('vx_f') / pl.col('vy_f')) * (180 / np.pi)).alias('haa')
|
| 27 |
+
])
|
| 28 |
+
|
| 29 |
+
# Mirror horizontal break for left-handed pitchers
|
| 30 |
+
df = df.with_columns(
|
| 31 |
+
pl.when(pl.col('pitcher_hand') == 'L')
|
| 32 |
+
.then(-pl.col('ax'))
|
| 33 |
+
.otherwise(pl.col('ax'))
|
| 34 |
+
.alias('ax')
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
# Mirror horizontal break for left-handed pitchers
|
| 38 |
+
df = df.with_columns(
|
| 39 |
+
pl.when(pl.col('pitcher_hand') == 'L')
|
| 40 |
+
.then(-pl.col('hb'))
|
| 41 |
+
.otherwise(pl.col('hb'))
|
| 42 |
+
.alias('hb')
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
# Mirror horizontal release point for left-handed pitchers
|
| 46 |
+
df = df.with_columns(
|
| 47 |
+
pl.when(pl.col('pitcher_hand') == 'L')
|
| 48 |
+
.then(pl.col('x0'))
|
| 49 |
+
.otherwise(-pl.col('x0'))
|
| 50 |
+
.alias('x0')
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# Define the pitch types to be considered
|
| 54 |
+
pitch_types = ['SI', 'FF', 'FC']
|
| 55 |
+
|
| 56 |
+
# Filter the DataFrame to include only the specified pitch types
|
| 57 |
+
df_filtered = df.filter(pl.col('pitch_type').is_in(pitch_types))
|
| 58 |
+
|
| 59 |
+
# Group by pitcher_id and year, then aggregate to calculate average speed and usage percentage
|
| 60 |
+
df_agg = df_filtered.group_by(['pitcher_id', 'year', 'pitch_type']).agg([
|
| 61 |
+
pl.col('start_speed').mean().alias('avg_fastball_speed'),
|
| 62 |
+
pl.col('az').mean().alias('avg_fastball_az'),
|
| 63 |
+
pl.col('ax').mean().alias('avg_fastball_ax'),
|
| 64 |
+
pl.len().alias('count')
|
| 65 |
+
])
|
| 66 |
+
|
| 67 |
+
# Sort the aggregated data by count and average fastball speed
|
| 68 |
+
df_agg = df_agg.sort(['count', 'avg_fastball_speed'], descending=[True, True])
|
| 69 |
+
df_agg = df_agg.unique(subset=['pitcher_id', 'year'], keep='first')
|
| 70 |
+
|
| 71 |
+
# Join the aggregated data with the main DataFrame
|
| 72 |
+
df = df.join(df_agg, on=['pitcher_id', 'year'])
|
| 73 |
+
|
| 74 |
+
# If no fastball, use the fastest pitch for avg_fastball_speed
|
| 75 |
+
df = df.with_columns(
|
| 76 |
+
pl.when(pl.col('avg_fastball_speed').is_null())
|
| 77 |
+
.then(pl.col('start_speed').max().over('pitcher_id'))
|
| 78 |
+
.otherwise(pl.col('avg_fastball_speed'))
|
| 79 |
+
.alias('avg_fastball_speed')
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
# If no fastball, use the fastest pitch for avg_fastball_az
|
| 83 |
+
df = df.with_columns(
|
| 84 |
+
pl.when(pl.col('avg_fastball_az').is_null())
|
| 85 |
+
.then(pl.col('az').max().over('pitcher_id'))
|
| 86 |
+
.otherwise(pl.col('avg_fastball_az'))
|
| 87 |
+
.alias('avg_fastball_az')
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
# If no fastball, use the fastest pitch for avg_fastball_ax
|
| 91 |
+
df = df.with_columns(
|
| 92 |
+
pl.when(pl.col('avg_fastball_ax').is_null())
|
| 93 |
+
.then(pl.col('ax').max().over('ax'))
|
| 94 |
+
.otherwise(pl.col('avg_fastball_ax'))
|
| 95 |
+
.alias('avg_fastball_ax')
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
# Calculate pitch differentials
|
| 99 |
+
df = df.with_columns(
|
| 100 |
+
(pl.col('start_speed') - pl.col('avg_fastball_speed')).alias('speed_diff'),
|
| 101 |
+
(pl.col('az') - pl.col('avg_fastball_az')).alias('az_diff'),
|
| 102 |
+
(pl.col('ax') - pl.col('avg_fastball_ax')).abs().alias('ax_diff')
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
# Cast the year column to integer type
|
| 106 |
+
df = df.with_columns(
|
| 107 |
+
pl.col('year').cast(pl.Int64)
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
df = df.with_columns([
|
| 113 |
+
pl.lit('All').alias('all')
|
| 114 |
+
])
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
return df
|
stuff_model/lgbm_model_2020_2023.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41001a1acf6ce7dbe247f1b8b7e68a1bb1b112f39d080b7e95a83479e56cb7c1
|
| 3 |
+
size 3092328
|
stuff_model/stuff_apply.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import polars as pl
|
| 2 |
+
import joblib
|
| 3 |
+
|
| 4 |
+
model = joblib.load('stuff_model/lgbm_model_2020_2023.joblib')
|
| 5 |
+
# Read the values from the text file
|
| 6 |
+
with open('stuff_model/target_stats.txt', 'r') as file:
|
| 7 |
+
lines = file.readlines()
|
| 8 |
+
target_mean = float(lines[0].strip())
|
| 9 |
+
target_std = float(lines[1].strip())
|
| 10 |
+
|
| 11 |
+
# Define the features to be used for training
|
| 12 |
+
features = ['start_speed',
|
| 13 |
+
'spin_rate',
|
| 14 |
+
'extension',
|
| 15 |
+
'az',
|
| 16 |
+
'ax',
|
| 17 |
+
'x0',
|
| 18 |
+
'z0',
|
| 19 |
+
'speed_diff',
|
| 20 |
+
'az_diff',
|
| 21 |
+
'ax_diff']
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def stuff_apply(df:pl.DataFrame) -> pl.DataFrame:
|
| 25 |
+
# Filter the dataframe to include only the rows for the year 2024 and drop rows with null values in the specified features and target column
|
| 26 |
+
# df_test = df.drop_nulls(subset=features)
|
| 27 |
+
df_test = df.clone()
|
| 28 |
+
|
| 29 |
+
# Predict the target values for the 2024 data using the trained model
|
| 30 |
+
df_test = df_test.with_columns(
|
| 31 |
+
pl.Series(name="target", values=model.predict(df_test[features].to_numpy()))
|
| 32 |
+
)
|
| 33 |
+
# Standardize the target column to create a z-score
|
| 34 |
+
df_test = df_test.with_columns(
|
| 35 |
+
((pl.col('target') - target_mean) / target_std).alias('target_zscore')
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# Convert the z-score to tj_stuff_plus
|
| 39 |
+
df_test = df_test.with_columns(
|
| 40 |
+
(100 - (pl.col('target_zscore') * 10)).alias('tj_stuff_plus')
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
df_pitch_types = pl.read_csv('stuff_model/tj_stuff_plus_pitch.csv')
|
| 44 |
+
|
| 45 |
+
# Join the pitch type statistics with the main DataFrame based on pitch_type
|
| 46 |
+
df_pitch_all = df_test.join(df_pitch_types, left_on='pitch_type', right_on='pitch_type')
|
| 47 |
+
|
| 48 |
+
# Normalize pitch_grade values to a range between -0.5 and 0.5 based on the percentiles
|
| 49 |
+
df_pitch_all = df_pitch_all.with_columns(
|
| 50 |
+
((pl.col('tj_stuff_plus') - pl.col('mean')) / pl.col('std')).alias('pitch_grade')
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# Scale the pitch_grade values to a range between 20 and 80
|
| 54 |
+
df_pitch_all = df_pitch_all.with_columns(
|
| 55 |
+
(pl.col('pitch_grade') * 10 + 50).clip(20, 80)
|
| 56 |
+
)
|
| 57 |
+
return df_pitch_all
|
stuff_model/target_stats.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
0.0034732498406374636
|
| 2 |
+
0.006846752748626548
|
stuff_model/tj_stuff_plus_pitch.csv
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pitch_type,mean,std,median,min,max,percentile_1,percentile_99
|
| 2 |
+
ST,106.44784631565936,5.593943599731136,106.24878922952112,91.18894850636659,125.29541262167034,91.69322149368426,125.25688309207108
|
| 3 |
+
SV,103.73183202363764,3.001226780758946,103.50047554089315,93.3173875900245,111.34757479687066,93.32953434698274,111.33689503153641
|
| 4 |
+
SL,103.49296290610897,5.265572779780409,103.19144262214559,88.84957017284297,121.88798777026031,89.76670287371176,121.36013955239422
|
| 5 |
+
KC,101.8993919341341,4.271694896723436,100.79211889194949,93.69754063161618,119.4933202093256,93.75149298057133,119.38166236091195
|
| 6 |
+
All,99.9275100894791,5.01699442232884,99.65265124489378,84.73033633038408,116.94934527087541,86.65905811630736,116.7610246502804
|
| 7 |
+
CU,99.88832068607897,4.615228571103906,99.08993373693156,89.84495168337246,119.90089262632986,90.20429983334718,117.89567125997061
|
| 8 |
+
FC,98.83449547008738,5.811964883678063,98.54483029899575,83.20928731685326,119.78700324933075,83.34007602984008,118.21186533190846
|
| 9 |
+
FS,98.25541635267653,6.898952096824192,98.46204303842217,72.25450024197754,114.88400714657823,73.39595959354874,114.78967217449389
|
| 10 |
+
FO,98.15224613640243,1.081819065809178,99.94816563615653,94.0023252668585,100.50624750619224,94.0142169475971,100.50513134245217
|
| 11 |
+
FF,97.29024735737988,6.078459125845886,97.09670890504734,81.2230917971995,118.10419744965911,81.32311771953398,117.7938724746093
|
| 12 |
+
SC,97.27958020025409,1.2452898498180456,97.27958020025409,93.536223938276,101.02293646223218,93.54371065079995,101.01544974970822
|
| 13 |
+
CH,96.35866365133434,6.178939251378385,95.80884625564597,81.28802319264824,121.14136334013493,82.02275793969746,119.09639344796777
|
| 14 |
+
SI,95.14161603816645,4.9734372581529955,95.11657827702109,82.5850956341191,112.99618112461533,82.8856383780296,112.72626192694757
|
| 15 |
+
CS,93.97853627048322,0.0,93.97853627048322,93.97853627048322,93.97853627048322,93.97853627048322,93.97853627048322
|
| 16 |
+
KN,93.41890096234394,0.0,93.41890096234394,93.41890096234394,93.41890096234394,93.41890096234394,93.41890096234394
|