Spaces:
Running
Running
| import polars as pl | |
| from grade_utils import ( | |
| season_to_kwera_constant, | |
| pitch_season_to_avg_velo_mean, | |
| pitch_season_to_avg_velo_std, | |
| pitch_season_to_lg_avg_velo, | |
| pitch_season_to_lg_swstr, | |
| pitch_season_to_lg_ball, | |
| pitch_season_to_lg_gb, | |
| pitch_season_to_lg_iffb, | |
| pitch_season_to_ypera_mean, | |
| pitch_season_to_ypera_std, | |
| map_columns | |
| ) | |
| pera_ball_kind = { | |
| 'Fastball (4-seam)': 'Fastball (4-seam)', | |
| 'Slider': 'Slider', | |
| 'Curve': 'Curve', | |
| 'Splitter': 'Splitter', | |
| 'Sinker': 'Sinker', | |
| 'Cutter': 'Cutter', | |
| 'Changeup': 'Changeup', | |
| 'Vertical Slider': 'Slider', | |
| 'Palmball': 'Changeup', | |
| 'Screwball': 'Curve', | |
| 'Slurve': 'Curve', | |
| 'Eephus': 'Curve', | |
| 'Knuckleball': 'Curve', | |
| 'Sweeper': 'Slider' | |
| } | |
| pera_ball_kind_col = pl.col('general_ballKind').replace_strict(pera_ball_kind).alias('YpERA Pitch') | |
| pera_ball_kind_col_alt = pl.col('ballKind').replace_strict(pera_ball_kind).alias('YpERA Pitch') # for when pitch data renames general_ballKind to ballKind (see `stats.py`) | |
| # p_velo_z = ((pl.col('Avg Velo') - pl.mean('Avg Velo').over('YpERA Pitch')) / pl.std('Avg Velo').over('YpERA Pitch')).alias('Velo Z') | |
| p_velo_z = ((pl.col('Avg Velo') - map_columns(pitch_season_to_avg_velo_mean)) / map_columns(pitch_season_to_avg_velo_std)).alias('Velo Z') | |
| p_swstr = (pl.col('SwStr%') * 0.7 + p_velo_z * 0.004 + 0.03).clip(0, 1).alias('xSwStr%') | |
| p_k = (p_swstr * 1.7 + 0.02).clip(0, 1).alias('xK%') | |
| p_bb = (pl.col('Ball%') * 0.8 - 0.2).clip(0, 1).alias('xBB%') | |
| p_kwera = (pl.col('season').replace(season_to_kwera_constant) - 10 * (p_k - p_bb)).alias('kwERA') | |
| p_gb_pu_adj = (pl.col('GB%') * -3 + pl.col('IFFB%') * -5 + 2).alias('GB% IFFB% Adj') | |
| pera = (p_kwera + p_gb_pu_adj).alias('YpERA') | |
| def average(col): | |
| return (pl.col('Count') * pl.col(col)).sum() / pl.sum('Count') | |
| # lg_velo = average('Avg Velo').over('YpERA Pitch').alias('lg_velo') | |
| # lg_swstr_ = average('SwStr%').over('YpERA Pitch').alias('lg_swstr') | |
| # lg_ball = average('Ball%').over('YpERA Pitch').alias('lg_ball') | |
| # lg_gb = average('GB%').over('YpERA Pitch').alias('lg_gb') | |
| # lg_iffb = average('IFFB%').over('YpERA Pitch').alias('lg_iffb') | |
| lg_velo = map_columns(pitch_season_to_lg_avg_velo).alias('lg_velo') | |
| lg_swstr_ = map_columns(pitch_season_to_lg_swstr).alias('lg_swstr') | |
| lg_ball = map_columns(pitch_season_to_lg_ball).alias('lg_ball') | |
| lg_gb = map_columns(pitch_season_to_lg_gb).alias('lg_gb') | |
| lg_iffb = map_columns(pitch_season_to_lg_iffb).alias('lg_iffb') | |
| # lg_velo_z = ((pl.col('lg_velo') - pl.mean('Avg Velo')) / pl.std('Avg Velo')).over('YpERA Pitch').alias('lg_velo_z') | |
| lg_velo_z = ((pl.col('lg_velo') - map_columns(pitch_season_to_avg_velo_mean)) / map_columns(pitch_season_to_avg_velo_std)).alias('lg_velo_z') | |
| lg_swstr = (lg_swstr_ * 0.7 + lg_velo_z * 0.004 + 0.03).clip(0, 1).alias('lg_xSwStr%') | |
| lg_k = (lg_swstr * 1.7 + 0.02).clip(0, 1).alias('lg_xK%') | |
| lg_bb = (lg_ball * 0.8 - 0.2).clip(0, 1).alias('lg_xBB%') | |
| lg_kwera = (pl.col('season').replace(season_to_kwera_constant) - 10 * (lg_k - lg_bb)).alias('lg_kwERA') | |
| lg_gb_pu_adj = (lg_gb * -4 + lg_iffb * -5 + 2).alias('lg_gb_pu_adj') | |
| lg_pera = (lg_kwera + lg_gb_pu_adj).alias('lg_YpERA') | |
| shrunk_pera = ( | |
| pl.when(pl.col('Count') < 100) | |
| .then(pl.col('YpERA') * pl.col('Count') / 100 + pl.col('lg_YpERA') * (1 - pl.col('Count')/100)) | |
| .otherwise('YpERA') | |
| .alias('shrunk_YpERA') | |
| ) | |
| mean_pera = map_columns(pitch_season_to_ypera_mean).alias('mean_YpERA') | |
| std_pera = map_columns(pitch_season_to_ypera_std).alias('std_YpERA') | |
| # mean_pera = ( | |
| # pl.col('YpERA') | |
| # .filter(pl.col('Count') >= 100) | |
| # .mean() | |
| # .alias('mean_YpERA') | |
| # ) | |
| # std_pera = ( | |
| # pl.col('YpERA') | |
| # .filter(pl.col('Count') >= 100) | |
| # .std() | |
| # .alias('std_YpERA') | |
| # ) | |
| pera_grade = ( | |
| ( | |
| (pl.col('shrunk_YpERA') - mean_pera) / | |
| std_pera | |
| ) | |
| .mul(-10).add(50).round().cast(pl.Int32()) | |
| ).alias('YpERA Grade') | |
| pera_minus = (pl.col('YpERA') / pl.col('lg_YpERA') * 100).cast(pl.Int32()).alias('YpERA-') | |
| pera_grade_pctl = ((pera_grade.clip(20, 80) - 20) / 60).alias('YpERA Grade_pctl') | |
| pera_minus_pctl = ((200 - pera_minus)/ 200).clip(0, 1).alias('YpERA-_pctl') | |
| def compute_pera(pitch_stats): | |
| pitch_stats = ( | |
| pitch_stats | |
| .fill_nan(0) | |
| .with_columns(pera_ball_kind_col if 'general_ballKind' in pitch_stats.columns else pera_ball_kind_col_alt) | |
| .with_columns( | |
| p_velo_z, | |
| p_swstr, | |
| p_k, | |
| p_bb, | |
| p_kwera, | |
| p_gb_pu_adj, | |
| pera | |
| ) | |
| .with_columns( | |
| lg_velo, | |
| lg_swstr_, | |
| lg_ball, | |
| lg_gb, | |
| lg_iffb | |
| ) | |
| .with_columns( | |
| lg_velo_z, | |
| lg_swstr, | |
| lg_k, | |
| lg_bb, | |
| lg_kwera, | |
| lg_gb_pu_adj, | |
| lg_pera | |
| ) | |
| .with_columns(shrunk_pera) | |
| .with_columns( | |
| mean_pera,#.over('YpERA Pitch'), | |
| std_pera#.over('YpERA Pitch'), | |
| ) | |
| .with_columns(pera_grade, pera_grade_pctl) | |
| .with_columns(pera_minus, pera_minus_pctl) # why is this a separate line? | |
| ) | |
| return pitch_stats | |
| if __name__ == '__main__': | |
| from datetime import date | |
| from data import data_df | |
| from stats import filter_data_by_date_and_game_kind, compute_pitch_stats | |
| _data = filter_data_by_date_and_game_kind( | |
| data=data_df, | |
| start_date=date(2021, 1, 1), | |
| end_date=date(2025, 12, 31), | |
| game_kind='Regular Season' | |
| ) | |
| pitch_stats = ( | |
| compute_pitch_stats( | |
| data=_data, | |
| player_type='pitcher', | |
| pitch_class_type='specific', | |
| min_pitches=1, | |
| group_by_season=True | |
| ) | |
| ) | |
| # try: | |
| pera_data = compute_pera(pitch_stats) | |
| # print(pera_data) | |
| # except Exception as e: | |
| # print(e) | |
| cols = ['season', 'pitcher_name', 'general_ballKind', 'ballKind', 'YpERA Pitch', 'YpERA Grade', 'YpERA-', 'YpERA'] | |
| print( | |
| pera_data | |
| .filter( | |
| pl.col('pitcher_name') == 'Miyagi Hiroya', | |
| # pl.col('general_ballKind') == 'Fastball (4-seam)', | |
| # pl.col('season').is_between(2022, 2024) | |
| pl.col('season') == 2025 | |
| ) | |
| .sort('season') | |
| [cols] | |
| ) | |
| breakpoint() | |