|
|
import polars as pl |
|
|
import joblib |
|
|
|
|
|
model = joblib.load('stuff_model/lgbm_model_2020_2024.joblib') |
|
|
|
|
|
with open('stuff_model/target_stats.txt', 'r') as file: |
|
|
lines = file.readlines() |
|
|
target_mean = float(lines[0].strip()) |
|
|
target_std = float(lines[1].strip()) |
|
|
|
|
|
|
|
|
features = ['start_speed', |
|
|
'spin_rate', |
|
|
'extension', |
|
|
'az', |
|
|
'ax', |
|
|
'x0', |
|
|
'z0', |
|
|
'speed_diff', |
|
|
'az_diff', |
|
|
'ax_diff'] |
|
|
|
|
|
|
|
|
def stuff_apply(df:pl.DataFrame) -> pl.DataFrame: |
|
|
|
|
|
|
|
|
df_test = df.clone() |
|
|
|
|
|
|
|
|
df_test = df_test.with_columns( |
|
|
pl.Series(name="target", values=model.predict(df_test[features].to_numpy())) |
|
|
) |
|
|
|
|
|
df_test = df_test.with_columns( |
|
|
((pl.col('target') - target_mean) / target_std).alias('target_zscore') |
|
|
) |
|
|
|
|
|
|
|
|
df_test = df_test.with_columns( |
|
|
(100 - (pl.col('target_zscore') * 10)).alias('tj_stuff_plus') |
|
|
) |
|
|
|
|
|
df_pitch_types = pl.read_csv('stuff_model/tj_stuff_plus_pitch.csv') |
|
|
|
|
|
|
|
|
df_pitch_all = df_test.join(df_pitch_types, left_on='pitch_type', right_on='pitch_type') |
|
|
|
|
|
|
|
|
df_pitch_all = df_pitch_all.with_columns( |
|
|
((pl.col('tj_stuff_plus') - pl.col('mean')) / pl.col('std')).alias('pitch_grade') |
|
|
) |
|
|
|
|
|
|
|
|
df_pitch_all = df_pitch_all.with_columns( |
|
|
(pl.col('pitch_grade') * 10 + 50).clip(20, 80) |
|
|
) |
|
|
return df_pitch_all |