nesticot commited on
Commit
5c7e1d9
·
verified ·
1 Parent(s): 4022a42

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -120
app.py CHANGED
@@ -21,14 +21,11 @@ import pandas as pd
21
  import shinyswatch
22
  import inflect
23
  from matplotlib.pyplot import text
24
- import rolling_batter_functions as rbf
25
  import joblib
26
  import polars as pl
27
-
28
- in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
29
- px_model = joblib.load('joblib_model/linear_reg_model_x.joblib')
30
- pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib')
31
- xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
32
 
33
  def percentile(n):
34
  def percentile_(x):
@@ -42,133 +39,42 @@ colour_palette = ['#FFB000','#648FFF','#785EF0',
42
 
43
 
44
  print('Starting Everything:')
45
- # exit_velo_df = milb_a_ev_df.append([triple_a_ev_df,double_a_ev_df,a_high_a_ev_df,single_a_ev_df]).reset_index(drop=True)
46
- # player_df_all = mlb_a_player_df.append([triple_a_player_df,double_a_player_df,a_high_a_player_df,single_a_player_df]).reset_index(drop=True)
47
- # exit_velo_df = pd.read_csv('exit_velo_df_all.csv',index_col=[0])
48
- # player_df_all = pd.read_csv('player_df_all.csv',index_col=[0])
49
-
50
- # pa_df = pd.read_csv('pa_df_all.csv',index_col=[0])
51
- # pa_df_full_na = pa_df.dropna()
52
 
53
 
54
- exit_velo_df_mlb = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025.parquet").to_pandas()
55
  #print(df_2023)
56
- exit_velo_df_mlb['level'] = 'MLB'
57
 
58
 
59
- exit_velo_df_aaa = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/aaa_pitch_data_2025.parquet").to_pandas()
60
  #print(df_2023)
61
- exit_velo_df_aaa['level'] = 'AAA'
62
 
63
 
64
- exit_velo_df_aa = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/aa_pitch_data_2025.parquet").to_pandas()
65
  #print(df_2023)
66
- exit_velo_df_aa['level'] = 'AA'
67
 
68
 
69
- exit_velo_df_ha = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/hi_a_pitch_data_2025.parquet").to_pandas()
70
  #print(df_2023)
71
- exit_velo_df_ha['level'] = 'A+'
72
 
73
  ### Import Datasets
74
- exit_velo_df_a = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/lo_a_pitch_data_2025.parquet").to_pandas()
75
  #print(df_2023)
76
- exit_velo_df_a['level'] = 'A'
77
-
78
- exit_velo_df = pd.concat([exit_velo_df_mlb,exit_velo_df_aaa,exit_velo_df_aa,exit_velo_df_ha,exit_velo_df_a])
79
- # exit_velo_df = pd.concat([exit_velo_df_mlb,exit_velo_df_aaa])
80
- # exit_velo_df_copy = exit_velo_df.copy()
81
-
82
- # exit_velo_df = exit_velo_df_copy.copy()
83
-
84
-
85
- from batting_update import df_update,df_update_summ_avg,df_update_summ,df_summ_batter_pitch_up,df_summ_changes,df_summ_filter_out
86
-
87
- def percentile(n):
88
- def percentile_(x):
89
- return np.nanpercentile(x, n)
90
- percentile_.__name__ = 'percentile_%s' % n
91
- return percentile_
92
-
93
- exit_velo_df_codes = df_update(exit_velo_df)
94
-
95
-
96
- exit_velo_df_codes_summ = exit_velo_df_codes.groupby(['batter_id','batter_name','level']).agg(
97
- pa = ('pa','sum'),
98
- k = ('k','sum'),
99
- bb = ('bb','sum'),
100
- bb_minus_k = ('bb_minus_k','sum'),
101
- csw = ('csw','sum'),
102
- bip = ('bip','sum'),
103
- tb = ('tb','sum'),
104
- woba = ('woba','sum'),
105
- woba_codes = ('woba_codes','sum'),
106
- xwoba = ('xwoba','sum'),
107
- xwoba_contact = ('xwoba_contact','sum'),
108
- #woba_codes = ('woba_codes','sum'),
109
- hard_hit = ('hard_hit','sum'),
110
- barrel = ('barrel','sum'),
111
- sweet_spot = ('sweet_spot','sum'),
112
- max_launch_speed = ('launch_speed','max'),
113
- launch_speed_90 = ('launch_speed',percentile(90)),
114
- launch_speed = ('launch_speed','mean'),
115
- launch_angle = ('launch_angle','mean'),
116
- pitches = ('pitches','sum'),
117
- swings = ('swings','sum'),
118
- in_zone = ('in_zone','sum'),
119
- out_zone = ('out_zone','sum'),
120
- whiffs = ('whiffs','sum'),
121
- zone_swing = ('zone_swing','sum'),
122
- zone_contact = ('zone_contact','sum'),
123
- ozone_swing = ('ozone_swing','sum'),
124
- ozone_contact = ('ozone_contact','sum'),
125
- ).reset_index()
126
-
127
- #exit_velo_df_codes_summ['out_zone'] = ~exit_velo_df_codes_summ.in_zone
128
- #bip_min_input = int(input())
129
- #bip_min = min(bip_min_input,50)
130
- #exit_velo_df_codes_summ = exit_velo_df_codes_summ[exit_velo_df_codes_summ.balls_in_play>=bip_min]
131
-
132
-
133
- exit_velo_df_codes_summ['k_percent'] = [exit_velo_df_codes_summ.k[x]/exit_velo_df_codes_summ.pa[x] if exit_velo_df_codes_summ.pa[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
134
- exit_velo_df_codes_summ['bb_percent'] =[exit_velo_df_codes_summ.bb[x]/exit_velo_df_codes_summ.pa[x] if exit_velo_df_codes_summ.pa[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
135
- exit_velo_df_codes_summ['bb_minus_k_percent'] =[exit_velo_df_codes_summ.bb_minus_k[x]/exit_velo_df_codes_summ.pa[x] if exit_velo_df_codes_summ.pa[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
136
-
137
- exit_velo_df_codes_summ['csw_percent'] =[exit_velo_df_codes_summ.csw[x]/exit_velo_df_codes_summ.pitches[x] if exit_velo_df_codes_summ.pitches[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
138
-
139
-
140
- exit_velo_df_codes_summ['sweet_spot_percent'] = [exit_velo_df_codes_summ.sweet_spot[x]/exit_velo_df_codes_summ.bip[x] if exit_velo_df_codes_summ.bip[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
141
-
142
- exit_velo_df_codes_summ['woba_percent'] = [exit_velo_df_codes_summ.woba[x]/exit_velo_df_codes_summ.woba_codes[x] if exit_velo_df_codes_summ.woba_codes[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
143
- #exit_velo_df_codes_summ['hard_hit_percent'] = [exit_velo_df_codes_summ.sweet_spot[x]/exit_velo_df_codes_summ.bip[x] if exit_velo_df_codes_summ.bip[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
144
- exit_velo_df_codes_summ['hard_hit_percent'] = [exit_velo_df_codes_summ.hard_hit[x]/exit_velo_df_codes_summ.bip[x] if exit_velo_df_codes_summ.bip[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
145
-
146
-
147
- exit_velo_df_codes_summ['barrel_percent'] = [exit_velo_df_codes_summ.barrel[x]/exit_velo_df_codes_summ.bip[x] if exit_velo_df_codes_summ.bip[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
148
-
149
- exit_velo_df_codes_summ['zone_contact_percent'] = [exit_velo_df_codes_summ.zone_contact[x]/exit_velo_df_codes_summ.zone_swing[x] if exit_velo_df_codes_summ.zone_swing[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
150
-
151
- exit_velo_df_codes_summ['zone_swing_percent'] = [exit_velo_df_codes_summ.zone_swing[x]/exit_velo_df_codes_summ.in_zone[x] if exit_velo_df_codes_summ.pitches[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
152
-
153
- exit_velo_df_codes_summ['zone_percent'] = [exit_velo_df_codes_summ.in_zone[x]/exit_velo_df_codes_summ.pitches[x] if exit_velo_df_codes_summ.pitches[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
154
-
155
- exit_velo_df_codes_summ['chase_percent'] = [exit_velo_df_codes_summ.ozone_swing[x]/(exit_velo_df_codes_summ.pitches[x] - exit_velo_df_codes_summ.in_zone[x]) if (exit_velo_df_codes_summ.pitches[x]- exit_velo_df_codes_summ.in_zone[x]) != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
156
-
157
- exit_velo_df_codes_summ['chase_contact'] = [exit_velo_df_codes_summ.ozone_contact[x]/exit_velo_df_codes_summ.ozone_swing[x] if exit_velo_df_codes_summ.ozone_swing[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
158
-
159
- exit_velo_df_codes_summ['swing_percent'] = [exit_velo_df_codes_summ.swings[x]/exit_velo_df_codes_summ.pitches[x] if exit_velo_df_codes_summ.pitches[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
160
-
161
- exit_velo_df_codes_summ['whiff_rate'] = [exit_velo_df_codes_summ.whiffs[x]/exit_velo_df_codes_summ.swings[x] if exit_velo_df_codes_summ.swings[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
162
 
163
- exit_velo_df_codes_summ['swstr_rate'] = [exit_velo_df_codes_summ.whiffs[x]/exit_velo_df_codes_summ.pitches[x] if exit_velo_df_codes_summ.pitches[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
164
 
165
- exit_velo_df_codes_summ['xwoba_percent'] = [exit_velo_df_codes_summ.xwoba[x]/exit_velo_df_codes_summ.woba_codes[x] if exit_velo_df_codes_summ.woba_codes[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
166
- exit_velo_df_codes_summ['xwoba_percent_contact'] = [exit_velo_df_codes_summ.xwoba_contact[x]/exit_velo_df_codes_summ.bip[x] if exit_velo_df_codes_summ.bip[x] != 0 else np.nan for x in range(len(exit_velo_df_codes_summ))]
167
 
 
 
168
 
169
- exit_velo_df_codes_summ = exit_velo_df_codes_summ.dropna(subset=['bip'])
 
 
170
 
171
- woba_list = ['woba','xwoba']
172
  pa_list = ['k','bb','bb_minus_k']
173
  balls_in_play_list = ['hard_hit','launch_speed','launch_speed_90','launch_angle','barrel','sweet_spot']
174
  pitches_list = ['zone_percent','swing_percent','sw_str','csw']
@@ -184,7 +90,6 @@ plot_dict = {
184
  'bb_minus_k':{'x_axis':'Plate Appearances','y_axis':'BB-K%','title':'BB-K%','x_value':'bb_minus_k','x_range':[-0.3,-0.2,-0.1,0,0.1,0.2],'percent':True,'percentile_label':'bb_minus_k_percent','flip_p':False,'percentile':False,'avg_adjust':False},
185
  'csw':{'x_axis':'Pitches','y_axis':'CSW%','title':'CSW%','x_value':'csw','x_range':[.2,.25,.3,.35,.4],'percent':True,'percentile_label':'csw_percent','flip_p':True,'percentile':False,'avg_adjust':False},
186
  'woba':{'x_axis':'wOBA PA','y_axis':'wOBA','title':'wOBA','x_value':'woba','x_range':[.20,.30,.40,.50],'percent':False,'percentile_label':'woba_percent','flip_p':False,'percentile':False,'avg_adjust':True},
187
- 'xwoba':{'x_axis':'xwOBA PA','y_axis':'xwOBA','title':'xwOBA','x_value':'xwoba','x_range':[.20,.30,.40,.50],'percent':False,'percentile_label':'xwoba_percent','flip_p':False,'percentile':False,'avg_adjust':True},
188
  'launch_speed':{'x_axis':'Balls In Play','y_axis':'Exit Velocity','title':'Exit Velocity','x_value':'launch_speed','x_range':[85,90,95,100],'percent':False,'percentile_label':'launch_speed','flip_p':False,'percentile':False,'avg_adjust':False},
189
  'launch_speed_90':{'x_axis':'Balls In Play','y_axis':'90th Percentile Exit Velocity','title':'90th Percentile Exit Velocity','x_value':'launch_speed','x_range':[95,100,105,110,115],'percent':False,'percentile_label':'launch_speed_90','flip_p':False,'percentile':True,'avg_adjust':False},
190
  'hard_hit':{'x_axis':'Balls In Play','y_axis':'HardHit%','title':'HardHit%','x_value':'hard_hit','x_range':[0.2,0.3,0.4,0.5,0.6,0.7],'percent':True,'percentile_label':'hard_hit_percent','flip_p':False,'percentile':False,'avg_adjust':False},
@@ -209,11 +114,12 @@ plot_dict = {
209
  # test_df = test_df.set_index('batter_id')
210
  # #test_df = test_df[test_df.pitcher == 'Chris Bassitt'].append(test_df[test_df.pitcher != 'Chris Bassitt'])
211
 
212
- batter_dict_mlb = exit_velo_df_mlb.sort_values(['batter_name']).set_index('batter_id')['batter_name'].to_dict()
213
- batter_dict_aaa = exit_velo_df_aaa.sort_values(['batter_name']).set_index('batter_id')['batter_name'].to_dict()
214
- batter_dict_aa = exit_velo_df_aa.sort_values(['batter_name']).set_index('batter_id')['batter_name'].to_dict()
215
- batter_dict_ha = exit_velo_df_ha.sort_values(['batter_name']).set_index('batter_id')['batter_name'].to_dict()
216
- batter_dict_a = exit_velo_df_a.sort_values(['batter_name']).set_index('batter_id')['batter_name'].to_dict()
 
217
 
218
  level_dict = {'MLB':'MLB','AAA':'AAA','AA':'AA','A+':'A+','A':'A'}
219
 
@@ -223,7 +129,6 @@ plot_dict_small = {
223
  'bb_minus_k':'BB-K%',
224
  'csw':'CSW%',
225
  'woba':'wOBA',
226
- 'xwoba':'xwOBA',
227
  'launch_speed':'Exit Velocity',
228
  'launch_speed_90':'90th Percentile Exit Velocity',
229
  'hard_hit':'HardHit%',
 
21
  import shinyswatch
22
  import inflect
23
  from matplotlib.pyplot import text
24
+ from functions import rolling_batter_functions as rbf
25
  import joblib
26
  import polars as pl
27
+ from functions import df_update
28
+ update = df_update.df_update()
 
 
 
29
 
30
  def percentile(n):
31
  def percentile_(x):
 
39
 
40
 
41
  print('Starting Everything:')
 
 
 
 
 
 
 
42
 
43
 
44
+ exit_velo_df_mlb = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025.parquet")
45
  #print(df_2023)
46
+ exit_velo_df_mlb = exit_velo_df_mlb.with_columns(pl.lit('MLB').alias('level'))
47
 
48
 
49
+ exit_velo_df_aaa = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/aaa_pitch_data_2025.parquet")
50
  #print(df_2023)
51
+ exit_velo_df_aaa = exit_velo_df_aaa.with_columns(pl.lit('AAA').alias('level'))
52
 
53
 
54
+ exit_velo_df_aa = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/aa_pitch_data_2025.parquet")
55
  #print(df_2023)
56
+ exit_velo_df_aa = exit_velo_df_aa.with_columns(pl.lit('AA').alias('level'))
57
 
58
 
59
+ exit_velo_df_ha = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/hi_a_pitch_data_2025.parquet")
60
  #print(df_2023)
61
+ exit_velo_df_ha = exit_velo_df_ha.with_columns(pl.lit('A+').alias('level'))
62
 
63
  ### Import Datasets
64
+ exit_velo_df_a = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/lo_a_pitch_data_2025.parquet")
65
  #print(df_2023)
66
+ exit_velo_df_a = exit_velo_df_a.with_columns(pl.lit('A').alias('level'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
 
68
 
 
 
69
 
70
+ exit_velo_df = pl.concat([exit_velo_df_mlb,exit_velo_df_aaa,exit_velo_df_aa,exit_velo_df_ha,exit_velo_df_a])
71
+ # exit_velo_df = pl.concat([exit_velo_df_mlb])#,exit_velo_df_aaa,exit_velo_df_aa,exit_velo_df_ha,exit_velo_df_a])
72
 
73
+ exit_velo_df_codes = update.update(exit_velo_df)
74
+ exit_velo_df_codes_summ = update.update_summary_select(df=exit_velo_df_codes,selection=['batter_id','batter_name','level']).to_pandas()
75
+ exit_velo_df_codes = exit_velo_df_codes.to_pandas()
76
 
77
+ woba_list = ['woba']
78
  pa_list = ['k','bb','bb_minus_k']
79
  balls_in_play_list = ['hard_hit','launch_speed','launch_speed_90','launch_angle','barrel','sweet_spot']
80
  pitches_list = ['zone_percent','swing_percent','sw_str','csw']
 
90
  'bb_minus_k':{'x_axis':'Plate Appearances','y_axis':'BB-K%','title':'BB-K%','x_value':'bb_minus_k','x_range':[-0.3,-0.2,-0.1,0,0.1,0.2],'percent':True,'percentile_label':'bb_minus_k_percent','flip_p':False,'percentile':False,'avg_adjust':False},
91
  'csw':{'x_axis':'Pitches','y_axis':'CSW%','title':'CSW%','x_value':'csw','x_range':[.2,.25,.3,.35,.4],'percent':True,'percentile_label':'csw_percent','flip_p':True,'percentile':False,'avg_adjust':False},
92
  'woba':{'x_axis':'wOBA PA','y_axis':'wOBA','title':'wOBA','x_value':'woba','x_range':[.20,.30,.40,.50],'percent':False,'percentile_label':'woba_percent','flip_p':False,'percentile':False,'avg_adjust':True},
 
93
  'launch_speed':{'x_axis':'Balls In Play','y_axis':'Exit Velocity','title':'Exit Velocity','x_value':'launch_speed','x_range':[85,90,95,100],'percent':False,'percentile_label':'launch_speed','flip_p':False,'percentile':False,'avg_adjust':False},
94
  'launch_speed_90':{'x_axis':'Balls In Play','y_axis':'90th Percentile Exit Velocity','title':'90th Percentile Exit Velocity','x_value':'launch_speed','x_range':[95,100,105,110,115],'percent':False,'percentile_label':'launch_speed_90','flip_p':False,'percentile':True,'avg_adjust':False},
95
  'hard_hit':{'x_axis':'Balls In Play','y_axis':'HardHit%','title':'HardHit%','x_value':'hard_hit','x_range':[0.2,0.3,0.4,0.5,0.6,0.7],'percent':True,'percentile_label':'hard_hit_percent','flip_p':False,'percentile':False,'avg_adjust':False},
 
114
  # test_df = test_df.set_index('batter_id')
115
  # #test_df = test_df[test_df.pitcher == 'Chris Bassitt'].append(test_df[test_df.pitcher != 'Chris Bassitt'])
116
 
117
+ batter_dict_mlb = dict(zip(exit_velo_df_mlb.sort(['batter_name'])['batter_id'],exit_velo_df_mlb.sort(['batter_name'])['batter_name']))
118
+ # batter_dict_mlb = exit_velo_df_mlb.sort(['batter_name']).set_index('batter_id')['batter_name'].to_dict()
119
+ batter_dict_aaa = dict(zip(exit_velo_df_aaa.sort(['batter_name'])['batter_id'],exit_velo_df_aaa.sort(['batter_name'])['batter_name']))
120
+ batter_dict_aa = dict(zip(exit_velo_df_aa.sort(['batter_name'])['batter_id'],exit_velo_df_aa.sort(['batter_name'])['batter_name']))
121
+ batter_dict_ha = dict(zip(exit_velo_df_ha.sort(['batter_name'])['batter_id'],exit_velo_df_ha.sort(['batter_name'])['batter_name']))
122
+ batter_dict_a =dict(zip(exit_velo_df_a.sort(['batter_name'])['batter_id'],exit_velo_df_a.sort(['batter_name'])['batter_name']))
123
 
124
  level_dict = {'MLB':'MLB','AAA':'AAA','AA':'AA','A+':'A+','A':'A'}
125
 
 
129
  'bb_minus_k':'BB-K%',
130
  'csw':'CSW%',
131
  'woba':'wOBA',
 
132
  'launch_speed':'Exit Velocity',
133
  'launch_speed_90':'90th Percentile Exit Velocity',
134
  'hard_hit':'HardHit%',