nesticot commited on
Commit
8c4b82c
·
verified ·
1 Parent(s): a3f9da4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -210
app.py CHANGED
@@ -49,47 +49,32 @@ print('Starting Everything:')
49
  # pa_df = pd.read_csv('pa_df_all.csv',index_col=[0])
50
  # pa_df_full_na = pa_df.dropna()
51
 
52
- ### Import Datasets
53
- dataset = load_dataset('nesticot/mlb_data', data_files=['mlb_pitch_data_2024.csv',
54
- ])
55
- dataset_train = dataset['train']
56
- exit_velo_df_mlb = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
57
  #print(df_2023)
58
  exit_velo_df_mlb['level'] = 'MLB'
59
 
60
- ### Import Datasets
61
- dataset = load_dataset('nesticot/mlb_data', data_files=['aaa_pitch_data_2024.csv',
62
- ])
63
- dataset_train = dataset['train']
64
- exit_velo_df_aaa = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
65
  #print(df_2023)
66
  exit_velo_df_aaa['level'] = 'AAA'
67
 
68
- ### Import Datasets
69
- dataset = load_dataset('nesticot/mlb_data', data_files=['aa_pitch_data_2024.csv',
70
- ])
71
- dataset_train = dataset['train']
72
- exit_velo_df_aa = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
73
  #print(df_2023)
74
  exit_velo_df_aa['level'] = 'AA'
75
 
76
- ### Import Datasets
77
- dataset = load_dataset('nesticot/mlb_data', data_files=['high_a_pitch_data_2024.csv',
78
- ])
79
- dataset_train = dataset['train']
80
- exit_velo_df_ha = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
81
  #print(df_2023)
82
  exit_velo_df_ha['level'] = 'A+'
83
 
84
  ### Import Datasets
85
- dataset = load_dataset('nesticot/mlb_data', data_files=['a_pitch_data_2024.csv',
86
- ])
87
- dataset_train = dataset['train']
88
- exit_velo_df_a = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
89
  #print(df_2023)
90
  exit_velo_df_a['level'] = 'A'
91
 
92
- exit_velo_df = pd.concat([exit_velo_df_mlb,exit_velo_df_aaa,exit_velo_df_aa,exit_velo_df_ha,exit_velo_df_a])
93
  # exit_velo_df = pd.concat([exit_velo_df_mlb,exit_velo_df_aaa])
94
  # exit_velo_df_copy = exit_velo_df.copy()
95
 
@@ -107,191 +92,6 @@ def percentile(n):
107
  exit_velo_df_codes = df_update(exit_velo_df)
108
 
109
 
110
- # end_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
111
- # 'double', 'sac_fly', 'force_out', 'home_run',
112
- # 'grounded_into_double_play', 'fielders_choice', 'field_error',
113
- # 'triple', 'sac_bunt', 'double_play', 'intent_walk',
114
- # 'fielders_choice_out', 'strikeout_double_play',
115
- # 'sac_fly_double_play', 'catcher_interf', 'other_out']
116
-
117
-
118
-
119
- # exit_velo_df['pa'] = [1 if isinstance(x, str) else 0 for x in exit_velo_df.event_type]
120
- # #exit_velo_df['pa'] = 1
121
- # exit_velo_df['k'] = exit_velo_df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in exit_velo_df.event_type.fillna('None').unique()])))
122
- # #exit_velo_df['bb'] = exit_velo_df.event_type.isin(list(filter(None, [x if 'walk' in x else '' for x in exit_velo_df.event_type.fillna('None').unique()])))
123
- # exit_velo_df['bb'] = exit_velo_df.event_type.isin(['walk','intent_walk'])
124
-
125
- # #exit_velo_df['k_minus_bb'] = exit_velo_df['k'].astype(np.float32)-exit_velo_df['bb'].astype(np.float32)
126
- # exit_velo_df['bb_minus_k'] = exit_velo_df['bb'].astype(np.float32)-exit_velo_df['k'].astype(np.float32)
127
-
128
-
129
-
130
- # exit_velo_df = exit_velo_df.drop_duplicates(subset=['play_id'])
131
-
132
-
133
-
134
- # swing_codes = ['Swinging Strike', 'In play, no out',
135
- # 'Foul', 'In play, out(s)',
136
- # 'In play, run(s)', 'Swinging Strike (Blocked)',
137
- # 'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout']
138
-
139
- # swings_in = ['Swinging Strike', 'In play, no out',
140
- # 'Foul', 'In play, out(s)',
141
- # 'In play, run(s)', 'Swinging Strike (Blocked)',
142
- # 'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout']
143
-
144
- # swing_strike_codes = ['Swinging Strike',
145
- # 'Swinging Strike (Blocked)','Missed Bunt','Foul Tip','Swinging Pitchout']
146
-
147
-
148
- # contact_codes = ['In play, no out',
149
- # 'Foul', 'In play, out(s)',
150
- # 'In play, run(s)',
151
- # 'Foul Bunt']
152
-
153
- # codes_in = ['In play, out(s)',
154
- # 'Swinging Strike',
155
- # 'Ball',
156
- # 'Foul',
157
- # 'In play, no out',
158
- # 'Called Strike',
159
- # 'Foul Tip',
160
- # 'In play, run(s)',
161
- # 'Hit By Pitch',
162
- # 'Ball In Dirt',
163
- # 'Pitchout',
164
- # 'Swinging Strike (Blocked)',
165
- # 'Foul Bunt',
166
- # 'Missed Bunt',
167
- # 'Foul Pitchout',
168
- # 'Intent Ball',
169
- # 'Swinging Pitchout']
170
-
171
-
172
-
173
-
174
- # exit_velo_df['in_zone'] = [x < 10 if x > 0 else np.nan for x in exit_velo_df['zone']]
175
-
176
-
177
- # if len(exit_velo_df.loc[(~exit_velo_df['x'].isnull())&(exit_velo_df['px'].isnull()),'px']) > 0:
178
- # exit_velo_df.loc[(~exit_velo_df['x'].isnull())&(exit_velo_df['px'].isnull()),'px'] = px_model.predict(exit_velo_df.loc[(~exit_velo_df['x'].isnull())&(exit_velo_df['px'].isnull())][['x']])
179
- # exit_velo_df.loc[(~exit_velo_df['y'].isnull())&(exit_velo_df['pz'].isnull()),'pz'] = px_model.predict(exit_velo_df.loc[(~exit_velo_df['y'].isnull())&(exit_velo_df['pz'].isnull())][['y']]) + 3.2
180
-
181
-
182
- # # exit_velo_df['in_zone'] = [x < 10 if x > 0 else np.nan for x in exit_velo_df['zone']]
183
- # exit_velo_df.loc[(~exit_velo_df['px'].isna())&
184
- # (exit_velo_df['in_zone'].isna())&
185
- # (~exit_velo_df['sz_top'].isna())&
186
- # (~exit_velo_df['pz'].isna())&
187
- # (~exit_velo_df['sz_bot'].isna())
188
- # ,'in_zone'] = in_zone_model.predict(exit_velo_df.loc[(~exit_velo_df['px'].isna())&
189
- # (exit_velo_df['in_zone'].isna())&
190
- # (~exit_velo_df['sz_top'].isna())&
191
- # (~exit_velo_df['pz'].isna())&
192
- # (~exit_velo_df['sz_bot'].isna())][['px','pz','sz_top','sz_bot']].values)
193
-
194
- # exit_velo_df = exit_velo_df.drop_duplicates(subset=['play_id'])
195
-
196
- # exit_velo_df_codes = exit_velo_df[exit_velo_df.play_description.isin(codes_in)].dropna(subset=['in_zone'])
197
-
198
- # exit_velo_df_codes['bip'] = ~exit_velo_df_codes.launch_speed.isna()
199
- # conditions = [
200
- # (exit_velo_df_codes['launch_speed'].isna()),
201
- # (exit_velo_df_codes['launch_speed']*1.5 - exit_velo_df_codes['launch_angle'] >= 117 ) & (exit_velo_df_codes['launch_speed'] + exit_velo_df_codes['launch_angle'] >= 124) & (exit_velo_df_codes['launch_speed'] > 98) & (exit_velo_df_codes['launch_angle'] >= 8) & (exit_velo_df_codes['launch_angle'] <= 50)
202
- # ]
203
-
204
- # choices = [False,True]
205
- # exit_velo_df_codes['barrel'] = np.select(conditions, choices, default=np.nan)
206
-
207
- # conditions_ss = [
208
- # (exit_velo_df_codes['launch_angle'].isna()),
209
- # (exit_velo_df_codes['launch_angle'] >= 8 ) * (exit_velo_df_codes['launch_angle'] <= 32 )
210
- # ]
211
-
212
- # choices_ss = [False,True]
213
- # exit_velo_df_codes['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan)
214
-
215
-
216
- # conditions_hh = [
217
- # (exit_velo_df_codes['launch_speed'].isna()),
218
- # (exit_velo_df_codes['launch_speed'] >= 94.5 )
219
- # ]
220
-
221
- # choices_hh = [False,True]
222
- # exit_velo_df_codes['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan)
223
-
224
-
225
- # conditions_tb = [
226
- # (exit_velo_df_codes['event_type']=='single'),
227
- # (exit_velo_df_codes['event_type']=='double'),
228
- # (exit_velo_df_codes['event_type']=='triple'),
229
- # (exit_velo_df_codes['event_type']=='home_run'),
230
- # ]
231
-
232
- # choices_tb = [1,2,3,4]
233
-
234
- # exit_velo_df_codes['tb'] = np.select(conditions_tb, choices_tb, default=np.nan)
235
-
236
- # conditions_woba = [
237
- # (exit_velo_df_codes['event_type']=='walk'),
238
- # (exit_velo_df_codes['event_type']=='hit_by_pitch'),
239
- # (exit_velo_df_codes['event_type']=='single'),
240
- # (exit_velo_df_codes['event_type']=='double'),
241
- # (exit_velo_df_codes['event_type']=='triple'),
242
- # (exit_velo_df_codes['event_type']=='home_run'),
243
- # ]
244
-
245
- # choices_woba = [0.705,
246
- # 0.688,
247
- # 0.897,
248
- # 1.233,
249
- # 1.612,
250
- # 2.013]
251
-
252
- # exit_velo_df_codes['woba'] = np.select(conditions_woba, choices_woba, default=np.nan)
253
-
254
-
255
- # woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
256
- # 'double', 'sac_fly', 'force_out', 'home_run',
257
- # 'grounded_into_double_play', 'fielders_choice', 'field_error',
258
- # 'triple', 'sac_bunt', 'double_play',
259
- # 'fielders_choice_out', 'strikeout_double_play',
260
- # 'sac_fly_double_play', 'other_out']
261
-
262
-
263
-
264
-
265
-
266
- # conditions_woba_code = [
267
- # (exit_velo_df_codes['event_type'].isin(woba_codes))
268
- # ]
269
-
270
- # choices_woba_code = [1]
271
-
272
- # exit_velo_df_codes['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan)
273
-
274
-
275
- # #exit_velo_df_codes['barrel'] = (exit_velo_df_codes.launch_speed >= 98) & (exit_velo_df_codes.launch_angle >= (26 - (-98 + exit_velo_df_codes.launch_speed))) & (exit_velo_df_codes.launch_angle <= 30 + (-98 + exit_velo_df_codes.launch_speed)) & (exit_velo_df_codes.launch_angle >= 8) & (exit_velo_df_codes.launch_angle <= 50)
276
-
277
-
278
-
279
-
280
-
281
- # #exit_velo_df_codes['barrel'] = (exit_velo_df_codes.launch_speed >= 98) & (exit_velo_df_codes.launch_angle >= (26 - (-98 + exit_velo_df_codes.launch_speed))) & (exit_velo_df_codes.launch_angle <= 30 + (-98 + exit_velo_df_codes.launch_speed)) & (exit_velo_df_codes.launch_angle >= 8) & (exit_velo_df_codes.launch_angle <= 50)
282
- # exit_velo_df_codes['pitches'] = 1
283
- # exit_velo_df_codes['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in exit_velo_df_codes.play_code]
284
- # exit_velo_df_codes['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in exit_velo_df_codes.play_code]
285
- # exit_velo_df_codes['swings'] = [1 if x in swings_in else 0 for x in exit_velo_df_codes.play_description]
286
-
287
- # exit_velo_df_codes['out_zone'] = exit_velo_df_codes.in_zone == False
288
- # exit_velo_df_codes['zone_swing'] = (exit_velo_df_codes.in_zone == True)&(exit_velo_df_codes.swings == 1)
289
- # exit_velo_df_codes['zone_contact'] = (exit_velo_df_codes.in_zone == True)&(exit_velo_df_codes.swings == 1)&(exit_velo_df_codes.whiffs == 0)
290
- # exit_velo_df_codes['ozone_swing'] = (exit_velo_df_codes.in_zone==False)&(exit_velo_df_codes.swings == 1)
291
- # exit_velo_df_codes['ozone_contact'] = (exit_velo_df_codes.in_zone==False)&(exit_velo_df_codes.swings == 1)&(exit_velo_df_codes.whiffs == 0)
292
-
293
-
294
-
295
  exit_velo_df_codes_summ = exit_velo_df_codes.groupby(['batter_id','batter_name','level']).agg(
296
  pa = ('pa','sum'),
297
  k = ('k','sum'),
 
49
  # pa_df = pd.read_csv('pa_df_all.csv',index_col=[0])
50
  # pa_df_full_na = pa_df.dropna()
51
 
52
+
53
+ exit_velo_df_mlb = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025.parquet").to_pandas()
 
 
 
54
  #print(df_2023)
55
  exit_velo_df_mlb['level'] = 'MLB'
56
 
57
+
58
+ exit_velo_df_aaa = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/aaa_pitch_data_2025.parquet").to_pandas()
 
 
 
59
  #print(df_2023)
60
  exit_velo_df_aaa['level'] = 'AAA'
61
 
62
+
63
+ exit_velo_df_aa = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/aa_pitch_data_2025.parquet").to_pandas()
 
 
 
64
  #print(df_2023)
65
  exit_velo_df_aa['level'] = 'AA'
66
 
67
+
68
+ exit_velo_df_ha = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/hi_a_pitch_data_2025.parquet").to_pandas()
 
 
 
69
  #print(df_2023)
70
  exit_velo_df_ha['level'] = 'A+'
71
 
72
  ### Import Datasets
73
+ exit_velo_df_a = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/lo_a_pitch_data_2025.parquet").to_pandas()
 
 
 
74
  #print(df_2023)
75
  exit_velo_df_a['level'] = 'A'
76
 
77
+ exit_velo_df = pl.concat([exit_velo_df_mlb,exit_velo_df_aaa,exit_velo_df_aa,exit_velo_df_ha,exit_velo_df_a])
78
  # exit_velo_df = pd.concat([exit_velo_df_mlb,exit_velo_df_aaa])
79
  # exit_velo_df_copy = exit_velo_df.copy()
80
 
 
92
  exit_velo_df_codes = df_update(exit_velo_df)
93
 
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  exit_velo_df_codes_summ = exit_velo_df_codes.groupby(['batter_id','batter_name','level']).agg(
96
  pa = ('pa','sum'),
97
  k = ('k','sum'),