nesticot commited on
Commit
fa175ae
·
verified ·
1 Parent(s): 9e0735d

Upload 27 files

Browse files
api_scraper.py CHANGED
@@ -106,7 +106,9 @@ class MLB_Scrape:
106
  time_list = [item for sublist in [[y['gameDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
107
  date_list = [item for sublist in [[y['officialDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
108
  away_team_list = [item for sublist in [[y['teams']['away']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
 
109
  home_team_list = [item for sublist in [[y['teams']['home']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
 
110
  state_list = [item for sublist in [[y['status']['codedGameState'] for y in x['games']] for x in game_call['dates']] for item in sublist]
111
  venue_id = [item for sublist in [[y['venue']['id'] for y in x['games']] for x in game_call['dates']] for item in sublist]
112
  venue_name = [item for sublist in [[y['venue']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
@@ -116,7 +118,9 @@ class MLB_Scrape:
116
  'time': time_list,
117
  'date': date_list,
118
  'away': away_team_list,
 
119
  'home': home_team_list,
 
120
  'state': state_list,
121
  'venue_id': venue_id,
122
  'venue_name': venue_name})
@@ -161,6 +165,27 @@ class MLB_Scrape:
161
 
162
  return data_total
163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
  return data_total
166
 
 
106
  time_list = [item for sublist in [[y['gameDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
107
  date_list = [item for sublist in [[y['officialDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
108
  away_team_list = [item for sublist in [[y['teams']['away']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
109
+ away_team_id_list = [item for sublist in [[y['teams']['away']['team']['id'] for y in x['games']] for x in game_call['dates']] for item in sublist]
110
  home_team_list = [item for sublist in [[y['teams']['home']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
111
+ home_team_id_list = [item for sublist in [[y['teams']['home']['team']['id'] for y in x['games']] for x in game_call['dates']] for item in sublist]
112
  state_list = [item for sublist in [[y['status']['codedGameState'] for y in x['games']] for x in game_call['dates']] for item in sublist]
113
  venue_id = [item for sublist in [[y['venue']['id'] for y in x['games']] for x in game_call['dates']] for item in sublist]
114
  venue_name = [item for sublist in [[y['venue']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
 
118
  'time': time_list,
119
  'date': date_list,
120
  'away': away_team_list,
121
+ 'away_id': away_team_id_list,
122
  'home': home_team_list,
123
+ 'home_id': home_team_id_list,
124
  'state': state_list,
125
  'venue_id': venue_id,
126
  'venue_name': venue_name})
 
165
 
166
  return data_total
167
 
168
+ def get_data_new(self, game_list_input: list):
169
+ """
170
+ Retrieves live game data for a list of game IDs in parallel.
171
+
172
+ Parameters:
173
+ - game_list_input (list): A list of game IDs for which to retrieve live data.
174
+
175
+ Returns:
176
+ - data_total (list): A list of JSON responses containing live game data for each game ID.
177
+ """
178
+ data_total = []
179
+ print('This May Take a While. Progress Bar shows Completion of Data Retrieval.')
180
+
181
+ def fetch_data(game_id):
182
+ r = requests.get(f'https://statsapi.mlb.com/api/v1.1/game/{game_id}/feed/live')
183
+ return r.json()
184
+
185
+ with ThreadPoolExecutor() as executor:
186
+ futures = {executor.submit(fetch_data, game_id): game_id for game_id in game_list_input}
187
+ for future in tqdm(as_completed(futures), total=len(futures), desc="Processing", unit="iteration"):
188
+ data_total.append(future.result())
189
 
190
  return data_total
191
 
app.py CHANGED
@@ -1,400 +1,488 @@
1
- import polars as pl
2
- import numpy as np
3
- import pandas as pd
4
- import api_scraper
5
- scrape = api_scraper.MLB_Scrape()
6
- from functions import df_update
7
- from functions import pitch_summary_functions
8
- update = df_update.df_update()
9
- from stuff_model import feature_engineering as fe
10
- from stuff_model import stuff_apply
11
- import requests
12
- import joblib
13
- from matplotlib.gridspec import GridSpec
14
- from shiny import App, reactive, ui, render
15
- from shiny.ui import h2, tags
16
- import matplotlib.pyplot as plt
17
- import matplotlib.gridspec as gridspec
18
- import seaborn as sns
19
- from functions.pitch_summary_functions import *
20
- from shiny import App, reactive, ui, render
21
- from shiny.ui import h2, tags
22
-
23
- colour_palette = ['#FFB000','#648FFF','#785EF0',
24
- '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
25
-
26
-
27
- year_list = [2017,2018,2019,2020,2021,2022,2023,2024]
28
-
29
-
30
-
31
- level_dict = {'1':'MLB',
32
- '11':'AAA',
33
- # '12':'AA',
34
- #'13':'A+',
35
- '14':'A',
36
- '17':'AFL',
37
- '22':'College',
38
- '21':'Prospects',
39
- '51':'International' }
40
-
41
- function_dict={
42
- 'velocity_kdes':'Velocity Distributions',
43
- 'break_plot':'Pitch Movement',
44
- 'tj_stuff_roling':'Rolling tjStuff+ by Pitch',
45
- 'tj_stuff_roling_game':'Rolling tjStuff+ by Game',
46
- 'location_plot_lhb':'Locations vs LHB',
47
- 'location_plot_rhb':'Locations vs RHB',
48
- }
49
-
50
-
51
- split_dict = {'all':'All',
52
- 'left':'LHH',
53
- 'right':'RHH'}
54
-
55
- split_dict_hand = {'all':['L','R'],
56
- 'left':['L'],
57
- 'right':['R']}
58
-
59
-
60
- type_dict = {'R':'Regular Season',
61
- 'S':'Spring',
62
- 'P':'Playoffs' }
63
-
64
-
65
-
66
- # List of MLB teams and their corresponding ESPN logo URLs
67
- mlb_teams = [
68
- {"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"},
69
- {"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
70
- {"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"},
71
- {"team": "BAL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bal.png&h=500&w=500"},
72
- {"team": "BOS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bos.png&h=500&w=500"},
73
- {"team": "CHC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chc.png&h=500&w=500"},
74
- {"team": "CWS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chw.png&h=500&w=500"},
75
- {"team": "CIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cin.png&h=500&w=500"},
76
- {"team": "CLE", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cle.png&h=500&w=500"},
77
- {"team": "COL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/col.png&h=500&w=500"},
78
- {"team": "DET", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/det.png&h=500&w=500"},
79
- {"team": "HOU", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/hou.png&h=500&w=500"},
80
- {"team": "KC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/kc.png&h=500&w=500"},
81
- {"team": "LAA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/laa.png&h=500&w=500"},
82
- {"team": "LAD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/lad.png&h=500&w=500"},
83
- {"team": "MIA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mia.png&h=500&w=500"},
84
- {"team": "MIL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mil.png&h=500&w=500"},
85
- {"team": "MIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/min.png&h=500&w=500"},
86
- {"team": "NYM", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nym.png&h=500&w=500"},
87
- {"team": "NYY", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nyy.png&h=500&w=500"},
88
- {"team": "PHI", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/phi.png&h=500&w=500"},
89
- {"team": "PIT", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/pit.png&h=500&w=500"},
90
- {"team": "SD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sd.png&h=500&w=500"},
91
- {"team": "SF", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sf.png&h=500&w=500"},
92
- {"team": "SEA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sea.png&h=500&w=500"},
93
- {"team": "STL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/stl.png&h=500&w=500"},
94
- {"team": "TB", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tb.png&h=500&w=500"},
95
- {"team": "TEX", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tex.png&h=500&w=500"},
96
- {"team": "TOR", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tor.png&h=500&w=500"},
97
- {"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"},
98
- {"team": "ZZZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/leagues/500/mlb.png&w=500&h=500"}
99
- ]
100
-
101
-
102
- df_image = pd.DataFrame(mlb_teams)
103
- image_dict = df_image.set_index('team')['logo_url'].to_dict()
104
- image_dict_flip = df_image.set_index('logo_url')['team'].to_dict()
105
-
106
-
107
-
108
- # # Define the features to be used for training
109
- # features_table = ['start_speed',
110
- # 'spin_rate',
111
- # 'extension',
112
- # 'ivb',
113
- # 'hb',
114
- # 'x0',
115
- # 'z0',
116
- # 'tj_stuff_plus']
117
-
118
- from shiny import App, reactive, ui, render
119
- from shiny.ui import h2, tags
120
-
121
- # Define the UI layout for the app
122
- app_ui = ui.page_sidebar(
123
- ui.sidebar(
124
- # Row for selecting season and level
125
- ui.row(
126
- ui.column(4, ui.input_select('year_input', 'Select Season', year_list, selected=2024)),
127
- ui.column(4, ui.input_select('level_input', 'Select Level', level_dict)),
128
- ui.column(4, ui.input_select('type_input', 'Select Type', type_dict,selected='R'))
129
- ),
130
- # Row for the action button to get player list
131
- ui.row(ui.input_action_button("player_button", "Get Player List", class_="btn-primary")),
132
- # Row for selecting the player
133
- ui.row(ui.column(12, ui.output_ui('player_select_ui', 'Select Player'))),
134
- # Row for selecting the date range
135
- ui.row(ui.column(12, ui.output_ui('date_id', 'Select Date'))),
136
-
137
- # Rows for selecting plots and split options
138
- ui.row(
139
- ui.column(4, ui.input_select('plot_id_1', 'Plot Left', function_dict, multiple=False, selected='velocity_kdes')),
140
- ui.column(4, ui.input_select('plot_id_2', 'Plot Middle', function_dict, multiple=False, selected='tj_stuff_roling')),
141
- ui.column(4, ui.input_select('plot_id_3', 'Plot Right', function_dict, multiple=False, selected='break_plot'))
142
- ),
143
- ui.row(
144
- ui.column(6, ui.input_select('split_id', 'Select Split', split_dict, multiple=False)),
145
- ui.column(6, ui.input_numeric('rolling_window', 'Rolling Window (for tjStuff+ Plot)', min=1, value=50))
146
- ),
147
- ui.row(
148
- ui.column(6, ui.input_switch("switch", "Custom Team?", False)),
149
- ui.column(6, ui.input_select('logo_select', 'Select Custom Logo', image_dict_flip, multiple=False))
150
- ),
151
-
152
- # Row for the action button to generate plot
153
- ui.row(ui.input_action_button("generate_plot", "Generate Plot", class_="btn-primary")),
154
- width="400px" # Added this parameter to control sidebar width
155
- ),
156
-
157
- # Main content area with tabs (placed directly in page_sidebar)
158
- ui.navset_tab(
159
- ui.nav_panel("Pitching Summary",
160
- ui.output_text("status"),
161
- ui.output_plot('plot', width='2100px', height='2100px')
162
- ),
163
- ui.nav_panel("Summary Table",
164
- ui.output_data_frame("grid"))
165
- )
166
- )
167
-
168
-
169
- def server(input, output, session):
170
-
171
- @reactive.calc
172
- @reactive.event(input.pitcher_id, input.date_id,input.split_id)
173
- def cached_data():
174
-
175
- year_input = int(input.year_input())
176
- sport_id = int(input.level_input())
177
- player_input = int(input.pitcher_id())
178
- start_date = str(input.date_id()[0])
179
- end_date = str(input.date_id()[1])
180
- # Simulate an expensive data operation
181
- game_list = scrape.get_player_games_list(sport_id = sport_id,
182
- season = year_input,
183
- player_id = player_input,
184
- start_date = start_date,
185
- end_date = end_date,
186
- game_type = [input.type_input()])
187
-
188
- data_list = scrape.get_data(game_list_input = game_list[:])
189
- df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter(
190
- (pl.col("pitcher_id") == player_input)&
191
- (pl.col("is_pitch") == True)&
192
- (pl.col("start_speed") >= 50)&
193
- (pl.col('batter_hand').is_in(split_dict_hand[input.split_id()]))
194
-
195
- )))).with_columns(
196
- pl.col('pitch_type').count().over('pitch_type').alias('pitch_count')
197
- ))
198
- return df
199
-
200
- @render.ui
201
- @reactive.event(input.player_button, input.year_input, input.level_input, input.type_input,ignore_none=False)
202
- def player_select_ui():
203
- # Get the list of pitchers for the selected level and season
204
- df_pitcher_info = scrape.get_players(sport_id=int(input.level_input()), season=int(input.year_input()), game_type = [input.type_input()]).filter(
205
- pl.col("position").is_in(['P','TWP'])).sort("name")
206
-
207
- # Create a dictionary of pitcher IDs and names
208
- pitcher_dict = dict(zip(df_pitcher_info['player_id'], df_pitcher_info['name']))
209
-
210
- # Return a select input for choosing a pitcher
211
- return ui.input_select("pitcher_id", "Select Pitcher", pitcher_dict, selectize=True)
212
-
213
- @render.ui
214
- @reactive.event(input.player_button, input.year_input, input.level_input, input.type_input,ignore_none=False)
215
- def date_id():
216
- # Create a date range input for selecting the date range within the selected year
217
- return ui.input_date_range("date_id", "Select Date Range",
218
- start=f"{int(input.year_input())}-01-01",
219
- end=f"{int(input.year_input())}-12-31",
220
- min=f"{int(input.year_input())}-01-01",
221
- max=f"{int(input.year_input())}-12-31")
222
- @output
223
- @render.text
224
- def status():
225
- # Only show status when generating
226
- if input.generate == 0:
227
- return ""
228
- return ""
229
-
230
- @output
231
- @render.plot
232
- @reactive.event(input.generate_plot, ignore_none=False)
233
- def plot():
234
- # Show progress/loading notification
235
- with ui.Progress(min=0, max=1) as p:
236
- p.set(message="Generating plot", detail="This may take a while...")
237
-
238
-
239
- p.set(0.3, "Gathering data...")
240
- year_input = int(input.year_input())
241
- sport_id = int(input.level_input())
242
- player_input = int(input.pitcher_id())
243
- start_date = str(input.date_id()[0])
244
- end_date = str(input.date_id()[1])
245
-
246
- print(year_input, sport_id, player_input, start_date, end_date)
247
-
248
- df = cached_data()
249
- df = df.clone()
250
-
251
- p.set(0.6, "Creating plot...")
252
-
253
-
254
- #plt.rcParams["figure.figsize"] = [10,10]
255
- fig = plt.figure(figsize=(26,26))
256
- plt.rcParams.update({'figure.autolayout': True})
257
- fig.set_facecolor('white')
258
- sns.set_theme(style="whitegrid", palette=colour_palette)
259
- print('this is the one plot')
260
-
261
- gs = gridspec.GridSpec(6, 8,
262
- height_ratios=[5,20,12,36,36,7],
263
- width_ratios=[4,18,18,18,18,18,18,4])
264
-
265
-
266
- gs.update(hspace=0.2, wspace=0.5)
267
-
268
- # Define the positions of each subplot in the grid
269
- ax_headshot = fig.add_subplot(gs[1,1:3])
270
- ax_bio = fig.add_subplot(gs[1,3:5])
271
- ax_logo = fig.add_subplot(gs[1,5:7])
272
-
273
- ax_season_table = fig.add_subplot(gs[2,1:7])
274
-
275
- ax_plot_1 = fig.add_subplot(gs[3,1:3])
276
- ax_plot_2 = fig.add_subplot(gs[3,3:5])
277
- ax_plot_3 = fig.add_subplot(gs[3,5:7])
278
-
279
- ax_table = fig.add_subplot(gs[4,1:7])
280
-
281
- ax_footer = fig.add_subplot(gs[-1,1:7])
282
- ax_header = fig.add_subplot(gs[0,1:7])
283
- ax_left = fig.add_subplot(gs[:,0])
284
- ax_right = fig.add_subplot(gs[:,-1])
285
-
286
- # Hide axes for footer, header, left, and right
287
- ax_footer.axis('off')
288
- ax_header.axis('off')
289
- ax_left.axis('off')
290
- ax_right.axis('off')
291
-
292
- sns.set_theme(style="whitegrid", palette=colour_palette)
293
- fig.set_facecolor('white')
294
-
295
- df_teams = scrape.get_teams()
296
-
297
- player_headshot(player_input=player_input, ax=ax_headshot,sport_id=sport_id,season=year_input)
298
- player_bio(pitcher_id=player_input, ax=ax_bio,sport_id=sport_id,year_input=year_input)
299
-
300
- if input.switch():
301
-
302
- # Get the logo URL from the image dictionary using the team abbreviation
303
- logo_url = input.logo_select()
304
-
305
- # Send a GET request to the logo URL
306
- response = requests.get(logo_url)
307
-
308
- # Open the image from the response content
309
- img = Image.open(BytesIO(response.content))
310
-
311
- # Display the image on the axis
312
- ax_logo.set_xlim(0, 1.3)
313
- ax_logo.set_ylim(0, 1)
314
- ax_logo.imshow(img, extent=[0.3, 1.3, 0, 1], origin='upper')
315
-
316
- # Turn off the axis
317
- ax_logo.axis('off')
318
-
319
- else:
320
- plot_logo(pitcher_id=player_input, ax=ax_logo, df_team=df_teams,df_players=scrape.get_players(sport_id,year_input))
321
-
322
- stat_summary_table(df=df,
323
- ax=ax_season_table,
324
- player_input=player_input,
325
- split=input.split_id(),
326
- sport_id=sport_id,
327
- game_type=[input.type_input()])
328
-
329
- # break_plot(df=df_plot,ax=ax2)
330
- for x,y,z in zip([input.plot_id_1(),input.plot_id_2(),input.plot_id_3()],[ax_plot_1,ax_plot_2,ax_plot_3],[1,3,5]):
331
- if x == 'velocity_kdes':
332
- velocity_kdes(df,
333
- ax=y,
334
- gs=gs,
335
- gs_x=[3,4],
336
- gs_y=[z,z+2],
337
- fig=fig)
338
- if x == 'tj_stuff_roling':
339
- tj_stuff_roling(df=df,
340
- window=int(input.rolling_window()),
341
- ax=y)
342
-
343
- if x == 'tj_stuff_roling_game':
344
- tj_stuff_roling_game(df=df,
345
- window=int(input.rolling_window()),
346
- ax=y)
347
-
348
- if x == 'break_plot':
349
- break_plot(df = df,ax=y)
350
-
351
- if x == 'location_plot_lhb':
352
- location_plot(df = df,ax=y,hand='L')
353
-
354
- if x == 'location_plot_rhb':
355
- location_plot(df = df,ax=y,hand='R')
356
-
357
- summary_table(df=df,
358
- ax=ax_table)
359
-
360
- plot_footer(ax_footer)
361
-
362
- fig.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.01)
363
-
364
-
365
- @output
366
- @render.data_frame
367
- @reactive.event(input.generate_plot, ignore_none=False)
368
- def grid():
369
-
370
- df = cached_data()
371
- df = df.clone()
372
- features_table = ['start_speed',
373
- 'spin_rate',
374
- 'extension',
375
- 'ivb',
376
- 'hb',
377
- 'x0',
378
- 'z0']
379
-
380
-
381
-
382
- selection = ['game_id','pitcher_id','pitcher_name','batter_id','batter_name','pitcher_hand',
383
- 'batter_hand','balls','strikes','play_code','event_type','pitch_type','vaa','haa']+features_table+['tj_stuff_plus']
384
-
385
-
386
-
387
- return render.DataGrid(
388
- df.select(selection).to_pandas().round(1),
389
- row_selection_mode='multiple',
390
- height='700px',
391
- width='fit-content',
392
- filters=True,
393
- )
394
-
395
-
396
- app = App(app_ui, server)
397
-
398
-
399
-
400
- app = App(app_ui, server)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from shiny import App, ui, render, reactive
2
+ import polars as pl
3
+ import numpy as np
4
+ import pandas as pd
5
+ import api_scraper
6
+ scrape = api_scraper.MLB_Scrape()
7
+ from functions import df_update
8
+ from functions import pitch_summary_functions
9
+ update = df_update.df_update()
10
+ from stuff_model import feature_engineering as fe
11
+ from stuff_model import stuff_apply
12
+ import requests
13
+ import joblib
14
+ from matplotlib.gridspec import GridSpec
15
+ import math
16
+ from pytabulator import TableOptions, Tabulator, output_tabulator, render_tabulator, theme
17
+ theme.tabulator_site()
18
+
19
+ colour_palette = ['#FFB000','#648FFF','#785EF0',
20
+ '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
21
+
22
+ # df = pl.read_csv("data.csv")
23
+ # df = pl.read_parquet("data_small.parquet")[:]
24
+ # df = pl.read_parquet("data.parquet")[:]
25
+ # print('df')
26
+ season = 2024
27
+
28
+ df_mlb = pl.read_parquet("data/data_mlb_2024.parquet")[:]
29
+ df_aaa = pl.read_parquet("data/data_aaa_2024.parquet")[:]
30
+ df_a = pl.read_parquet("data/data_a_2024.parquet")[:]
31
+
32
+
33
+
34
+ def df_final(df:pl.dataframe,year_input:int,sport_id:int):
35
+
36
+ df_schedule = scrape.get_schedule(year_input=[year_input],sport_id=[sport_id])
37
+ df = df.join(df_schedule, on='game_id', how='left')
38
+
39
+ df = df.with_columns(
40
+ pl.when((pl.col('batter_team_id') == pl.col('away_id')))
41
+ .then(pl.lit('Away'))
42
+ .when((pl.col('batter_team_id') == pl.col('home_id')))
43
+ .then(pl.lit('Home'))
44
+ .otherwise(None)
45
+ .alias('home_away')
46
+ )
47
+
48
+ df = df.with_columns(
49
+ pl.when((pl.col('pitcher_team_id') == pl.col('away_id')))
50
+ .then(pl.lit('Away'))
51
+ .when((pl.col('pitcher_team_id') == pl.col('home_id')))
52
+ .then(pl.lit('Home'))
53
+ .otherwise(None)
54
+ .alias('home_away_pitcher')
55
+ )
56
+
57
+
58
+ print('schedule')
59
+
60
+ df_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df))
61
+ print('stuff')
62
+ df_up = update.update(df)
63
+ print('update')
64
+ df_total = df_up.join(df_stuff[['play_id','tj_stuff_plus']], on='play_id', how='left')
65
+ print('total')
66
+ return df_total
67
+
68
+
69
+ df_mlb_total = df_final(df=df_mlb,year_input=season,sport_id=1)
70
+ df_aaa_total = df_final(df=df_aaa,year_input=season,sport_id=11)
71
+ df_a_total = df_final(df=df_a.drop_nulls(subset=['start_speed']),year_input=season,sport_id=14)
72
+
73
+ rounding_dict = {
74
+ 'pa': 0,
75
+ 'bip': 0,
76
+ 'hits': 0,
77
+ 'k': 0,
78
+ 'bb': 0,
79
+ 'max_launch_speed': 1,
80
+ 'launch_speed_90': 1,
81
+ 'launch_speed': 1,
82
+ 'pitches': 0,
83
+ 'tj_stuff_plus_avg': 0,
84
+ 'avg': 3,
85
+ 'obp': 3,
86
+ 'slg': 3,
87
+ 'ops': 3,
88
+ 'k_percent': 3,
89
+ 'bb_percent': 3,
90
+ 'k_minus_bb_percent': 3,
91
+ 'sweet_spot_percent': 3,
92
+ 'woba_percent': 3,
93
+ 'xwoba_percent': 3,
94
+ 'woba_percent_contact': 3,
95
+ 'xwoba_percent_contact': 3,
96
+ 'hard_hit_percent': 3,
97
+ 'barrel_percent': 3,
98
+ 'zone_contact_percent': 3,
99
+ 'zone_swing_percent': 3,
100
+ 'zone_percent': 3,
101
+ 'chase_percent': 3,
102
+ 'chase_contact': 3,
103
+ 'swing_percent': 3,
104
+ 'whiff_rate': 3,
105
+ 'swstr_rate': 3,
106
+ 'ground_ball_percent': 3,
107
+ 'line_drive_percent': 3,
108
+ 'fly_ball_percent': 3,
109
+ 'pop_up_percent': 3,
110
+ 'heart_zone_swing_percent': 3,
111
+ 'shadow_zone_swing_percent': 3,
112
+ 'chase_zone_swing_percent': 3,
113
+ 'waste_zone_swing_percent': 3,
114
+ 'heart_zone_whiff_percent': 3,
115
+ 'shadow_zone_whiff_percent': 3,
116
+ 'chase_zone_whiff_percent': 3,
117
+ 'waste_zone_whiff_percent': 3,
118
+ 'start_speed_avg': 1,
119
+ 'vb_avg': 1,
120
+ 'ivb_avg': 1,
121
+ 'hb_avg': 1,
122
+ 'z0_avg': 1,
123
+ 'x0_avg': 1,
124
+ 'vaa_avg': 1,
125
+ 'haa_avg': 1,
126
+ 'spin_rate_avg': 0,
127
+ 'extension_avg': 1
128
+ }
129
+
130
+ columns = [
131
+ { "title": "PA", "field": "pa", "width": 150},
132
+ { "title": "BBE", "field": "bip", "width": 150 },
133
+ { "title": "H", "field": "hits", "width": 150 },
134
+ { "title": "K", "field": "k", "width": 150 },
135
+ { "title": "BB", "field": "bb", "width": 150 },
136
+ { "title": "Max EV", "field": "max_launch_speed", "width": 150 },
137
+ { "title": "90th% EV", "field": "launch_speed_90", "width": 150 },
138
+ { "title": "EV", "field": "launch_speed", "width": 150 },
139
+ { "title": "Pitches", "field": "pitches", "width": 150 },
140
+ { "title": "AVG", "field": "avg", "width": 150 },
141
+ { "title": "OBP", "field": "obp", "width": 150 },
142
+ { "title": "SLG", "field": "slg", "width": 150 },
143
+ { "title": "OPS", "field": "ops", "width": 150 },
144
+ { "title": "K%", "field": "k_percent", "width": 150,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
145
+ { "title": "BB%", "field": "bb_percent", "width": 150,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
146
+ { "title": "K-BB%", "field": "k_minus_bb_percent", "width": 150,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
147
+ { "title": "SwSpot%", "field": "sweet_spot_percent", "width": 150,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
148
+ { "title": "wOBA", "field": "woba_percent", "width": 150 },
149
+ { "title": "xwOBA", "field": "xwoba_percent", "width": 150 },
150
+ { "title": "wOBACON", "field": "woba_percent_contact", "width": 150 },
151
+ { "title": "xwOBACON", "field": "xwoba_percent_contact", "width": 150 },
152
+ { "title": "HardHit%", "field": "hard_hit_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
153
+ { "title": "Barrel%", "field": "barrel_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
154
+ { "title": "Z-Contact%", "field": "zone_contact_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
155
+ { "title": "Z-Swing%", "field": "zone_swing_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
156
+ { "title": "Zone%", "field": "zone_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
157
+ { "title": "O-Swing%", "field": "chase_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
158
+ { "title": "O-Contact%", "field": "chase_contact", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
159
+ { "title": "Swing%", "field": "swing_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
160
+ { "title": "Whiff%", "field": "whiff_rate", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
161
+ { "title": "SwStr%", "field": "swstr_rate", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
162
+ { "title": "GB%", "field": "ground_ball_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
163
+ { "title": "LD%", "field": "line_drive_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
164
+ { "title": "FB%", "field": "fly_ball_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
165
+ { "title": "PU%", "field": "pop_up_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
166
+ { "title": "Heart Swing%", "field": "heart_zone_swing_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
167
+ { "title": "Shadow Swing%", "field": "shadow_zone_swing_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
168
+ { "title": "Chase Swing%", "field": "chase_zone_swing_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
169
+ { "title": "Waste Swing%", "field": "waste_zone_swing_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
170
+ { "title": "Heart Whiff%", "field": "heart_zone_whiff_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
171
+ { "title": "Shadow Whiff%", "field": "shadow_zone_whiff_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
172
+ { "title": "Chase Whiff%", "field": "chase_zone_whiff_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
173
+ { "title": "Waste Whiff%", "field": "waste_zone_whiff_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
174
+ { "title": "tjStuff+", "field": "tj_stuff_plus_avg", "width": 150 },
175
+ { "title": "Velocity", "field": "start_speed_avg", "width": 150 },
176
+ { "title": "Extension", "field": "extension_avg", "width": 150 },
177
+ { "title": "VB", "field": "vb_avg", "width": 150 },
178
+ { "title": "iVB", "field": "ivb_avg", "width": 150 },
179
+ { "title": "HB", "field": "hb_avg", "width": 150 },
180
+ { "title": "vRel", "field": "z0_avg", "width": 150 },
181
+ { "title": "hRel", "field": "x0_avg", "width": 150 },
182
+ { "title": "VAA", "field": "vaa_avg", "width": 150 },
183
+ { "title": "HAA", "field": "haa_avg", "width": 150 },
184
+ { "title": "Spin Rate", "field": "spin_rate_avg", "width": 150 },
185
+ { "title": "Extension", "field": "extension_avg", "width": 150 },
186
+
187
+ ]
188
+
189
+ stat_titles = dict(zip([col["field"] for col in columns],[col["title"] for col in columns]))
190
+
191
+ stat_selection = [key for key in stat_titles.keys()]
192
+
193
+ agg_titles = {'batter_id':'Batter ID',
194
+ 'batter_name':'Batter Name',
195
+ 'batter_team':'Batter Team',
196
+ 'batter_hand':'Batter Hand',
197
+ 'pitcher_id':'Pitcher ID',
198
+ 'pitcher_name':'Pitcher Name',
199
+ 'pitcher_team':'Pitcher Team',
200
+ 'pitcher_hand':'Pitcher Hand',
201
+ 'pitch_type':'Pitch Type',
202
+ 'pitch_group':'Pitch Group',
203
+ 'home_away_batter':'Home/Away Batter',
204
+ 'home_away_pitcher':'Home/Away Pitcher',
205
+ 'is_swing':'Is Swing?',
206
+ 'is_bip':'Is BIP?',
207
+ 'in_zone_final':'In Zone?',
208
+ 'attack_zone_final':'Attack Zone'}
209
+
210
+
211
+ columns_group = [
212
+ { "title": "Batter ID", "field": "batter_id", "width": 150, "headerFilter":"input","frozen":True,},
213
+ { "title": "Batter Name", "field": "batter_name", "width": 200,"frozen":True, "headerFilter":"input" },
214
+ { "title": "Batter Team", "field": "batter_team", "width": 150,"frozen":True, "headerFilter":"input" },
215
+ { "title": "Batter Hand", "field": "batter_hand", "width": 150,"frozen":True, "headerFilter":"input" },
216
+ { "title": "Pitcher ID", "field": "pitcher_id", "width": 150,"frozen":True, "headerFilter":"input" },
217
+ { "title": "Pitcher Name", "field": "pitcher_name", "width": 200,"frozen":True, "headerFilter":"input" },
218
+ { "title": "Pitcher Team", "field": "pitcher_team", "width": 150,"frozen":True, "headerFilter":"input" },
219
+ { "title": "Pitcher Hand", "field": "pitcher_hand", "width": 150,"frozen":True, "headerFilter":"input" },
220
+ { "title": "Pitch Type", "field": "pitch_type", "width": 150,"frozen":True, "headerFilter":"input" },
221
+ { "title": "Pitch Group", "field": "pitch_group", "width": 150,"frozen":True, "headerFilter":"input" },
222
+ { "title": "Home/Away Batter", "field": "home_away_batter", "width": 150,"frozen":True, "headerFilter":"input" },
223
+ { "title": "Home/Away Pitcher", "field": "home_away_pitcher", "width": 150,"frozen":True, "headerFilter":"input" },
224
+ { "title": "Is Swing?", "field": "is_swing", "width": 150,"frozen":True, "headerFilter":"input" },
225
+ { "title": "Is BIP?", "field": "is_bip", "width": 150,"frozen":True, "headerFilter":"input" },
226
+ { "title": "In Zone?", "field": "in_zone_final", "width": 150,"frozen":True, "headerFilter":"input" },
227
+ { "title": "Attack Zone", "field": "attack_zone_final", "width": 150,"frozen":True, "headerFilter":"input" }
228
+ ]
229
+
230
+
231
+ app_ui = ui.page_sidebar(
232
+ ui.sidebar(
233
+ ui.input_selectize(
234
+ "level_input",
235
+ "Select Level:",
236
+ choices=['MLB','AAA','A'],
237
+ multiple=False,
238
+ selected=['MLB']
239
+ ),
240
+ ui.input_selectize(
241
+ "list_input",
242
+ "Select Aggregation:",
243
+ choices=agg_titles,
244
+ multiple=True,
245
+ selected=['batter_id', 'batter_name']
246
+ ),
247
+ ui.input_selectize(
248
+ "list_stats",
249
+ "Select Stats:",
250
+ choices=stat_titles,
251
+ multiple=True,
252
+ selected=['pa']
253
+ ),
254
+ ui.input_date_range(
255
+ "date_id",
256
+ "Select Date Range",
257
+ start=f'{season}-01-01',
258
+ end=f'{season}-12-01',
259
+ min=f'{season}-01-01',
260
+ max=f'{season}-12-01',
261
+ ),
262
+ ui.hr(),
263
+ ui.h4("Filters"),
264
+ ui.div(
265
+ {"id": "filter-container"},
266
+ ui.div(
267
+ {"class": "filter-row", "id": "filter_row_1"}, # Add id for deletion
268
+ ui.row(
269
+ ui.column(5, # Adjusted column widths to make room for delete button
270
+ ui.input_select(
271
+ "filter_column_1",
272
+ "Metric",
273
+ choices={}
274
+ )
275
+ ),
276
+ ui.column(3,
277
+ ui.input_select(
278
+ "filter_operator_1",
279
+ "Operator",
280
+ choices=[">=", "<="]
281
+ ),
282
+ ),
283
+ ui.column(3,
284
+ ui.input_numeric(
285
+ "filter_value_1",
286
+ "Value",
287
+ value=0
288
+ )
289
+ ),
290
+ ui.column(1,
291
+ ui.markdown("&nbsp;"),
292
+
293
+
294
+ ui.input_action_button(
295
+ f"delete_filter_1",
296
+ "",
297
+ class_="btn-danger btn-sm",
298
+ style="padding: 3px 6px;",
299
+ icon='✖'
300
+
301
+ )
302
+ )
303
+ )
304
+ )
305
+ ),
306
+ ui.input_action_button(
307
+ "add_filter",
308
+ "Add Filter",
309
+ class_="btn-secondary"
310
+ ),
311
+ ui.br(),
312
+ ui.br(),
313
+ ui.input_action_button(
314
+ "generate_table",
315
+ "Generate Table",
316
+ class_="btn-primary"
317
+ ),
318
+ width="400px"
319
+ ),
320
+ ui.navset_tab(
321
+ ui.nav_panel("Leaderboard",
322
+ ui.card(
323
+ #ui.card_header("Leaderboard"),
324
+ output_tabulator("tabulator")
325
+ )
326
+ ),
327
+
328
+ )
329
+ )
330
+
331
+ def server(input, output, session):
332
+ # Store the number of active filters
333
+ filter_count = reactive.value(1)
334
+ # Store active filter IDs
335
+ active_filters = reactive.value([1])
336
+
337
+ @reactive.effect
338
+ @reactive.event(input.list_stats)
339
+ def _():
340
+ stat_choices = {k: k for k in input.list_stats()}
341
+ filtered_stat_choices = {key: stat_titles[key] for key in stat_choices}
342
+ ui.update_select("filter_column_1", choices=filtered_stat_choices)
343
+
344
+ @reactive.effect
345
+ @reactive.event(input.add_filter)
346
+ def _():
347
+ current_count = filter_count.get()
348
+ new_count = current_count + 1
349
+
350
+ stat_choices = {k: k for k in input.list_stats()}
351
+ filtered_stat_choices = {key: stat_titles[key] for key in stat_choices}
352
+
353
+ ui.insert_ui(
354
+ selector="#filter-container",
355
+ where="beforeEnd",
356
+ ui=ui.div(
357
+ {"class": "filter-row", "id": f"filter_row_{new_count}"},
358
+ ui.row(
359
+ ui.column(5,
360
+ ui.input_select(
361
+ f"filter_column_{new_count}",
362
+ "Metric",
363
+ choices=filtered_stat_choices
364
+ ),
365
+ ),
366
+ ui.column(3,
367
+ ui.input_select(
368
+ f"filter_operator_{new_count}",
369
+ "Operator",
370
+ choices=[">=", "<="]
371
+ ),
372
+ ),
373
+ ui.column(3,
374
+ ui.input_numeric(
375
+ f"filter_value_{new_count}",
376
+ "Value",
377
+ value=0
378
+ )
379
+ ),
380
+ ui.column(1,
381
+ ui.markdown("&nbsp;"),
382
+
383
+
384
+ ui.input_action_button(
385
+ f"delete_filter_{new_count}",
386
+ "",
387
+ class_="btn-danger btn-sm",
388
+ style="padding: 3px 6px;",
389
+ icon=''
390
+
391
+ )
392
+ )
393
+ )
394
+ )
395
+ )
396
+ filter_count.set(new_count)
397
+ current_filters = active_filters.get()
398
+ current_filters.append(new_count)
399
+ active_filters.set(current_filters)
400
+
401
+ @reactive.effect
402
+ def _():
403
+ # Monitor all possible delete buttons
404
+ for i in range(1, filter_count.get() + 1):
405
+ try:
406
+ if getattr(input, f"delete_filter_{i}")() > 0:
407
+ # Remove the filter row
408
+ ui.remove_ui(f"#filter_row_{i}")
409
+ # Update active filters
410
+ current_filters = active_filters.get()
411
+ if i in current_filters:
412
+ current_filters.remove(i)
413
+ active_filters.set(current_filters)
414
+ except:
415
+ continue
416
+
417
+ @output
418
+ @render_tabulator
419
+ @reactive.event(input.generate_table, ignore_none=False)
420
+ def tabulator():
421
+ columns_c = columns.copy()
422
+ selection_list = list(input.list_input())
423
+ start_date = str(input.date_id()[0])
424
+ end_date = str(input.date_id()[1])
425
+
426
+
427
+ if input.level_input() == "MLB":
428
+ df_agg = update.update_summary_select(df=df_mlb_total.filter((pl.col('game_date')>=start_date)&(pl.col('game_date')<=end_date)),
429
+ selection=selection_list)
430
+
431
+ elif input.level_input() == "AAA":
432
+ df_agg = update.update_summary_select(df=df_aaa_total.filter((pl.col('game_date')>=start_date)&(pl.col('game_date')<=end_date)),
433
+ selection=selection_list)
434
+
435
+ elif input.level_input() == "A":
436
+ df_agg = update.update_summary_select(df=df_a_total.filter((pl.col('game_date')>=start_date)&(pl.col('game_date')<=end_date)),
437
+ selection=selection_list)
438
+
439
+
440
+ df_agg = df_agg.select(selection_list + list(input.list_stats()))#.sort('pa', descending=True)
441
+
442
+ # Apply filters - only for active filters
443
+ for i in active_filters.get():
444
+ try:
445
+ col_name = getattr(input, f"filter_column_{i}")()
446
+ if col_name: # Only apply filter if column is selected
447
+ operator = getattr(input, f"filter_operator_{i}")()
448
+ if col_name in [col["field"] for col in columns_c if col.get("formatter") == "money"]:
449
+ value = getattr(input, f"filter_value_{i}")()/100
450
+ else:
451
+ value = getattr(input, f"filter_value_{i}")()
452
+
453
+ if operator == ">=":
454
+ df_agg = df_agg.filter(pl.col(col_name) >= value)
455
+ elif operator == "<=":
456
+ df_agg = df_agg.filter(pl.col(col_name) <= value)
457
+ except:
458
+ continue
459
+
460
+ for col in df_agg.columns[len(selection_list):]:
461
+ if col in rounding_dict:
462
+ df_agg = df_agg.with_columns(pl.col(col).round(rounding_dict[col]))
463
+
464
+ for column in columns_c:
465
+ if column.get("formatter") == "money" and column.get("field") in df_agg.columns:
466
+ df_agg = df_agg.with_columns(pl.col(column.get("field"))*100)
467
+
468
+ col_group = []
469
+ for column in columns_group:
470
+ if column.get("field") in df_agg.columns:
471
+ col_group.append(column)
472
+
473
+ col_group_stats = []
474
+ for column in columns_c:
475
+ if column.get("field") in df_agg.columns:
476
+ col_group_stats.append(column)
477
+
478
+ columns_c = col_group + col_group_stats
479
+
480
+ return Tabulator(
481
+ df_agg.to_pandas(),
482
+ table_options=TableOptions(
483
+ height=800,
484
+ columns=columns_c,
485
+ )
486
+ )
487
+
488
+ app = App(app_ui, server)
data/data_a_2024.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11ddeb5b35ced8d2a3c627af2cded75c78fbb333fa4331569b78a7fb29ddce1f
3
+ size 44964738
data/data_aaa_2024.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf48b06263a3b0a25b701e98c589f7e11bdf0ec3665562082ea209a61b55468e
3
+ size 117704536
data/data_mlb_2024.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0f2c52277a98ecbba343650996f32bbb8a847631d676bce2f07dbd3931f09a8
3
+ size 128295721
functions/__pycache__/df_update.cpython-39.pyc CHANGED
Binary files a/functions/__pycache__/df_update.cpython-39.pyc and b/functions/__pycache__/df_update.cpython-39.pyc differ
 
functions/__pycache__/pitch_summary_functions.cpython-39.pyc CHANGED
Binary files a/functions/__pycache__/pitch_summary_functions.cpython-39.pyc and b/functions/__pycache__/pitch_summary_functions.cpython-39.pyc differ
 
functions/df_update.py CHANGED
@@ -1,475 +1,579 @@
1
- import polars as pl
2
- import numpy as np
3
- import joblib
4
-
5
- loaded_model = joblib.load('joblib_model/barrel_model.joblib')
6
- in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
7
- attack_zone_model = joblib.load('joblib_model/model_attack_zone.joblib')
8
- xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
9
- px_model = joblib.load('joblib_model/linear_reg_model_x.joblib')
10
- pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib')
11
-
12
-
13
- class df_update:
14
- def __init__(self):
15
- pass
16
-
17
- def update(self, df_clone: pl.DataFrame):
18
-
19
- df = df_clone.clone()
20
- # Assuming px_model is defined and df is your DataFrame
21
- hit_codes = ['single',
22
- 'double','home_run', 'triple']
23
-
24
- ab_codes = ['single', 'strikeout', 'field_out',
25
- 'grounded_into_double_play', 'fielders_choice', 'force_out',
26
- 'double', 'field_error', 'home_run', 'triple',
27
- 'double_play',
28
- 'fielders_choice_out', 'strikeout_double_play',
29
- 'other_out','triple_play']
30
-
31
-
32
- obp_true_codes = ['single', 'walk',
33
- 'double','home_run', 'triple',
34
- 'hit_by_pitch', 'intent_walk']
35
-
36
- obp_codes = ['single', 'strikeout', 'walk', 'field_out',
37
- 'grounded_into_double_play', 'fielders_choice', 'force_out',
38
- 'double', 'sac_fly', 'field_error', 'home_run', 'triple',
39
- 'hit_by_pitch', 'double_play', 'intent_walk',
40
- 'fielders_choice_out', 'strikeout_double_play',
41
- 'sac_fly_double_play',
42
- 'other_out','triple_play']
43
-
44
-
45
- contact_codes = ['In play, no out',
46
- 'Foul', 'In play, out(s)',
47
- 'In play, run(s)',
48
- 'Foul Bunt']
49
-
50
- bip_codes = ['In play, no out', 'In play, run(s)','In play, out(s)']
51
-
52
-
53
- conditions_barrel = [
54
- df['launch_speed'].is_null(),
55
- (df['launch_speed'] * 1.5 - df['launch_angle'] >= 117) &
56
- (df['launch_speed'] + df['launch_angle'] >= 124) &
57
- (df['launch_speed'] >= 98) &
58
- (df['launch_angle'] >= 4) & (df['launch_angle'] <= 50)
59
- ]
60
- choices_barrel = [False, True]
61
-
62
- conditions_tb = [
63
- (df['event_type'] == 'single'),
64
- (df['event_type'] == 'double'),
65
- (df['event_type'] == 'triple'),
66
- (df['event_type'] == 'home_run')
67
- ]
68
- choices_tb = [1, 2, 3, 4]
69
-
70
-
71
- conditions_woba = [
72
- df['event_type'].is_in(['strikeout', 'field_out', 'sac_fly', 'force_out', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out']),
73
- df['event_type'] == 'walk',
74
- df['event_type'] == 'hit_by_pitch',
75
- df['event_type'] == 'single',
76
- df['event_type'] == 'double',
77
- df['event_type'] == 'triple',
78
- df['event_type'] == 'home_run'
79
- ]
80
- choices_woba = [0, 0.689, 0.720, 0.881, 1.254, 1.589, 2.048]
81
-
82
- woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', 'double', 'sac_fly', 'force_out', 'home_run', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'triple', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out']
83
-
84
- pitch_cat = {'FA': 'Fastball',
85
- 'FF': 'Fastball',
86
- 'FT': 'Fastball',
87
- 'FC': 'Fastball',
88
- 'FS': 'Off-Speed',
89
- 'FO': 'Off-Speed',
90
- 'SI': 'Fastball',
91
- 'ST': 'Breaking',
92
- 'SL': 'Breaking',
93
- 'CU': 'Breaking',
94
- 'KC': 'Breaking',
95
- 'SC': 'Off-Speed',
96
- 'GY': 'Off-Speed',
97
- 'SV': 'Breaking',
98
- 'CS': 'Breaking',
99
- 'CH': 'Off-Speed',
100
- 'KN': 'Off-Speed',
101
- 'EP': 'Breaking',
102
- 'UN': None,
103
- 'IN': None,
104
- 'PO': None,
105
- 'AB': None,
106
- 'AS': None,
107
- 'NP': None}
108
-
109
-
110
- df = df.with_columns([
111
- pl.when(df['type_ab'].is_not_null()).then(1).otherwise(0).alias('pa'),
112
- pl.when(df['is_pitch']).then(1).otherwise(0).alias('pitches'),
113
- pl.when(df['sz_top'] == 0).then(None).otherwise(df['sz_top']).alias('sz_top'),
114
- pl.when(df['sz_bot'] == 0).then(None).otherwise(df['sz_bot']).alias('sz_bot'),
115
- pl.when(df['zone'] > 0).then(df['zone'] < 10).otherwise(None).alias('in_zone'),
116
- pl.Series(px_model.predict(df[['x']].fill_null(0).to_numpy())[:, 0]).alias('px_predict'),
117
- pl.Series(pz_model.predict(df[['y']].fill_null(0).to_numpy())[:, 0] + 3.2).alias('pz_predict'),
118
- pl.Series(in_zone_model.predict(df[['px','pz','sz_top','sz_bot']].fill_null(0).to_numpy())[:]).alias('in_zone_predict'),
119
- pl.Series(attack_zone_model.predict(df[['px','pz','sz_top','sz_bot']].fill_null(0).to_numpy())[:]).alias('attack_zone_predict'),
120
- pl.when(df['event_type'].is_in(hit_codes)).then(True).otherwise(False).alias('hits'),
121
- pl.when(df['event_type'].is_in(ab_codes)).then(True).otherwise(False).alias('ab'),
122
- pl.when(df['event_type'].is_in(obp_true_codes)).then(True).otherwise(False).alias('on_base'),
123
- pl.when(df['event_type'].is_in(obp_codes)).then(True).otherwise(False).alias('obp'),
124
- pl.when(df['play_description'].is_in(bip_codes)).then(True).otherwise(False).alias('bip'),
125
- pl.when(conditions_barrel[0]).then(choices_barrel[0]).when(conditions_barrel[1]).then(choices_barrel[1]).otherwise(None).alias('barrel'),
126
- pl.when(df['launch_angle'].is_null()).then(False).when((df['launch_angle'] >= 8) & (df['launch_angle'] <= 32)).then(True).otherwise(None).alias('sweet_spot'),
127
- pl.when(df['launch_speed'].is_null()).then(False).when(df['launch_speed'] >= 94.5).then(True).otherwise(None).alias('hard_hit'),
128
- pl.when(conditions_tb[0]).then(choices_tb[0]).when(conditions_tb[1]).then(choices_tb[1]).when(conditions_tb[2]).then(choices_tb[2]).when(conditions_tb[3]).then(choices_tb[3]).otherwise(None).alias('tb'),
129
- pl.when(conditions_woba[0]).then(choices_woba[0]).when(conditions_woba[1]).then(choices_woba[1]).when(conditions_woba[2]).then(choices_woba[2]).when(conditions_woba[3]).then(choices_woba[3]).when(conditions_woba[4]).then(choices_woba[4]).when(conditions_woba[5]).then(choices_woba[5]).when(conditions_woba[6]).then(choices_woba[6]).otherwise(None).alias('woba'),
130
- pl.when((df['play_code'] == 'S') | (df['play_code'] == 'W') | (df['play_code'] == 'T')).then(1).otherwise(0).alias('whiffs'),
131
- pl.when((df['play_code'] == 'S') | (df['play_code'] == 'W') | (df['play_code'] == 'T') | (df['play_code'] == 'C')).then(1).otherwise(0).alias('csw'),
132
- pl.when(pl.col('is_swing').cast(pl.Boolean)).then(1).otherwise(0).alias('swings'),
133
- pl.col('event_type').is_in(['strikeout','strikeout_double_play']).alias('k'),
134
- pl.col('event_type').is_in(['walk', 'intent_walk']).alias('bb'),
135
- pl.lit(None).alias('attack_zone'),
136
- pl.lit(None).alias('woba_pred'),
137
- pl.lit(None).alias('woba_pred_contact')
138
-
139
- ])
140
-
141
- df = df.with_columns([
142
- pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('woba_codes'),
143
- pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('xwoba_codes'),
144
- pl.when((pl.col('tb') >= 0)).then(df['woba']).otherwise(None).alias('woba_contact'),
145
- pl.when(pl.col('px').is_null()).then(pl.col('px_predict')).otherwise(pl.col('px')).alias('px'),
146
- pl.when(pl.col('pz').is_null()).then(pl.col('pz_predict')).otherwise(pl.col('pz')).alias('pz'),
147
- pl.when(pl.col('in_zone').is_null()).then(pl.col('in_zone_predict')).otherwise(pl.col('in_zone')).alias('in_zone'),
148
- pl.when(df['launch_speed'].is_null()).then(None).otherwise(df['barrel']).alias('barrel'),
149
- pl.lit('average').alias('average'),
150
- pl.when(pl.col('in_zone') == False).then(True).otherwise(False).alias('out_zone'),
151
- pl.when((pl.col('in_zone') == True) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('zone_swing'),
152
- pl.when((pl.col('in_zone') == True) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('zone_contact'),
153
- pl.when((pl.col('in_zone') == False) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('ozone_swing'),
154
- pl.when((pl.col('in_zone') == False) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('ozone_contact'),
155
- pl.when(pl.col('event_type').str.contains('strikeout')).then(True).otherwise(False).alias('k'),
156
- pl.when(pl.col('event_type').is_in(['walk', 'intent_walk'])).then(True).otherwise(False).alias('bb'),
157
- pl.when(pl.col('attack_zone').is_null()).then(pl.col('attack_zone_predict')).otherwise(pl.col('attack_zone')).alias('attack_zone'),
158
-
159
-
160
- ])
161
-
162
- df = df.with_columns([
163
- (df['k'].cast(pl.Float32) - df['bb'].cast(pl.Float32)).alias('k_minus_bb'),
164
- (df['bb'].cast(pl.Float32) - df['k'].cast(pl.Float32)).alias('bb_minus_k'),
165
- (df['launch_speed'] > 0).alias('bip_div'),
166
- (df['attack_zone'] == 0).alias('heart'),
167
- (df['attack_zone'] == 1).alias('shadow'),
168
- (df['attack_zone'] == 2).alias('chase'),
169
- (df['attack_zone'] == 3).alias('waste'),
170
- ((df['attack_zone'] == 0) & (df['swings'] == 1)).alias('heart_swing'),
171
- ((df['attack_zone'] == 1) & (df['swings'] == 1)).alias('shadow_swing'),
172
- ((df['attack_zone'] == 2) & (df['swings'] == 1)).alias('chase_swing'),
173
- ((df['attack_zone'] == 3) & (df['swings'] == 1)).alias('waste_swing'),
174
- ((df['attack_zone'] == 0) & (df['whiffs'] == 1)).alias('heart_whiff'),
175
- ((df['attack_zone'] == 1) & (df['whiffs'] == 1)).alias('shadow_whiff'),
176
- ((df['attack_zone'] == 2) & (df['whiffs'] == 1)).alias('chase_whiff'),
177
- ((df['attack_zone'] == 3) & (df['whiffs'] == 1)).alias('waste_whiff')
178
- ])
179
-
180
-
181
- [0, 0.689, 0.720, 0.881, 1.254, 1.589, 2.048]
182
-
183
- df = df.with_columns([
184
- pl.Series(
185
- [sum(x) for x in xwoba_model.predict_proba(df[['launch_angle', 'launch_speed']].fill_null(0).to_numpy()[:]) * ([0, 0.881, 1.254, 1.589, 2.048])]
186
- ).alias('woba_pred_predict')
187
- ])
188
-
189
- df = df.with_columns([
190
- pl.when(pl.col('event_type').is_in(['walk'])).then(0.689)
191
- .when(pl.col('event_type').is_in(['hit_by_pitch'])).then(0.720)
192
- .when(pl.col('event_type').is_in(['strikeout', 'strikeout_double_play'])).then(0)
193
- .otherwise(pl.col('woba_pred_predict')).alias('woba_pred_predict')
194
- ])
195
-
196
- df = df.with_columns([
197
- pl.when(pl.col('woba_codes').is_null()).then(None).otherwise(pl.col('woba_pred_predict')).alias('woba_pred'),
198
- pl.when(pl.col('bip')!=1).then(None).otherwise(pl.col('woba_pred_predict')).alias('woba_pred_contact'),
199
- ])
200
-
201
- df = df.with_columns([
202
- pl.when(pl.col('trajectory').is_in(['bunt_popup'])).then(pl.lit('popup'))
203
- .when(pl.col('trajectory').is_in(['bunt_grounder'])).then(pl.lit('ground_ball'))
204
- .when(pl.col('trajectory').is_in(['bunt_line_drive'])).then(pl.lit('line_drive'))
205
- .when(pl.col('trajectory').is_in([''])).then(pl.lit(None))
206
- .otherwise(pl.col('trajectory')).alias('trajectory')
207
- ])
208
-
209
-
210
- # Create one-hot encoded columns for the trajectory column
211
- dummy_df = df.select(pl.col('trajectory')).to_dummies()
212
-
213
- # Rename the one-hot encoded columns
214
- dummy_df = dummy_df.rename({
215
- 'trajectory_fly_ball': 'trajectory_fly_ball',
216
- 'trajectory_ground_ball': 'trajectory_ground_ball',
217
- 'trajectory_line_drive': 'trajectory_line_drive',
218
- 'trajectory_popup': 'trajectory_popup'
219
- })
220
-
221
- # Ensure the columns are present in the DataFrame
222
- for col in ['trajectory_fly_ball', 'trajectory_ground_ball', 'trajectory_line_drive', 'trajectory_popup']:
223
- if col not in dummy_df.columns:
224
- dummy_df = dummy_df.with_columns(pl.lit(0).alias(col))
225
-
226
- # Join the one-hot encoded columns back to the original DataFrame
227
- df = df.hstack(dummy_df)
228
-
229
- # Check if 'trajectory_null' column exists and drop it
230
- if 'trajectory_null' in df.columns:
231
- df = df.drop('trajectory_null')
232
-
233
- return df
234
-
235
- # Assuming df is your Polars DataFrame
236
- def update_summary(self, df: pl.DataFrame, pitcher: bool = True) -> pl.DataFrame:
237
- """
238
- Update summary statistics for pitchers or batters.
239
-
240
- Parameters:
241
- df (pl.DataFrame): The input Polars DataFrame containing player statistics.
242
- pitcher (bool): A flag indicating whether to calculate statistics for pitchers (True) or batters (False).
243
-
244
- Returns:
245
- pl.DataFrame: A Polars DataFrame with aggregated and calculated summary statistics.
246
- """
247
-
248
- # Determine the position based on the pitcher flag
249
- if pitcher:
250
- position = 'pitcher'
251
- else:
252
- position = 'batter'
253
-
254
- # Group by position_id and position_name, then aggregate various statistics
255
- df_summ = df.group_by([f'{position}_id', f'{position}_name']).agg([
256
- pl.col('pa').sum().alias('pa'),
257
- pl.col('ab').sum().alias('ab'),
258
- pl.col('obp').sum().alias('obp_pa'),
259
- pl.col('hits').sum().alias('hits'),
260
- pl.col('on_base').sum().alias('on_base'),
261
- pl.col('k').sum().alias('k'),
262
- pl.col('bb').sum().alias('bb'),
263
- pl.col('bb_minus_k').sum().alias('bb_minus_k'),
264
- pl.col('csw').sum().alias('csw'),
265
- pl.col('bip').sum().alias('bip'),
266
- pl.col('bip_div').sum().alias('bip_div'),
267
- pl.col('tb').sum().alias('tb'),
268
- pl.col('woba').sum().alias('woba'),
269
- pl.col('woba_contact').sum().alias('woba_contact'),
270
- pl.col('woba_pred').sum().alias('xwoba'),
271
- pl.col('woba_pred_contact').sum().alias('xwoba_contact'),
272
- pl.col('woba_codes').sum().alias('woba_codes'),
273
- pl.col('xwoba_codes').sum().alias('xwoba_codes'),
274
- pl.col('hard_hit').sum().alias('hard_hit'),
275
- pl.col('barrel').sum().alias('barrel'),
276
- pl.col('sweet_spot').sum().alias('sweet_spot'),
277
- pl.col('launch_speed').max().alias('max_launch_speed'),
278
- pl.col('launch_speed').quantile(0.90).alias('launch_speed_90'),
279
- pl.col('launch_speed').mean().alias('launch_speed'),
280
- pl.col('launch_angle').mean().alias('launch_angle'),
281
- pl.col('is_pitch').sum().alias('pitches'),
282
- pl.col('swings').sum().alias('swings'),
283
- pl.col('in_zone').sum().alias('in_zone'),
284
- pl.col('out_zone').sum().alias('out_zone'),
285
- pl.col('whiffs').sum().alias('whiffs'),
286
- pl.col('zone_swing').sum().alias('zone_swing'),
287
- pl.col('zone_contact').sum().alias('zone_contact'),
288
- pl.col('ozone_swing').sum().alias('ozone_swing'),
289
- pl.col('ozone_contact').sum().alias('ozone_contact'),
290
- pl.col('trajectory_ground_ball').sum().alias('ground_ball'),
291
- pl.col('trajectory_line_drive').sum().alias('line_drive'),
292
- pl.col('trajectory_fly_ball').sum().alias('fly_ball'),
293
- pl.col('trajectory_popup').sum().alias('pop_up'),
294
- pl.col('attack_zone').count().alias('attack_zone'),
295
- pl.col('heart').sum().alias('heart'),
296
- pl.col('shadow').sum().alias('shadow'),
297
- pl.col('chase').sum().alias('chase'),
298
- pl.col('waste').sum().alias('waste'),
299
- pl.col('heart_swing').sum().alias('heart_swing'),
300
- pl.col('shadow_swing').sum().alias('shadow_swing'),
301
- pl.col('chase_swing').sum().alias('chase_swing'),
302
- pl.col('waste_swing').sum().alias('waste_swing'),
303
- pl.col('heart_whiff').sum().alias('heart_whiff'),
304
- pl.col('shadow_whiff').sum().alias('shadow_whiff'),
305
- pl.col('chase_whiff').sum().alias('chase_whiff'),
306
- pl.col('waste_whiff').sum().alias('waste_whiff')
307
- ])
308
-
309
- # Add calculated columns to the summary DataFrame
310
- df_summ = df_summ.with_columns([
311
- (pl.col('hits') / pl.col('ab')).alias('avg'),
312
- (pl.col('on_base') / pl.col('obp_pa')).alias('obp'),
313
- (pl.col('tb') / pl.col('ab')).alias('slg'),
314
- (pl.col('on_base') / pl.col('obp_pa') + pl.col('tb') / pl.col('ab')).alias('ops'),
315
- (pl.col('k') / pl.col('pa')).alias('k_percent'),
316
- (pl.col('bb') / pl.col('pa')).alias('bb_percent'),
317
- (pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
318
- (pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
319
- (pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
320
- (pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
321
- (pl.col('woba') / pl.col('woba_codes')).alias('woba_percent'),
322
- (pl.col('woba_contact') / pl.col('bip')).alias('woba_percent_contact'),
323
- (pl.col('hard_hit') / pl.col('bip_div')).alias('hard_hit_percent'),
324
- (pl.col('barrel') / pl.col('bip_div')).alias('barrel_percent'),
325
- (pl.col('zone_contact') / pl.col('zone_swing')).alias('zone_contact_percent'),
326
- (pl.col('zone_swing') / pl.col('in_zone')).alias('zone_swing_percent'),
327
- (pl.col('in_zone') / pl.col('pitches')).alias('zone_percent'),
328
- (pl.col('ozone_swing') / (pl.col('pitches') - pl.col('in_zone'))).alias('chase_percent'),
329
- (pl.col('ozone_contact') / pl.col('ozone_swing')).alias('chase_contact'),
330
- (pl.col('swings') / pl.col('pitches')).alias('swing_percent'),
331
- (pl.col('whiffs') / pl.col('swings')).alias('whiff_rate'),
332
- (pl.col('whiffs') / pl.col('pitches')).alias('swstr_rate'),
333
- (pl.col('ground_ball') / pl.col('bip')).alias('ground_ball_percent'),
334
- (pl.col('line_drive') / pl.col('bip')).alias('line_drive_percent'),
335
- (pl.col('fly_ball') / pl.col('bip')).alias('fly_ball_percent'),
336
- (pl.col('pop_up') / pl.col('bip')).alias('pop_up_percent'),
337
- (pl.col('heart') / pl.col('attack_zone')).alias('heart_zone_percent'),
338
- (pl.col('shadow') / pl.col('attack_zone')).alias('shadow_zone_percent'),
339
- (pl.col('chase') / pl.col('attack_zone')).alias('chase_zone_percent'),
340
- (pl.col('waste') / pl.col('attack_zone')).alias('waste_zone_percent'),
341
- (pl.col('heart_swing') / pl.col('heart')).alias('heart_zone_swing_percent'),
342
- (pl.col('shadow_swing') / pl.col('shadow')).alias('shadow_zone_swing_percent'),
343
- (pl.col('chase_swing') / pl.col('chase')).alias('chase_zone_swing_percent'),
344
- (pl.col('waste_swing') / pl.col('waste')).alias('waste_zone_swing_percent'),
345
- (pl.col('heart_whiff') / pl.col('heart_swing')).alias('heart_zone_whiff_percent'),
346
- (pl.col('shadow_whiff') / pl.col('shadow_swing')).alias('shadow_zone_whiff_percent'),
347
- (pl.col('chase_whiff') / pl.col('chase_swing')).alias('chase_zone_whiff_percent'),
348
- (pl.col('waste_whiff') / pl.col('waste_swing')).alias('waste_zone_whiff_percent'),
349
- (pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
350
- (pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact')
351
- ])
352
-
353
- return df_summ
354
-
355
-
356
-
357
-
358
-
359
-
360
- # Assuming df is your Polars DataFrame
361
- def update_summary_select(self, df: pl.DataFrame, selection: list) -> pl.DataFrame:
362
- """
363
- Update summary statistics for pitchers or batters.
364
-
365
- Parameters:
366
- df (pl.DataFrame): The input Polars DataFrame containing player statistics.
367
- pitcher (bool): A flag indicating whether to calculate statistics for pitchers (True) or batters (False).
368
-
369
- Returns:
370
- pl.DataFrame: A Polars DataFrame with aggregated and calculated summary statistics.
371
- """
372
-
373
- # Group by position_id and position_name, then aggregate various statistics
374
- df_summ = df.group_by(selection).agg([
375
- pl.col('pa').sum().alias('pa'),
376
- pl.col('ab').sum().alias('ab'),
377
- pl.col('obp').sum().alias('obp_pa'),
378
- pl.col('hits').sum().alias('hits'),
379
- pl.col('on_base').sum().alias('on_base'),
380
- pl.col('k').sum().alias('k'),
381
- pl.col('bb').sum().alias('bb'),
382
- pl.col('bb_minus_k').sum().alias('bb_minus_k'),
383
- pl.col('csw').sum().alias('csw'),
384
- pl.col('bip').sum().alias('bip'),
385
- pl.col('bip_div').sum().alias('bip_div'),
386
- pl.col('tb').sum().alias('tb'),
387
- pl.col('woba').sum().alias('woba'),
388
- pl.col('woba_contact').sum().alias('woba_contact'),
389
- pl.col('woba_pred').sum().alias('xwoba'),
390
- pl.col('woba_pred_contact').sum().alias('xwoba_contact'),
391
- pl.col('woba_codes').sum().alias('woba_codes'),
392
- pl.col('xwoba_codes').sum().alias('xwoba_codes'),
393
- pl.col('hard_hit').sum().alias('hard_hit'),
394
- pl.col('barrel').sum().alias('barrel'),
395
- pl.col('sweet_spot').sum().alias('sweet_spot'),
396
- pl.col('launch_speed').max().alias('max_launch_speed'),
397
- pl.col('launch_speed').quantile(0.90).alias('launch_speed_90'),
398
- pl.col('launch_speed').mean().alias('launch_speed'),
399
- pl.col('launch_angle').mean().alias('launch_angle'),
400
- pl.col('is_pitch').sum().alias('pitches'),
401
- pl.col('swings').sum().alias('swings'),
402
- pl.col('in_zone').sum().alias('in_zone'),
403
- pl.col('out_zone').sum().alias('out_zone'),
404
- pl.col('whiffs').sum().alias('whiffs'),
405
- pl.col('zone_swing').sum().alias('zone_swing'),
406
- pl.col('zone_contact').sum().alias('zone_contact'),
407
- pl.col('ozone_swing').sum().alias('ozone_swing'),
408
- pl.col('ozone_contact').sum().alias('ozone_contact'),
409
- pl.col('trajectory_ground_ball').sum().alias('ground_ball'),
410
- pl.col('trajectory_line_drive').sum().alias('line_drive'),
411
- pl.col('trajectory_fly_ball').sum().alias('fly_ball'),
412
- pl.col('trajectory_popup').sum().alias('pop_up'),
413
- pl.col('attack_zone').count().alias('attack_zone'),
414
- pl.col('heart').sum().alias('heart'),
415
- pl.col('shadow').sum().alias('shadow'),
416
- pl.col('chase').sum().alias('chase'),
417
- pl.col('waste').sum().alias('waste'),
418
- pl.col('heart_swing').sum().alias('heart_swing'),
419
- pl.col('shadow_swing').sum().alias('shadow_swing'),
420
- pl.col('chase_swing').sum().alias('chase_swing'),
421
- pl.col('waste_swing').sum().alias('waste_swing'),
422
- pl.col('heart_whiff').sum().alias('heart_whiff'),
423
- pl.col('shadow_whiff').sum().alias('shadow_whiff'),
424
- pl.col('chase_whiff').sum().alias('chase_whiff'),
425
- pl.col('waste_whiff').sum().alias('waste_whiff'),
426
- pl.col('tj_stuff_plus').sum().alias('tj_stuff_plus')
427
- ])
428
-
429
- # Add calculated columns to the summary DataFrame
430
- df_summ = df_summ.with_columns([
431
- (pl.col('hits') / pl.col('ab')).alias('avg'),
432
- (pl.col('on_base') / pl.col('obp_pa')).alias('obp'),
433
- (pl.col('tb') / pl.col('ab')).alias('slg'),
434
- (pl.col('on_base') / pl.col('obp_pa') + pl.col('tb') / pl.col('ab')).alias('ops'),
435
- (pl.col('k') / pl.col('pa')).alias('k_percent'),
436
- (pl.col('bb') / pl.col('pa')).alias('bb_percent'),
437
- (pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
438
- (pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
439
- (pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
440
- (pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
441
- (pl.col('woba') / pl.col('woba_codes')).alias('woba_percent'),
442
- (pl.col('woba_contact') / pl.col('bip')).alias('woba_percent_contact'),
443
- (pl.col('hard_hit') / pl.col('bip_div')).alias('hard_hit_percent'),
444
- (pl.col('barrel') / pl.col('bip_div')).alias('barrel_percent'),
445
- (pl.col('zone_contact') / pl.col('zone_swing')).alias('zone_contact_percent'),
446
- (pl.col('zone_swing') / pl.col('in_zone')).alias('zone_swing_percent'),
447
- (pl.col('in_zone') / pl.col('pitches')).alias('zone_percent'),
448
- (pl.col('ozone_swing') / (pl.col('pitches') - pl.col('in_zone'))).alias('chase_percent'),
449
- (pl.col('ozone_contact') / pl.col('ozone_swing')).alias('chase_contact'),
450
- (pl.col('swings') / pl.col('pitches')).alias('swing_percent'),
451
- (pl.col('whiffs') / pl.col('swings')).alias('whiff_rate'),
452
- (pl.col('whiffs') / pl.col('pitches')).alias('swstr_rate'),
453
- (pl.col('ground_ball') / pl.col('bip')).alias('ground_ball_percent'),
454
- (pl.col('line_drive') / pl.col('bip')).alias('line_drive_percent'),
455
- (pl.col('fly_ball') / pl.col('bip')).alias('fly_ball_percent'),
456
- (pl.col('pop_up') / pl.col('bip')).alias('pop_up_percent'),
457
- (pl.col('heart') / pl.col('attack_zone')).alias('heart_zone_percent'),
458
- (pl.col('shadow') / pl.col('attack_zone')).alias('shadow_zone_percent'),
459
- (pl.col('chase') / pl.col('attack_zone')).alias('chase_zone_percent'),
460
- (pl.col('waste') / pl.col('attack_zone')).alias('waste_zone_percent'),
461
- (pl.col('heart_swing') / pl.col('heart')).alias('heart_zone_swing_percent'),
462
- (pl.col('shadow_swing') / pl.col('shadow')).alias('shadow_zone_swing_percent'),
463
- (pl.col('chase_swing') / pl.col('chase')).alias('chase_zone_swing_percent'),
464
- (pl.col('waste_swing') / pl.col('waste')).alias('waste_zone_swing_percent'),
465
- (pl.col('heart_whiff') / pl.col('heart_swing')).alias('heart_zone_whiff_percent'),
466
- (pl.col('shadow_whiff') / pl.col('shadow_swing')).alias('shadow_zone_whiff_percent'),
467
- (pl.col('chase_whiff') / pl.col('chase_swing')).alias('chase_zone_whiff_percent'),
468
- (pl.col('waste_whiff') / pl.col('waste_swing')).alias('waste_zone_whiff_percent'),
469
- (pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
470
- (pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact'),
471
- (pl.col('tj_stuff_plus') / pl.col('pitches')).alias('tj_stuff_plus_avg'),
472
-
473
- ])
474
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475
  return df_summ
 
1
+ import polars as pl
2
+ import numpy as np
3
+ import joblib
4
+
5
+ loaded_model = joblib.load('joblib_model/barrel_model.joblib')
6
+ in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
7
+ attack_zone_model = joblib.load('joblib_model/model_attack_zone.joblib')
8
+ xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
9
+ px_model = joblib.load('joblib_model/linear_reg_model_x.joblib')
10
+ pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib')
11
+
12
+
13
+ class df_update:
14
+ def __init__(self):
15
+ pass
16
+
17
+ def update(self, df_clone: pl.DataFrame):
18
+
19
+ df = df_clone.clone()
20
+ # Assuming px_model is defined and df is your DataFrame
21
+ hit_codes = ['single',
22
+ 'double','home_run', 'triple']
23
+
24
+ ab_codes = ['single', 'strikeout', 'field_out',
25
+ 'grounded_into_double_play', 'fielders_choice', 'force_out',
26
+ 'double', 'field_error', 'home_run', 'triple',
27
+ 'double_play',
28
+ 'fielders_choice_out', 'strikeout_double_play',
29
+ 'other_out','triple_play']
30
+
31
+
32
+ obp_true_codes = ['single', 'walk',
33
+ 'double','home_run', 'triple',
34
+ 'hit_by_pitch', 'intent_walk']
35
+
36
+ obp_codes = ['single', 'strikeout', 'walk', 'field_out',
37
+ 'grounded_into_double_play', 'fielders_choice', 'force_out',
38
+ 'double', 'sac_fly', 'field_error', 'home_run', 'triple',
39
+ 'hit_by_pitch', 'double_play', 'intent_walk',
40
+ 'fielders_choice_out', 'strikeout_double_play',
41
+ 'sac_fly_double_play',
42
+ 'other_out','triple_play']
43
+
44
+
45
+ contact_codes = ['In play, no out',
46
+ 'Foul', 'In play, out(s)',
47
+ 'In play, run(s)',
48
+ 'Foul Bunt']
49
+
50
+ bip_codes = ['In play, no out', 'In play, run(s)','In play, out(s)']
51
+
52
+
53
+ conditions_barrel = [
54
+ df['launch_speed'].is_null(),
55
+ (df['launch_speed'] * 1.5 - df['launch_angle'] >= 117) &
56
+ (df['launch_speed'] + df['launch_angle'] >= 124) &
57
+ (df['launch_speed'] >= 98) &
58
+ (df['launch_angle'] >= 4) & (df['launch_angle'] <= 50)
59
+ ]
60
+ choices_barrel = [False, True]
61
+
62
+ conditions_tb = [
63
+ (df['event_type'] == 'single'),
64
+ (df['event_type'] == 'double'),
65
+ (df['event_type'] == 'triple'),
66
+ (df['event_type'] == 'home_run')
67
+ ]
68
+ choices_tb = [1, 2, 3, 4]
69
+
70
+
71
+ conditions_woba = [
72
+ df['event_type'].is_in(['strikeout', 'field_out', 'sac_fly', 'force_out', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out']),
73
+ df['event_type'] == 'walk',
74
+ df['event_type'] == 'hit_by_pitch',
75
+ df['event_type'] == 'single',
76
+ df['event_type'] == 'double',
77
+ df['event_type'] == 'triple',
78
+ df['event_type'] == 'home_run'
79
+ ]
80
+ choices_woba = [0, 0.689, 0.720, 0.881, 1.254, 1.589, 2.048]
81
+
82
+ woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', 'double', 'sac_fly', 'force_out', 'home_run', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'triple', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out']
83
+
84
+ pitch_cat = {'FA': 'Fastball',
85
+ 'FF': 'Fastball',
86
+ 'FT': 'Fastball',
87
+ 'FC': 'Fastball',
88
+ 'FS': 'Off-Speed',
89
+ 'FO': 'Off-Speed',
90
+ 'SI': 'Fastball',
91
+ 'ST': 'Breaking',
92
+ 'SL': 'Breaking',
93
+ 'CU': 'Breaking',
94
+ 'KC': 'Breaking',
95
+ 'SC': 'Off-Speed',
96
+ 'GY': 'Off-Speed',
97
+ 'SV': 'Breaking',
98
+ 'CS': 'Breaking',
99
+ 'CH': 'Off-Speed',
100
+ 'KN': 'Off-Speed',
101
+ 'EP': 'Breaking',
102
+ 'UN': None,
103
+ 'IN': None,
104
+ 'PO': None,
105
+ 'AB': None,
106
+ 'AS': None,
107
+ 'NP': None}
108
+
109
+
110
+ df = df.with_columns([
111
+ pl.when(df['type_ab'].is_not_null()).then(1).otherwise(0).alias('pa'),
112
+ pl.when(df['is_pitch']).then(1).otherwise(0).alias('pitches'),
113
+ pl.when(df['sz_top'] == 0).then(None).otherwise(df['sz_top']).alias('sz_top'),
114
+ pl.when(df['sz_bot'] == 0).then(None).otherwise(df['sz_bot']).alias('sz_bot'),
115
+ pl.when(df['zone'] > 0).then(df['zone'] < 10).otherwise(None).alias('in_zone'),
116
+ pl.Series(px_model.predict(df[['x']].fill_null(0).to_numpy())[:, 0]).alias('px_predict'),
117
+ pl.Series(pz_model.predict(df[['y']].fill_null(0).to_numpy())[:, 0] + 3.2).alias('pz_predict'),
118
+ pl.Series(in_zone_model.predict(df[['px','pz','sz_top','sz_bot']].fill_null(0).to_numpy())[:]).alias('in_zone_predict'),
119
+ pl.Series(attack_zone_model.predict(df[['px','pz','sz_top','sz_bot']].fill_null(0).to_numpy())[:]).alias('attack_zone_predict'),
120
+ pl.when(df['event_type'].is_in(hit_codes)).then(True).otherwise(False).alias('hits'),
121
+ pl.when(df['event_type'].is_in(ab_codes)).then(True).otherwise(False).alias('ab'),
122
+ pl.when(df['event_type'].is_in(obp_true_codes)).then(True).otherwise(False).alias('on_base'),
123
+ pl.when(df['event_type'].is_in(obp_codes)).then(True).otherwise(False).alias('obp'),
124
+ pl.when(df['play_description'].is_in(bip_codes)).then(True).otherwise(False).alias('bip'),
125
+ pl.when(conditions_barrel[0]).then(choices_barrel[0]).when(conditions_barrel[1]).then(choices_barrel[1]).otherwise(None).alias('barrel'),
126
+ pl.when(df['launch_angle'].is_null()).then(False).when((df['launch_angle'] >= 8) & (df['launch_angle'] <= 32)).then(True).otherwise(None).alias('sweet_spot'),
127
+ pl.when(df['launch_speed'].is_null()).then(False).when(df['launch_speed'] >= 94.5).then(True).otherwise(None).alias('hard_hit'),
128
+ pl.when(conditions_tb[0]).then(choices_tb[0]).when(conditions_tb[1]).then(choices_tb[1]).when(conditions_tb[2]).then(choices_tb[2]).when(conditions_tb[3]).then(choices_tb[3]).otherwise(None).alias('tb'),
129
+ pl.when(conditions_woba[0]).then(choices_woba[0]).when(conditions_woba[1]).then(choices_woba[1]).when(conditions_woba[2]).then(choices_woba[2]).when(conditions_woba[3]).then(choices_woba[3]).when(conditions_woba[4]).then(choices_woba[4]).when(conditions_woba[5]).then(choices_woba[5]).when(conditions_woba[6]).then(choices_woba[6]).otherwise(None).alias('woba'),
130
+ pl.when((df['play_code'] == 'S') | (df['play_code'] == 'W') | (df['play_code'] == 'T')).then(1).otherwise(0).alias('whiffs'),
131
+ pl.when((df['play_code'] == 'S') | (df['play_code'] == 'W') | (df['play_code'] == 'T') | (df['play_code'] == 'C')).then(1).otherwise(0).alias('csw'),
132
+ pl.when(pl.col('is_swing').cast(pl.Boolean)).then(1).otherwise(0).alias('swings'),
133
+ pl.col('event_type').is_in(['strikeout','strikeout_double_play']).alias('k'),
134
+ pl.col('event_type').is_in(['walk', 'intent_walk']).alias('bb'),
135
+ pl.lit(None).alias('attack_zone'),
136
+ pl.lit(None).alias('woba_pred'),
137
+ pl.lit(None).alias('woba_pred_contact')
138
+
139
+ ])
140
+
141
+
142
+ df = df.with_columns([
143
+ pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('woba_codes'),
144
+ pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('xwoba_codes'),
145
+ pl.when((pl.col('tb') >= 0)).then(df['woba']).otherwise(None).alias('woba_contact'),
146
+ pl.when(pl.col('px').is_null()).then(pl.col('px_predict')).otherwise(pl.col('px')).alias('px'),
147
+ pl.when(pl.col('pz').is_null()).then(pl.col('pz_predict')).otherwise(pl.col('pz')).alias('pz'),
148
+ pl.when(pl.col('in_zone').is_null()).then(pl.col('in_zone_predict')).otherwise(pl.col('in_zone')).alias('in_zone_final'),
149
+
150
+ ])
151
+
152
+ df = df.with_columns([
153
+ pl.when(df['launch_speed'].is_null()).then(None).otherwise(df['barrel']).alias('barrel'),
154
+ pl.lit('average').alias('average'),
155
+ pl.when(pl.col('in_zone_final') == False).then(True).otherwise(False).alias('out_zone'),
156
+ pl.when((pl.col('in_zone_final') == True) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('zone_swing'),
157
+ pl.when((pl.col('in_zone_final') == True) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('zone_contact'),
158
+ pl.when((pl.col('in_zone_final') == False) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('ozone_swing'),
159
+ pl.when((pl.col('in_zone_final') == False) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('ozone_contact'),
160
+ pl.when(pl.col('event_type').str.contains('strikeout')).then(True).otherwise(False).alias('k'),
161
+ pl.when(pl.col('event_type').is_in(['walk', 'intent_walk'])).then(True).otherwise(False).alias('bb'),
162
+ pl.when(pl.col('attack_zone').is_null()).then(pl.col('attack_zone_predict')).otherwise(pl.col('attack_zone')).alias('attack_zone_final'),
163
+
164
+
165
+ ])
166
+
167
+ df = df.with_columns([
168
+ (df['k'].cast(pl.Float32) - df['bb'].cast(pl.Float32)).alias('k_minus_bb'),
169
+ (df['bb'].cast(pl.Float32) - df['k'].cast(pl.Float32)).alias('bb_minus_k'),
170
+ (df['launch_speed'] > 0).alias('bip_div'),
171
+ (df['attack_zone_final'] == 0).alias('heart'),
172
+ (df['attack_zone_final'] == 1).alias('shadow'),
173
+ (df['attack_zone_final'] == 2).alias('chase'),
174
+ (df['attack_zone_final'] == 3).alias('waste'),
175
+ ((df['attack_zone_final'] == 0) & (df['swings'] == 1)).alias('heart_swing'),
176
+ ((df['attack_zone_final'] == 1) & (df['swings'] == 1)).alias('shadow_swing'),
177
+ ((df['attack_zone_final'] == 2) & (df['swings'] == 1)).alias('chase_swing'),
178
+ ((df['attack_zone_final'] == 3) & (df['swings'] == 1)).alias('waste_swing'),
179
+ ((df['attack_zone_final'] == 0) & (df['whiffs'] == 1)).alias('heart_whiff'),
180
+ ((df['attack_zone_final'] == 1) & (df['whiffs'] == 1)).alias('shadow_whiff'),
181
+ ((df['attack_zone_final'] == 2) & (df['whiffs'] == 1)).alias('chase_whiff'),
182
+ ((df['attack_zone_final'] == 3) & (df['whiffs'] == 1)).alias('waste_whiff')
183
+ ])
184
+
185
+
186
+ [0, 0.689, 0.720, 0.881, 1.254, 1.589, 2.048]
187
+
188
+ df = df.with_columns([
189
+ pl.Series(
190
+ [sum(x) for x in xwoba_model.predict_proba(df[['launch_angle', 'launch_speed']].fill_null(0).to_numpy()[:]) * ([0, 0.881, 1.254, 1.589, 2.048])]
191
+ ).alias('woba_pred_predict')
192
+ ])
193
+
194
+ df = df.with_columns([
195
+ pl.when(pl.col('event_type').is_in(['walk'])).then(0.689)
196
+ .when(pl.col('event_type').is_in(['hit_by_pitch'])).then(0.720)
197
+ .when(pl.col('event_type').is_in(['strikeout', 'strikeout_double_play'])).then(0)
198
+ .otherwise(pl.col('woba_pred_predict')).alias('woba_pred_predict')
199
+ ])
200
+
201
+ df = df.with_columns([
202
+ pl.when(pl.col('woba_codes').is_null()).then(None).otherwise(pl.col('woba_pred_predict')).alias('woba_pred'),
203
+ pl.when(pl.col('bip')!=1).then(None).otherwise(pl.col('woba_pred_predict')).alias('woba_pred_contact'),
204
+ ])
205
+
206
+ df = df.with_columns([
207
+ pl.when(pl.col('trajectory').is_in(['bunt_popup'])).then(pl.lit('popup'))
208
+ .when(pl.col('trajectory').is_in(['bunt_grounder'])).then(pl.lit('ground_ball'))
209
+ .when(pl.col('trajectory').is_in(['bunt_line_drive'])).then(pl.lit('line_drive'))
210
+ .when(pl.col('trajectory').is_in([''])).then(pl.lit(None))
211
+ .otherwise(pl.col('trajectory')).alias('trajectory')
212
+ ])
213
+
214
+
215
+ # Create one-hot encoded columns for the trajectory column
216
+ dummy_df = df.select(pl.col('trajectory')).to_dummies()
217
+
218
+ # Rename the one-hot encoded columns
219
+ dummy_df = dummy_df.rename({
220
+ 'trajectory_fly_ball': 'trajectory_fly_ball',
221
+ 'trajectory_ground_ball': 'trajectory_ground_ball',
222
+ 'trajectory_line_drive': 'trajectory_line_drive',
223
+ 'trajectory_popup': 'trajectory_popup'
224
+ })
225
+
226
+ # Ensure the columns are present in the DataFrame
227
+ for col in ['trajectory_fly_ball', 'trajectory_ground_ball', 'trajectory_line_drive', 'trajectory_popup']:
228
+ if col not in dummy_df.columns:
229
+ dummy_df = dummy_df.with_columns(pl.lit(0).alias(col))
230
+
231
+ # Join the one-hot encoded columns back to the original DataFrame
232
+ df = df.hstack(dummy_df)
233
+
234
+ # Check if 'trajectory_null' column exists and drop it
235
+ if 'trajectory_null' in df.columns:
236
+ df = df.drop('trajectory_null')
237
+
238
+
239
+ pitch_cat = {'FA': None,
240
+ 'FF': 'Fastball',
241
+ 'FT': 'Fastball',
242
+ 'FC': 'Fastball',
243
+ 'FS': 'Off-Speed',
244
+ 'FO': 'Off-Speed',
245
+ 'SI': 'Fastball',
246
+ 'ST': 'Breaking',
247
+ 'SL': 'Breaking',
248
+ 'CU': 'Breaking',
249
+ 'KC': 'Breaking',
250
+ 'SC': 'Off-Speed',
251
+ 'GY': 'Off-Speed',
252
+ 'SV': 'Breaking',
253
+ 'CS': 'Breaking',
254
+ 'CH': 'Off-Speed',
255
+ 'KN': 'Off-Speed',
256
+ 'EP': 'Breaking',
257
+ 'UN': None,
258
+ 'IN': None,
259
+ 'PO': None,
260
+ 'AB': None,
261
+ 'AS': None,
262
+ 'NP': None}
263
+ df = df.with_columns(
264
+ df["pitch_type"].map_elements(lambda x: pitch_cat.get(x, x)).alias("pitch_group")
265
+ )
266
+
267
+ df = df.with_columns([
268
+
269
+ (-(pl.col('vy0')**2 - (2 * pl.col('ay') * (pl.col('y0') - 17/12)))**0.5).alias('vy_f'),
270
+ ])
271
+
272
+ df = df.with_columns([
273
+ ((pl.col('vy_f') - pl.col('vy0')) / pl.col('ay')).alias('t'),
274
+ ])
275
+
276
+ df = df.with_columns([
277
+ (pl.col('vz0') + (pl.col('az') * pl.col('t'))).alias('vz_f'),
278
+ (pl.col('vx0') + (pl.col('ax') * pl.col('t'))).alias('vx_f')
279
+ ])
280
+
281
+ df = df.with_columns([
282
+ (-np.arctan(pl.col('vz_f') / pl.col('vy_f')) * (180 / np.pi)).alias('vaa'),
283
+ (-np.arctan(pl.col('vx_f') / pl.col('vy_f')) * (180 / np.pi)).alias('haa')
284
+ ])
285
+
286
+ # Mirror horizontal break for left-handed pitchers
287
+ df = df.with_columns(
288
+ pl.when(pl.col('pitcher_hand') == 'L')
289
+ .then(-pl.col('ax'))
290
+ .otherwise(pl.col('ax'))
291
+ .alias('ax')
292
+ )
293
+
294
+ # Mirror horizontal break for left-handed pitchers
295
+ df = df.with_columns(
296
+ pl.when(pl.col('pitcher_hand') == 'L')
297
+ .then(-pl.col('hb'))
298
+ .otherwise(pl.col('hb'))
299
+ .alias('hb')
300
+ )
301
+
302
+ # Mirror horizontal release point for left-handed pitchers
303
+ df = df.with_columns(
304
+ pl.when(pl.col('pitcher_hand') == 'L')
305
+ .then(pl.col('x0'))
306
+ .otherwise(-pl.col('x0'))
307
+ .alias('x0')
308
+ )
309
+
310
+ df = df.with_columns([
311
+ pl.when(df['swings'].is_null()).then(None).otherwise(df['swings']).alias('is_swing'),
312
+ pl.when(df['bip'].is_null()).then(None).otherwise(df['bip']).alias('is_bip')])
313
+
314
+
315
+ return df
316
+
317
+ # Assuming df is your Polars DataFrame
318
+ def update_summary(self, df: pl.DataFrame, pitcher: bool = True) -> pl.DataFrame:
319
+ """
320
+ Update summary statistics for pitchers or batters.
321
+
322
+ Parameters:
323
+ df (pl.DataFrame): The input Polars DataFrame containing player statistics.
324
+ pitcher (bool): A flag indicating whether to calculate statistics for pitchers (True) or batters (False).
325
+
326
+ Returns:
327
+ pl.DataFrame: A Polars DataFrame with aggregated and calculated summary statistics.
328
+ """
329
+
330
+ # Determine the position based on the pitcher flag
331
+ if pitcher:
332
+ position = 'pitcher'
333
+ else:
334
+ position = 'batter'
335
+
336
+ # Group by position_id and position_name, then aggregate various statistics
337
+ df_summ = df.group_by([f'{position}_id', f'{position}_name']).agg([
338
+ pl.col('pa').sum().alias('pa'),
339
+ pl.col('ab').sum().alias('ab'),
340
+ pl.col('obp').sum().alias('obp_pa'),
341
+ pl.col('hits').sum().alias('hits'),
342
+ pl.col('on_base').sum().alias('on_base'),
343
+ pl.col('k').sum().alias('k'),
344
+ pl.col('bb').sum().alias('bb'),
345
+ pl.col('bb_minus_k').sum().alias('bb_minus_k'),
346
+ pl.col('csw').sum().alias('csw'),
347
+ pl.col('bip').sum().alias('bip'),
348
+ pl.col('bip_div').sum().alias('bip_div'),
349
+ pl.col('tb').sum().alias('tb'),
350
+ pl.col('woba').sum().alias('woba'),
351
+ pl.col('woba_contact').sum().alias('woba_contact'),
352
+ pl.col('woba_pred').sum().alias('xwoba'),
353
+ pl.col('woba_pred_contact').sum().alias('xwoba_contact'),
354
+ pl.col('woba_codes').sum().alias('woba_codes'),
355
+ pl.col('xwoba_codes').sum().alias('xwoba_codes'),
356
+ pl.col('hard_hit').sum().alias('hard_hit'),
357
+ pl.col('barrel').sum().alias('barrel'),
358
+ pl.col('sweet_spot').sum().alias('sweet_spot'),
359
+ pl.col('launch_speed').max().alias('max_launch_speed'),
360
+ pl.col('launch_speed').quantile(0.90).alias('launch_speed_90'),
361
+ pl.col('launch_speed').mean().alias('launch_speed'),
362
+ pl.col('launch_angle').mean().alias('launch_angle'),
363
+ pl.col('is_pitch').sum().alias('pitches'),
364
+ pl.col('swings').sum().alias('swings'),
365
+ pl.col('in_zone').sum().alias('in_zone'),
366
+ pl.col('out_zone').sum().alias('out_zone'),
367
+ pl.col('whiffs').sum().alias('whiffs'),
368
+ pl.col('zone_swing').sum().alias('zone_swing'),
369
+ pl.col('zone_contact').sum().alias('zone_contact'),
370
+ pl.col('ozone_swing').sum().alias('ozone_swing'),
371
+ pl.col('ozone_contact').sum().alias('ozone_contact'),
372
+ pl.col('trajectory_ground_ball').sum().alias('ground_ball'),
373
+ pl.col('trajectory_line_drive').sum().alias('line_drive'),
374
+ pl.col('trajectory_fly_ball').sum().alias('fly_ball'),
375
+ pl.col('trajectory_popup').sum().alias('pop_up'),
376
+ pl.col('attack_zone').count().alias('attack_zone'),
377
+ pl.col('heart').sum().alias('heart'),
378
+ pl.col('shadow').sum().alias('shadow'),
379
+ pl.col('chase').sum().alias('chase'),
380
+ pl.col('waste').sum().alias('waste'),
381
+ pl.col('heart_swing').sum().alias('heart_swing'),
382
+ pl.col('shadow_swing').sum().alias('shadow_swing'),
383
+ pl.col('chase_swing').sum().alias('chase_swing'),
384
+ pl.col('waste_swing').sum().alias('waste_swing'),
385
+ pl.col('heart_whiff').sum().alias('heart_whiff'),
386
+ pl.col('shadow_whiff').sum().alias('shadow_whiff'),
387
+ pl.col('chase_whiff').sum().alias('chase_whiff'),
388
+ pl.col('waste_whiff').sum().alias('waste_whiff')
389
+ ])
390
+
391
+ # Add calculated columns to the summary DataFrame
392
+ df_summ = df_summ.with_columns([
393
+ (pl.col('hits') / pl.col('ab')).alias('avg'),
394
+ (pl.col('on_base') / pl.col('obp_pa')).alias('obp'),
395
+ (pl.col('tb') / pl.col('ab')).alias('slg'),
396
+ (pl.col('on_base') / pl.col('obp_pa') + pl.col('tb') / pl.col('ab')).alias('ops'),
397
+ (pl.col('k') / pl.col('pa')).alias('k_percent'),
398
+ (pl.col('bb') / pl.col('pa')).alias('bb_percent'),
399
+ (pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
400
+ (pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
401
+ (pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
402
+ (pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
403
+ (pl.col('woba') / pl.col('woba_codes')).alias('woba_percent'),
404
+ (pl.col('woba_contact') / pl.col('bip')).alias('woba_percent_contact'),
405
+ (pl.col('hard_hit') / pl.col('bip_div')).alias('hard_hit_percent'),
406
+ (pl.col('barrel') / pl.col('bip_div')).alias('barrel_percent'),
407
+ (pl.col('zone_contact') / pl.col('zone_swing')).alias('zone_contact_percent'),
408
+ (pl.col('zone_swing') / pl.col('in_zone')).alias('zone_swing_percent'),
409
+ (pl.col('in_zone') / pl.col('pitches')).alias('zone_percent'),
410
+ (pl.col('ozone_swing') / (pl.col('pitches') - pl.col('in_zone'))).alias('chase_percent'),
411
+ (pl.col('ozone_contact') / pl.col('ozone_swing')).alias('chase_contact'),
412
+ (pl.col('swings') / pl.col('pitches')).alias('swing_percent'),
413
+ (pl.col('whiffs') / pl.col('swings')).alias('whiff_rate'),
414
+ (pl.col('whiffs') / pl.col('pitches')).alias('swstr_rate'),
415
+ (pl.col('ground_ball') / pl.col('bip')).alias('ground_ball_percent'),
416
+ (pl.col('line_drive') / pl.col('bip')).alias('line_drive_percent'),
417
+ (pl.col('fly_ball') / pl.col('bip')).alias('fly_ball_percent'),
418
+ (pl.col('pop_up') / pl.col('bip')).alias('pop_up_percent'),
419
+ (pl.col('heart') / pl.col('attack_zone')).alias('heart_zone_percent'),
420
+ (pl.col('shadow') / pl.col('attack_zone')).alias('shadow_zone_percent'),
421
+ (pl.col('chase') / pl.col('attack_zone')).alias('chase_zone_percent'),
422
+ (pl.col('waste') / pl.col('attack_zone')).alias('waste_zone_percent'),
423
+ (pl.col('heart_swing') / pl.col('heart')).alias('heart_zone_swing_percent'),
424
+ (pl.col('shadow_swing') / pl.col('shadow')).alias('shadow_zone_swing_percent'),
425
+ (pl.col('chase_swing') / pl.col('chase')).alias('chase_zone_swing_percent'),
426
+ (pl.col('waste_swing') / pl.col('waste')).alias('waste_zone_swing_percent'),
427
+ (pl.col('heart_whiff') / pl.col('heart_swing')).alias('heart_zone_whiff_percent'),
428
+ (pl.col('shadow_whiff') / pl.col('shadow_swing')).alias('shadow_zone_whiff_percent'),
429
+ (pl.col('chase_whiff') / pl.col('chase_swing')).alias('chase_zone_whiff_percent'),
430
+ (pl.col('waste_whiff') / pl.col('waste_swing')).alias('waste_zone_whiff_percent'),
431
+ (pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
432
+ (pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact')
433
+ ])
434
+
435
+ return df_summ
436
+
437
+
438
+
439
+
440
+
441
+
442
+ # Assuming df is your Polars DataFrame
443
+ def update_summary_select(self, df: pl.DataFrame, selection: list) -> pl.DataFrame:
444
+ """
445
+ Update summary statistics for pitchers or batters.
446
+
447
+ Parameters:
448
+ df (pl.DataFrame): The input Polars DataFrame containing player statistics.
449
+ pitcher (bool): A flag indicating whether to calculate statistics for pitchers (True) or batters (False).
450
+
451
+ Returns:
452
+ pl.DataFrame: A Polars DataFrame with aggregated and calculated summary statistics.
453
+ """
454
+
455
+ # Group by position_id and position_name, then aggregate various statistics
456
+ df_summ = df.group_by(selection).agg([
457
+ pl.col('pa').sum().alias('pa'),
458
+ pl.col('ab').sum().alias('ab'),
459
+ pl.col('obp').sum().alias('obp_pa'),
460
+ pl.col('hits').sum().alias('hits'),
461
+ pl.col('on_base').sum().alias('on_base'),
462
+ pl.col('k').sum().alias('k'),
463
+ pl.col('bb').sum().alias('bb'),
464
+ pl.col('bb_minus_k').sum().alias('bb_minus_k'),
465
+ pl.col('k_minus_bb').sum().alias('k_minus_bb'),
466
+ pl.col('csw').sum().alias('csw'),
467
+ pl.col('bip').sum().alias('bip'),
468
+ pl.col('bip_div').sum().alias('bip_div'),
469
+ pl.col('tb').sum().alias('tb'),
470
+ pl.col('woba').sum().alias('woba'),
471
+ pl.col('woba_contact').sum().alias('woba_contact'),
472
+ pl.col('woba_pred').sum().alias('xwoba'),
473
+ pl.col('woba_pred_contact').sum().alias('xwoba_contact'),
474
+ pl.col('woba_codes').sum().alias('woba_codes'),
475
+ pl.col('xwoba_codes').sum().alias('xwoba_codes'),
476
+ pl.col('hard_hit').sum().alias('hard_hit'),
477
+ pl.col('barrel').sum().alias('barrel'),
478
+ pl.col('sweet_spot').sum().alias('sweet_spot'),
479
+ pl.col('launch_speed').max().alias('max_launch_speed'),
480
+ pl.col('launch_speed').quantile(0.90).alias('launch_speed_90'),
481
+ pl.col('launch_speed').mean().alias('launch_speed'),
482
+ pl.col('launch_angle').mean().alias('launch_angle'),
483
+ pl.col('is_pitch').sum().alias('pitches'),
484
+ pl.col('swings').sum().alias('swings'),
485
+ pl.col('in_zone').sum().alias('in_zone'),
486
+ pl.col('out_zone').sum().alias('out_zone'),
487
+ pl.col('whiffs').sum().alias('whiffs'),
488
+ pl.col('zone_swing').sum().alias('zone_swing'),
489
+ pl.col('zone_contact').sum().alias('zone_contact'),
490
+ pl.col('ozone_swing').sum().alias('ozone_swing'),
491
+ pl.col('ozone_contact').sum().alias('ozone_contact'),
492
+ pl.col('trajectory_ground_ball').sum().alias('ground_ball'),
493
+ pl.col('trajectory_line_drive').sum().alias('line_drive'),
494
+ pl.col('trajectory_fly_ball').sum().alias('fly_ball'),
495
+ pl.col('trajectory_popup').sum().alias('pop_up'),
496
+ pl.col('attack_zone').count().alias('attack_zone'),
497
+ pl.col('heart').sum().alias('heart'),
498
+ pl.col('shadow').sum().alias('shadow'),
499
+ pl.col('chase').sum().alias('chase'),
500
+ pl.col('waste').sum().alias('waste'),
501
+ pl.col('heart_swing').sum().alias('heart_swing'),
502
+ pl.col('shadow_swing').sum().alias('shadow_swing'),
503
+ pl.col('chase_swing').sum().alias('chase_swing'),
504
+ pl.col('waste_swing').sum().alias('waste_swing'),
505
+ pl.col('heart_whiff').sum().alias('heart_whiff'),
506
+ pl.col('shadow_whiff').sum().alias('shadow_whiff'),
507
+ pl.col('chase_whiff').sum().alias('chase_whiff'),
508
+ pl.col('waste_whiff').sum().alias('waste_whiff'),
509
+ pl.col('tj_stuff_plus').sum().alias('tj_stuff_plus'),
510
+ pl.col('start_speed').sum(),
511
+ pl.col('vb').sum(),
512
+ pl.col('ivb').sum(),
513
+ pl.col('hb').sum(),
514
+ pl.col('x0').sum(),
515
+ pl.col('z0').sum(),
516
+ pl.col('vaa').sum(),
517
+ pl.col('haa').sum(),
518
+ pl.col('spin_rate').sum(),
519
+ pl.col('extension').sum(),
520
+ ])
521
+
522
+ # Add calculated columns to the summary DataFrame
523
+ df_summ = df_summ.with_columns([
524
+ (pl.col('hits') / pl.col('ab')).alias('avg'),
525
+ (pl.col('on_base') / pl.col('obp_pa')).alias('obp'),
526
+ (pl.col('tb') / pl.col('ab')).alias('slg'),
527
+ (pl.col('on_base') / pl.col('obp_pa') + pl.col('tb') / pl.col('ab')).alias('ops'),
528
+ (pl.col('k') / pl.col('pa')).alias('k_percent'),
529
+ (pl.col('bb') / pl.col('pa')).alias('bb_percent'),
530
+ (pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
531
+ (pl.col('k_minus_bb') / pl.col('pa')).alias('k_minus_bb_percent'),
532
+ (pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
533
+ (pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
534
+ (pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
535
+ (pl.col('woba') / pl.col('woba_codes')).alias('woba_percent'),
536
+ (pl.col('woba_contact') / pl.col('bip')).alias('woba_percent_contact'),
537
+ (pl.col('hard_hit') / pl.col('bip_div')).alias('hard_hit_percent'),
538
+ (pl.col('barrel') / pl.col('bip_div')).alias('barrel_percent'),
539
+ (pl.col('zone_contact') / pl.col('zone_swing')).alias('zone_contact_percent'),
540
+ (pl.col('zone_swing') / pl.col('in_zone')).alias('zone_swing_percent'),
541
+ (pl.col('in_zone') / pl.col('pitches')).alias('zone_percent'),
542
+ (pl.col('ozone_swing') / (pl.col('pitches') - pl.col('in_zone'))).alias('chase_percent'),
543
+ (pl.col('ozone_contact') / pl.col('ozone_swing')).alias('chase_contact'),
544
+ (pl.col('swings') / pl.col('pitches')).alias('swing_percent'),
545
+ (pl.col('whiffs') / pl.col('swings')).alias('whiff_rate'),
546
+ (pl.col('whiffs') / pl.col('pitches')).alias('swstr_rate'),
547
+ (pl.col('ground_ball') / pl.col('bip')).alias('ground_ball_percent'),
548
+ (pl.col('line_drive') / pl.col('bip')).alias('line_drive_percent'),
549
+ (pl.col('fly_ball') / pl.col('bip')).alias('fly_ball_percent'),
550
+ (pl.col('pop_up') / pl.col('bip')).alias('pop_up_percent'),
551
+ (pl.col('heart') / pl.col('attack_zone')).alias('heart_zone_percent'),
552
+ (pl.col('shadow') / pl.col('attack_zone')).alias('shadow_zone_percent'),
553
+ (pl.col('chase') / pl.col('attack_zone')).alias('chase_zone_percent'),
554
+ (pl.col('waste') / pl.col('attack_zone')).alias('waste_zone_percent'),
555
+ (pl.col('heart_swing') / pl.col('heart')).alias('heart_zone_swing_percent'),
556
+ (pl.col('shadow_swing') / pl.col('shadow')).alias('shadow_zone_swing_percent'),
557
+ (pl.col('chase_swing') / pl.col('chase')).alias('chase_zone_swing_percent'),
558
+ (pl.col('waste_swing') / pl.col('waste')).alias('waste_zone_swing_percent'),
559
+ (pl.col('heart_whiff') / pl.col('heart_swing')).alias('heart_zone_whiff_percent'),
560
+ (pl.col('shadow_whiff') / pl.col('shadow_swing')).alias('shadow_zone_whiff_percent'),
561
+ (pl.col('chase_whiff') / pl.col('chase_swing')).alias('chase_zone_whiff_percent'),
562
+ (pl.col('waste_whiff') / pl.col('waste_swing')).alias('waste_zone_whiff_percent'),
563
+ (pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
564
+ (pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact'),
565
+ (pl.col('tj_stuff_plus') / pl.col('pitches')).alias('tj_stuff_plus_avg'),
566
+ (pl.col('start_speed')/ pl.col('pitches')).alias('start_speed_avg'),
567
+ (pl.col('vb')/ pl.col('pitches')).alias('vb_avg'),
568
+ (pl.col('ivb')/ pl.col('pitches')).alias('ivb_avg'),
569
+ (pl.col('hb')/ pl.col('pitches')).alias('hb_avg'),
570
+ (pl.col('x0')/ pl.col('pitches')).alias('x0_avg'),
571
+ (pl.col('z0')/ pl.col('pitches')).alias('z0_avg'),
572
+ (pl.col('vaa')/ pl.col('pitches')).alias('vaa_avg'),
573
+ (pl.col('haa')/ pl.col('pitches')).alias('haa_avg'),
574
+ (pl.col('spin_rate')/ pl.col('pitches')).alias('spin_rate_avg'),
575
+ (pl.col('extension')/ pl.col('pitches')).alias('extension_avg'),
576
+
577
+ ])
578
+
579
  return df_summ
functions/pitch_summary_functions.py CHANGED
@@ -662,18 +662,21 @@ def summary_table(df: pl.DataFrame, ax: plt.Axes):
662
  format_cells(['count_percent', 'zone_percent', 'chase_percent', 'whiff_percent'], '{:,.1%}')
663
  format_cells(['tj_stuff_plus', 'pitch_grade', 'spin_rate'], '{:,.0f}')
664
 
665
- # Create legend for pitch types
666
- items_in_order = (df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy())
667
- colour_pitches = [dict_colour[x] for x in items_in_order]
668
- label = [dict_pitch[x] for x in items_in_order]
669
- handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches]
670
- if len(label) > 5:
671
- ax.legend(handles, label, bbox_to_anchor=(0.1, 0.81, 0.8, 0.14), ncol=5,
672
- fancybox=True, loc='lower center', fontsize=16, framealpha=1.0, markerscale=1.7, prop={'family': 'calibi', 'size': 16})
673
- else:
674
- ax.legend(handles, label, bbox_to_anchor=(0.1, 0.81, 0.8, 0.14), ncol=5,
675
- fancybox=True, loc='lower center', fontsize=20, framealpha=1.0, markerscale=2, prop={'family': 'calibi', 'size': 20})
676
  ax.axis('off')
 
 
 
677
 
678
  def plot_footer(ax: plt.Axes):
679
  """
@@ -685,8 +688,8 @@ def plot_footer(ax: plt.Axes):
685
  The axis to add the footer text to.
686
  """
687
  # Add footer text
688
- ax.text(0, 1, 'By: @TJStats', ha='left', va='top', fontsize=24)
689
- ax.text(0.5, 0.25,
690
  '''
691
  Colour Coding Compares to League Average By Pitch
692
  tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type
@@ -694,7 +697,31 @@ def plot_footer(ax: plt.Axes):
694
  Pitch Grade scales tjStuff+ to the traditional 20-80 Scouting Scale for a given pitch type
695
  ''',
696
  ha='center', va='bottom', fontsize=12)
697
- ax.text(1, 1, 'Data: MLB, Fangraphs\nImages: MLB, ESPN', ha='right', va='top', fontsize=24)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
698
  ax.axis('off')
699
 
700
  # Function to get an image from a URL and display it on the given axis
@@ -725,7 +752,7 @@ def player_headshot(player_input: str, ax: plt.Axes, sport_id: int, season: int)
725
  img = Image.open(BytesIO(response.content))
726
 
727
  # Display the image on the axis
728
- ax.set_xlim(0, 1.3)
729
  ax.set_ylim(0, 1)
730
  ax.imshow(img, extent=[0, 1, 0, 1] if sport_id == 1 else [1/6, 5/6, 0, 1], origin='upper')
731
  except PIL.UnidentifiedImageError:
@@ -735,7 +762,18 @@ def player_headshot(player_input: str, ax: plt.Axes, sport_id: int, season: int)
735
  # Turn off the axis
736
  ax.axis('off')
737
 
738
- def player_bio(pitcher_id: str, ax: plt.Axes, sport_id: int, year_input: int):
 
 
 
 
 
 
 
 
 
 
 
739
  """
740
  Display the player's bio information on the given axis.
741
 
@@ -750,6 +788,9 @@ def player_bio(pitcher_id: str, ax: plt.Axes, sport_id: int, year_input: int):
750
  year_input : int
751
  The season year.
752
  """
 
 
 
753
  # Construct the URL to fetch player data
754
  url = f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}&hydrate=currentTeam"
755
 
@@ -764,19 +805,24 @@ def player_bio(pitcher_id: str, ax: plt.Axes, sport_id: int, year_input: int):
764
  weight = data['people'][0]['weight']
765
 
766
  # Display the player's name, handedness, age, height, and weight on the axis
767
- ax.text(0.5, 1, f'{player_name}', va='top', ha='center', fontsize=56)
768
- ax.text(0.5, 0.7, f'{pitcher_hand}HP, Age: {age}, {height}/{weight}', va='top', ha='center', fontsize=30)
769
- ax.text(0.5, 0.45, f'Season Pitching Summary', va='top', ha='center', fontsize=40)
770
 
 
771
  # Make API call to retrieve sports information
772
  response = requests.get(url='https://statsapi.mlb.com/api/v1/sports').json()
773
-
774
- # Convert the JSON response into a Polars DataFrame
775
  df_sport_id = pl.DataFrame(response['sports'])
776
  abb = df_sport_id.filter(pl.col('id') == sport_id)['abbreviation'][0]
 
 
 
 
 
777
 
778
  # Display the season and sport abbreviation
779
- ax.text(0.5, 0.20, f'{year_input} {abb} Season', va='top', ha='center', fontsize=30, fontstyle='italic')
 
780
 
781
  # Turn off the axis
782
  ax.axis('off')
@@ -861,9 +907,9 @@ def plot_logo(pitcher_id: str, ax: plt.Axes, df_team: pl.DataFrame, df_players:
861
  img = Image.open(BytesIO(response.content))
862
 
863
  # Display the image on the axis
864
- ax.set_xlim(0, 1.3)
865
  ax.set_ylim(0, 1)
866
- ax.imshow(img, extent=[0.3, 1.3, 0, 1], origin='upper')
867
 
868
  # Turn off the axis
869
  ax.axis('off')
 
662
  format_cells(['count_percent', 'zone_percent', 'chase_percent', 'whiff_percent'], '{:,.1%}')
663
  format_cells(['tj_stuff_plus', 'pitch_grade', 'spin_rate'], '{:,.0f}')
664
 
665
+ # # Create legend for pitch types
666
+ # items_in_order = (df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy())
667
+ # colour_pitches = [dict_colour[x] for x in items_in_order]
668
+ # label = [dict_pitch[x] for x in items_in_order]
669
+ # handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches]
670
+ # if len(label) > 5:
671
+ # ax.legend(handles, label, bbox_to_anchor=(0.1, 0.81, 0.8, 0.14), ncol=5,
672
+ # fancybox=True, loc='lower center', fontsize=16, framealpha=1.0, markerscale=1.7, prop={'family': 'calibi', 'size': 16})
673
+ # else:
674
+ # ax.legend(handles, label, bbox_to_anchor=(0.1, 0.81, 0.8, 0.14), ncol=5,
675
+ # fancybox=True, loc='lower center', fontsize=20, framealpha=1.0, markerscale=2, prop={'family': 'calibi', 'size': 20})
676
  ax.axis('off')
677
+ # ax.set_title(f'{df["pitcher_name"][0]}', fontdict=font_properties_titles)
678
+ # ax.text(x=0.5,y=0.90,s=f'{df["pitcher_name"][0]}',
679
+ # fontsize=30, ha='center', va='bottom',)
680
 
681
  def plot_footer(ax: plt.Axes):
682
  """
 
688
  The axis to add the footer text to.
689
  """
690
  # Add footer text
691
+ ax.text(0, 1, 'By: Thomas Nestico\n @TJStats', ha='left', va='top', fontsize=24)
692
+ ax.text(0.5, 0.15,
693
  '''
694
  Colour Coding Compares to League Average By Pitch
695
  tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type
 
697
  Pitch Grade scales tjStuff+ to the traditional 20-80 Scouting Scale for a given pitch type
698
  ''',
699
  ha='center', va='bottom', fontsize=12)
700
+ ax.text(1, 1, 'Data: MLB\nImages: MLB, ESPN', ha='right', va='top', fontsize=24)
701
+ ax.axis('off')
702
+
703
+
704
+
705
+ def plot_footer_break(ax: plt.Axes):
706
+ """
707
+ Add footer text to the plot.
708
+
709
+ Parameters
710
+ ----------
711
+ ax : plt.Axes
712
+ The axis to add the footer text to.
713
+ """
714
+ # Add footer text
715
+ ax.text(0, 0.25, 'By: Thomas Nestico\n @TJStats', ha='left', va='bottom', fontsize=24)
716
+ # ax.text(0.5, 0.15,
717
+ # '''
718
+ # Colour Coding Compares to League Average By Pitch
719
+ # tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type
720
+ # tjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10
721
+ # Pitch Grade scales tjStuff+ to the traditional 20-80 Scouting Scale for a given pitch type
722
+ # ''',
723
+ # ha='center', va='bottom', fontsize=12)
724
+ ax.text(1, 0.25, 'Data: MLB\nImages: MLB, ESPN', ha='right', va='bottom', fontsize=24)
725
  ax.axis('off')
726
 
727
  # Function to get an image from a URL and display it on the given axis
 
752
  img = Image.open(BytesIO(response.content))
753
 
754
  # Display the image on the axis
755
+ ax.set_xlim(0, 1)
756
  ax.set_ylim(0, 1)
757
  ax.imshow(img, extent=[0, 1, 0, 1] if sport_id == 1 else [1/6, 5/6, 0, 1], origin='upper')
758
  except PIL.UnidentifiedImageError:
 
762
  # Turn off the axis
763
  ax.axis('off')
764
 
765
+ splits_title = {
766
+
767
+ 'all':'',
768
+ 'left':' vs LHH',
769
+ 'right':' vs RHH',
770
+
771
+ }
772
+
773
+ type_dict = {'R':'Regular Season',
774
+ 'S':'Spring',
775
+ 'P':'Playoffs' }
776
+ def player_bio(pitcher_id: str, ax: plt.Axes, sport_id: int,game_type: str, year_input: int,split: str,df: pl.DataFrame):
777
  """
778
  Display the player's bio information on the given axis.
779
 
 
788
  year_input : int
789
  The season year.
790
  """
791
+ start_date = df['game_date'][0]
792
+ end_date = df['game_date'][-1]
793
+
794
  # Construct the URL to fetch player data
795
  url = f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}&hydrate=currentTeam"
796
 
 
805
  weight = data['people'][0]['weight']
806
 
807
  # Display the player's name, handedness, age, height, and weight on the axis
808
+ ax.text(0.5, 1, f'{player_name}', va='top', ha='center', fontsize=36)
809
+ ax.text(0.5, 0.68, f'{pitcher_hand}HP, Age: {age}, {height}/{weight}', va='top', ha='center', fontsize=24)
 
810
 
811
+ # Convert the JSON response into a Polars DataFrame
812
  # Make API call to retrieve sports information
813
  response = requests.get(url='https://statsapi.mlb.com/api/v1/sports').json()
814
+
 
815
  df_sport_id = pl.DataFrame(response['sports'])
816
  abb = df_sport_id.filter(pl.col('id') == sport_id)['abbreviation'][0]
817
+ # title = f'{df["game_date"][0]} vs {df["batter_team"][0]} ({type_dict[game_type[0]]}){split_title[split]}'
818
+ ax.text(0.5, 0.45, f'{year_input} {abb} {type_dict[game_type]}', va='top', ha='center', fontsize=24)
819
+
820
+
821
+
822
 
823
  # Display the season and sport abbreviation
824
+ ax.text(0.5, 0.20, f'{start_date} to {end_date}{splits_title[split]}', va='top', ha='center', fontsize=24, fontstyle='italic')
825
+
826
 
827
  # Turn off the axis
828
  ax.axis('off')
 
907
  img = Image.open(BytesIO(response.content))
908
 
909
  # Display the image on the axis
910
+ ax.set_xlim(0, 1)
911
  ax.set_ylim(0, 1)
912
+ ax.imshow(img, extent=[0, 1, 0, 1], origin='upper')
913
 
914
  # Turn off the axis
915
  ax.axis('off')
stuff_model/__pycache__/feature_engineering.cpython-39.pyc CHANGED
Binary files a/stuff_model/__pycache__/feature_engineering.cpython-39.pyc and b/stuff_model/__pycache__/feature_engineering.cpython-39.pyc differ