nesticot commited on
Commit
e025fd6
·
verified ·
1 Parent(s): 518da23

Upload 29 files

Browse files
api_scraper.py CHANGED
The diff for this file is too large to render. See raw diff
 
functions/__pycache__/app.cpython-39.pyc ADDED
Binary file (10.5 kB). View file
 
functions/__pycache__/heat_map_functions.cpython-39.pyc ADDED
Binary file (11.3 kB). View file
 
functions/__pycache__/pitch_summary_functions.cpython-39.pyc CHANGED
Binary files a/functions/__pycache__/pitch_summary_functions.cpython-39.pyc and b/functions/__pycache__/pitch_summary_functions.cpython-39.pyc differ
 
functions/app.py ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import polars as pl
2
+ import numpy as np
3
+ import pandas as pd
4
+ import api_scraper
5
+ scrape = api_scraper.MLB_Scrape()
6
+ from functions import df_update
7
+ from functions import pitch_summary_functions
8
+ update = df_update.df_update()
9
+ from stuff_model import feature_engineering as fe
10
+ from stuff_model import stuff_apply
11
+ import requests
12
+ import joblib
13
+ from matplotlib.gridspec import GridSpec
14
+ from shiny import App, reactive, ui, render
15
+ from shiny.ui import h2, tags
16
+ import matplotlib.pyplot as plt
17
+ import matplotlib.gridspec as gridspec
18
+ import seaborn as sns
19
+ from functions.pitch_summary_functions import *
20
+ from functions.df_update import *
21
+ from shiny import App, reactive, ui, render
22
+ from shiny.ui import h2, tags
23
+ from functions.heat_map_functions import *
24
+
25
+ colour_palette = ['#FFB000','#648FFF','#785EF0',
26
+ '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
27
+
28
+
29
+ year_list = [2017,2018,2019,2020,2021,2022,2023,2024]
30
+
31
+
32
+
33
+ level_dict = {'1':'MLB',
34
+ '11':'AAA',
35
+ '12':'AA',
36
+ '13':'A+',
37
+ '14':'A',
38
+ '17':'AFL',
39
+ '22':'College',
40
+ '21':'Prospects',
41
+ '51':'International' }
42
+
43
+ function_dict={
44
+ 'velocity_kdes':'Velocity Distributions',
45
+ 'break_plot':'Pitch Movement',
46
+ 'tj_stuff_roling':'Rolling tjStuff+ by Pitch',
47
+ 'tj_stuff_roling_game':'Rolling tjStuff+ by Game',
48
+ 'location_plot_lhb':'Locations vs LHB',
49
+ 'location_plot_rhb':'Locations vs RHB',
50
+ }
51
+
52
+
53
+ split_dict = {'all':'All',
54
+ 'left':'LHH',
55
+ 'right':'RHH'}
56
+
57
+ split_dict_hand = {'all':['L','R'],
58
+ 'left':['L'],
59
+ 'right':['R']}
60
+
61
+
62
+ type_dict = {'R':'Regular Season',
63
+ 'S':'Spring',
64
+ 'P':'Playoffs' }
65
+
66
+ format_dict = {
67
+ 'pitch_percent': '{:.1%}',
68
+ 'pitches': '{:.0f}',
69
+ 'heart_zone_percent': '{:.1%}',
70
+ 'shadow_zone_percent': '{:.1%}',
71
+ 'chase_zone_percent': '{:.1%}',
72
+ 'waste_zone_percent': '{:.1%}',
73
+ 'csw_percent': '{:.1%}',
74
+ 'whiff_rate': '{:.1%}',
75
+ 'chase_percent': '{:.1%}',
76
+ 'bip': '{:.0f}',
77
+ 'xwoba_percent_contact': '{:.3f}'
78
+ }
79
+
80
+ format_dict = {
81
+ 'pitch_percent': '{:.1%}',
82
+ 'pitches': '{:.0f}',
83
+ 'heart_zone_percent': '{:.1%}',
84
+ 'shadow_zone_percent': '{:.1%}',
85
+ 'chase_zone_percent': '{:.1%}',
86
+ 'waste_zone_percent': '{:.1%}',
87
+ 'csw_percent': '{:.1%}',
88
+ 'whiff_rate': '{:.1%}',
89
+ 'chase_percent': '{:.1%}',
90
+ 'bip': '{:.0f}',
91
+ 'xwoba_percent_contact': '{:.3f}'
92
+ }
93
+ label_translation_dict = {
94
+ 'pitch_percent': 'Pitch%',
95
+ 'pitches': 'Pitches',
96
+ 'heart_zone_percent': 'Heart%',
97
+ 'shadow_zone_percent': 'Shado%',
98
+ 'chase_zone_percent': 'Chas%',
99
+ 'waste_zone_percent': 'Waste%',
100
+ 'csw_percent': 'CSW%',
101
+ 'whiff_rate': 'Whiff%',
102
+ 'chase_percent': 'O-Swing%',
103
+ 'bip': 'BBE',
104
+ 'xwoba_percent_contact': 'xwOBACON'
105
+ }
106
+
107
+
108
+ cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
109
+ cmap_sum2 = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#FFFFFF','#FFB000','#FE6100'])
110
+
111
+
112
+ from shiny import App, reactive, ui, render
113
+ from shiny.ui import h2, tags
114
+
115
+ # Define the UI layout for the app
116
+ app_ui = ui.page_fluid(
117
+ ui.layout_sidebar(
118
+ ui.panel_sidebar(
119
+ # Row for selecting season and level
120
+ ui.row(
121
+ ui.column(4, ui.input_select('year_input', 'Select Season', year_list, selected=2024)),
122
+ ui.column(4, ui.input_select('level_input', 'Select Level', level_dict)),
123
+ ui.column(4, ui.input_select('type_input', 'Select Type', type_dict,selected='R'))
124
+ ),
125
+ # Row for the action button to get player list
126
+ ui.row(ui.input_action_button("player_button", "Get Player List", class_="btn-primary")),
127
+ # Row for selecting the player
128
+ ui.row(ui.column(12, ui.output_ui('player_select_ui', 'Select Player'))),
129
+ # Row for selecting the date range
130
+
131
+ # Rows for selecting plots and split options
132
+ ui.row(ui.column(12, ui.output_ui('pitch_type_ui', 'Select Pitch Type'))),
133
+
134
+ ui.row(ui.column(12, ui.output_ui('date_id', 'Select Date'))),
135
+
136
+
137
+
138
+ # Row for the action button to generate plot
139
+ ui.row(ui.input_action_button("generate_plot", "Generate Plot", class_="btn-primary")),
140
+ ),
141
+
142
+ ui.panel_main(
143
+ ui.navset_tab(
144
+ # Tab for game summary plot
145
+ ui.nav("Pitching Summary",
146
+ ui.output_text("status"),
147
+ ui.output_plot('plot', width='1440px', height=f'{900/1600*1440}px')
148
+ ),
149
+ )
150
+ )
151
+ )
152
+ )
153
+
154
+
155
+ def server(input, output, session):
156
+
157
+ @reactive.calc
158
+ @reactive.event(input.pitcher_id, input.date_id)
159
+ def cached_data():
160
+
161
+ year_input = int(input.year_input())
162
+ sport_id = int(input.level_input())
163
+ player_input = int(input.pitcher_id())
164
+ start_date = str(input.date_id()[0])
165
+ end_date = str(input.date_id()[1])
166
+ # Simulate an expensive data operation
167
+ game_list = scrape.get_player_games_list(sport_id = sport_id,
168
+ season = year_input,
169
+ player_id = player_input,
170
+ start_date = start_date,
171
+ end_date = end_date,
172
+ game_type = [input.type_input()])
173
+
174
+ data_list = scrape.get_data(game_list_input = game_list[:])
175
+ df = (update.update(scrape.get_data_df(data_list = data_list).filter(
176
+ (pl.col("pitcher_id") == player_input)&
177
+ (pl.col("is_pitch") == True)
178
+
179
+
180
+ ))).with_columns(
181
+ pl.col('pitch_type').count().over('pitch_type').alias('pitch_count')
182
+ )
183
+ return df
184
+
185
+
186
+ @render.ui
187
+ @reactive.event(input.player_button, ignore_none=False)
188
+ def player_select_ui():
189
+ # Get the list of pitchers for the selected level and season
190
+ df_pitcher_info = scrape.get_players(sport_id=int(input.level_input()), season=int(input.year_input()), game_type = [input.type_input()]).filter(
191
+ pl.col("position").is_in(['P','TWP'])).sort("name")
192
+
193
+ # Create a dictionary of pitcher IDs and names
194
+ pitcher_dict = dict(zip(df_pitcher_info['player_id'], df_pitcher_info['name']))
195
+
196
+ # Return a select input for choosing a pitcher
197
+ return ui.input_select("pitcher_id", "Select Pitcher", pitcher_dict, selectize=True)
198
+
199
+
200
+ @render.ui
201
+ @reactive.event(input.pitcher_id, ignore_none=False)
202
+ def pitch_type_ui():
203
+ df = cached_data()
204
+ df = df.clone()
205
+
206
+ pitch_dict = dict(zip(df['pitch_type'], df['pitch_description']))
207
+ return ui.input_select("pitch_type_input", "Select Pitch Type", pitch_dict, selectize=True)
208
+
209
+ @render.ui
210
+ @reactive.event(input.player_button, ignore_none=False)
211
+ def date_id():
212
+ # Create a date range input for selecting the date range within the selected year
213
+ return ui.input_date_range("date_id", "Select Date Range",
214
+ start=f"{int(input.year_input())}-01-01",
215
+ end=f"{int(input.year_input())}-12-31",
216
+ min=f"{int(input.year_input())}-01-01",
217
+ max=f"{int(input.year_input())}-12-31")
218
+
219
+
220
+
221
+ @output
222
+ @render.text
223
+ def status():
224
+ # Only show status when generating
225
+ if input.generate == 0:
226
+ return ""
227
+ return ""
228
+
229
+ @output
230
+ @render.plot
231
+ @reactive.event(input.generate_plot, ignore_none=False)
232
+ def plot():
233
+ # Show progress/loading notification
234
+ with ui.Progress(min=0, max=1) as p:
235
+ p.set(message="Generating plot", detail="This may take a while...")
236
+
237
+
238
+ p.set(0.3, "Gathering data...")
239
+ year_input = int(input.year_input())
240
+ sport_id = int(input.level_input())
241
+ player_input = int(input.pitcher_id())
242
+ start_date = str(input.date_id()[0])
243
+ end_date = str(input.date_id()[1])
244
+
245
+
246
+ print(year_input, sport_id, player_input, start_date, end_date)
247
+
248
+ df = cached_data()
249
+ df = df.clone()
250
+
251
+ pitch_input = input.pitch_type_input()
252
+
253
+ df_plot = pitch_heat_map(pitch_input, df)
254
+ pivot_table_l = pitch_prop(df=df_plot, hand = 'L')
255
+ pivot_table_r = pitch_prop(df=df_plot, hand = 'R')
256
+
257
+
258
+ table_left = df_update().update_summary_select(df=df_plot.filter(pl.col('batter_hand') == 'L'), selection=['pitcher_hand'])
259
+ table_left = table_left.with_columns(
260
+ (pl.col('pitches')/len(df.filter(pl.col('batter_hand') == 'L'))).alias('pitch_percent')
261
+ )
262
+
263
+ table_right = df_update().update_summary_select(df=df_plot.filter(pl.col('batter_hand') == 'R'), selection=['pitcher_hand'])
264
+ table_right = table_right.with_columns(
265
+ (pl.col('pitches')/len(df.filter(pl.col('batter_hand') == 'R'))).alias('pitch_percent')
266
+ )
267
+
268
+ normalize = mcolors.Normalize(vmin=table_left['pitch_percent']*0.5,
269
+ vmax=table_left['pitch_percent']*1.5) # Define the range of values
270
+
271
+
272
+ df_colour_left = pd.DataFrame(data=[[get_color(x,normalize,cmap_sum2) for x in pivot_table_l[0]],
273
+ [get_color(x,normalize,cmap_sum2) for x in pivot_table_l[1]],
274
+ [get_color(x,normalize,cmap_sum2) for x in pivot_table_l[2]]])
275
+ df_colour_left[0] = '#ffffff'
276
+
277
+ normalize = mcolors.Normalize(vmin=table_right['pitch_percent']*0.5,
278
+ vmax=table_right['pitch_percent']*1.5) # Define the range of values
279
+
280
+
281
+ df_colour_right = pd.DataFrame(data=[[get_color(x,normalize,cmap_sum2) for x in pivot_table_r[0]],
282
+ [get_color(x,normalize,cmap_sum2) for x in pivot_table_r[1]],
283
+ [get_color(x,normalize,cmap_sum2) for x in pivot_table_r[2]]])
284
+ df_colour_right[0] = '#ffffff'
285
+
286
+
287
+ table_left = table_left.select(
288
+ 'pitch_percent',
289
+ 'pitches',
290
+ 'heart_zone_percent',
291
+ 'shadow_zone_percent',
292
+ 'chase_zone_percent',
293
+ 'waste_zone_percent',
294
+ 'csw_percent',
295
+ 'whiff_rate',
296
+ 'chase_percent',
297
+ 'bip',
298
+ 'xwoba_percent_contact').to_pandas().T
299
+
300
+ table_right = table_right.select(
301
+ 'pitch_percent',
302
+ 'pitches',
303
+ 'heart_zone_percent',
304
+ 'shadow_zone_percent',
305
+ 'chase_zone_percent',
306
+ 'waste_zone_percent',
307
+ 'csw_percent',
308
+ 'whiff_rate',
309
+ 'chase_percent',
310
+ 'bip',
311
+ 'xwoba_percent_contact').to_pandas().T
312
+
313
+ table_right = table_right.replace({'nan%':'—'})
314
+ table_right = table_right.replace({'nan':'—'})
315
+
316
+
317
+
318
+
319
+
320
+ p.set(0.6, "Creating plot...")
321
+
322
+ import matplotlib.pyplot as plt
323
+ fig = plt.figure(figsize=(16, 9))
324
+ fig.set_facecolor('white')
325
+ sns.set_theme(style="whitegrid", palette=colour_palette)
326
+ gs = GridSpec(3, 5, height_ratios=[2,9,1],width_ratios=[1,9,1,9,1])
327
+ gs.update(hspace=0.2, wspace=0.3)
328
+
329
+ # Add subplots to the grid
330
+ ax_header = fig.add_subplot(gs[0, :])
331
+ ax_left = fig.add_subplot(gs[1, 1])
332
+ ax_right = fig.add_subplot(gs[1, 3])
333
+
334
+ axfooter = fig.add_subplot(gs[-1, :])
335
+
336
+
337
+
338
+ heat_map_plot(df=df_plot,
339
+ ax=ax_left,
340
+ cmap=cmap_sum2,
341
+ hand='L')
342
+
343
+ heat_map_plot(df=df_plot,
344
+ ax=ax_right,
345
+ cmap=cmap_sum2,
346
+ hand='R')
347
+
348
+
349
+
350
+ # Load the image
351
+ img = mpimg.imread('images/left.png')
352
+ imagebox = OffsetImage(img, zoom=0.58) # adjust zoom as needed
353
+ ab = AnnotationBbox(imagebox, (1.25, -0.5), box_alignment=(0, 0), frameon=False)
354
+ ax_left.add_artist(ab)
355
+
356
+
357
+ # Load the image
358
+ img = mpimg.imread('images/right.png')
359
+ imagebox = OffsetImage(img, zoom=0.58) # adjust zoom as needed
360
+ # Create an AnnotationBbox
361
+ ab = AnnotationBbox(imagebox, (-1.25, -0.5), box_alignment=(1, 0), frameon=False)
362
+
363
+ ax_right.add_artist(ab)
364
+
365
+
366
+ table_plot(ax=ax_left,
367
+ table=table_left,
368
+ hand='L')
369
+
370
+ table_plot_pivot(ax=ax_left,
371
+ pivot_table=pivot_table_l,
372
+ df_colour=df_colour_left)
373
+
374
+
375
+ table_plot(ax=ax_right,
376
+ table=table_right,
377
+ hand='R')
378
+
379
+ table_plot_pivot(ax=ax_right,
380
+ pivot_table=pivot_table_r,
381
+ df_colour=df_colour_right)
382
+
383
+
384
+ from matplotlib.cm import ScalarMappable
385
+ from matplotlib.colors import Normalize
386
+ # Create a ScalarMappable with the same colormap and normalization
387
+ sm = ScalarMappable(cmap=cmap_sum2, norm=Normalize(vmin=0, vmax=1))
388
+
389
+ cbar = fig.colorbar(sm, ax=axfooter, orientation='horizontal',aspect=100)
390
+ cbar.set_ticks([])
391
+
392
+ cbar.set_ticks([sm.norm.vmin, sm.norm.vmax])
393
+
394
+ cbar.ax.set_xticklabels(['Least', 'Most'])
395
+ cbar.ax.tick_params(labeltop=True, labelbottom=False, labelsize=14)
396
+ labels = cbar.ax.get_xticklabels()
397
+
398
+ labels[0].set_horizontalalignment('left')
399
+ labels[-1].set_horizontalalignment('right')
400
+ labels = cbar.ax.get_xticklabels()
401
+
402
+
403
+ cbar.ax.set_xticklabels(labels)
404
+ cbar.ax.tick_params(length=0)
405
+
406
+
407
+
408
+
409
+
410
+ axfooter.text(x=0.02,y=1,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=18,va='top')
411
+ axfooter.text(x=1-0.02,y=1,s='Data: MLB',ha='right',fontname='Calibri',fontsize=18,va='top')
412
+
413
+ axfooter.axis('off')
414
+
415
+ # Display the image on the axis
416
+ ax_header.set_xlim(-12,12)
417
+ ax_header.set_ylim(0, 2)
418
+ ax_header.text(x=0,y=2,s=f"{df_plot['pitcher_name'][0]} - {df_plot['pitcher_hand'][0]}HP\n{df_plot['pitch_description'][0]} Pitch Frequency",ha='center',fontsize=24,va='top')
419
+ ax_header.text(x=0,y=0.75,s=f"{year_input} {level_dict[str(sport_id)]} Season",ha='center',fontsize=16,va='top')
420
+ ax_header.text(x=0,y=0.35,s=f"{df_plot['game_date'][0]} to {df_plot['game_date'][-1]}",ha='center',fontsize=16,va='top',fontstyle='italic')
421
+
422
+ ax_header.axis('off')
423
+
424
+
425
+ import urllib
426
+ import urllib.request
427
+ import urllib.error
428
+ from urllib.error import HTTPError
429
+
430
+
431
+ plot_header(pitcher_id=player_input,
432
+ ax=ax_header,
433
+ df_team=scrape.get_teams(),
434
+ df_players=scrape.get_players(sport_id,year_input),
435
+ sport_id=sport_id,)
436
+
437
+
438
+
439
+
440
+
441
+
442
+ fig.subplots_adjust(left=0.03, right=0.97, top=0.97, bottom=0.03)
443
+
444
+
445
+
446
+
447
+ app = App(app_ui, server)
448
+
449
+
450
+
451
+ app = App(app_ui, server)
functions/heat_map_functions.py CHANGED
@@ -39,8 +39,8 @@ label_translation_dict = {
39
  'pitch_percent': 'Pitch%',
40
  'pitches': 'Pitches',
41
  'heart_zone_percent': 'Heart%',
42
- 'shadow_zone_percent': 'Shado%',
43
- 'chase_zone_percent': 'Chas%',
44
  'waste_zone_percent': 'Waste%',
45
  'csw_percent': 'CSW%',
46
  'whiff_rate': 'Whiff%',
@@ -160,16 +160,20 @@ def table_plot(ax:plt.Axes,
160
 
161
  if hand == 'R':
162
  bbox_data = Bbox.from_bounds(1.7, -0.5, 2.5, 5)
163
- ax.text(s='Against RHH',x=1.7,y=4.65,fontsize=18,fontweight='bold')
164
-
165
  else:
166
  bbox_data = Bbox.from_bounds(-4.2, -0.5, 2.5, 5) # replace width and height with the desired values
167
- ax.text(s='Against LHH',x=-3.6,y=4.65,fontsize=18,fontweight='bold')
168
-
169
 
170
 
171
  bbox_axes = trans.transform_bbox(bbox_data)
172
 
 
 
 
 
 
 
173
  table = table.apply(lambda x: format_dict[x.name].format(x[0]) if x[0] != '—' else '—', axis=1)
174
  table.index = [label_translation_dict[x] for x in table.index]
175
 
@@ -205,7 +209,7 @@ def table_plot_pivot(ax:plt.Axes,
205
 
206
  table_plot_pivot = ax.table(cellText=[[format_as_percentage(val) for val in row] for row in pivot_table.select(pivot_table.columns[-4:]).to_numpy()],
207
  colLabels =pivot_table.columns[-4:],
208
- rowLabels =[' 0 ',' 1 ',' 2 '],
209
  loc='center',
210
  cellLoc='center',
211
  colWidths=[0.3,0.3,0.30,0.3],
@@ -220,8 +224,8 @@ def table_plot_pivot(ax:plt.Axes,
220
  table_plot_pivot.set_fontsize(min_font_size)
221
 
222
 
223
- ax.text(x=-1.72, y=5.08, s='Strikes', rotation=90,fontweight='bold')
224
- ax.text(x=-1.72, y=5.08, s='Strikes', rotation=90,fontweight='bold')
225
 
226
 
227
  def plot_header(pitcher_id: str, ax: plt.Axes, df_team: pl.DataFrame, df_players: pl.DataFrame,sport_id:int):
 
39
  'pitch_percent': 'Pitch%',
40
  'pitches': 'Pitches',
41
  'heart_zone_percent': 'Heart%',
42
+ 'shadow_zone_percent': 'Shadow%',
43
+ 'chase_zone_percent': 'Chase%',
44
  'waste_zone_percent': 'Waste%',
45
  'csw_percent': 'CSW%',
46
  'whiff_rate': 'Whiff%',
 
160
 
161
  if hand == 'R':
162
  bbox_data = Bbox.from_bounds(1.7, -0.5, 2.5, 5)
163
+
 
164
  else:
165
  bbox_data = Bbox.from_bounds(-4.2, -0.5, 2.5, 5) # replace width and height with the desired values
166
+
 
167
 
168
 
169
  bbox_axes = trans.transform_bbox(bbox_data)
170
 
171
+ if hand == 'R':
172
+ ax.text(s='Against RHH',x=2.95,y=4.65,fontsize=18,fontweight='bold',ha='center')
173
+ else:
174
+ ax.text(s='Against LHH',x=-2.95,y=4.65,fontsize=18,fontweight='bold',ha='center')
175
+
176
+
177
  table = table.apply(lambda x: format_dict[x.name].format(x[0]) if x[0] != '—' else '—', axis=1)
178
  table.index = [label_translation_dict[x] for x in table.index]
179
 
 
209
 
210
  table_plot_pivot = ax.table(cellText=[[format_as_percentage(val) for val in row] for row in pivot_table.select(pivot_table.columns[-4:]).to_numpy()],
211
  colLabels =pivot_table.columns[-4:],
212
+ rowLabels =[' 0 ',' 1 ',' 2 '],
213
  loc='center',
214
  cellLoc='center',
215
  colWidths=[0.3,0.3,0.30,0.3],
 
224
  table_plot_pivot.set_fontsize(min_font_size)
225
 
226
 
227
+ ax.text(x=-1.8, y=5.08, s='Strikes', rotation=90,fontweight='bold')
228
+ ax.text(x=0, y=6.05, s='Balls',fontweight='bold',ha='center')
229
 
230
 
231
  def plot_header(pitcher_id: str, ax: plt.Axes, df_team: pl.DataFrame, df_players: pl.DataFrame,sport_id:int):
functions/pitch_summary_functions.py CHANGED
@@ -1055,8 +1055,7 @@ def stat_summary_table(df: pl.DataFrame,
1055
  player_input: int,
1056
  sport_id: int,
1057
  ax: plt.Axes,
1058
- split: str = 'All',
1059
- game_type: list = ['R']):
1060
  """
1061
  Create a summary table of player statistics.
1062
 
@@ -1073,18 +1072,6 @@ def stat_summary_table(df: pl.DataFrame,
1073
  split : str, optional
1074
  The split type (default is 'All').
1075
  """
1076
-
1077
- type_dict = {'R':'Regular Season',
1078
- 'S':'Spring',
1079
- 'P':'Playoffs' }
1080
-
1081
- split_title = {
1082
- 'all':'',
1083
- 'right':' vs RHH',
1084
- 'left':' vs LHH'
1085
- }
1086
-
1087
-
1088
  # Format start and end dates
1089
  start_date_format = str(pd.to_datetime(df['game_date'][0]).strftime('%m/%d/%Y'))
1090
  end_date_format = str(pd.to_datetime(df['game_date'][-1]).strftime('%m/%d/%Y'))
@@ -1092,11 +1079,9 @@ def stat_summary_table(df: pl.DataFrame,
1092
  # Determine app context based on sport ID
1093
  appContext = 'majorLeague' if sport_id == 1 else 'minorLeague'
1094
 
1095
- game_type_str = ','.join([str(x) for x in game_type])
1096
-
1097
  # Fetch player stats from MLB API
1098
  pitcher_stats_call = requests.get(
1099
- f'https://statsapi.mlb.com/api/v1/people/{player_input}?appContext={appContext}&hydrate=stats(group=[pitching],type=[byDateRange],sportId={sport_id},startDate={start_date_format},endDate={end_date_format},gameType=[{game_type_str}])'
1100
  ).json()
1101
 
1102
  # Extract stats and create DataFrame
@@ -1118,11 +1103,11 @@ def stat_summary_table(df: pl.DataFrame,
1118
  if df['game_id'][0] == df['game_id'][-1]:
1119
  pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched', 'battersFaced', 'earnedRuns', 'hits', 'strikeOuts', 'baseOnBalls', 'hitByPitch', 'homeRuns', 'strikePercentage', 'whiffs'])
1120
  new_column_names = ['$\\bf{IP}$', '$\\bf{PA}$', '$\\bf{ER}$', '$\\bf{H}$', '$\\bf{K}$', '$\\bf{BB}$', '$\\bf{HBP}$', '$\\bf{HR}$', '$\\bf{Strike\%}$', '$\\bf{Whiffs}$']
1121
- title = f'{df["game_date"][0]} vs {df["batter_team"][0]} ({type_dict[game_type[0]]}){split_title[split]}'
1122
- elif sport_id != 1 or game_type[0] in ['S','P']:
1123
  pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched', 'battersFaced', 'whip', 'era', 'fip', 'k_percent', 'bb_percent', 'k_bb_percent', 'strikePercentage'])
1124
  new_column_names = ['$\\bf{IP}$', '$\\bf{PA}$', '$\\bf{WHIP}$', '$\\bf{ERA}$', '$\\bf{FIP}$', '$\\bf{K\%}$', '$\\bf{BB\%}$', '$\\bf{K-BB\%}$', '$\\bf{Strike\%}$']
1125
- title = f'{df["game_date"][0]} to {df["game_date"][-1]} ({type_dict[game_type[0]]}{split_title[split]})'
1126
  else:
1127
  fangraphs_table(df=df, ax=ax, player_input=player_input, season=int(df['game_date'][0][0:4]), split=split)
1128
  return
@@ -1136,3 +1121,149 @@ def stat_summary_table(df: pl.DataFrame,
1136
  # Add title to the plot
1137
  ax.text(0.5, 0.9, title, va='bottom', ha='center', fontsize=36, fontstyle='italic')
1138
  ax.axis('off')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1055
  player_input: int,
1056
  sport_id: int,
1057
  ax: plt.Axes,
1058
+ split: str = 'All'):
 
1059
  """
1060
  Create a summary table of player statistics.
1061
 
 
1072
  split : str, optional
1073
  The split type (default is 'All').
1074
  """
 
 
 
 
 
 
 
 
 
 
 
 
1075
  # Format start and end dates
1076
  start_date_format = str(pd.to_datetime(df['game_date'][0]).strftime('%m/%d/%Y'))
1077
  end_date_format = str(pd.to_datetime(df['game_date'][-1]).strftime('%m/%d/%Y'))
 
1079
  # Determine app context based on sport ID
1080
  appContext = 'majorLeague' if sport_id == 1 else 'minorLeague'
1081
 
 
 
1082
  # Fetch player stats from MLB API
1083
  pitcher_stats_call = requests.get(
1084
+ f'https://statsapi.mlb.com/api/v1/people/{player_input}?appContext={appContext}&hydrate=stats(group=[pitching],type=[byDateRange],sportId={sport_id},startDate={start_date_format},endDate={end_date_format})'
1085
  ).json()
1086
 
1087
  # Extract stats and create DataFrame
 
1103
  if df['game_id'][0] == df['game_id'][-1]:
1104
  pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched', 'battersFaced', 'earnedRuns', 'hits', 'strikeOuts', 'baseOnBalls', 'hitByPitch', 'homeRuns', 'strikePercentage', 'whiffs'])
1105
  new_column_names = ['$\\bf{IP}$', '$\\bf{PA}$', '$\\bf{ER}$', '$\\bf{H}$', '$\\bf{K}$', '$\\bf{BB}$', '$\\bf{HBP}$', '$\\bf{HR}$', '$\\bf{Strike\%}$', '$\\bf{Whiffs}$']
1106
+ title = f'{df["game_date"][0]} vs {df["batter_team"][0]}'
1107
+ elif sport_id != 1:
1108
  pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched', 'battersFaced', 'whip', 'era', 'fip', 'k_percent', 'bb_percent', 'k_bb_percent', 'strikePercentage'])
1109
  new_column_names = ['$\\bf{IP}$', '$\\bf{PA}$', '$\\bf{WHIP}$', '$\\bf{ERA}$', '$\\bf{FIP}$', '$\\bf{K\%}$', '$\\bf{BB\%}$', '$\\bf{K-BB\%}$', '$\\bf{Strike\%}$']
1110
+ title = f'{df["game_date"][0]} to {df["game_date"][-1]}'
1111
  else:
1112
  fangraphs_table(df=df, ax=ax, player_input=player_input, season=int(df['game_date'][0][0:4]), split=split)
1113
  return
 
1121
  # Add title to the plot
1122
  ax.text(0.5, 0.9, title, va='bottom', ha='center', fontsize=36, fontstyle='italic')
1123
  ax.axis('off')
1124
+
1125
+
1126
+
1127
+ def stat_daily_summary(df: pl.DataFrame,
1128
+ data: list,
1129
+ player_input: int,
1130
+ sport_id: int,
1131
+ ax: plt.Axes):
1132
+
1133
+
1134
+ pk_list = []
1135
+ pitcher_id_list = []
1136
+ summary_list = []
1137
+ ip_list = []
1138
+ pa_list = []
1139
+ er_list = []
1140
+ hit_list = []
1141
+ k_list = []
1142
+ bb_list = []
1143
+ hbp_list = []
1144
+ strikes_list = []
1145
+ hr_list = []
1146
+ test_list = []
1147
+ game_pk_list = []
1148
+ pitches_list = []
1149
+
1150
+
1151
+ # 'inningsPitched','battersFaced','earnedRuns','hits','strikeOuts','baseOnBalls','hitByPitch'
1152
+
1153
+ for y in range(0,len(data)):
1154
+
1155
+ pk_list.append([data[y]['gameData']['game']['pk'] for x in data[y]['liveData']['boxscore']['teams']['away']['pitchers']])
1156
+ pk_list.append([data[y]['gameData']['game']['pk'] for x in data[y]['liveData']['boxscore']['teams']['home']['pitchers']])
1157
+
1158
+ pitcher_id_list.append([x for x in data[y]['liveData']['boxscore']['teams']['away']['pitchers']])
1159
+ pitcher_id_list.append([x for x in data[y]['liveData']['boxscore']['teams']['home']['pitchers']])
1160
+
1161
+
1162
+ ip_list.append([data[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['inningsPitched'] for x in data[y]['liveData']['boxscore']['teams']['away']['pitchers']])
1163
+ ip_list.append([data[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['inningsPitched'] for x in data[y]['liveData']['boxscore']['teams']['home']['pitchers']])
1164
+
1165
+ pa_list.append([data[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['battersFaced'] for x in data[y]['liveData']['boxscore']['teams']['away']['pitchers']])
1166
+ pa_list.append([data[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['battersFaced'] for x in data[y]['liveData']['boxscore']['teams']['home']['pitchers']])
1167
+
1168
+ er_list.append([data[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['earnedRuns'] for x in data[y]['liveData']['boxscore']['teams']['away']['pitchers']])
1169
+ er_list.append([data[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['earnedRuns'] for x in data[y]['liveData']['boxscore']['teams']['home']['pitchers']])
1170
+
1171
+ hit_list.append([data[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['hits'] for x in data[y]['liveData']['boxscore']['teams']['away']['pitchers']])
1172
+ hit_list.append([data[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['hits'] for x in data[y]['liveData']['boxscore']['teams']['home']['pitchers']])
1173
+
1174
+ k_list.append([data[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['strikeOuts'] for x in data[y]['liveData']['boxscore']['teams']['away']['pitchers']])
1175
+ k_list.append([data[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['strikeOuts'] for x in data[y]['liveData']['boxscore']['teams']['home']['pitchers']])
1176
+
1177
+ bb_list.append([data[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['baseOnBalls'] for x in data[y]['liveData']['boxscore']['teams']['away']['pitchers']])
1178
+ bb_list.append([data[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['baseOnBalls'] for x in data[y]['liveData']['boxscore']['teams']['home']['pitchers']])
1179
+
1180
+ hbp_list.append([data[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['hitByPitch'] for x in data[y]['liveData']['boxscore']['teams']['away']['pitchers']])
1181
+ hbp_list.append([data[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['hitByPitch'] for x in data[y]['liveData']['boxscore']['teams']['home']['pitchers']])
1182
+
1183
+ strikes_list.append([data[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['strikes'] for x in data[y]['liveData']['boxscore']['teams']['away']['pitchers']])
1184
+ strikes_list.append([data[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['strikes'] for x in data[y]['liveData']['boxscore']['teams']['home']['pitchers']])
1185
+
1186
+ pitches_list.append([data[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['pitchesThrown'] for x in data[y]['liveData']['boxscore']['teams']['home']['pitchers']])
1187
+ pitches_list.append([data[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['pitchesThrown'] for x in data[y]['liveData']['boxscore']['teams']['home']['pitchers']])
1188
+
1189
+
1190
+ hr_list.append([data[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['homeRuns'] for x in data[y]['liveData']['boxscore']['teams']['away']['pitchers']])
1191
+ hr_list.append([data[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['homeRuns'] for x in data[y]['liveData']['boxscore']['teams']['home']['pitchers']])
1192
+
1193
+ summary_list.append([data[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['summary'] for x in data[y]['liveData']['boxscore']['teams']['away']['pitchers']])
1194
+ summary_list.append([data[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['summary'] for x in data[y]['liveData']['boxscore']['teams']['home']['pitchers']])
1195
+
1196
+ test_list.append([x for x in data[y]['liveData']['plays']['allPlays']])
1197
+ game_pk_list.append([data[y]['gameData']['game']['pk'] for x in data[y]['liveData']['plays']['allPlays']])
1198
+
1199
+ flat_list_pk = [item for sublist in pk_list for item in sublist]
1200
+ flat_list_pitcher_id = [item for sublist in pitcher_id_list for item in sublist]
1201
+ flat_list_summary = [item for sublist in summary_list for item in sublist]
1202
+ flat_list_hits = [item for sublist in hit_list for item in sublist]
1203
+ flat_list_k = [item for sublist in k_list for item in sublist]
1204
+ flat_list_bb = [item for sublist in bb_list for item in sublist]
1205
+ flat_list_pa = [item for sublist in pa_list for item in sublist]
1206
+ flat_list_ip = [item for sublist in ip_list for item in sublist]
1207
+ flat_list_hbp= [item for sublist in hbp_list for item in sublist]
1208
+ flat_list_strikes = [item for sublist in strikes_list for item in sublist]
1209
+ flat_list_hr= [item for sublist in hr_list for item in sublist]
1210
+ flat_list_er= [item for sublist in er_list for item in sublist]
1211
+ flat_list_pitches= [item for sublist in pitches_list for item in sublist]
1212
+
1213
+
1214
+
1215
+ pitcher_summary_df = pl.DataFrame(data={'game_id':flat_list_pk,
1216
+ 'pitcher_id':flat_list_pitcher_id,
1217
+ 'summary':flat_list_summary,
1218
+ 'hits':flat_list_hits,
1219
+ 'k':flat_list_k,
1220
+ 'bb':flat_list_bb,
1221
+ 'pa':flat_list_pa,
1222
+ 'hbp':flat_list_hbp,
1223
+ 'strikes':flat_list_strikes,
1224
+ 'hr':flat_list_hr,
1225
+ 'ip':flat_list_ip,
1226
+ 'er':flat_list_er,
1227
+ 'pitches':flat_list_pitches})
1228
+
1229
+
1230
+
1231
+ # Add additional calculated columns
1232
+ pitcher_summary_df = pitcher_summary_df.filter(pl.col('pitcher_id')==player_input).with_columns(
1233
+ pl.lit(df['is_whiff'].sum()).alias('whiffs'),
1234
+ ((pl.col('strikes'))/(pl.col('pitches'))*100).round(1).cast(pl.Utf8).str.concat('%').alias('strikePercentage')
1235
+ )
1236
+
1237
+ # Determine columns and title based on game count and sport ID
1238
+
1239
+ pitcher_stats_call_df_small = pitcher_summary_df.select(['ip',
1240
+ 'pa',
1241
+ 'er',
1242
+ 'hits',
1243
+ 'k',
1244
+ 'bb',
1245
+ 'hbp',
1246
+ 'hr',
1247
+ 'strikePercentage',
1248
+ 'whiffs'])
1249
+
1250
+ new_column_names = ['$\\bf{IP}$', '$\\bf{PA}$', '$\\bf{ER}$', '$\\bf{H}$', '$\\bf{K}$', '$\\bf{BB}$', '$\\bf{HBP}$', '$\\bf{HR}$', '$\\bf{Strike\%}$', '$\\bf{Whiffs}$']
1251
+ title = f'{df["game_date"][0]} vs {df["batter_team"][0]}'
1252
+
1253
+ table_fg = ax.table(cellText=pitcher_stats_call_df_small.to_numpy(), colLabels=pitcher_stats_call_df_small.columns, cellLoc='center',
1254
+ bbox=[0.0, 0.1, 1, 0.7])
1255
+
1256
+ min_font_size = 20
1257
+ table_fg.set_fontsize(min_font_size)
1258
+
1259
+
1260
+ new_column_names = ['$\\bf{IP}$','$\\bf{PA}$','$\\bf{ER}$','$\\bf{H}$','$\\bf{K}$','$\\bf{BB}$','$\\bf{HBP}$','$\\bf{HR}$','$\\bf{Strike\%}$','$\\bf{Whiffs}$']
1261
+ # #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%']
1262
+ for i, col_name in enumerate(new_column_names):
1263
+ table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
1264
+
1265
+ ax.axis('off')
1266
+
1267
+ # Add title to the plot
1268
+ ax.text(0.5, 0.9, title, va='bottom', ha='center', fontsize=36, fontstyle='italic')
1269
+ ax.axis('off')
stuff_model/__pycache__/feature_engineering.cpython-39.pyc CHANGED
Binary files a/stuff_model/__pycache__/feature_engineering.cpython-39.pyc and b/stuff_model/__pycache__/feature_engineering.cpython-39.pyc differ
 
stuff_model/feature_engineering.py CHANGED
@@ -51,7 +51,7 @@ def feature_engineering(df: pl.DataFrame) -> pl.DataFrame:
51
  )
52
 
53
  # Define the pitch types to be considered
54
- pitch_types = ['SI', 'FF', 'FC']
55
 
56
  # Filter the DataFrame to include only the specified pitch types
57
  df_filtered = df.filter(pl.col('pitch_type').is_in(pitch_types))
 
51
  )
52
 
53
  # Define the pitch types to be considered
54
+ pitch_types = ['SI', 'FF', 'FC','FA']
55
 
56
  # Filter the DataFrame to include only the specified pitch types
57
  df_filtered = df.filter(pl.col('pitch_type').is_in(pitch_types))
stuff_model/tj_stuff_plus_pitch.csv CHANGED
@@ -9,6 +9,7 @@ FC,98.83449547008738,5.811964883678063,98.54483029899575,83.20928731685326,119.7
9
  FS,98.25541635267653,6.898952096824192,98.46204303842217,72.25450024197754,114.88400714657823,73.39595959354874,114.78967217449389
10
  FO,98.15224613640243,1.081819065809178,99.94816563615653,94.0023252668585,100.50624750619224,94.0142169475971,100.50513134245217
11
  FF,97.29024735737988,6.078459125845886,97.09670890504734,81.2230917971995,118.10419744965911,81.32311771953398,117.7938724746093
 
12
  SC,97.27958020025409,1.2452898498180456,97.27958020025409,93.536223938276,101.02293646223218,93.54371065079995,101.01544974970822
13
  CH,96.35866365133434,6.178939251378385,95.80884625564597,81.28802319264824,121.14136334013493,82.02275793969746,119.09639344796777
14
  SI,95.14161603816645,4.9734372581529955,95.11657827702109,82.5850956341191,112.99618112461533,82.8856383780296,112.72626192694757
 
9
  FS,98.25541635267653,6.898952096824192,98.46204303842217,72.25450024197754,114.88400714657823,73.39595959354874,114.78967217449389
10
  FO,98.15224613640243,1.081819065809178,99.94816563615653,94.0023252668585,100.50624750619224,94.0142169475971,100.50513134245217
11
  FF,97.29024735737988,6.078459125845886,97.09670890504734,81.2230917971995,118.10419744965911,81.32311771953398,117.7938724746093
12
+ FA,97.29024735737988,6.078459125845886,97.09670890504734,81.2230917971995,118.10419744965911,81.32311771953398,117.7938724746093
13
  SC,97.27958020025409,1.2452898498180456,97.27958020025409,93.536223938276,101.02293646223218,93.54371065079995,101.01544974970822
14
  CH,96.35866365133434,6.178939251378385,95.80884625564597,81.28802319264824,121.14136334013493,82.02275793969746,119.09639344796777
15
  SI,95.14161603816645,4.9734372581529955,95.11657827702109,82.5850956341191,112.99618112461533,82.8856383780296,112.72626192694757