nesticot commited on
Commit
f36f4c7
·
verified ·
1 Parent(s): 473da44

Update functions/pitch_summary_functions.py

Browse files
Files changed (1) hide show
  1. functions/pitch_summary_functions.py +206 -187
functions/pitch_summary_functions.py CHANGED
@@ -189,12 +189,7 @@ def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs):
189
 
190
  return ax.add_patch(ellipse)
191
  ### VELOCITY KDES ###
192
- def velocity_kdes(df: pl.DataFrame,
193
- ax: plt.Axes,
194
- gs: gridspec.GridSpec,
195
- gs_x: list,
196
- gs_y: list,
197
- fig: plt.Figure):
198
  """
199
  Plot the velocity KDEs for different pitch types.
200
 
@@ -213,46 +208,30 @@ def velocity_kdes(df: pl.DataFrame,
213
  fig : plt.Figure
214
  The figure to plot on.
215
  """
216
- # Join the original DataFrame on 'pitch_type' with sorted counts to reorder
217
- items_in_order = (df
218
- .sort("pitch_count", descending=True)['pitch_type']
219
- .unique(maintain_order=True)
220
- .to_numpy()
221
- )
222
 
223
  # Create the inner subplot inside the outer subplot
224
- import matplotlib.gridspec as gridspec
225
  ax.axis('off')
226
  ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20})
227
-
228
  inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order), 1, subplot_spec=gs[gs_x[0]:gs_x[-1], gs_y[0]:gs_y[-1]])
229
  ax_top = [fig.add_subplot(inner) for inner in inner_grid_1]
230
 
231
  for idx, i in enumerate(items_in_order):
232
  pitch_data = df.filter(pl.col('pitch_type') == i)['start_speed']
233
  if np.unique(pitch_data).size == 1: # Check if all values are the same
234
- ax_top[idx].plot([np.unique(pitch_data), np.unique(pitch_data)], [0, 1], linewidth=4,
235
- color=dict_colour[i], zorder=20)
236
  else:
237
- sns.kdeplot(pitch_data, ax=ax_top[idx], fill=True,
238
- clip=(pitch_data.min(), pitch_data.max()),
239
- color=dict_colour[i])
240
 
241
  # Plot the mean release speed for the current data
242
  df_average = df.filter(df['pitch_type'] == i)['start_speed']
243
- ax_top[idx].plot([df_average.mean(), df_average.mean()],
244
- [ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]],
245
- color=dict_colour[i],
246
- linestyle='--')
247
- df_statcast_group = pl.read_csv('functions/statcast_2024_grouped.csv')
248
 
249
  # Plot the mean release speed for the statcast group data
 
250
  df_average = df_statcast_group.filter(df_statcast_group['pitch_type'] == i)['release_speed']
251
- ax_top[idx].plot([df_average.mean(), df_average.mean()],
252
- [ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]],
253
- color=dict_colour[i],
254
- linestyle=':')
255
-
256
 
257
  ax_top[idx].set_xlim(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5)
258
  ax_top[idx].set_xlabel('')
@@ -266,8 +245,7 @@ def velocity_kdes(df: pl.DataFrame,
266
  ax_top[idx].set_xticks(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5))
267
  ax_top[idx].set_yticks([])
268
  ax_top[idx].grid(axis='x', linestyle='--')
269
- ax_top[idx].text(-0.01, 0.5, i, transform=ax_top[idx].transAxes,
270
- fontsize=14, va='center', ha='right')
271
 
272
  ax_top[-1].spines['top'].set_visible(False)
273
  ax_top[-1].spines['right'].set_visible(False)
@@ -275,7 +253,6 @@ def velocity_kdes(df: pl.DataFrame,
275
  ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5)))
276
  ax_top[-1].set_xlabel('Velocity (mph)')
277
 
278
-
279
  ### TJ STUFF+ ROLLING ###
280
  def tj_stuff_roling(df: pl.DataFrame, window: int, ax: plt.Axes):
281
  """
@@ -290,36 +267,29 @@ def tj_stuff_roling(df: pl.DataFrame, window: int, ax: plt.Axes):
290
  ax : plt.Axes
291
  The axis to plot on.
292
  """
293
- # Join the original DataFrame on 'pitch_type' with sorted counts to reorder
294
- items_in_order = (
295
- df.sort("pitch_count", descending=True)['pitch_type']
296
- .unique(maintain_order=True)
297
- .to_numpy()
298
- )
299
 
300
  # Plot the rolling average for each pitch type
301
  for i in items_in_order:
302
- if max(df.filter(pl.col('pitch_type') == i)['pitch_count']) >= window:
303
- print('LENGTH',
304
- len(range(1, max(df.filter(pl.col('pitch_type') == i)['pitch_count']) + 1)),
305
- len(df.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window)))
306
  sns.lineplot(
307
- x=range(1, max(df.filter(pl.col('pitch_type') == i)['pitch_count']) + 1),
308
- y=df.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window),
309
  color=dict_colour[i],
310
  ax=ax,
311
  linewidth=3
312
  )
313
 
314
  # Adjust x-axis limits to start from 1
315
- ax.set_xlim(window, max(df['pitch_count']))
316
  ax.set_ylim(70, 130)
317
  ax.set_xlabel('Pitches', fontdict=font_properties_axes)
318
  ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
319
  ax.set_title(f"{window} Pitch Rolling tjStuff+", fontdict=font_properties_titles)
320
  ax.xaxis.set_major_locator(MaxNLocator(integer=True))
321
 
322
-
323
  ### TJ STUFF+ ROLLING ###
324
  def tj_stuff_roling_game(df: pl.DataFrame, window: int, ax: plt.Axes):
325
  """
@@ -368,15 +338,15 @@ def tj_stuff_roling_game(df: pl.DataFrame, window: int, ax: plt.Axes):
368
  ])
369
 
370
  sns.lineplot(x=range(1, max(df_item['start_number_right']) + 1),
371
- y=df_item.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window),
372
- color=dict_colour[i],
373
- ax=ax, linewidth=3)
374
 
375
  # Highlight missing game data points
376
  for n in range(len(df_item)):
377
  if df_item['game_id'].is_null()[n]:
378
  sns.scatterplot(x=[df_item['start_number_right'][n]],
379
- y=[df_item['tj_stuff_plus'][n]],
380
  color='white',
381
  ec='black',
382
  ax=ax,
@@ -390,7 +360,6 @@ def tj_stuff_roling_game(df: pl.DataFrame, window: int, ax: plt.Axes):
390
  ax.set_title(f"{window} Game Rolling tjStuff+", fontdict=font_properties_titles)
391
  ax.xaxis.set_major_locator(MaxNLocator(integer=True))
392
 
393
-
394
  def break_plot(df: pl.DataFrame, ax: plt.Axes):
395
  """
396
  Plot the pitch breaks for different pitch types.
@@ -416,9 +385,9 @@ def break_plot(df: pl.DataFrame, ax: plt.Axes):
416
 
417
  # Plot scatter plot for pitch breaks
418
  if df['pitcher_hand'][0] == 'R':
419
- sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'] * 1, hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2)
420
- if df['pitcher_hand'][0] == 'L':
421
- sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'] * 1, hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2)
422
 
423
  # Set axis limits
424
  ax.set_xlim((-25, 25))
@@ -446,7 +415,7 @@ def break_plot(df: pl.DataFrame, ax: plt.Axes):
446
  bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
447
  ax.text(24.5, -24.5, s='Arm Side →', fontstyle='italic', ha='right', va='bottom',
448
  bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
449
- if df['pitcher_hand'][0] == 'L':
450
  ax.invert_xaxis()
451
  ax.text(24.5, -24.5, s='← Arm Side', fontstyle='italic', ha='left', va='bottom',
452
  bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
@@ -705,58 +674,81 @@ def summary_table(df: pl.DataFrame, ax: plt.Axes):
705
  fancybox=True, loc='lower center', fontsize=20, framealpha=1.0, markerscale=2, prop={'family': 'calibi', 'size': 20})
706
  ax.axis('off')
707
 
708
- def plot_footer(ax:plt.Axes):
709
- # Add footer text
 
 
 
 
 
 
 
 
710
  ax.text(0, 1, 'By: @TJStats', ha='left', va='top', fontsize=24)
711
  ax.text(0.5, 0.25,
712
- '''
713
- Colour Coding Compares to League Average By Pitch
714
- tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type
715
- tjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10
716
- Pitch Grade scales tjStuff+ to the traditional 20-80 Scouting Scale for a given pitch type
717
- ''',
718
- ha='center', va='bottom', fontsize=12)
719
  ax.text(1, 1, 'Data: MLB, Fangraphs\nImages: MLB, ESPN', ha='right', va='top', fontsize=24)
720
  ax.axis('off')
721
 
722
-
723
  # Function to get an image from a URL and display it on the given axis
724
- def player_headshot(player_input: str, ax: plt.Axes, sport_id: int,season: int):
725
- # Construct the URL for the player's headshot image
726
- print('SPORT ID',sport_id)
 
 
 
 
 
 
 
 
 
 
 
 
727
  try:
 
728
  if int(sport_id) == 1:
729
- url = f'https://img.mlbstatic.com/mlb-photos/image/'\
730
- f'upload/d_people:generic:headshot:67:current.png'\
731
- f'/w_640,q_auto:best/v1/people/{player_input}/headshot/silo/current.png'
732
-
733
- # Send a GET request to the URL
734
- response = requests.get(url)
735
-
736
- # Open the image from the response content
737
- img = Image.open(BytesIO(response.content))
738
-
739
-
740
- # Display the image on the axis
741
- ax.set_xlim(0, 1.3)
742
- ax.set_ylim(0, 1)
743
- ax.imshow(img, extent=[0, 1, 0, 1], origin='upper')
744
  else:
745
  url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{player_input}/headshot/milb/current.png'
746
- response = requests.get(url)
747
- img = Image.open(BytesIO(response.content))
748
- ax.set_xlim(0, 1.3)
749
- ax.set_ylim(0, 1)
750
- ax.imshow(img, extent=[1/6, 5/6, 0, 1], origin='upper')
751
- except PIL.UnidentifiedImageError as e:
 
 
 
 
752
  ax.axis('off')
753
  return
754
 
755
  # Turn off the axis
756
  ax.axis('off')
757
 
 
 
 
758
 
759
- def player_bio(pitcher_id: str, ax: plt.Axes,sport_id: int,year_input: int):
 
 
 
 
 
 
 
 
 
 
760
  # Construct the URL to fetch player data
761
  url = f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}&hydrate=currentTeam"
762
 
@@ -782,14 +774,28 @@ def player_bio(pitcher_id: str, ax: plt.Axes,sport_id: int,year_input: int):
782
  df_sport_id = pl.DataFrame(response['sports'])
783
  abb = df_sport_id.filter(pl.col('id') == sport_id)['abbreviation'][0]
784
 
 
785
  ax.text(0.5, 0.20, f'{year_input} {abb} Season', va='top', ha='center', fontsize=30, fontstyle='italic')
786
 
787
  # Turn off the axis
788
  ax.axis('off')
789
 
 
 
 
790
 
791
- def plot_logo(pitcher_id: str, ax: plt.Axes,df_team: pl.DataFrame,df_players : pl.DataFrame):
792
- # List of MLB teams and their corresponding ESPN logo URLs
 
 
 
 
 
 
 
 
 
 
793
  mlb_teams = [
794
  {"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"},
795
  {"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"},
@@ -823,11 +829,13 @@ def plot_logo(pitcher_id: str, ax: plt.Axes,df_team: pl.DataFrame,df_players : p
823
  {"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"},
824
  {"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
825
  ]
 
826
  try:
827
  # Create a DataFrame from the list of dictionaries
828
  df_image = pd.DataFrame(mlb_teams)
829
- image_dict = df_image.set_index('team')['logo_url'].to_dict()
830
-
 
831
  team_id = df_players.filter(pl.col('player_id') == pitcher_id)['team'][0]
832
 
833
  # Construct the URL to fetch team data
@@ -839,8 +847,7 @@ def plot_logo(pitcher_id: str, ax: plt.Axes,df_team: pl.DataFrame,df_players : p
839
  # Extract the team abbreviation
840
  if data_team['teams'][0]['id'] in df_team['parent_org_id']:
841
  team_abb = df_team.filter(pl.col('team_id') == data_team['teams'][0]['id'])['parent_org_abbreviation'][0]
842
-
843
- else:
844
  team_abb = df_team.filter(pl.col('parent_org_id') == data_team['teams'][0]['parentOrgId'])['parent_org_abbreviation'][0]
845
 
846
  # Get the logo URL from the image dictionary using the team abbreviation
@@ -859,7 +866,7 @@ def plot_logo(pitcher_id: str, ax: plt.Axes,df_team: pl.DataFrame,df_players : p
859
 
860
  # Turn off the axis
861
  ax.axis('off')
862
- except KeyError as e:
863
  ax.axis('off')
864
  return
865
 
@@ -910,29 +917,55 @@ def fangraphs_pitching_leaderboards(season: int,
910
  df = pl.DataFrame(data=data['data'], infer_schema_length=1000)
911
  return df
912
 
913
- def fangrpahs_splits_scrape(player_input: str, year_input: int, start_date: str, end_date: str, split: str):
914
- ### FANGRAPHS SPLITS SCRAPE ###
915
- split_dict = {'all':[],
916
- 'left':['5'],
917
- 'right':['6']
918
- }
919
- import json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
920
  url = "https://www.fangraphs.com/api/leaders/splits/splits-leaders"
921
 
922
- fg_id = str(fangraphs_pitching_leaderboards(year_input,
923
- split='All',
924
- start_date = f'{year_input}-01-01',
925
- end_date = f'{year_input}-12-31').filter(pl.col('xMLBAMID')==player_input)['playerid'][0])
926
- print('start_date',start_date)
927
- print('start_date',end_date)
 
 
 
928
  payload = {
929
- "strPlayerId": str(fg_id),
930
  "strSplitArr": split_dict[split],
931
  "strGroup": "season",
932
  "strPosition": "P",
933
  "strType": "2",
934
- "strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')),
935
- "strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')),
936
  "strSplitTeams": False,
937
  "dctFilters": [],
938
  "strStatType": "player",
@@ -945,39 +978,23 @@ def fangrpahs_splits_scrape(player_input: str, year_input: int, start_date: str,
945
  "arrWxElevation": None,
946
  "arrWxWindSpeed": None
947
  }
948
- json_payload = json.dumps(payload)
949
- headers = {'Content-Type': 'application/json'}
950
- response = requests.post(url, data=json_payload, headers=headers)
951
  data_pull = response.json()['data'][0]
952
 
953
- payload_advanced = {
954
- "strPlayerId": str(fg_id),
955
- "strSplitArr": split_dict[split],
956
- "strGroup": "season",
957
- "strPosition": "P",
958
- "strType": "1",
959
- "strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')),
960
- "strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')),
961
- "strSplitTeams": False,
962
- "dctFilters": [],
963
- "strStatType": "player",
964
- "strAutoPt": False,
965
- "arrPlayerId": [],
966
- "strSplitArrPitch": [],
967
- "arrWxTemperature": None,
968
- "arrWxPressure": None,
969
- "arrWxAirDensity": None,
970
- "arrWxElevation": None,
971
- "arrWxWindSpeed": None
972
- }
973
 
974
- json_payload_advanced = json.dumps(payload_advanced)
975
- headers = {'Content-Type': 'application/json'}
976
- response_advanced = requests.post(url, data=json_payload_advanced, headers=headers)
977
  data_pull_advanced = response_advanced.json()['data'][0]
978
 
 
979
  data_pull.update(data_pull_advanced)
980
  df_pull = pl.DataFrame(data_pull)
 
981
  return df_pull
982
 
983
 
@@ -1002,20 +1019,11 @@ def fangraphs_table(df: pl.DataFrame,
1002
  start_date = df['game_date'][0]
1003
  end_date = df['game_date'][-1]
1004
 
1005
- # Fetch Fangraphs pitching leaderboards data
1006
- # df_fangraphs = fangraphs_pitching_leaderboards(season=season,
1007
- # split=split,
1008
- # start_date=start_date,
1009
- # end_date=end_date).filter(pl.col('xMLBAMID') == player_input)
1010
-
1011
- # df_fangraphs = df_fangraphs.with_columns(
1012
- # ((pl.col('Strikes')/pl.col('Pitches'))).alias('strikePercentage'),
1013
-
1014
- # )
1015
- df_fangraphs = fangrpahs_splits_scrape(player_input=player_input,
1016
  year_input=season,
1017
  start_date=start_date,
1018
- end_date=start_date,
1019
  split=split)
1020
 
1021
  # Select relevant columns for the table
@@ -1044,61 +1052,72 @@ def fangraphs_table(df: pl.DataFrame,
1044
 
1045
 
1046
  def stat_summary_table(df: pl.DataFrame,
1047
- player_input: int,
1048
- sport_id: int,
1049
- ax: plt.Axes,
1050
- split: str = 'All'):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1051
  start_date_format = str(pd.to_datetime(df['game_date'][0]).strftime('%m/%d/%Y'))
1052
  end_date_format = str(pd.to_datetime(df['game_date'][-1]).strftime('%m/%d/%Y'))
1053
 
1054
- if sport_id == 1:
1055
- appContext = 'majorLeague'
1056
- else:
1057
- appContext = 'minorLeague'
 
 
 
1058
 
1059
- pitcher_stats_call = requests.get(f'https://statsapi.mlb.com/api/v1/people/{player_input}?appContext={appContext}&hydrate=stats(group=[pitching],type=[byDateRange],sportId={sport_id},startDate={start_date_format},endDate={end_date_format})').json()
1060
  pitcher_stats_call_header = [x for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
1061
  pitcher_stats_call_values = [pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat'][x] for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
1062
- pitcher_stats_call_df = pl.DataFrame(data=dict(zip(pitcher_stats_call_header,pitcher_stats_call_values)))
1063
 
 
1064
  pitcher_stats_call_df = pitcher_stats_call_df.with_columns(
1065
  pl.lit(df['is_whiff'].sum()).alias('whiffs'),
1066
- (pl.col('strikeOuts')/pl.col('battersFaced')*100).round(1).cast(pl.Utf8).str.concat('%').alias('k_percent'),
1067
- (pl.col('baseOnBalls')/pl.col('battersFaced')*100).round(1).cast(pl.Utf8).str.concat('%').alias('bb_percent'),
1068
- ((pl.col('strikeOuts') - pl.col('baseOnBalls'))/pl.col('battersFaced')*100).round(1).cast(pl.Utf8).str.concat('%').alias('k_bb_percent'),
1069
- (((pl.col('homeRuns')*13 + 3*((pl.col('baseOnBalls'))+(pl.col('hitByPitch')))-2*(pl.col('strikeOuts'))))/((pl.col('outs'))/3)+3.15).round(2).map_elements(lambda x: f"{x:.2f}") .alias('fip'),
1070
- ((pl.col('strikes')/pl.col('numberOfPitches')*100)).round(1).cast(pl.Utf8).str.concat('%').alias('strikePercentage'),
1071
  )
1072
 
1073
-
1074
  if df['game_id'][0] == df['game_id'][-1]:
1075
- pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched','battersFaced','earnedRuns','hits','strikeOuts','baseOnBalls','hitByPitch','homeRuns','strikePercentage','whiffs'])
1076
- new_column_names = ['$\\bf{IP}$','$\\bf{PA}$','$\\bf{ER}$','$\\bf{H}$','$\\bf{K}$','$\\bf{BB}$','$\\bf{HBP}$','$\\bf{HR}$','$\\bf{Strike\%}$','$\\bf{Whiffs}$']
1077
  title = f'{df["game_date"][0]} vs {df["batter_team"][0]}'
1078
  elif sport_id != 1:
1079
- pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched','battersFaced','whip','era','fip','k_percent','bb_percent','k_bb_percent','strikePercentage'])
1080
- new_column_names = ['$\\bf{IP}$','$\\bf{PA}$','$\\bf{WHIP}$','$\\bf{ERA}$','$\\bf{FIP}$','$\\bf{K\%}$','$\\bf{BB\%}$','$\\bf{K-BB\%}$','$\\bf{Strike\%}$']
1081
  title = f'{df["game_date"][0]} to {df["game_date"][-1]}'
1082
  else:
1083
- fangraphs_table(df=df,
1084
- ax=ax,
1085
- player_input=player_input,
1086
- season=2024,
1087
- split=split)
1088
  return
1089
-
1090
- import matplotlib.pyplot as plt
1091
- table_fg = ax.table(cellText=pitcher_stats_call_df_small.to_numpy(), colLabels=pitcher_stats_call_df_small.columns, cellLoc='center',
1092
- bbox=[0.0, 0.1, 1, 0.7])
1093
-
1094
- min_font_size = 20
1095
- table_fg.set_fontsize(min_font_size)
1096
 
1097
- # #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%']
 
 
1098
  for i, col_name in enumerate(new_column_names):
1099
  table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
1100
-
1101
- ax.text(0.5, 0.9, title, va='bottom', ha='center',
1102
- fontsize=36, fontstyle='italic')
1103
 
 
 
1104
  ax.axis('off')
 
189
 
190
  return ax.add_patch(ellipse)
191
  ### VELOCITY KDES ###
192
+ def velocity_kdes(df: pl.DataFrame, ax: plt.Axes, gs: gridspec.GridSpec, gs_x: list, gs_y: list, fig: plt.Figure):
 
 
 
 
 
193
  """
194
  Plot the velocity KDEs for different pitch types.
195
 
 
208
  fig : plt.Figure
209
  The figure to plot on.
210
  """
211
+ # Get unique pitch types sorted by pitch count
212
+ items_in_order = df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy()
 
 
 
 
213
 
214
  # Create the inner subplot inside the outer subplot
 
215
  ax.axis('off')
216
  ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20})
 
217
  inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order), 1, subplot_spec=gs[gs_x[0]:gs_x[-1], gs_y[0]:gs_y[-1]])
218
  ax_top = [fig.add_subplot(inner) for inner in inner_grid_1]
219
 
220
  for idx, i in enumerate(items_in_order):
221
  pitch_data = df.filter(pl.col('pitch_type') == i)['start_speed']
222
  if np.unique(pitch_data).size == 1: # Check if all values are the same
223
+ ax_top[idx].plot([np.unique(pitch_data), np.unique(pitch_data)], [0, 1], linewidth=4, color=dict_colour[i], zorder=20)
 
224
  else:
225
+ sns.kdeplot(pitch_data, ax=ax_top[idx], fill=True, clip=(pitch_data.min(), pitch_data.max()), color=dict_colour[i])
 
 
226
 
227
  # Plot the mean release speed for the current data
228
  df_average = df.filter(df['pitch_type'] == i)['start_speed']
229
+ ax_top[idx].plot([df_average.mean(), df_average.mean()], [ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]], color=dict_colour[i], linestyle='--')
 
 
 
 
230
 
231
  # Plot the mean release speed for the statcast group data
232
+ df_statcast_group = pl.read_csv('functions/statcast_2024_grouped.csv')
233
  df_average = df_statcast_group.filter(df_statcast_group['pitch_type'] == i)['release_speed']
234
+ ax_top[idx].plot([df_average.mean(), df_average.mean()], [ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]], color=dict_colour[i], linestyle=':')
 
 
 
 
235
 
236
  ax_top[idx].set_xlim(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5)
237
  ax_top[idx].set_xlabel('')
 
245
  ax_top[idx].set_xticks(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5))
246
  ax_top[idx].set_yticks([])
247
  ax_top[idx].grid(axis='x', linestyle='--')
248
+ ax_top[idx].text(-0.01, 0.5, i, transform=ax_top[idx].transAxes, fontsize=14, va='center', ha='right')
 
249
 
250
  ax_top[-1].spines['top'].set_visible(False)
251
  ax_top[-1].spines['right'].set_visible(False)
 
253
  ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5)))
254
  ax_top[-1].set_xlabel('Velocity (mph)')
255
 
 
256
  ### TJ STUFF+ ROLLING ###
257
  def tj_stuff_roling(df: pl.DataFrame, window: int, ax: plt.Axes):
258
  """
 
267
  ax : plt.Axes
268
  The axis to plot on.
269
  """
270
+ # Get unique pitch types sorted by pitch count
271
+ items_in_order = df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy()
 
 
 
 
272
 
273
  # Plot the rolling average for each pitch type
274
  for i in items_in_order:
275
+ pitch_data = df.filter(pl.col('pitch_type') == i)
276
+ if pitch_data['pitch_count'].max() >= window:
 
 
277
  sns.lineplot(
278
+ x=range(1, pitch_data['pitch_count'].max() + 1),
279
+ y=pitch_data['tj_stuff_plus'].rolling_mean(window),
280
  color=dict_colour[i],
281
  ax=ax,
282
  linewidth=3
283
  )
284
 
285
  # Adjust x-axis limits to start from 1
286
+ ax.set_xlim(window, df['pitch_count'].max())
287
  ax.set_ylim(70, 130)
288
  ax.set_xlabel('Pitches', fontdict=font_properties_axes)
289
  ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
290
  ax.set_title(f"{window} Pitch Rolling tjStuff+", fontdict=font_properties_titles)
291
  ax.xaxis.set_major_locator(MaxNLocator(integer=True))
292
 
 
293
  ### TJ STUFF+ ROLLING ###
294
  def tj_stuff_roling_game(df: pl.DataFrame, window: int, ax: plt.Axes):
295
  """
 
338
  ])
339
 
340
  sns.lineplot(x=range(1, max(df_item['start_number_right']) + 1),
341
+ y=df_item.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window),
342
+ color=dict_colour[i],
343
+ ax=ax, linewidth=3)
344
 
345
  # Highlight missing game data points
346
  for n in range(len(df_item)):
347
  if df_item['game_id'].is_null()[n]:
348
  sns.scatterplot(x=[df_item['start_number_right'][n]],
349
+ y=[df_item['tj_stuff_plus'].rolling_mean(window)[n]],
350
  color='white',
351
  ec='black',
352
  ax=ax,
 
360
  ax.set_title(f"{window} Game Rolling tjStuff+", fontdict=font_properties_titles)
361
  ax.xaxis.set_major_locator(MaxNLocator(integer=True))
362
 
 
363
  def break_plot(df: pl.DataFrame, ax: plt.Axes):
364
  """
365
  Plot the pitch breaks for different pitch types.
 
385
 
386
  # Plot scatter plot for pitch breaks
387
  if df['pitcher_hand'][0] == 'R':
388
+ sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'], hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2)
389
+ elif df['pitcher_hand'][0] == 'L':
390
+ sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'], hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2)
391
 
392
  # Set axis limits
393
  ax.set_xlim((-25, 25))
 
415
  bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
416
  ax.text(24.5, -24.5, s='Arm Side →', fontstyle='italic', ha='right', va='bottom',
417
  bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
418
+ elif df['pitcher_hand'][0] == 'L':
419
  ax.invert_xaxis()
420
  ax.text(24.5, -24.5, s='← Arm Side', fontstyle='italic', ha='left', va='bottom',
421
  bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
 
674
  fancybox=True, loc='lower center', fontsize=20, framealpha=1.0, markerscale=2, prop={'family': 'calibi', 'size': 20})
675
  ax.axis('off')
676
 
677
+ def plot_footer(ax: plt.Axes):
678
+ """
679
+ Add footer text to the plot.
680
+
681
+ Parameters
682
+ ----------
683
+ ax : plt.Axes
684
+ The axis to add the footer text to.
685
+ """
686
+ # Add footer text
687
  ax.text(0, 1, 'By: @TJStats', ha='left', va='top', fontsize=24)
688
  ax.text(0.5, 0.25,
689
+ '''
690
+ Colour Coding Compares to League Average By Pitch
691
+ tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type
692
+ tjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10
693
+ Pitch Grade scales tjStuff+ to the traditional 20-80 Scouting Scale for a given pitch type
694
+ ''',
695
+ ha='center', va='bottom', fontsize=12)
696
  ax.text(1, 1, 'Data: MLB, Fangraphs\nImages: MLB, ESPN', ha='right', va='top', fontsize=24)
697
  ax.axis('off')
698
 
 
699
  # Function to get an image from a URL and display it on the given axis
700
+ def player_headshot(player_input: str, ax: plt.Axes, sport_id: int, season: int):
701
+ """
702
+ Display the player's headshot image on the given axis.
703
+
704
+ Parameters
705
+ ----------
706
+ player_input : str
707
+ The player's ID.
708
+ ax : plt.Axes
709
+ The axis to display the image on.
710
+ sport_id : int
711
+ The sport ID (1 for MLB, other for minor leagues).
712
+ season : int
713
+ The season year.
714
+ """
715
  try:
716
+ # Construct the URL for the player's headshot image based on sport ID
717
  if int(sport_id) == 1:
718
+ url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_640,q_auto:best/v1/people/{player_input}/headshot/silo/current.png'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
719
  else:
720
  url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{player_input}/headshot/milb/current.png'
721
+
722
+ # Send a GET request to the URL and open the image from the response content
723
+ response = requests.get(url)
724
+ img = Image.open(BytesIO(response.content))
725
+
726
+ # Display the image on the axis
727
+ ax.set_xlim(0, 1.3)
728
+ ax.set_ylim(0, 1)
729
+ ax.imshow(img, extent=[0, 1, 0, 1] if sport_id == 1 else [1/6, 5/6, 0, 1], origin='upper')
730
+ except PIL.UnidentifiedImageError:
731
  ax.axis('off')
732
  return
733
 
734
  # Turn off the axis
735
  ax.axis('off')
736
 
737
+ def player_bio(pitcher_id: str, ax: plt.Axes, sport_id: int, year_input: int):
738
+ """
739
+ Display the player's bio information on the given axis.
740
 
741
+ Parameters
742
+ ----------
743
+ pitcher_id : str
744
+ The player's ID.
745
+ ax : plt.Axes
746
+ The axis to display the bio information on.
747
+ sport_id : int
748
+ The sport ID (1 for MLB, other for minor leagues).
749
+ year_input : int
750
+ The season year.
751
+ """
752
  # Construct the URL to fetch player data
753
  url = f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}&hydrate=currentTeam"
754
 
 
774
  df_sport_id = pl.DataFrame(response['sports'])
775
  abb = df_sport_id.filter(pl.col('id') == sport_id)['abbreviation'][0]
776
 
777
+ # Display the season and sport abbreviation
778
  ax.text(0.5, 0.20, f'{year_input} {abb} Season', va='top', ha='center', fontsize=30, fontstyle='italic')
779
 
780
  # Turn off the axis
781
  ax.axis('off')
782
 
783
+ def plot_logo(pitcher_id: str, ax: plt.Axes, df_team: pl.DataFrame, df_players: pl.DataFrame):
784
+ """
785
+ Display the team logo for the given pitcher on the specified axis.
786
 
787
+ Parameters
788
+ ----------
789
+ pitcher_id : str
790
+ The ID of the pitcher.
791
+ ax : plt.Axes
792
+ The axis to display the logo on.
793
+ df_team : pl.DataFrame
794
+ The DataFrame containing team data.
795
+ df_players : pl.DataFrame
796
+ The DataFrame containing player data.
797
+ """
798
+ # List of MLB teams and their corresponding ESPN logo URLs
799
  mlb_teams = [
800
  {"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"},
801
  {"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"},
 
829
  {"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"},
830
  {"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
831
  ]
832
+
833
  try:
834
  # Create a DataFrame from the list of dictionaries
835
  df_image = pd.DataFrame(mlb_teams)
836
+ image_dict = df_image.set_index('team')['logo_url'].to_dict()
837
+
838
+ # Get the team ID for the given pitcher
839
  team_id = df_players.filter(pl.col('player_id') == pitcher_id)['team'][0]
840
 
841
  # Construct the URL to fetch team data
 
847
  # Extract the team abbreviation
848
  if data_team['teams'][0]['id'] in df_team['parent_org_id']:
849
  team_abb = df_team.filter(pl.col('team_id') == data_team['teams'][0]['id'])['parent_org_abbreviation'][0]
850
+ else:
 
851
  team_abb = df_team.filter(pl.col('parent_org_id') == data_team['teams'][0]['parentOrgId'])['parent_org_abbreviation'][0]
852
 
853
  # Get the logo URL from the image dictionary using the team abbreviation
 
866
 
867
  # Turn off the axis
868
  ax.axis('off')
869
+ except KeyError:
870
  ax.axis('off')
871
  return
872
 
 
917
  df = pl.DataFrame(data=data['data'], infer_schema_length=1000)
918
  return df
919
 
920
+ def fangraphs_splits_scrape(player_input: str, year_input: int, start_date: str, end_date: str, split: str) -> pl.DataFrame:
921
+ """
922
+ Scrape Fangraphs splits data for a specific player.
923
+
924
+ Parameters
925
+ ----------
926
+ player_input : str
927
+ The player's ID.
928
+ year_input : int
929
+ The season year.
930
+ start_date : str
931
+ The start date for the data.
932
+ end_date : str
933
+ The end date for the data.
934
+ split : str
935
+ The split type (e.g., 'all', 'left', 'right').
936
+
937
+ Returns
938
+ -------
939
+ pl.DataFrame
940
+ The DataFrame containing the splits data.
941
+ """
942
+ split_dict = {
943
+ 'all': [],
944
+ 'left': ['5'],
945
+ 'right': ['6']
946
+ }
947
+
948
+
949
+
950
  url = "https://www.fangraphs.com/api/leaders/splits/splits-leaders"
951
 
952
+ # Get Fangraphs player ID
953
+ fg_id = str(fangraphs_pitching_leaderboards(
954
+ year_input,
955
+ split='All',
956
+ start_date=f'{year_input}-01-01',
957
+ end_date=f'{year_input}-12-31'
958
+ ).filter(pl.col('xMLBAMID') == player_input)['playerid'][0])
959
+
960
+ # Payload for basic stats
961
  payload = {
962
+ "strPlayerId": fg_id,
963
  "strSplitArr": split_dict[split],
964
  "strGroup": "season",
965
  "strPosition": "P",
966
  "strType": "2",
967
+ "strStartDate": pd.to_datetime(start_date).strftime('%Y-%m-%d'),
968
+ "strEndDate": pd.to_datetime(end_date).strftime('%Y-%m-%d'),
969
  "strSplitTeams": False,
970
  "dctFilters": [],
971
  "strStatType": "player",
 
978
  "arrWxElevation": None,
979
  "arrWxWindSpeed": None
980
  }
981
+
982
+ # Fetch basic stats
983
+ response = requests.post(url, data=json.dumps(payload), headers={'Content-Type': 'application/json'})
984
  data_pull = response.json()['data'][0]
985
 
986
+ # Payload for advanced stats
987
+ payload_advanced = payload.copy()
988
+ payload_advanced["strType"] = "1"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
989
 
990
+ # Fetch advanced stats
991
+ response_advanced = requests.post(url, data=json.dumps(payload_advanced), headers={'Content-Type': 'application/json'})
 
992
  data_pull_advanced = response_advanced.json()['data'][0]
993
 
994
+ # Combine basic and advanced stats
995
  data_pull.update(data_pull_advanced)
996
  df_pull = pl.DataFrame(data_pull)
997
+
998
  return df_pull
999
 
1000
 
 
1019
  start_date = df['game_date'][0]
1020
  end_date = df['game_date'][-1]
1021
 
1022
+ # Fetch Fangraphs splits data
1023
+ df_fangraphs = fangraphs_splits_scrape(player_input=player_input,
 
 
 
 
 
 
 
 
 
1024
  year_input=season,
1025
  start_date=start_date,
1026
+ end_date=end_date,
1027
  split=split)
1028
 
1029
  # Select relevant columns for the table
 
1052
 
1053
 
1054
  def stat_summary_table(df: pl.DataFrame,
1055
+ player_input: int,
1056
+ sport_id: int,
1057
+ ax: plt.Axes,
1058
+ split: str = 'All'):
1059
+ """
1060
+ Create a summary table of player statistics.
1061
+
1062
+ Parameters
1063
+ ----------
1064
+ df : pl.DataFrame
1065
+ The DataFrame containing pitch data.
1066
+ player_input : int
1067
+ The player's ID.
1068
+ sport_id : int
1069
+ The sport ID (1 for MLB, other for minor leagues).
1070
+ ax : plt.Axes
1071
+ The axis to plot the table on.
1072
+ split : str, optional
1073
+ The split type (default is 'All').
1074
+ """
1075
+ # Format start and end dates
1076
  start_date_format = str(pd.to_datetime(df['game_date'][0]).strftime('%m/%d/%Y'))
1077
  end_date_format = str(pd.to_datetime(df['game_date'][-1]).strftime('%m/%d/%Y'))
1078
 
1079
+ # Determine app context based on sport ID
1080
+ appContext = 'majorLeague' if sport_id == 1 else 'minorLeague'
1081
+
1082
+ # Fetch player stats from MLB API
1083
+ pitcher_stats_call = requests.get(
1084
+ f'https://statsapi.mlb.com/api/v1/people/{player_input}?appContext={appContext}&hydrate=stats(group=[pitching],type=[byDateRange],sportId={sport_id},startDate={start_date_format},endDate={end_date_format})'
1085
+ ).json()
1086
 
1087
+ # Extract stats and create DataFrame
1088
  pitcher_stats_call_header = [x for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
1089
  pitcher_stats_call_values = [pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat'][x] for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
1090
+ pitcher_stats_call_df = pl.DataFrame(data=dict(zip(pitcher_stats_call_header, pitcher_stats_call_values)))
1091
 
1092
+ # Add additional calculated columns
1093
  pitcher_stats_call_df = pitcher_stats_call_df.with_columns(
1094
  pl.lit(df['is_whiff'].sum()).alias('whiffs'),
1095
+ (pl.col('strikeOuts') / pl.col('battersFaced') * 100).round(1).cast(pl.Utf8).str.concat('%').alias('k_percent'),
1096
+ (pl.col('baseOnBalls') / pl.col('battersFaced') * 100).round(1).cast(pl.Utf8).str.concat('%').alias('bb_percent'),
1097
+ ((pl.col('strikeOuts') - pl.col('baseOnBalls')) / pl.col('battersFaced') * 100).round(1).cast(pl.Utf8).str.concat('%').alias('k_bb_percent'),
1098
+ (((pl.col('homeRuns') * 13 + 3 * ((pl.col('baseOnBalls')) + (pl.col('hitByPitch'))) - 2 * (pl.col('strikeOuts')))) / ((pl.col('outs')) / 3) + 3.15).round(2).map_elements(lambda x: f"{x:.2f}").alias('fip'),
1099
+ ((pl.col('strikes') / pl.col('numberOfPitches') * 100)).round(1).cast(pl.Utf8).str.concat('%').alias('strikePercentage'),
1100
  )
1101
 
1102
+ # Determine columns and title based on game count and sport ID
1103
  if df['game_id'][0] == df['game_id'][-1]:
1104
+ pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched', 'battersFaced', 'earnedRuns', 'hits', 'strikeOuts', 'baseOnBalls', 'hitByPitch', 'homeRuns', 'strikePercentage', 'whiffs'])
1105
+ new_column_names = ['$\\bf{IP}$', '$\\bf{PA}$', '$\\bf{ER}$', '$\\bf{H}$', '$\\bf{K}$', '$\\bf{BB}$', '$\\bf{HBP}$', '$\\bf{HR}$', '$\\bf{Strike\%}$', '$\\bf{Whiffs}$']
1106
  title = f'{df["game_date"][0]} vs {df["batter_team"][0]}'
1107
  elif sport_id != 1:
1108
+ pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched', 'battersFaced', 'whip', 'era', 'fip', 'k_percent', 'bb_percent', 'k_bb_percent', 'strikePercentage'])
1109
+ new_column_names = ['$\\bf{IP}$', '$\\bf{PA}$', '$\\bf{WHIP}$', '$\\bf{ERA}$', '$\\bf{FIP}$', '$\\bf{K\%}$', '$\\bf{BB\%}$', '$\\bf{K-BB\%}$', '$\\bf{Strike\%}$']
1110
  title = f'{df["game_date"][0]} to {df["game_date"][-1]}'
1111
  else:
1112
+ fangraphs_table(df=df, ax=ax, player_input=player_input, season=int(df['game_date'][0][0:4]), split=split)
 
 
 
 
1113
  return
 
 
 
 
 
 
 
1114
 
1115
+ # Create and format the table
1116
+ table_fg = ax.table(cellText=pitcher_stats_call_df_small.to_numpy(), colLabels=pitcher_stats_call_df_small.columns, cellLoc='center', bbox=[0.0, 0.1, 1, 0.7])
1117
+ table_fg.set_fontsize(20)
1118
  for i, col_name in enumerate(new_column_names):
1119
  table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
 
 
 
1120
 
1121
+ # Add title to the plot
1122
+ ax.text(0.5, 0.9, title, va='bottom', ha='center', fontsize=36, fontstyle='italic')
1123
  ax.axis('off')