Update functions/pitch_summary_functions.py
Browse files- functions/pitch_summary_functions.py +206 -187
functions/pitch_summary_functions.py
CHANGED
|
@@ -189,12 +189,7 @@ def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs):
|
|
| 189 |
|
| 190 |
return ax.add_patch(ellipse)
|
| 191 |
### VELOCITY KDES ###
|
| 192 |
-
def velocity_kdes(df: pl.DataFrame,
|
| 193 |
-
ax: plt.Axes,
|
| 194 |
-
gs: gridspec.GridSpec,
|
| 195 |
-
gs_x: list,
|
| 196 |
-
gs_y: list,
|
| 197 |
-
fig: plt.Figure):
|
| 198 |
"""
|
| 199 |
Plot the velocity KDEs for different pitch types.
|
| 200 |
|
|
@@ -213,46 +208,30 @@ def velocity_kdes(df: pl.DataFrame,
|
|
| 213 |
fig : plt.Figure
|
| 214 |
The figure to plot on.
|
| 215 |
"""
|
| 216 |
-
#
|
| 217 |
-
items_in_order = (
|
| 218 |
-
.sort("pitch_count", descending=True)['pitch_type']
|
| 219 |
-
.unique(maintain_order=True)
|
| 220 |
-
.to_numpy()
|
| 221 |
-
)
|
| 222 |
|
| 223 |
# Create the inner subplot inside the outer subplot
|
| 224 |
-
import matplotlib.gridspec as gridspec
|
| 225 |
ax.axis('off')
|
| 226 |
ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20})
|
| 227 |
-
|
| 228 |
inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order), 1, subplot_spec=gs[gs_x[0]:gs_x[-1], gs_y[0]:gs_y[-1]])
|
| 229 |
ax_top = [fig.add_subplot(inner) for inner in inner_grid_1]
|
| 230 |
|
| 231 |
for idx, i in enumerate(items_in_order):
|
| 232 |
pitch_data = df.filter(pl.col('pitch_type') == i)['start_speed']
|
| 233 |
if np.unique(pitch_data).size == 1: # Check if all values are the same
|
| 234 |
-
ax_top[idx].plot([np.unique(pitch_data), np.unique(pitch_data)], [0, 1], linewidth=4,
|
| 235 |
-
color=dict_colour[i], zorder=20)
|
| 236 |
else:
|
| 237 |
-
sns.kdeplot(pitch_data, ax=ax_top[idx], fill=True,
|
| 238 |
-
clip=(pitch_data.min(), pitch_data.max()),
|
| 239 |
-
color=dict_colour[i])
|
| 240 |
|
| 241 |
# Plot the mean release speed for the current data
|
| 242 |
df_average = df.filter(df['pitch_type'] == i)['start_speed']
|
| 243 |
-
ax_top[idx].plot([df_average.mean(), df_average.mean()],
|
| 244 |
-
[ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]],
|
| 245 |
-
color=dict_colour[i],
|
| 246 |
-
linestyle='--')
|
| 247 |
-
df_statcast_group = pl.read_csv('functions/statcast_2024_grouped.csv')
|
| 248 |
|
| 249 |
# Plot the mean release speed for the statcast group data
|
|
|
|
| 250 |
df_average = df_statcast_group.filter(df_statcast_group['pitch_type'] == i)['release_speed']
|
| 251 |
-
ax_top[idx].plot([df_average.mean(), df_average.mean()],
|
| 252 |
-
[ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]],
|
| 253 |
-
color=dict_colour[i],
|
| 254 |
-
linestyle=':')
|
| 255 |
-
|
| 256 |
|
| 257 |
ax_top[idx].set_xlim(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5)
|
| 258 |
ax_top[idx].set_xlabel('')
|
|
@@ -266,8 +245,7 @@ def velocity_kdes(df: pl.DataFrame,
|
|
| 266 |
ax_top[idx].set_xticks(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5))
|
| 267 |
ax_top[idx].set_yticks([])
|
| 268 |
ax_top[idx].grid(axis='x', linestyle='--')
|
| 269 |
-
ax_top[idx].text(-0.01, 0.5, i, transform=ax_top[idx].transAxes,
|
| 270 |
-
fontsize=14, va='center', ha='right')
|
| 271 |
|
| 272 |
ax_top[-1].spines['top'].set_visible(False)
|
| 273 |
ax_top[-1].spines['right'].set_visible(False)
|
|
@@ -275,7 +253,6 @@ def velocity_kdes(df: pl.DataFrame,
|
|
| 275 |
ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5)))
|
| 276 |
ax_top[-1].set_xlabel('Velocity (mph)')
|
| 277 |
|
| 278 |
-
|
| 279 |
### TJ STUFF+ ROLLING ###
|
| 280 |
def tj_stuff_roling(df: pl.DataFrame, window: int, ax: plt.Axes):
|
| 281 |
"""
|
|
@@ -290,36 +267,29 @@ def tj_stuff_roling(df: pl.DataFrame, window: int, ax: plt.Axes):
|
|
| 290 |
ax : plt.Axes
|
| 291 |
The axis to plot on.
|
| 292 |
"""
|
| 293 |
-
#
|
| 294 |
-
items_in_order = (
|
| 295 |
-
df.sort("pitch_count", descending=True)['pitch_type']
|
| 296 |
-
.unique(maintain_order=True)
|
| 297 |
-
.to_numpy()
|
| 298 |
-
)
|
| 299 |
|
| 300 |
# Plot the rolling average for each pitch type
|
| 301 |
for i in items_in_order:
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
len(range(1, max(df.filter(pl.col('pitch_type') == i)['pitch_count']) + 1)),
|
| 305 |
-
len(df.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window)))
|
| 306 |
sns.lineplot(
|
| 307 |
-
x=range(1,
|
| 308 |
-
y=
|
| 309 |
color=dict_colour[i],
|
| 310 |
ax=ax,
|
| 311 |
linewidth=3
|
| 312 |
)
|
| 313 |
|
| 314 |
# Adjust x-axis limits to start from 1
|
| 315 |
-
ax.set_xlim(window,
|
| 316 |
ax.set_ylim(70, 130)
|
| 317 |
ax.set_xlabel('Pitches', fontdict=font_properties_axes)
|
| 318 |
ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
|
| 319 |
ax.set_title(f"{window} Pitch Rolling tjStuff+", fontdict=font_properties_titles)
|
| 320 |
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
|
| 321 |
|
| 322 |
-
|
| 323 |
### TJ STUFF+ ROLLING ###
|
| 324 |
def tj_stuff_roling_game(df: pl.DataFrame, window: int, ax: plt.Axes):
|
| 325 |
"""
|
|
@@ -368,15 +338,15 @@ def tj_stuff_roling_game(df: pl.DataFrame, window: int, ax: plt.Axes):
|
|
| 368 |
])
|
| 369 |
|
| 370 |
sns.lineplot(x=range(1, max(df_item['start_number_right']) + 1),
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
|
| 375 |
# Highlight missing game data points
|
| 376 |
for n in range(len(df_item)):
|
| 377 |
if df_item['game_id'].is_null()[n]:
|
| 378 |
sns.scatterplot(x=[df_item['start_number_right'][n]],
|
| 379 |
-
y=[df_item['tj_stuff_plus'][n]],
|
| 380 |
color='white',
|
| 381 |
ec='black',
|
| 382 |
ax=ax,
|
|
@@ -390,7 +360,6 @@ def tj_stuff_roling_game(df: pl.DataFrame, window: int, ax: plt.Axes):
|
|
| 390 |
ax.set_title(f"{window} Game Rolling tjStuff+", fontdict=font_properties_titles)
|
| 391 |
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
|
| 392 |
|
| 393 |
-
|
| 394 |
def break_plot(df: pl.DataFrame, ax: plt.Axes):
|
| 395 |
"""
|
| 396 |
Plot the pitch breaks for different pitch types.
|
|
@@ -416,9 +385,9 @@ def break_plot(df: pl.DataFrame, ax: plt.Axes):
|
|
| 416 |
|
| 417 |
# Plot scatter plot for pitch breaks
|
| 418 |
if df['pitcher_hand'][0] == 'R':
|
| 419 |
-
sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb']
|
| 420 |
-
|
| 421 |
-
sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb']
|
| 422 |
|
| 423 |
# Set axis limits
|
| 424 |
ax.set_xlim((-25, 25))
|
|
@@ -446,7 +415,7 @@ def break_plot(df: pl.DataFrame, ax: plt.Axes):
|
|
| 446 |
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
|
| 447 |
ax.text(24.5, -24.5, s='Arm Side →', fontstyle='italic', ha='right', va='bottom',
|
| 448 |
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
|
| 449 |
-
|
| 450 |
ax.invert_xaxis()
|
| 451 |
ax.text(24.5, -24.5, s='← Arm Side', fontstyle='italic', ha='left', va='bottom',
|
| 452 |
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
|
|
@@ -705,58 +674,81 @@ def summary_table(df: pl.DataFrame, ax: plt.Axes):
|
|
| 705 |
fancybox=True, loc='lower center', fontsize=20, framealpha=1.0, markerscale=2, prop={'family': 'calibi', 'size': 20})
|
| 706 |
ax.axis('off')
|
| 707 |
|
| 708 |
-
def plot_footer(ax:plt.Axes):
|
| 709 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 710 |
ax.text(0, 1, 'By: @TJStats', ha='left', va='top', fontsize=24)
|
| 711 |
ax.text(0.5, 0.25,
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
ax.text(1, 1, 'Data: MLB, Fangraphs\nImages: MLB, ESPN', ha='right', va='top', fontsize=24)
|
| 720 |
ax.axis('off')
|
| 721 |
|
| 722 |
-
|
| 723 |
# Function to get an image from a URL and display it on the given axis
|
| 724 |
-
def player_headshot(player_input: str, ax: plt.Axes, sport_id: int,season: int):
|
| 725 |
-
|
| 726 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 727 |
try:
|
|
|
|
| 728 |
if int(sport_id) == 1:
|
| 729 |
-
url = f'https://img.mlbstatic.com/mlb-photos/image/'
|
| 730 |
-
f'upload/d_people:generic:headshot:67:current.png'\
|
| 731 |
-
f'/w_640,q_auto:best/v1/people/{player_input}/headshot/silo/current.png'
|
| 732 |
-
|
| 733 |
-
# Send a GET request to the URL
|
| 734 |
-
response = requests.get(url)
|
| 735 |
-
|
| 736 |
-
# Open the image from the response content
|
| 737 |
-
img = Image.open(BytesIO(response.content))
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
# Display the image on the axis
|
| 741 |
-
ax.set_xlim(0, 1.3)
|
| 742 |
-
ax.set_ylim(0, 1)
|
| 743 |
-
ax.imshow(img, extent=[0, 1, 0, 1], origin='upper')
|
| 744 |
else:
|
| 745 |
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{player_input}/headshot/milb/current.png'
|
| 746 |
-
|
| 747 |
-
|
| 748 |
-
|
| 749 |
-
|
| 750 |
-
|
| 751 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 752 |
ax.axis('off')
|
| 753 |
return
|
| 754 |
|
| 755 |
# Turn off the axis
|
| 756 |
ax.axis('off')
|
| 757 |
|
|
|
|
|
|
|
|
|
|
| 758 |
|
| 759 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 760 |
# Construct the URL to fetch player data
|
| 761 |
url = f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}&hydrate=currentTeam"
|
| 762 |
|
|
@@ -782,14 +774,28 @@ def player_bio(pitcher_id: str, ax: plt.Axes,sport_id: int,year_input: int):
|
|
| 782 |
df_sport_id = pl.DataFrame(response['sports'])
|
| 783 |
abb = df_sport_id.filter(pl.col('id') == sport_id)['abbreviation'][0]
|
| 784 |
|
|
|
|
| 785 |
ax.text(0.5, 0.20, f'{year_input} {abb} Season', va='top', ha='center', fontsize=30, fontstyle='italic')
|
| 786 |
|
| 787 |
# Turn off the axis
|
| 788 |
ax.axis('off')
|
| 789 |
|
|
|
|
|
|
|
|
|
|
| 790 |
|
| 791 |
-
|
| 792 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 793 |
mlb_teams = [
|
| 794 |
{"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"},
|
| 795 |
{"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"},
|
|
@@ -823,11 +829,13 @@ def plot_logo(pitcher_id: str, ax: plt.Axes,df_team: pl.DataFrame,df_players : p
|
|
| 823 |
{"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"},
|
| 824 |
{"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
|
| 825 |
]
|
|
|
|
| 826 |
try:
|
| 827 |
# Create a DataFrame from the list of dictionaries
|
| 828 |
df_image = pd.DataFrame(mlb_teams)
|
| 829 |
-
image_dict = df_image.set_index('team')['logo_url'].to_dict()
|
| 830 |
-
|
|
|
|
| 831 |
team_id = df_players.filter(pl.col('player_id') == pitcher_id)['team'][0]
|
| 832 |
|
| 833 |
# Construct the URL to fetch team data
|
|
@@ -839,8 +847,7 @@ def plot_logo(pitcher_id: str, ax: plt.Axes,df_team: pl.DataFrame,df_players : p
|
|
| 839 |
# Extract the team abbreviation
|
| 840 |
if data_team['teams'][0]['id'] in df_team['parent_org_id']:
|
| 841 |
team_abb = df_team.filter(pl.col('team_id') == data_team['teams'][0]['id'])['parent_org_abbreviation'][0]
|
| 842 |
-
|
| 843 |
-
else:
|
| 844 |
team_abb = df_team.filter(pl.col('parent_org_id') == data_team['teams'][0]['parentOrgId'])['parent_org_abbreviation'][0]
|
| 845 |
|
| 846 |
# Get the logo URL from the image dictionary using the team abbreviation
|
|
@@ -859,7 +866,7 @@ def plot_logo(pitcher_id: str, ax: plt.Axes,df_team: pl.DataFrame,df_players : p
|
|
| 859 |
|
| 860 |
# Turn off the axis
|
| 861 |
ax.axis('off')
|
| 862 |
-
except KeyError
|
| 863 |
ax.axis('off')
|
| 864 |
return
|
| 865 |
|
|
@@ -910,29 +917,55 @@ def fangraphs_pitching_leaderboards(season: int,
|
|
| 910 |
df = pl.DataFrame(data=data['data'], infer_schema_length=1000)
|
| 911 |
return df
|
| 912 |
|
| 913 |
-
def
|
| 914 |
-
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
|
| 918 |
-
|
| 919 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 920 |
url = "https://www.fangraphs.com/api/leaders/splits/splits-leaders"
|
| 921 |
|
| 922 |
-
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
|
| 926 |
-
|
| 927 |
-
|
|
|
|
|
|
|
|
|
|
| 928 |
payload = {
|
| 929 |
-
"strPlayerId":
|
| 930 |
"strSplitArr": split_dict[split],
|
| 931 |
"strGroup": "season",
|
| 932 |
"strPosition": "P",
|
| 933 |
"strType": "2",
|
| 934 |
-
"strStartDate":
|
| 935 |
-
"strEndDate":
|
| 936 |
"strSplitTeams": False,
|
| 937 |
"dctFilters": [],
|
| 938 |
"strStatType": "player",
|
|
@@ -945,39 +978,23 @@ def fangrpahs_splits_scrape(player_input: str, year_input: int, start_date: str,
|
|
| 945 |
"arrWxElevation": None,
|
| 946 |
"arrWxWindSpeed": None
|
| 947 |
}
|
| 948 |
-
|
| 949 |
-
|
| 950 |
-
response = requests.post(url, data=
|
| 951 |
data_pull = response.json()['data'][0]
|
| 952 |
|
| 953 |
-
|
| 954 |
-
|
| 955 |
-
|
| 956 |
-
"strGroup": "season",
|
| 957 |
-
"strPosition": "P",
|
| 958 |
-
"strType": "1",
|
| 959 |
-
"strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')),
|
| 960 |
-
"strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')),
|
| 961 |
-
"strSplitTeams": False,
|
| 962 |
-
"dctFilters": [],
|
| 963 |
-
"strStatType": "player",
|
| 964 |
-
"strAutoPt": False,
|
| 965 |
-
"arrPlayerId": [],
|
| 966 |
-
"strSplitArrPitch": [],
|
| 967 |
-
"arrWxTemperature": None,
|
| 968 |
-
"arrWxPressure": None,
|
| 969 |
-
"arrWxAirDensity": None,
|
| 970 |
-
"arrWxElevation": None,
|
| 971 |
-
"arrWxWindSpeed": None
|
| 972 |
-
}
|
| 973 |
|
| 974 |
-
|
| 975 |
-
|
| 976 |
-
response_advanced = requests.post(url, data=json_payload_advanced, headers=headers)
|
| 977 |
data_pull_advanced = response_advanced.json()['data'][0]
|
| 978 |
|
|
|
|
| 979 |
data_pull.update(data_pull_advanced)
|
| 980 |
df_pull = pl.DataFrame(data_pull)
|
|
|
|
| 981 |
return df_pull
|
| 982 |
|
| 983 |
|
|
@@ -1002,20 +1019,11 @@ def fangraphs_table(df: pl.DataFrame,
|
|
| 1002 |
start_date = df['game_date'][0]
|
| 1003 |
end_date = df['game_date'][-1]
|
| 1004 |
|
| 1005 |
-
# Fetch Fangraphs
|
| 1006 |
-
|
| 1007 |
-
# split=split,
|
| 1008 |
-
# start_date=start_date,
|
| 1009 |
-
# end_date=end_date).filter(pl.col('xMLBAMID') == player_input)
|
| 1010 |
-
|
| 1011 |
-
# df_fangraphs = df_fangraphs.with_columns(
|
| 1012 |
-
# ((pl.col('Strikes')/pl.col('Pitches'))).alias('strikePercentage'),
|
| 1013 |
-
|
| 1014 |
-
# )
|
| 1015 |
-
df_fangraphs = fangrpahs_splits_scrape(player_input=player_input,
|
| 1016 |
year_input=season,
|
| 1017 |
start_date=start_date,
|
| 1018 |
-
end_date=
|
| 1019 |
split=split)
|
| 1020 |
|
| 1021 |
# Select relevant columns for the table
|
|
@@ -1044,61 +1052,72 @@ def fangraphs_table(df: pl.DataFrame,
|
|
| 1044 |
|
| 1045 |
|
| 1046 |
def stat_summary_table(df: pl.DataFrame,
|
| 1047 |
-
|
| 1048 |
-
|
| 1049 |
-
|
| 1050 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1051 |
start_date_format = str(pd.to_datetime(df['game_date'][0]).strftime('%m/%d/%Y'))
|
| 1052 |
end_date_format = str(pd.to_datetime(df['game_date'][-1]).strftime('%m/%d/%Y'))
|
| 1053 |
|
| 1054 |
-
|
| 1055 |
-
|
| 1056 |
-
|
| 1057 |
-
|
|
|
|
|
|
|
|
|
|
| 1058 |
|
| 1059 |
-
|
| 1060 |
pitcher_stats_call_header = [x for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
|
| 1061 |
pitcher_stats_call_values = [pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat'][x] for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
|
| 1062 |
-
pitcher_stats_call_df = pl.DataFrame(data=dict(zip(pitcher_stats_call_header,pitcher_stats_call_values)))
|
| 1063 |
|
|
|
|
| 1064 |
pitcher_stats_call_df = pitcher_stats_call_df.with_columns(
|
| 1065 |
pl.lit(df['is_whiff'].sum()).alias('whiffs'),
|
| 1066 |
-
(pl.col('strikeOuts')/pl.col('battersFaced')*100).round(1).cast(pl.Utf8).str.concat('%').alias('k_percent'),
|
| 1067 |
-
(pl.col('baseOnBalls')/pl.col('battersFaced')*100).round(1).cast(pl.Utf8).str.concat('%').alias('bb_percent'),
|
| 1068 |
-
((pl.col('strikeOuts') - pl.col('baseOnBalls'))/pl.col('battersFaced')*100).round(1).cast(pl.Utf8).str.concat('%').alias('k_bb_percent'),
|
| 1069 |
-
(((pl.col('homeRuns')*13 + 3*((pl.col('baseOnBalls'))+(pl.col('hitByPitch')))-2*(pl.col('strikeOuts'))))/((pl.col('outs'))/3)+3.15).round(2).map_elements(lambda x: f"{x:.2f}")
|
| 1070 |
-
((pl.col('strikes')/pl.col('numberOfPitches')*100)).round(1).cast(pl.Utf8).str.concat('%').alias('strikePercentage'),
|
| 1071 |
)
|
| 1072 |
|
| 1073 |
-
|
| 1074 |
if df['game_id'][0] == df['game_id'][-1]:
|
| 1075 |
-
pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched','battersFaced','earnedRuns','hits','strikeOuts','baseOnBalls','hitByPitch','homeRuns','strikePercentage','whiffs'])
|
| 1076 |
-
new_column_names = ['$\\bf{IP}$','$\\bf{PA}$','$\\bf{ER}$','$\\bf{H}$','$\\bf{K}$','$\\bf{BB}$','$\\bf{HBP}$','$\\bf{HR}$','$\\bf{Strike\%}$','$\\bf{Whiffs}$']
|
| 1077 |
title = f'{df["game_date"][0]} vs {df["batter_team"][0]}'
|
| 1078 |
elif sport_id != 1:
|
| 1079 |
-
pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched','battersFaced','whip','era','fip','k_percent','bb_percent','k_bb_percent','strikePercentage'])
|
| 1080 |
-
new_column_names = ['$\\bf{IP}$','$\\bf{PA}$','$\\bf{WHIP}$','$\\bf{ERA}$','$\\bf{FIP}$','$\\bf{K\%}$','$\\bf{BB\%}$','$\\bf{K-BB\%}$','$\\bf{Strike\%}$']
|
| 1081 |
title = f'{df["game_date"][0]} to {df["game_date"][-1]}'
|
| 1082 |
else:
|
| 1083 |
-
fangraphs_table(df=df,
|
| 1084 |
-
ax=ax,
|
| 1085 |
-
player_input=player_input,
|
| 1086 |
-
season=2024,
|
| 1087 |
-
split=split)
|
| 1088 |
return
|
| 1089 |
-
|
| 1090 |
-
import matplotlib.pyplot as plt
|
| 1091 |
-
table_fg = ax.table(cellText=pitcher_stats_call_df_small.to_numpy(), colLabels=pitcher_stats_call_df_small.columns, cellLoc='center',
|
| 1092 |
-
bbox=[0.0, 0.1, 1, 0.7])
|
| 1093 |
-
|
| 1094 |
-
min_font_size = 20
|
| 1095 |
-
table_fg.set_fontsize(min_font_size)
|
| 1096 |
|
| 1097 |
-
#
|
|
|
|
|
|
|
| 1098 |
for i, col_name in enumerate(new_column_names):
|
| 1099 |
table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
|
| 1100 |
-
|
| 1101 |
-
ax.text(0.5, 0.9, title, va='bottom', ha='center',
|
| 1102 |
-
fontsize=36, fontstyle='italic')
|
| 1103 |
|
|
|
|
|
|
|
| 1104 |
ax.axis('off')
|
|
|
|
| 189 |
|
| 190 |
return ax.add_patch(ellipse)
|
| 191 |
### VELOCITY KDES ###
|
| 192 |
+
def velocity_kdes(df: pl.DataFrame, ax: plt.Axes, gs: gridspec.GridSpec, gs_x: list, gs_y: list, fig: plt.Figure):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
"""
|
| 194 |
Plot the velocity KDEs for different pitch types.
|
| 195 |
|
|
|
|
| 208 |
fig : plt.Figure
|
| 209 |
The figure to plot on.
|
| 210 |
"""
|
| 211 |
+
# Get unique pitch types sorted by pitch count
|
| 212 |
+
items_in_order = df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
# Create the inner subplot inside the outer subplot
|
|
|
|
| 215 |
ax.axis('off')
|
| 216 |
ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20})
|
|
|
|
| 217 |
inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order), 1, subplot_spec=gs[gs_x[0]:gs_x[-1], gs_y[0]:gs_y[-1]])
|
| 218 |
ax_top = [fig.add_subplot(inner) for inner in inner_grid_1]
|
| 219 |
|
| 220 |
for idx, i in enumerate(items_in_order):
|
| 221 |
pitch_data = df.filter(pl.col('pitch_type') == i)['start_speed']
|
| 222 |
if np.unique(pitch_data).size == 1: # Check if all values are the same
|
| 223 |
+
ax_top[idx].plot([np.unique(pitch_data), np.unique(pitch_data)], [0, 1], linewidth=4, color=dict_colour[i], zorder=20)
|
|
|
|
| 224 |
else:
|
| 225 |
+
sns.kdeplot(pitch_data, ax=ax_top[idx], fill=True, clip=(pitch_data.min(), pitch_data.max()), color=dict_colour[i])
|
|
|
|
|
|
|
| 226 |
|
| 227 |
# Plot the mean release speed for the current data
|
| 228 |
df_average = df.filter(df['pitch_type'] == i)['start_speed']
|
| 229 |
+
ax_top[idx].plot([df_average.mean(), df_average.mean()], [ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]], color=dict_colour[i], linestyle='--')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
# Plot the mean release speed for the statcast group data
|
| 232 |
+
df_statcast_group = pl.read_csv('functions/statcast_2024_grouped.csv')
|
| 233 |
df_average = df_statcast_group.filter(df_statcast_group['pitch_type'] == i)['release_speed']
|
| 234 |
+
ax_top[idx].plot([df_average.mean(), df_average.mean()], [ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]], color=dict_colour[i], linestyle=':')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
ax_top[idx].set_xlim(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5)
|
| 237 |
ax_top[idx].set_xlabel('')
|
|
|
|
| 245 |
ax_top[idx].set_xticks(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5))
|
| 246 |
ax_top[idx].set_yticks([])
|
| 247 |
ax_top[idx].grid(axis='x', linestyle='--')
|
| 248 |
+
ax_top[idx].text(-0.01, 0.5, i, transform=ax_top[idx].transAxes, fontsize=14, va='center', ha='right')
|
|
|
|
| 249 |
|
| 250 |
ax_top[-1].spines['top'].set_visible(False)
|
| 251 |
ax_top[-1].spines['right'].set_visible(False)
|
|
|
|
| 253 |
ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5)))
|
| 254 |
ax_top[-1].set_xlabel('Velocity (mph)')
|
| 255 |
|
|
|
|
| 256 |
### TJ STUFF+ ROLLING ###
|
| 257 |
def tj_stuff_roling(df: pl.DataFrame, window: int, ax: plt.Axes):
|
| 258 |
"""
|
|
|
|
| 267 |
ax : plt.Axes
|
| 268 |
The axis to plot on.
|
| 269 |
"""
|
| 270 |
+
# Get unique pitch types sorted by pitch count
|
| 271 |
+
items_in_order = df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
# Plot the rolling average for each pitch type
|
| 274 |
for i in items_in_order:
|
| 275 |
+
pitch_data = df.filter(pl.col('pitch_type') == i)
|
| 276 |
+
if pitch_data['pitch_count'].max() >= window:
|
|
|
|
|
|
|
| 277 |
sns.lineplot(
|
| 278 |
+
x=range(1, pitch_data['pitch_count'].max() + 1),
|
| 279 |
+
y=pitch_data['tj_stuff_plus'].rolling_mean(window),
|
| 280 |
color=dict_colour[i],
|
| 281 |
ax=ax,
|
| 282 |
linewidth=3
|
| 283 |
)
|
| 284 |
|
| 285 |
# Adjust x-axis limits to start from 1
|
| 286 |
+
ax.set_xlim(window, df['pitch_count'].max())
|
| 287 |
ax.set_ylim(70, 130)
|
| 288 |
ax.set_xlabel('Pitches', fontdict=font_properties_axes)
|
| 289 |
ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
|
| 290 |
ax.set_title(f"{window} Pitch Rolling tjStuff+", fontdict=font_properties_titles)
|
| 291 |
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
|
| 292 |
|
|
|
|
| 293 |
### TJ STUFF+ ROLLING ###
|
| 294 |
def tj_stuff_roling_game(df: pl.DataFrame, window: int, ax: plt.Axes):
|
| 295 |
"""
|
|
|
|
| 338 |
])
|
| 339 |
|
| 340 |
sns.lineplot(x=range(1, max(df_item['start_number_right']) + 1),
|
| 341 |
+
y=df_item.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window),
|
| 342 |
+
color=dict_colour[i],
|
| 343 |
+
ax=ax, linewidth=3)
|
| 344 |
|
| 345 |
# Highlight missing game data points
|
| 346 |
for n in range(len(df_item)):
|
| 347 |
if df_item['game_id'].is_null()[n]:
|
| 348 |
sns.scatterplot(x=[df_item['start_number_right'][n]],
|
| 349 |
+
y=[df_item['tj_stuff_plus'].rolling_mean(window)[n]],
|
| 350 |
color='white',
|
| 351 |
ec='black',
|
| 352 |
ax=ax,
|
|
|
|
| 360 |
ax.set_title(f"{window} Game Rolling tjStuff+", fontdict=font_properties_titles)
|
| 361 |
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
|
| 362 |
|
|
|
|
| 363 |
def break_plot(df: pl.DataFrame, ax: plt.Axes):
|
| 364 |
"""
|
| 365 |
Plot the pitch breaks for different pitch types.
|
|
|
|
| 385 |
|
| 386 |
# Plot scatter plot for pitch breaks
|
| 387 |
if df['pitcher_hand'][0] == 'R':
|
| 388 |
+
sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'], hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2)
|
| 389 |
+
elif df['pitcher_hand'][0] == 'L':
|
| 390 |
+
sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'], hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2)
|
| 391 |
|
| 392 |
# Set axis limits
|
| 393 |
ax.set_xlim((-25, 25))
|
|
|
|
| 415 |
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
|
| 416 |
ax.text(24.5, -24.5, s='Arm Side →', fontstyle='italic', ha='right', va='bottom',
|
| 417 |
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
|
| 418 |
+
elif df['pitcher_hand'][0] == 'L':
|
| 419 |
ax.invert_xaxis()
|
| 420 |
ax.text(24.5, -24.5, s='← Arm Side', fontstyle='italic', ha='left', va='bottom',
|
| 421 |
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
|
|
|
|
| 674 |
fancybox=True, loc='lower center', fontsize=20, framealpha=1.0, markerscale=2, prop={'family': 'calibi', 'size': 20})
|
| 675 |
ax.axis('off')
|
| 676 |
|
| 677 |
+
def plot_footer(ax: plt.Axes):
|
| 678 |
+
"""
|
| 679 |
+
Add footer text to the plot.
|
| 680 |
+
|
| 681 |
+
Parameters
|
| 682 |
+
----------
|
| 683 |
+
ax : plt.Axes
|
| 684 |
+
The axis to add the footer text to.
|
| 685 |
+
"""
|
| 686 |
+
# Add footer text
|
| 687 |
ax.text(0, 1, 'By: @TJStats', ha='left', va='top', fontsize=24)
|
| 688 |
ax.text(0.5, 0.25,
|
| 689 |
+
'''
|
| 690 |
+
Colour Coding Compares to League Average By Pitch
|
| 691 |
+
tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type
|
| 692 |
+
tjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10
|
| 693 |
+
Pitch Grade scales tjStuff+ to the traditional 20-80 Scouting Scale for a given pitch type
|
| 694 |
+
''',
|
| 695 |
+
ha='center', va='bottom', fontsize=12)
|
| 696 |
ax.text(1, 1, 'Data: MLB, Fangraphs\nImages: MLB, ESPN', ha='right', va='top', fontsize=24)
|
| 697 |
ax.axis('off')
|
| 698 |
|
|
|
|
| 699 |
# Function to get an image from a URL and display it on the given axis
|
| 700 |
+
def player_headshot(player_input: str, ax: plt.Axes, sport_id: int, season: int):
|
| 701 |
+
"""
|
| 702 |
+
Display the player's headshot image on the given axis.
|
| 703 |
+
|
| 704 |
+
Parameters
|
| 705 |
+
----------
|
| 706 |
+
player_input : str
|
| 707 |
+
The player's ID.
|
| 708 |
+
ax : plt.Axes
|
| 709 |
+
The axis to display the image on.
|
| 710 |
+
sport_id : int
|
| 711 |
+
The sport ID (1 for MLB, other for minor leagues).
|
| 712 |
+
season : int
|
| 713 |
+
The season year.
|
| 714 |
+
"""
|
| 715 |
try:
|
| 716 |
+
# Construct the URL for the player's headshot image based on sport ID
|
| 717 |
if int(sport_id) == 1:
|
| 718 |
+
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_640,q_auto:best/v1/people/{player_input}/headshot/silo/current.png'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 719 |
else:
|
| 720 |
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{player_input}/headshot/milb/current.png'
|
| 721 |
+
|
| 722 |
+
# Send a GET request to the URL and open the image from the response content
|
| 723 |
+
response = requests.get(url)
|
| 724 |
+
img = Image.open(BytesIO(response.content))
|
| 725 |
+
|
| 726 |
+
# Display the image on the axis
|
| 727 |
+
ax.set_xlim(0, 1.3)
|
| 728 |
+
ax.set_ylim(0, 1)
|
| 729 |
+
ax.imshow(img, extent=[0, 1, 0, 1] if sport_id == 1 else [1/6, 5/6, 0, 1], origin='upper')
|
| 730 |
+
except PIL.UnidentifiedImageError:
|
| 731 |
ax.axis('off')
|
| 732 |
return
|
| 733 |
|
| 734 |
# Turn off the axis
|
| 735 |
ax.axis('off')
|
| 736 |
|
| 737 |
+
def player_bio(pitcher_id: str, ax: plt.Axes, sport_id: int, year_input: int):
|
| 738 |
+
"""
|
| 739 |
+
Display the player's bio information on the given axis.
|
| 740 |
|
| 741 |
+
Parameters
|
| 742 |
+
----------
|
| 743 |
+
pitcher_id : str
|
| 744 |
+
The player's ID.
|
| 745 |
+
ax : plt.Axes
|
| 746 |
+
The axis to display the bio information on.
|
| 747 |
+
sport_id : int
|
| 748 |
+
The sport ID (1 for MLB, other for minor leagues).
|
| 749 |
+
year_input : int
|
| 750 |
+
The season year.
|
| 751 |
+
"""
|
| 752 |
# Construct the URL to fetch player data
|
| 753 |
url = f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}&hydrate=currentTeam"
|
| 754 |
|
|
|
|
| 774 |
df_sport_id = pl.DataFrame(response['sports'])
|
| 775 |
abb = df_sport_id.filter(pl.col('id') == sport_id)['abbreviation'][0]
|
| 776 |
|
| 777 |
+
# Display the season and sport abbreviation
|
| 778 |
ax.text(0.5, 0.20, f'{year_input} {abb} Season', va='top', ha='center', fontsize=30, fontstyle='italic')
|
| 779 |
|
| 780 |
# Turn off the axis
|
| 781 |
ax.axis('off')
|
| 782 |
|
| 783 |
+
def plot_logo(pitcher_id: str, ax: plt.Axes, df_team: pl.DataFrame, df_players: pl.DataFrame):
|
| 784 |
+
"""
|
| 785 |
+
Display the team logo for the given pitcher on the specified axis.
|
| 786 |
|
| 787 |
+
Parameters
|
| 788 |
+
----------
|
| 789 |
+
pitcher_id : str
|
| 790 |
+
The ID of the pitcher.
|
| 791 |
+
ax : plt.Axes
|
| 792 |
+
The axis to display the logo on.
|
| 793 |
+
df_team : pl.DataFrame
|
| 794 |
+
The DataFrame containing team data.
|
| 795 |
+
df_players : pl.DataFrame
|
| 796 |
+
The DataFrame containing player data.
|
| 797 |
+
"""
|
| 798 |
+
# List of MLB teams and their corresponding ESPN logo URLs
|
| 799 |
mlb_teams = [
|
| 800 |
{"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"},
|
| 801 |
{"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"},
|
|
|
|
| 829 |
{"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"},
|
| 830 |
{"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
|
| 831 |
]
|
| 832 |
+
|
| 833 |
try:
|
| 834 |
# Create a DataFrame from the list of dictionaries
|
| 835 |
df_image = pd.DataFrame(mlb_teams)
|
| 836 |
+
image_dict = df_image.set_index('team')['logo_url'].to_dict()
|
| 837 |
+
|
| 838 |
+
# Get the team ID for the given pitcher
|
| 839 |
team_id = df_players.filter(pl.col('player_id') == pitcher_id)['team'][0]
|
| 840 |
|
| 841 |
# Construct the URL to fetch team data
|
|
|
|
| 847 |
# Extract the team abbreviation
|
| 848 |
if data_team['teams'][0]['id'] in df_team['parent_org_id']:
|
| 849 |
team_abb = df_team.filter(pl.col('team_id') == data_team['teams'][0]['id'])['parent_org_abbreviation'][0]
|
| 850 |
+
else:
|
|
|
|
| 851 |
team_abb = df_team.filter(pl.col('parent_org_id') == data_team['teams'][0]['parentOrgId'])['parent_org_abbreviation'][0]
|
| 852 |
|
| 853 |
# Get the logo URL from the image dictionary using the team abbreviation
|
|
|
|
| 866 |
|
| 867 |
# Turn off the axis
|
| 868 |
ax.axis('off')
|
| 869 |
+
except KeyError:
|
| 870 |
ax.axis('off')
|
| 871 |
return
|
| 872 |
|
|
|
|
| 917 |
df = pl.DataFrame(data=data['data'], infer_schema_length=1000)
|
| 918 |
return df
|
| 919 |
|
| 920 |
+
def fangraphs_splits_scrape(player_input: str, year_input: int, start_date: str, end_date: str, split: str) -> pl.DataFrame:
|
| 921 |
+
"""
|
| 922 |
+
Scrape Fangraphs splits data for a specific player.
|
| 923 |
+
|
| 924 |
+
Parameters
|
| 925 |
+
----------
|
| 926 |
+
player_input : str
|
| 927 |
+
The player's ID.
|
| 928 |
+
year_input : int
|
| 929 |
+
The season year.
|
| 930 |
+
start_date : str
|
| 931 |
+
The start date for the data.
|
| 932 |
+
end_date : str
|
| 933 |
+
The end date for the data.
|
| 934 |
+
split : str
|
| 935 |
+
The split type (e.g., 'all', 'left', 'right').
|
| 936 |
+
|
| 937 |
+
Returns
|
| 938 |
+
-------
|
| 939 |
+
pl.DataFrame
|
| 940 |
+
The DataFrame containing the splits data.
|
| 941 |
+
"""
|
| 942 |
+
split_dict = {
|
| 943 |
+
'all': [],
|
| 944 |
+
'left': ['5'],
|
| 945 |
+
'right': ['6']
|
| 946 |
+
}
|
| 947 |
+
|
| 948 |
+
|
| 949 |
+
|
| 950 |
url = "https://www.fangraphs.com/api/leaders/splits/splits-leaders"
|
| 951 |
|
| 952 |
+
# Get Fangraphs player ID
|
| 953 |
+
fg_id = str(fangraphs_pitching_leaderboards(
|
| 954 |
+
year_input,
|
| 955 |
+
split='All',
|
| 956 |
+
start_date=f'{year_input}-01-01',
|
| 957 |
+
end_date=f'{year_input}-12-31'
|
| 958 |
+
).filter(pl.col('xMLBAMID') == player_input)['playerid'][0])
|
| 959 |
+
|
| 960 |
+
# Payload for basic stats
|
| 961 |
payload = {
|
| 962 |
+
"strPlayerId": fg_id,
|
| 963 |
"strSplitArr": split_dict[split],
|
| 964 |
"strGroup": "season",
|
| 965 |
"strPosition": "P",
|
| 966 |
"strType": "2",
|
| 967 |
+
"strStartDate": pd.to_datetime(start_date).strftime('%Y-%m-%d'),
|
| 968 |
+
"strEndDate": pd.to_datetime(end_date).strftime('%Y-%m-%d'),
|
| 969 |
"strSplitTeams": False,
|
| 970 |
"dctFilters": [],
|
| 971 |
"strStatType": "player",
|
|
|
|
| 978 |
"arrWxElevation": None,
|
| 979 |
"arrWxWindSpeed": None
|
| 980 |
}
|
| 981 |
+
|
| 982 |
+
# Fetch basic stats
|
| 983 |
+
response = requests.post(url, data=json.dumps(payload), headers={'Content-Type': 'application/json'})
|
| 984 |
data_pull = response.json()['data'][0]
|
| 985 |
|
| 986 |
+
# Payload for advanced stats
|
| 987 |
+
payload_advanced = payload.copy()
|
| 988 |
+
payload_advanced["strType"] = "1"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 989 |
|
| 990 |
+
# Fetch advanced stats
|
| 991 |
+
response_advanced = requests.post(url, data=json.dumps(payload_advanced), headers={'Content-Type': 'application/json'})
|
|
|
|
| 992 |
data_pull_advanced = response_advanced.json()['data'][0]
|
| 993 |
|
| 994 |
+
# Combine basic and advanced stats
|
| 995 |
data_pull.update(data_pull_advanced)
|
| 996 |
df_pull = pl.DataFrame(data_pull)
|
| 997 |
+
|
| 998 |
return df_pull
|
| 999 |
|
| 1000 |
|
|
|
|
| 1019 |
start_date = df['game_date'][0]
|
| 1020 |
end_date = df['game_date'][-1]
|
| 1021 |
|
| 1022 |
+
# Fetch Fangraphs splits data
|
| 1023 |
+
df_fangraphs = fangraphs_splits_scrape(player_input=player_input,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1024 |
year_input=season,
|
| 1025 |
start_date=start_date,
|
| 1026 |
+
end_date=end_date,
|
| 1027 |
split=split)
|
| 1028 |
|
| 1029 |
# Select relevant columns for the table
|
|
|
|
| 1052 |
|
| 1053 |
|
| 1054 |
def stat_summary_table(df: pl.DataFrame,
|
| 1055 |
+
player_input: int,
|
| 1056 |
+
sport_id: int,
|
| 1057 |
+
ax: plt.Axes,
|
| 1058 |
+
split: str = 'All'):
|
| 1059 |
+
"""
|
| 1060 |
+
Create a summary table of player statistics.
|
| 1061 |
+
|
| 1062 |
+
Parameters
|
| 1063 |
+
----------
|
| 1064 |
+
df : pl.DataFrame
|
| 1065 |
+
The DataFrame containing pitch data.
|
| 1066 |
+
player_input : int
|
| 1067 |
+
The player's ID.
|
| 1068 |
+
sport_id : int
|
| 1069 |
+
The sport ID (1 for MLB, other for minor leagues).
|
| 1070 |
+
ax : plt.Axes
|
| 1071 |
+
The axis to plot the table on.
|
| 1072 |
+
split : str, optional
|
| 1073 |
+
The split type (default is 'All').
|
| 1074 |
+
"""
|
| 1075 |
+
# Format start and end dates
|
| 1076 |
start_date_format = str(pd.to_datetime(df['game_date'][0]).strftime('%m/%d/%Y'))
|
| 1077 |
end_date_format = str(pd.to_datetime(df['game_date'][-1]).strftime('%m/%d/%Y'))
|
| 1078 |
|
| 1079 |
+
# Determine app context based on sport ID
|
| 1080 |
+
appContext = 'majorLeague' if sport_id == 1 else 'minorLeague'
|
| 1081 |
+
|
| 1082 |
+
# Fetch player stats from MLB API
|
| 1083 |
+
pitcher_stats_call = requests.get(
|
| 1084 |
+
f'https://statsapi.mlb.com/api/v1/people/{player_input}?appContext={appContext}&hydrate=stats(group=[pitching],type=[byDateRange],sportId={sport_id},startDate={start_date_format},endDate={end_date_format})'
|
| 1085 |
+
).json()
|
| 1086 |
|
| 1087 |
+
# Extract stats and create DataFrame
|
| 1088 |
pitcher_stats_call_header = [x for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
|
| 1089 |
pitcher_stats_call_values = [pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat'][x] for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
|
| 1090 |
+
pitcher_stats_call_df = pl.DataFrame(data=dict(zip(pitcher_stats_call_header, pitcher_stats_call_values)))
|
| 1091 |
|
| 1092 |
+
# Add additional calculated columns
|
| 1093 |
pitcher_stats_call_df = pitcher_stats_call_df.with_columns(
|
| 1094 |
pl.lit(df['is_whiff'].sum()).alias('whiffs'),
|
| 1095 |
+
(pl.col('strikeOuts') / pl.col('battersFaced') * 100).round(1).cast(pl.Utf8).str.concat('%').alias('k_percent'),
|
| 1096 |
+
(pl.col('baseOnBalls') / pl.col('battersFaced') * 100).round(1).cast(pl.Utf8).str.concat('%').alias('bb_percent'),
|
| 1097 |
+
((pl.col('strikeOuts') - pl.col('baseOnBalls')) / pl.col('battersFaced') * 100).round(1).cast(pl.Utf8).str.concat('%').alias('k_bb_percent'),
|
| 1098 |
+
(((pl.col('homeRuns') * 13 + 3 * ((pl.col('baseOnBalls')) + (pl.col('hitByPitch'))) - 2 * (pl.col('strikeOuts')))) / ((pl.col('outs')) / 3) + 3.15).round(2).map_elements(lambda x: f"{x:.2f}").alias('fip'),
|
| 1099 |
+
((pl.col('strikes') / pl.col('numberOfPitches') * 100)).round(1).cast(pl.Utf8).str.concat('%').alias('strikePercentage'),
|
| 1100 |
)
|
| 1101 |
|
| 1102 |
+
# Determine columns and title based on game count and sport ID
|
| 1103 |
if df['game_id'][0] == df['game_id'][-1]:
|
| 1104 |
+
pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched', 'battersFaced', 'earnedRuns', 'hits', 'strikeOuts', 'baseOnBalls', 'hitByPitch', 'homeRuns', 'strikePercentage', 'whiffs'])
|
| 1105 |
+
new_column_names = ['$\\bf{IP}$', '$\\bf{PA}$', '$\\bf{ER}$', '$\\bf{H}$', '$\\bf{K}$', '$\\bf{BB}$', '$\\bf{HBP}$', '$\\bf{HR}$', '$\\bf{Strike\%}$', '$\\bf{Whiffs}$']
|
| 1106 |
title = f'{df["game_date"][0]} vs {df["batter_team"][0]}'
|
| 1107 |
elif sport_id != 1:
|
| 1108 |
+
pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched', 'battersFaced', 'whip', 'era', 'fip', 'k_percent', 'bb_percent', 'k_bb_percent', 'strikePercentage'])
|
| 1109 |
+
new_column_names = ['$\\bf{IP}$', '$\\bf{PA}$', '$\\bf{WHIP}$', '$\\bf{ERA}$', '$\\bf{FIP}$', '$\\bf{K\%}$', '$\\bf{BB\%}$', '$\\bf{K-BB\%}$', '$\\bf{Strike\%}$']
|
| 1110 |
title = f'{df["game_date"][0]} to {df["game_date"][-1]}'
|
| 1111 |
else:
|
| 1112 |
+
fangraphs_table(df=df, ax=ax, player_input=player_input, season=int(df['game_date'][0][0:4]), split=split)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1113 |
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1114 |
|
| 1115 |
+
# Create and format the table
|
| 1116 |
+
table_fg = ax.table(cellText=pitcher_stats_call_df_small.to_numpy(), colLabels=pitcher_stats_call_df_small.columns, cellLoc='center', bbox=[0.0, 0.1, 1, 0.7])
|
| 1117 |
+
table_fg.set_fontsize(20)
|
| 1118 |
for i, col_name in enumerate(new_column_names):
|
| 1119 |
table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
|
|
|
|
|
|
|
|
|
|
| 1120 |
|
| 1121 |
+
# Add title to the plot
|
| 1122 |
+
ax.text(0.5, 0.9, title, va='bottom', ha='center', fontsize=36, fontstyle='italic')
|
| 1123 |
ax.axis('off')
|