|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import json |
|
|
from matplotlib.ticker import FuncFormatter |
|
|
from matplotlib.ticker import MaxNLocator |
|
|
import math |
|
|
from matplotlib.patches import Ellipse |
|
|
import matplotlib.transforms as transforms |
|
|
import matplotlib.colors |
|
|
import matplotlib.colors as mcolors |
|
|
import seaborn as sns |
|
|
import matplotlib.pyplot as plt |
|
|
import requests |
|
|
import polars as pl |
|
|
from PIL import Image |
|
|
import requests |
|
|
from io import BytesIO |
|
|
from matplotlib.offsetbox import OffsetImage, AnnotationBbox |
|
|
import matplotlib.pyplot as plt |
|
|
import matplotlib.gridspec as gridspec |
|
|
import PIL |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pitch_colours = { |
|
|
|
|
|
'FF': {'colour': '#FF007D', 'name': '4-Seam Fastball'}, |
|
|
'FA': {'colour': '#FF007D', 'name': 'Fastball'}, |
|
|
'SI': {'colour': '#98165D', 'name': 'Sinker'}, |
|
|
'FC': {'colour': '#BE5FA0', 'name': 'Cutter'}, |
|
|
|
|
|
|
|
|
'CH': {'colour': '#F79E70', 'name': 'Changeup'}, |
|
|
'FS': {'colour': '#FE6100', 'name': 'Splitter'}, |
|
|
'SC': {'colour': '#F08223', 'name': 'Screwball'}, |
|
|
'FO': {'colour': '#FFB000', 'name': 'Forkball'}, |
|
|
|
|
|
|
|
|
'SL': {'colour': '#67E18D', 'name': 'Slider'}, |
|
|
'ST': {'colour': '#1BB999', 'name': 'Sweeper'}, |
|
|
'SV': {'colour': '#376748', 'name': 'Slurve'}, |
|
|
|
|
|
|
|
|
'KC': {'colour': '#311D8B', 'name': 'Knuckle Curve'}, |
|
|
'CU': {'colour': '#3025CE', 'name': 'Curveball'}, |
|
|
'CS': {'colour': '#274BFC', 'name': 'Slow Curve'}, |
|
|
'EP': {'colour': '#648FFF', 'name': 'Eephus'}, |
|
|
|
|
|
|
|
|
'KN': {'colour': '#867A08', 'name': 'Knuckleball'}, |
|
|
'PO': {'colour': '#472C30', 'name': 'Pitch Out'}, |
|
|
'UN': {'colour': '#9C8975', 'name': 'Unknown'}, |
|
|
} |
|
|
|
|
|
|
|
|
dict_colour = {key: value['colour'] for key, value in pitch_colours.items()} |
|
|
dict_pitch = {key: value['name'] for key, value in pitch_colours.items()} |
|
|
dict_pitch_desc_type = {value['name']: key for key, value in pitch_colours.items()} |
|
|
dict_pitch_desc_type.update({'Four-Seam Fastball':'FF'}) |
|
|
dict_pitch_desc_type.update({'All':'All'}) |
|
|
dict_pitch_name = {value['name']: value['colour'] for key, value in pitch_colours.items()} |
|
|
dict_pitch_name.update({'Four-Seam Fastball':'#FF007D'}) |
|
|
|
|
|
font_properties = {'family': 'calibi', 'size': 12} |
|
|
font_properties_titles = {'family': 'calibi', 'size': 20} |
|
|
font_properties_axes = {'family': 'calibi', 'size': 16} |
|
|
|
|
|
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) |
|
|
|
|
|
|
|
|
fangraphs_stats_dict = {'IP':{'table_header':'$\\bf{IP}$','format':'.1f',} , |
|
|
'TBF':{'table_header':'$\\bf{PA}$','format':'.0f',} , |
|
|
'AVG':{'table_header':'$\\bf{AVG}$','format':'.3f',} , |
|
|
'K/9':{'table_header':'$\\bf{K\/9}$','format':'.2f',} , |
|
|
'BB/9':{'table_header':'$\\bf{BB\/9}$','format':'.2f',} , |
|
|
'K/BB':{'table_header':'$\\bf{K\/BB}$','format':'.2f',} , |
|
|
'HR/9':{'table_header':'$\\bf{HR\/9}$','format':'.2f',} , |
|
|
'K%':{'table_header':'$\\bf{K\%}$','format':'.1%',} , |
|
|
'BB%':{'table_header':'$\\bf{BB\%}$','format':'.1%',} , |
|
|
'K-BB%':{'table_header':'$\\bf{K-BB\%}$','format':'.1%',} , |
|
|
'WHIP':{'table_header':'$\\bf{WHIP}$','format':'.2f',} , |
|
|
'BABIP':{'table_header':'$\\bf{BABIP}$','format':'.3f',} , |
|
|
'LOB%':{'table_header':'$\\bf{LOB\%}$','format':'.1%',} , |
|
|
'xFIP':{'table_header':'$\\bf{xFIP}$','format':'.2f',} , |
|
|
'FIP':{'table_header':'$\\bf{FIP}$','format':'.2f',} , |
|
|
'H':{'table_header':'$\\bf{H}$','format':'.0f',} , |
|
|
'2B':{'table_header':'$\\bf{2B}$','format':'.0f',} , |
|
|
'3B':{'table_header':'$\\bf{3B}$','format':'.0f',} , |
|
|
'R':{'table_header':'$\\bf{R}$','format':'.0f',} , |
|
|
'ER':{'table_header':'$\\bf{ER}$','format':'.0f',} , |
|
|
'HR':{'table_header':'$\\bf{HR}$','format':'.0f',} , |
|
|
'BB':{'table_header':'$\\bf{BB}$','format':'.0f',} , |
|
|
'IBB':{'table_header':'$\\bf{IBB}$','format':'.0f',} , |
|
|
'HBP':{'table_header':'$\\bf{HBP}$','format':'.0f',} , |
|
|
'SO':{'table_header':'$\\bf{SO}$','format':'.0f',} , |
|
|
'OBP':{'table_header':'$\\bf{OBP}$','format':'.0f',} , |
|
|
'SLG':{'table_header':'$\\bf{SLG}$','format':'.0f',} , |
|
|
'ERA':{'table_header':'$\\bf{ERA}$','format':'.2f',} , |
|
|
'wOBA':{'table_header':'$\\bf{wOBA}$','format':'.3f',} , |
|
|
'G':{'table_header':'$\\bf{G}$','format':'.0f',}, |
|
|
'strikePercentage':{'table_header':'$\\bf{Strike\%}$','format':'.1%'} } |
|
|
|
|
|
colour_palette = ['#FFB000','#648FFF','#785EF0', |
|
|
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED'] |
|
|
|
|
|
|
|
|
def get_color(value, normalize, cmap_sum): |
|
|
""" |
|
|
Get the color corresponding to a value based on a colormap and normalization. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
value : float |
|
|
The value to be mapped to a color. |
|
|
normalize : matplotlib.colors.Normalize |
|
|
The normalization function to scale the value. |
|
|
cmap_sum : matplotlib.colors.Colormap |
|
|
The colormap to use for mapping the value to a color. |
|
|
|
|
|
Returns |
|
|
------- |
|
|
str |
|
|
The hexadecimal color code corresponding to the value. |
|
|
""" |
|
|
color = cmap_sum(normalize(value)) |
|
|
return mcolors.to_hex(color) |
|
|
|
|
|
|
|
|
def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs): |
|
|
""" |
|
|
Create a plot of the covariance confidence ellipse of *x* and *y*. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
x, y : array-like, shape (n, ) |
|
|
Input data. |
|
|
|
|
|
ax : matplotlib.axes.Axes |
|
|
The axes object to draw the ellipse into. |
|
|
|
|
|
n_std : float |
|
|
The number of standard deviations to determine the ellipse's radiuses. |
|
|
|
|
|
**kwargs |
|
|
Forwarded to `~matplotlib.patches.Ellipse` |
|
|
|
|
|
Returns |
|
|
------- |
|
|
matplotlib.patches.Ellipse |
|
|
""" |
|
|
|
|
|
if len(x) != len(y): |
|
|
raise ValueError("x and y must be the same size") |
|
|
try: |
|
|
cov = np.cov(x, y) |
|
|
pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1]) |
|
|
|
|
|
|
|
|
ell_radius_x = np.sqrt(1 + pearson) |
|
|
ell_radius_y = np.sqrt(1 - pearson) |
|
|
ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2, |
|
|
facecolor=facecolor,linewidth=2,linestyle='--', **kwargs) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scale_x = np.sqrt(cov[0, 0]) * n_std |
|
|
mean_x = x.mean() |
|
|
|
|
|
|
|
|
|
|
|
scale_y = np.sqrt(cov[1, 1]) * n_std |
|
|
mean_y = y.mean() |
|
|
|
|
|
|
|
|
transf = transforms.Affine2D() \ |
|
|
.rotate_deg(45) \ |
|
|
.scale(scale_x, scale_y) \ |
|
|
.translate(mean_x, mean_y) |
|
|
|
|
|
|
|
|
|
|
|
ellipse.set_transform(transf + ax.transData) |
|
|
except ValueError: |
|
|
return |
|
|
|
|
|
return ax.add_patch(ellipse) |
|
|
|
|
|
def velocity_kdes(df: pl.DataFrame, ax: plt.Axes, gs: gridspec.GridSpec, gs_x: list, gs_y: list, fig: plt.Figure): |
|
|
""" |
|
|
Plot the velocity KDEs for different pitch types. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
df : pl.DataFrame |
|
|
The DataFrame containing pitch data. |
|
|
ax : plt.Axes |
|
|
The axis to plot on. |
|
|
gs : GridSpec |
|
|
The GridSpec for the subplot layout. |
|
|
gs_x : list |
|
|
The x-coordinates for the GridSpec. |
|
|
gs_y : list |
|
|
The y-coordinates for the GridSpec. |
|
|
fig : plt.Figure |
|
|
The figure to plot on. |
|
|
""" |
|
|
|
|
|
items_in_order = df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy() |
|
|
|
|
|
|
|
|
ax.axis('off') |
|
|
ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20}) |
|
|
inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order), 1, subplot_spec=gs[gs_x[0]:gs_x[-1], gs_y[0]:gs_y[-1]]) |
|
|
ax_top = [fig.add_subplot(inner) for inner in inner_grid_1] |
|
|
|
|
|
for idx, i in enumerate(items_in_order): |
|
|
pitch_data = df.filter(pl.col('pitch_type') == i)['start_speed'] |
|
|
if np.unique(pitch_data).size == 1: |
|
|
ax_top[idx].plot([np.unique(pitch_data), np.unique(pitch_data)], [0, 1], linewidth=4, color=dict_colour[i], zorder=20) |
|
|
else: |
|
|
sns.kdeplot(pitch_data, ax=ax_top[idx], fill=True, clip=(pitch_data.min(), pitch_data.max()), color=dict_colour[i]) |
|
|
|
|
|
|
|
|
df_average = df.filter(df['pitch_type'] == i)['start_speed'] |
|
|
ax_top[idx].plot([df_average.mean(), df_average.mean()], [ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]], color=dict_colour[i], linestyle='--') |
|
|
|
|
|
|
|
|
df_statcast_group = pl.read_csv('functions/statcast_2024_grouped.csv') |
|
|
df_average = df_statcast_group.filter(df_statcast_group['pitch_type'] == i)['release_speed'] |
|
|
ax_top[idx].plot([df_average.mean(), df_average.mean()], [ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]], color=dict_colour[i], linestyle=':') |
|
|
|
|
|
ax_top[idx].set_xlim(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5) |
|
|
ax_top[idx].set_xlabel('') |
|
|
ax_top[idx].set_ylabel('') |
|
|
if idx < len(items_in_order) - 1: |
|
|
ax_top[idx].spines['top'].set_visible(False) |
|
|
ax_top[idx].spines['right'].set_visible(False) |
|
|
ax_top[idx].spines['left'].set_visible(False) |
|
|
ax_top[idx].tick_params(axis='x', colors='none') |
|
|
|
|
|
ax_top[idx].set_xticks(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5)) |
|
|
ax_top[idx].set_yticks([]) |
|
|
ax_top[idx].grid(axis='x', linestyle='--') |
|
|
ax_top[idx].text(-0.01, 0.5, i, transform=ax_top[idx].transAxes, fontsize=14, va='center', ha='right') |
|
|
|
|
|
ax_top[-1].spines['top'].set_visible(False) |
|
|
ax_top[-1].spines['right'].set_visible(False) |
|
|
ax_top[-1].spines['left'].set_visible(False) |
|
|
ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5))) |
|
|
ax_top[-1].set_xlabel('Velocity (mph)') |
|
|
|
|
|
|
|
|
def tj_stuff_roling(df: pl.DataFrame, window: int, ax: plt.Axes): |
|
|
""" |
|
|
Plot the rolling average of tjStuff+ for different pitch types. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
df : pl.DataFrame |
|
|
The DataFrame containing pitch data. |
|
|
window : int |
|
|
The window size for calculating the rolling average. |
|
|
ax : plt.Axes |
|
|
The axis to plot on. |
|
|
""" |
|
|
|
|
|
items_in_order = df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy() |
|
|
|
|
|
|
|
|
for i in items_in_order: |
|
|
pitch_data = df.filter(pl.col('pitch_type') == i) |
|
|
if pitch_data['pitch_count'].max() >= window: |
|
|
sns.lineplot( |
|
|
x=range(1, pitch_data['pitch_count'].max() + 1), |
|
|
y=pitch_data['tj_stuff_plus'].rolling_mean(window), |
|
|
color=dict_colour[i], |
|
|
ax=ax, |
|
|
linewidth=3 |
|
|
) |
|
|
|
|
|
|
|
|
ax.set_xlim(window, df['pitch_count'].max()) |
|
|
ax.set_ylim(70, 130) |
|
|
ax.set_xlabel('Pitches', fontdict=font_properties_axes) |
|
|
ax.set_ylabel('tjStuff+', fontdict=font_properties_axes) |
|
|
ax.set_title(f"{window} Pitch Rolling tjStuff+", fontdict=font_properties_titles) |
|
|
ax.xaxis.set_major_locator(MaxNLocator(integer=True)) |
|
|
|
|
|
|
|
|
def tj_stuff_roling_game(df: pl.DataFrame, window: int, ax: plt.Axes): |
|
|
""" |
|
|
Plot the rolling average of tjStuff+ for different pitch types over games. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
df : pl.DataFrame |
|
|
The DataFrame containing pitch data. |
|
|
window : int |
|
|
The window size for calculating the rolling average. |
|
|
ax : plt.Axes |
|
|
The axis to plot on. |
|
|
""" |
|
|
|
|
|
date_to_number = {date: i + 1 for i, date in enumerate(df['game_id'].unique(maintain_order=True))} |
|
|
|
|
|
|
|
|
df_plot = df.with_columns( |
|
|
pl.col("game_id").map_elements(lambda x: date_to_number.get(x, x)).alias("start_number") |
|
|
) |
|
|
|
|
|
|
|
|
plot_game_roll = df_plot.group_by(['start_number', 'game_id', 'game_date', 'pitch_type', 'pitch_description']).agg( |
|
|
pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus') |
|
|
).sort('start_number', descending=False) |
|
|
|
|
|
|
|
|
sorted_value_counts = df['pitch_type'].value_counts().sort('count', descending=True) |
|
|
items_in_order = sorted_value_counts['pitch_type'].to_list() |
|
|
|
|
|
|
|
|
for i in items_in_order: |
|
|
df_item = plot_game_roll.filter(pl.col('pitch_type') == i) |
|
|
df_item = df_item.with_columns( |
|
|
pl.col("start_number").cast(pl.Int64) |
|
|
).join( |
|
|
pl.DataFrame({"start_number": list(date_to_number.values())}), |
|
|
on="start_number", |
|
|
how="outer" |
|
|
).sort("start_number_right").with_columns([ |
|
|
pl.col("start_number").fill_null(strategy="forward").fill_null(strategy="backward"), |
|
|
pl.col("tj_stuff_plus").fill_null(strategy="forward").fill_null(strategy="backward"), |
|
|
pl.col("pitch_type").fill_null(strategy="forward").fill_null(strategy="backward"), |
|
|
pl.col("pitch_description").fill_null(strategy="forward").fill_null(strategy="backward") |
|
|
]) |
|
|
|
|
|
sns.lineplot(x=range(1, max(df_item['start_number_right']) + 1), |
|
|
y=df_item.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window,min_periods=1), |
|
|
color=dict_colour[i], |
|
|
ax=ax, linewidth=3) |
|
|
|
|
|
|
|
|
for n in range(len(df_item)): |
|
|
if df_item['game_id'].is_null()[n]: |
|
|
sns.scatterplot(x=[df_item['start_number_right'][n]], |
|
|
y=[df_item['tj_stuff_plus'].rolling_mean(window,min_periods=1)[n]], |
|
|
color='white', |
|
|
ec=dict_colour[i], |
|
|
ax=ax, |
|
|
zorder=100) |
|
|
|
|
|
|
|
|
ax.set_xlim(1, max(df_item['start_number'])) |
|
|
ax.set_ylim(70, 130) |
|
|
ax.set_xlabel('Games', fontdict=font_properties_axes) |
|
|
ax.set_ylabel('tjStuff+', fontdict=font_properties_axes) |
|
|
ax.set_title(f"{window} Game Rolling tjStuff+", fontdict=font_properties_titles) |
|
|
ax.xaxis.set_major_locator(MaxNLocator(integer=True)) |
|
|
|
|
|
def break_plot(df: pl.DataFrame, ax: plt.Axes): |
|
|
""" |
|
|
Plot the pitch breaks for different pitch types. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
df : pl.DataFrame |
|
|
The DataFrame containing pitch data. |
|
|
ax : plt.Axes |
|
|
The axis to plot on. |
|
|
""" |
|
|
|
|
|
label_labels = df.sort(by=['pitch_count', 'pitch_type'], descending=[False, True])['pitch_type'].unique(maintain_order=True).to_numpy() |
|
|
|
|
|
|
|
|
for idx, label in enumerate(label_labels): |
|
|
subset = df.filter(pl.col('pitch_type') == label) |
|
|
if len(subset) > 4: |
|
|
try: |
|
|
confidence_ellipse(subset['hb'], subset['ivb'], ax=ax, edgecolor=dict_colour[label], n_std=2, facecolor=dict_colour[label], alpha=0.2) |
|
|
except ValueError: |
|
|
return |
|
|
|
|
|
|
|
|
if df['pitcher_hand'][0] == 'R': |
|
|
sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'], hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2) |
|
|
elif df['pitcher_hand'][0] == 'L': |
|
|
sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'], hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2) |
|
|
|
|
|
|
|
|
ax.set_xlim((-25, 25)) |
|
|
ax.set_ylim((-25, 25)) |
|
|
|
|
|
|
|
|
ax.hlines(y=0, xmin=-50, xmax=50, color=colour_palette[8], alpha=0.5, linestyles='--', zorder=1) |
|
|
ax.vlines(x=0, ymin=-50, ymax=50, color=colour_palette[8], alpha=0.5, linestyles='--', zorder=1) |
|
|
|
|
|
|
|
|
ax.set_xlabel('Horizontal Break (in)', fontdict=font_properties_axes) |
|
|
ax.set_ylabel('Induced Vertical Break (in)', fontdict=font_properties_axes) |
|
|
ax.set_title("Pitch Breaks", fontdict=font_properties_titles) |
|
|
|
|
|
|
|
|
ax.get_legend().remove() |
|
|
|
|
|
|
|
|
ax.set_xticklabels(ax.get_xticks(), fontdict=font_properties) |
|
|
ax.set_yticklabels(ax.get_yticks(), fontdict=font_properties) |
|
|
|
|
|
|
|
|
if df['pitcher_hand'][0] == 'R': |
|
|
ax.text(-24.5, -24.5, s='← Glove Side', fontstyle='italic', ha='left', va='bottom', |
|
|
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3) |
|
|
ax.text(24.5, -24.5, s='Arm Side →', fontstyle='italic', ha='right', va='bottom', |
|
|
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3) |
|
|
elif df['pitcher_hand'][0] == 'L': |
|
|
ax.invert_xaxis() |
|
|
ax.text(24.5, -24.5, s='← Arm Side', fontstyle='italic', ha='left', va='bottom', |
|
|
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3) |
|
|
ax.text(-24.5, -24.5, s='Glove Side →', fontstyle='italic', ha='right', va='bottom', |
|
|
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3) |
|
|
|
|
|
|
|
|
ax.set_aspect('equal', adjustable='box') |
|
|
ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) |
|
|
ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) |
|
|
|
|
|
|
|
|
strike_zone = pl.DataFrame({ |
|
|
'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9], |
|
|
'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5] |
|
|
}) |
|
|
|
|
|
|
|
|
def draw_line(axis, alpha_spot=1, catcher_p=True): |
|
|
""" |
|
|
Draw the strike zone and home plate on the given axis. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
axis : matplotlib.axes.Axes |
|
|
The axis to draw the strike zone on. |
|
|
alpha_spot : float, optional |
|
|
The transparency level of the lines (default is 1). |
|
|
catcher_p : bool, optional |
|
|
Whether to draw the catcher's perspective (default is True). |
|
|
""" |
|
|
|
|
|
axis.plot(strike_zone['PlateLocSide'].to_list(), strike_zone['PlateLocHeight'].to_list(), |
|
|
color='black', linewidth=1.3, zorder=3, alpha=alpha_spot) |
|
|
|
|
|
if catcher_p: |
|
|
|
|
|
axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1, alpha=alpha_spot, zorder=1) |
|
|
axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1, alpha=alpha_spot, zorder=1) |
|
|
axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1, alpha=alpha_spot, zorder=1) |
|
|
axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1, alpha=alpha_spot, zorder=1) |
|
|
axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1, alpha=alpha_spot, zorder=1) |
|
|
else: |
|
|
|
|
|
axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1, alpha=alpha_spot, zorder=1) |
|
|
axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1, alpha=alpha_spot, zorder=1) |
|
|
axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1, alpha=alpha_spot, zorder=1) |
|
|
axis.plot([0, 0.9], [-0.35, -0.1], color='black', linewidth=1, alpha=alpha_spot, zorder=1) |
|
|
axis.plot([0.9, 0.708], [-0.1, 0.4], color='black', linewidth=1, alpha=alpha_spot, zorder=1) |
|
|
|
|
|
def location_plot(df: pl.DataFrame, ax: plt.Axes, hand: str): |
|
|
""" |
|
|
Plot the pitch locations for different pitch types against a specific batter hand. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
df : pl.DataFrame |
|
|
The DataFrame containing pitch data. |
|
|
ax : plt.Axes |
|
|
The axis to plot on. |
|
|
hand : str |
|
|
The batter hand ('L' for left-handed, 'R' for right-handed). |
|
|
""" |
|
|
|
|
|
label_labels = df.sort(by=['pitch_count', 'pitch_type'], descending=[False, True])['pitch_type'].unique(maintain_order=True).to_numpy() |
|
|
|
|
|
|
|
|
for label in label_labels: |
|
|
subset = df.filter((pl.col('pitch_type') == label) & (pl.col('batter_hand') == hand)) |
|
|
if len(subset) >= 5: |
|
|
confidence_ellipse(subset['px'], subset['pz'], ax=ax, edgecolor=dict_colour[label], n_std=1.5, facecolor=dict_colour[label], alpha=0.3) |
|
|
|
|
|
|
|
|
pitch_location_group = ( |
|
|
df.filter(pl.col("batter_hand") == hand) |
|
|
.group_by("pitch_type") |
|
|
.agg([ |
|
|
pl.col("start_speed").count().alias("pitches"), |
|
|
pl.col("px").mean().alias("px"), |
|
|
pl.col("pz").mean().alias("pz") |
|
|
]) |
|
|
) |
|
|
|
|
|
|
|
|
total_pitches = pitch_location_group['pitches'].sum() |
|
|
pitch_location_group = pitch_location_group.with_columns( |
|
|
(pl.col("pitches") / total_pitches).alias("pitch_percent") |
|
|
) |
|
|
|
|
|
|
|
|
sns.scatterplot(ax=ax, x=pitch_location_group['px'], y=pitch_location_group['pz'], |
|
|
hue=pitch_location_group['pitch_type'], palette=dict_colour, ec='black', |
|
|
s=pitch_location_group['pitch_percent'] * 750, linewidth=2, zorder=2) |
|
|
|
|
|
|
|
|
ax.axis('square') |
|
|
draw_line(ax, alpha_spot=0.75, catcher_p=False) |
|
|
ax.axis('off') |
|
|
ax.set_xlim((-2.75, 2.75)) |
|
|
ax.set_ylim((-0.5, 5)) |
|
|
if len(pitch_location_group['px']) > 0: |
|
|
ax.get_legend().remove() |
|
|
ax.grid(False) |
|
|
ax.set_title(f"Pitch Locations vs {hand}HB\n{pitch_location_group['pitches'].sum()} Pitches", fontdict=font_properties_titles) |
|
|
|
|
|
|
|
|
def summary_table(df: pl.DataFrame, ax: plt.Axes): |
|
|
""" |
|
|
Create a summary table of pitch data. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
df : pl.DataFrame |
|
|
The DataFrame containing pitch data. |
|
|
ax : plt.Axes |
|
|
The axis to plot the table on. |
|
|
""" |
|
|
|
|
|
df_agg = df.group_by("pitch_description").agg( |
|
|
pl.col('is_pitch').sum().alias('count'), |
|
|
(pl.col('is_pitch').sum() / df.select(pl.col('is_pitch').sum())).alias('count_percent'), |
|
|
pl.col('start_speed').mean().alias('start_speed'), |
|
|
pl.col('ivb').mean().alias('ivb'), |
|
|
pl.col('hb').mean().alias('hb'), |
|
|
pl.col('spin_rate').mean().alias('spin_rate'), |
|
|
pl.col('vaa').mean().alias('vaa'), |
|
|
pl.col('haa').mean().alias('haa'), |
|
|
pl.col('release_pos_z').mean().alias('z0'), |
|
|
pl.col('release_pos_x').mean().alias('x0'), |
|
|
pl.col('extension').mean().alias('extension'), |
|
|
(((pl.col('spin_direction').mean() + 180) % 360 // 30) + |
|
|
(((pl.col('spin_direction').mean() + 180) % 360 % 30 / 30 / 100 * 60).round(2) * 10).round(0) // 1.5 / 4) |
|
|
.cast(pl.Float64).map_elements(lambda x: f"{int(x)}:{int((x % 1) * 60):02d}", return_dtype=pl.Utf8).alias('clock_time'), |
|
|
pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'), |
|
|
pl.col('pitch_grade').mean().alias('pitch_grade'), |
|
|
(pl.col('in_zone').sum() / pl.col('is_pitch').sum()).alias('zone_percent'), |
|
|
(pl.col('ozone_swing').sum() / pl.col('out_zone').sum()).alias('chase_percent'), |
|
|
(pl.col('whiffs').sum() / pl.col('swings').sum()).alias('whiff_percent'), |
|
|
(pl.col('woba_pred_contact').sum() / pl.col('bip').sum()).alias('xwobacon') |
|
|
).sort("count", descending=True) |
|
|
|
|
|
|
|
|
df_agg_all = df.group_by(pl.lit("All").alias("pitch_description")).agg( |
|
|
pl.col('is_pitch').sum().alias('count'), |
|
|
(pl.col('is_pitch').sum() / df.select(pl.col('is_pitch').sum())).alias('count_percent'), |
|
|
pl.lit(None).alias('start_speed'), |
|
|
pl.lit(None).alias('ivb'), |
|
|
pl.lit(None).alias('hb'), |
|
|
pl.lit(None).alias('spin_rate'), |
|
|
pl.lit(None).alias('vaa'), |
|
|
pl.lit(None).alias('haa'), |
|
|
pl.lit(None).alias('z0'), |
|
|
pl.lit(None).alias('x0'), |
|
|
pl.col('extension').mean().alias('extension'), |
|
|
pl.lit(None).alias('clock_time'), |
|
|
pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'), |
|
|
pl.lit(None).alias('pitch_grade'), |
|
|
(pl.col('in_zone').sum() / pl.col('is_pitch').sum()).alias('zone_percent'), |
|
|
(pl.col('ozone_swing').sum() / pl.col('out_zone').sum()).alias('chase_percent'), |
|
|
(pl.col('whiffs').sum() / pl.col('swings').sum()).alias('whiff_percent'), |
|
|
(pl.col('woba_pred_contact').sum() / pl.col('bip').sum()).alias('xwobacon') |
|
|
) |
|
|
|
|
|
|
|
|
df_agg = pl.concat([df_agg, df_agg_all]).fill_nan(None) |
|
|
|
|
|
|
|
|
statcast_pitch_summary = pl.read_csv('functions/statcast_2024_grouped.csv') |
|
|
|
|
|
|
|
|
table = ax.table(cellText=df_agg.fill_nan('—').fill_null('—').to_numpy(), colLabels=df_agg.columns, cellLoc='center', |
|
|
colWidths=[2.3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], bbox=[0.0, 0, 1, 0.8]) |
|
|
|
|
|
|
|
|
min_font_size = 14 |
|
|
table.auto_set_font_size(False) |
|
|
table.set_fontsize(min_font_size) |
|
|
table.scale(1, 0.5) |
|
|
|
|
|
|
|
|
min_font_size = 18 |
|
|
for i in range(len(df_agg) + 1): |
|
|
for j in range(len(df_agg.columns)): |
|
|
if i > 0: |
|
|
cell = table.get_celld()[i, j] |
|
|
cell.set_fontsize(min_font_size) |
|
|
|
|
|
|
|
|
cmap_sum = mcolors.LinearSegmentedColormap.from_list("", ['#648FFF', '#FFFFFF', '#FFB000']) |
|
|
cmap_sum_r = mcolors.LinearSegmentedColormap.from_list("", ['#FFB000', '#FFFFFF', '#648FFF']) |
|
|
|
|
|
|
|
|
for i in range(len(df_agg)): |
|
|
pitch_check = dict_pitch_desc_type[df_agg['pitch_description'][i]] |
|
|
cell_text = table.get_celld()[(i + 1, 0)].get_text().get_text() |
|
|
|
|
|
if cell_text != 'All': |
|
|
table.get_celld()[(i + 1, 0)].set_facecolor(dict_pitch_name[cell_text]) |
|
|
text_props = {'color': '#000000', 'fontweight': 'bold'} if cell_text in ['Split-Finger', 'Slider', 'Changeup'] else {'color': '#ffffff', 'fontweight': 'bold'} |
|
|
table.get_celld()[(i + 1, 0)].set_text_props(**text_props) |
|
|
if cell_text == 'Four-Seam Fastball': |
|
|
table.get_celld()[(i + 1, 0)].get_text().set_text('4-Seam') |
|
|
|
|
|
select_df = statcast_pitch_summary.filter(statcast_pitch_summary['pitch_type'] == pitch_check) |
|
|
|
|
|
|
|
|
columns_to_color = [(3, 'release_speed', 0.95, 1.05), (11, 'release_extension', 0.9, 1.1), (13, None, 80, 120), |
|
|
(14, None, 30, 70), (15, 'in_zone_rate', 0.7, 1.3), (16, 'chase_rate', 0.7, 1.3), |
|
|
(17, 'whiff_rate', 0.7, 1.3), (18, 'xwoba', 0.7, 1.3)] |
|
|
|
|
|
for col, stat, vmin_factor, vmax_factor in columns_to_color: |
|
|
cell_value = table.get_celld()[(i + 1, col)].get_text().get_text() |
|
|
if cell_value != '—': |
|
|
vmin = select_df[stat].mean() * vmin_factor if stat else vmin_factor |
|
|
vmax = select_df[stat].mean() * vmax_factor if stat else vmax_factor |
|
|
normalize = mcolors.Normalize(vmin=vmin, vmax=vmax) |
|
|
cmap = cmap_sum if col != 18 else cmap_sum_r |
|
|
table.get_celld()[(i + 1, col)].set_facecolor(get_color(float(cell_value.strip('%')), normalize, cmap)) |
|
|
|
|
|
|
|
|
table.get_celld()[(len(df_agg), 0)].set_text_props(color='#000000', fontweight='bold') |
|
|
|
|
|
|
|
|
new_column_names = ['$\\bf{Pitch\\ Name}$', '$\\bf{Count}$', '$\\bf{Pitch\\%}$', '$\\bf{Velocity}$', '$\\bf{iVB}$', |
|
|
'$\\bf{HB}$', '$\\bf{Spin}$', '$\\bf{VAA}$', '$\\bf{HAA}$', '$\\bf{vRel}$', '$\\bf{hRel}$', |
|
|
'$\\bf{Ext.}$', '$\\bf{Axis}$', '$\\bf{tjStuff+}$', '$\\bf{Grade}$', '$\\bf{Zone\\%}$', |
|
|
'$\\bf{Chase\\%}$', '$\\bf{Whiff\\%}$', '$\\bf{xwOBA}$\n$\\bf{Contact}$'] |
|
|
|
|
|
for i, col_name in enumerate(new_column_names): |
|
|
table.get_celld()[(0, i)].get_text().set_text(col_name) |
|
|
|
|
|
|
|
|
def format_cells(columns, fmt): |
|
|
for col in columns: |
|
|
col_idx = df_agg.columns.index(col) |
|
|
for row in range(1, len(df_agg) + 1): |
|
|
cell_value = table.get_celld()[(row, col_idx)].get_text().get_text() |
|
|
if cell_value != '—': |
|
|
table.get_celld()[(row, col_idx)].get_text().set_text(fmt.format(float(cell_value.strip('%')))) |
|
|
|
|
|
format_cells(['start_speed', 'ivb', 'hb', 'vaa', 'haa', 'z0', 'x0', 'extension'], '{:,.1f}') |
|
|
format_cells(['xwobacon'], '{:,.3f}') |
|
|
format_cells(['count_percent', 'zone_percent', 'chase_percent', 'whiff_percent'], '{:,.1%}') |
|
|
format_cells(['tj_stuff_plus', 'pitch_grade', 'spin_rate'], '{:,.0f}') |
|
|
|
|
|
|
|
|
items_in_order = (df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy()) |
|
|
colour_pitches = [dict_colour[x] for x in items_in_order] |
|
|
label = [dict_pitch[x] for x in items_in_order] |
|
|
handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches] |
|
|
if len(label) > 5: |
|
|
ax.legend(handles, label, bbox_to_anchor=(0.1, 0.81, 0.8, 0.14), ncol=5, |
|
|
fancybox=True, loc='lower center', fontsize=16, framealpha=1.0, markerscale=1.7, prop={'family': 'calibi', 'size': 16}) |
|
|
else: |
|
|
ax.legend(handles, label, bbox_to_anchor=(0.1, 0.81, 0.8, 0.14), ncol=5, |
|
|
fancybox=True, loc='lower center', fontsize=20, framealpha=1.0, markerscale=2, prop={'family': 'calibi', 'size': 20}) |
|
|
ax.axis('off') |
|
|
|
|
|
def plot_footer(ax: plt.Axes): |
|
|
""" |
|
|
Add footer text to the plot. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
ax : plt.Axes |
|
|
The axis to add the footer text to. |
|
|
""" |
|
|
|
|
|
ax.text(0, 1, 'By: @TJStats', ha='left', va='top', fontsize=24) |
|
|
ax.text(0.5, 0.25, |
|
|
''' |
|
|
Colour Coding Compares to League Average By Pitch |
|
|
tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type |
|
|
tjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10 |
|
|
Pitch Grade scales tjStuff+ to the traditional 20-80 Scouting Scale for a given pitch type |
|
|
''', |
|
|
ha='center', va='bottom', fontsize=12) |
|
|
ax.text(1, 1, 'Data: MLB, Fangraphs\nImages: MLB, ESPN', ha='right', va='top', fontsize=24) |
|
|
ax.axis('off') |
|
|
|
|
|
|
|
|
def player_headshot(player_input: str, ax: plt.Axes, sport_id: int, season: int): |
|
|
""" |
|
|
Display the player's headshot image on the given axis. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
player_input : str |
|
|
The player's ID. |
|
|
ax : plt.Axes |
|
|
The axis to display the image on. |
|
|
sport_id : int |
|
|
The sport ID (1 for MLB, other for minor leagues). |
|
|
season : int |
|
|
The season year. |
|
|
""" |
|
|
try: |
|
|
|
|
|
if int(sport_id) == 1: |
|
|
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_640,q_auto:best/v1/people/{player_input}/headshot/silo/current.png' |
|
|
else: |
|
|
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{player_input}/headshot/milb/current.png' |
|
|
|
|
|
|
|
|
response = requests.get(url) |
|
|
img = Image.open(BytesIO(response.content)) |
|
|
|
|
|
|
|
|
ax.set_xlim(0, 1.3) |
|
|
ax.set_ylim(0, 1) |
|
|
ax.imshow(img, extent=[0, 1, 0, 1] if sport_id == 1 else [1/6, 5/6, 0, 1], origin='upper') |
|
|
except PIL.UnidentifiedImageError: |
|
|
ax.axis('off') |
|
|
return |
|
|
|
|
|
|
|
|
ax.axis('off') |
|
|
|
|
|
def player_bio(pitcher_id: str, ax: plt.Axes, sport_id: int, year_input: int): |
|
|
""" |
|
|
Display the player's bio information on the given axis. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
pitcher_id : str |
|
|
The player's ID. |
|
|
ax : plt.Axes |
|
|
The axis to display the bio information on. |
|
|
sport_id : int |
|
|
The sport ID (1 for MLB, other for minor leagues). |
|
|
year_input : int |
|
|
The season year. |
|
|
""" |
|
|
|
|
|
url = f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}&hydrate=currentTeam" |
|
|
|
|
|
|
|
|
data = requests.get(url).json() |
|
|
|
|
|
|
|
|
player_name = data['people'][0]['fullName'] |
|
|
pitcher_hand = data['people'][0]['pitchHand']['code'] |
|
|
age = data['people'][0]['currentAge'] |
|
|
height = data['people'][0]['height'] |
|
|
weight = data['people'][0]['weight'] |
|
|
|
|
|
|
|
|
ax.text(0.5, 1, f'{player_name}', va='top', ha='center', fontsize=56) |
|
|
ax.text(0.5, 0.7, f'{pitcher_hand}HP, Age:{age}, {height}/{weight}', va='top', ha='center', fontsize=30) |
|
|
ax.text(0.5, 0.45, f'Season Pitching Summary', va='top', ha='center', fontsize=40) |
|
|
|
|
|
|
|
|
response = requests.get(url='https://statsapi.mlb.com/api/v1/sports').json() |
|
|
|
|
|
|
|
|
df_sport_id = pl.DataFrame(response['sports']) |
|
|
abb = df_sport_id.filter(pl.col('id') == sport_id)['abbreviation'][0] |
|
|
|
|
|
|
|
|
ax.text(0.5, 0.20, f'{year_input} {abb} Season', va='top', ha='center', fontsize=30, fontstyle='italic') |
|
|
|
|
|
|
|
|
ax.axis('off') |
|
|
|
|
|
def plot_logo(pitcher_id: str, ax: plt.Axes, df_team: pl.DataFrame, df_players: pl.DataFrame): |
|
|
""" |
|
|
Display the team logo for the given pitcher on the specified axis. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
pitcher_id : str |
|
|
The ID of the pitcher. |
|
|
ax : plt.Axes |
|
|
The axis to display the logo on. |
|
|
df_team : pl.DataFrame |
|
|
The DataFrame containing team data. |
|
|
df_players : pl.DataFrame |
|
|
The DataFrame containing player data. |
|
|
""" |
|
|
|
|
|
mlb_teams = [ |
|
|
{"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"}, |
|
|
{"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"}, |
|
|
{"team": "BAL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bal.png&h=500&w=500"}, |
|
|
{"team": "BOS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bos.png&h=500&w=500"}, |
|
|
{"team": "CHC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chc.png&h=500&w=500"}, |
|
|
{"team": "CWS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chw.png&h=500&w=500"}, |
|
|
{"team": "CIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cin.png&h=500&w=500"}, |
|
|
{"team": "CLE", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cle.png&h=500&w=500"}, |
|
|
{"team": "COL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/col.png&h=500&w=500"}, |
|
|
{"team": "DET", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/det.png&h=500&w=500"}, |
|
|
{"team": "HOU", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/hou.png&h=500&w=500"}, |
|
|
{"team": "KC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/kc.png&h=500&w=500"}, |
|
|
{"team": "LAA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/laa.png&h=500&w=500"}, |
|
|
{"team": "LAD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/lad.png&h=500&w=500"}, |
|
|
{"team": "MIA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mia.png&h=500&w=500"}, |
|
|
{"team": "MIL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mil.png&h=500&w=500"}, |
|
|
{"team": "MIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/min.png&h=500&w=500"}, |
|
|
{"team": "NYM", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nym.png&h=500&w=500"}, |
|
|
{"team": "NYY", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nyy.png&h=500&w=500"}, |
|
|
{"team": "OAK", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"}, |
|
|
{"team": "PHI", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/phi.png&h=500&w=500"}, |
|
|
{"team": "PIT", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/pit.png&h=500&w=500"}, |
|
|
{"team": "SD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sd.png&h=500&w=500"}, |
|
|
{"team": "SF", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sf.png&h=500&w=500"}, |
|
|
{"team": "SEA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sea.png&h=500&w=500"}, |
|
|
{"team": "STL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/stl.png&h=500&w=500"}, |
|
|
{"team": "TB", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tb.png&h=500&w=500"}, |
|
|
{"team": "TEX", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tex.png&h=500&w=500"}, |
|
|
{"team": "TOR", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tor.png&h=500&w=500"}, |
|
|
{"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"}, |
|
|
{"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"}, |
|
|
] |
|
|
|
|
|
try: |
|
|
|
|
|
df_image = pd.DataFrame(mlb_teams) |
|
|
image_dict = df_image.set_index('team')['logo_url'].to_dict() |
|
|
|
|
|
|
|
|
team_id = df_players.filter(pl.col('player_id') == pitcher_id)['team'][0] |
|
|
|
|
|
|
|
|
url_team = f'https://statsapi.mlb.com/api/v1/teams/{team_id}' |
|
|
|
|
|
|
|
|
data_team = requests.get(url_team).json() |
|
|
|
|
|
|
|
|
if data_team['teams'][0]['id'] in df_team['parent_org_id']: |
|
|
team_abb = df_team.filter(pl.col('team_id') == data_team['teams'][0]['id'])['parent_org_abbreviation'][0] |
|
|
else: |
|
|
team_abb = df_team.filter(pl.col('parent_org_id') == data_team['teams'][0]['parentOrgId'])['parent_org_abbreviation'][0] |
|
|
|
|
|
|
|
|
logo_url = image_dict[team_abb] |
|
|
|
|
|
|
|
|
response = requests.get(logo_url) |
|
|
|
|
|
|
|
|
img = Image.open(BytesIO(response.content)) |
|
|
|
|
|
|
|
|
ax.set_xlim(0, 1.3) |
|
|
ax.set_ylim(0, 1) |
|
|
ax.imshow(img, extent=[0.3, 1.3, 0, 1], origin='upper') |
|
|
|
|
|
|
|
|
ax.axis('off') |
|
|
except KeyError: |
|
|
ax.axis('off') |
|
|
return |
|
|
|
|
|
splits = { |
|
|
'all':0, |
|
|
'left':13, |
|
|
'right':14, |
|
|
} |
|
|
|
|
|
splits_title = { |
|
|
|
|
|
'all':'', |
|
|
'left':' vs LHH', |
|
|
'right':' vs RHH', |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
def fangraphs_pitching_leaderboards(season: int, |
|
|
split: str, |
|
|
start_date: str = '2024-01-01', |
|
|
end_date: str = '2024-12-31'): |
|
|
""" |
|
|
Fetch pitching leaderboards data from Fangraphs. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
season : int |
|
|
The season year. |
|
|
split : str |
|
|
The split type (e.g., 'All', 'LHH', 'RHH'). |
|
|
start_date : str, optional |
|
|
The start date for the data (default is '2024-01-01'). |
|
|
end_date : str, optional |
|
|
The end date for the data (default is '2024-12-31'). |
|
|
|
|
|
Returns |
|
|
------- |
|
|
pl.DataFrame |
|
|
The DataFrame containing the pitching leaderboards data. |
|
|
""" |
|
|
url = f""" |
|
|
https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&season={season}&season1={season} |
|
|
&startdate={start_date}&enddate={end_date}&ind=0&qual=0&type=8&month=1000&pageitems=500000 |
|
|
""" |
|
|
|
|
|
data = requests.get(url).json() |
|
|
df = pl.DataFrame(data=data['data'], infer_schema_length=1000) |
|
|
return df |
|
|
|
|
|
def fangraphs_splits_scrape(player_input: str, year_input: int, start_date: str, end_date: str, split: str) -> pl.DataFrame: |
|
|
""" |
|
|
Scrape Fangraphs splits data for a specific player. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
player_input : str |
|
|
The player's ID. |
|
|
year_input : int |
|
|
The season year. |
|
|
start_date : str |
|
|
The start date for the data. |
|
|
end_date : str |
|
|
The end date for the data. |
|
|
split : str |
|
|
The split type (e.g., 'all', 'left', 'right'). |
|
|
|
|
|
Returns |
|
|
------- |
|
|
pl.DataFrame |
|
|
The DataFrame containing the splits data. |
|
|
""" |
|
|
split_dict = { |
|
|
'all': [], |
|
|
'left': ['5'], |
|
|
'right': ['6'] |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
url = "https://www.fangraphs.com/api/leaders/splits/splits-leaders" |
|
|
|
|
|
|
|
|
fg_id = str(fangraphs_pitching_leaderboards( |
|
|
year_input, |
|
|
split='All', |
|
|
start_date=f'{year_input}-01-01', |
|
|
end_date=f'{year_input}-12-31' |
|
|
).filter(pl.col('xMLBAMID') == player_input)['playerid'][0]) |
|
|
|
|
|
|
|
|
payload = { |
|
|
"strPlayerId": fg_id, |
|
|
"strSplitArr": split_dict[split], |
|
|
"strGroup": "season", |
|
|
"strPosition": "P", |
|
|
"strType": "2", |
|
|
"strStartDate": pd.to_datetime(start_date).strftime('%Y-%m-%d'), |
|
|
"strEndDate": pd.to_datetime(end_date).strftime('%Y-%m-%d'), |
|
|
"strSplitTeams": False, |
|
|
"dctFilters": [], |
|
|
"strStatType": "player", |
|
|
"strAutoPt": False, |
|
|
"arrPlayerId": [], |
|
|
"strSplitArrPitch": [], |
|
|
"arrWxTemperature": None, |
|
|
"arrWxPressure": None, |
|
|
"arrWxAirDensity": None, |
|
|
"arrWxElevation": None, |
|
|
"arrWxWindSpeed": None |
|
|
} |
|
|
|
|
|
|
|
|
response = requests.post(url, data=json.dumps(payload), headers={'Content-Type': 'application/json'}) |
|
|
data_pull = response.json()['data'][0] |
|
|
|
|
|
|
|
|
payload_advanced = payload.copy() |
|
|
payload_advanced["strType"] = "1" |
|
|
|
|
|
|
|
|
response_advanced = requests.post(url, data=json.dumps(payload_advanced), headers={'Content-Type': 'application/json'}) |
|
|
data_pull_advanced = response_advanced.json()['data'][0] |
|
|
|
|
|
|
|
|
data_pull.update(data_pull_advanced) |
|
|
df_pull = pl.DataFrame(data_pull) |
|
|
|
|
|
return df_pull |
|
|
|
|
|
|
|
|
def fangraphs_table(df: pl.DataFrame, |
|
|
ax: plt.Axes, |
|
|
player_input: str, |
|
|
season: int, |
|
|
split: str): |
|
|
""" |
|
|
Create a table of Fangraphs pitching leaderboards data for a specific player. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
ax : plt.Axes |
|
|
The axis to plot the table on. |
|
|
season : int |
|
|
The season year. |
|
|
split : str |
|
|
The split type (e.g., 'All', 'LHH', 'RHH'). |
|
|
""" |
|
|
|
|
|
start_date = df['game_date'][0] |
|
|
end_date = df['game_date'][-1] |
|
|
|
|
|
|
|
|
df_fangraphs = fangraphs_splits_scrape(player_input=player_input, |
|
|
year_input=season, |
|
|
start_date=start_date, |
|
|
end_date=end_date, |
|
|
split=split) |
|
|
|
|
|
|
|
|
plot_table = df_fangraphs.select(['IP', 'WHIP', 'ERA', 'TBF', 'FIP', 'K%', 'BB%', 'K-BB%']) |
|
|
|
|
|
|
|
|
plot_table_values = [format(plot_table[x][0], fangraphs_stats_dict[x]['format']) if plot_table[x][0] != '---' else '---' for x in plot_table.columns] |
|
|
|
|
|
|
|
|
table_fg = ax.table(cellText=[plot_table_values], colLabels=plot_table.columns, cellLoc='center', |
|
|
bbox=[0.0, 0.1, 1, 0.7]) |
|
|
|
|
|
|
|
|
min_font_size = 20 |
|
|
table_fg.set_fontsize(min_font_size) |
|
|
|
|
|
|
|
|
new_column_names = [fangraphs_stats_dict[col]['table_header'] for col in plot_table.columns] |
|
|
for i, col_name in enumerate(new_column_names): |
|
|
table_fg.get_celld()[(0, i)].get_text().set_text(col_name) |
|
|
|
|
|
|
|
|
ax.text(0.5, 0.9, f'{start_date} to {end_date}{splits_title[split]}', va='bottom', ha='center', |
|
|
fontsize=36, fontstyle='italic') |
|
|
ax.axis('off') |
|
|
|
|
|
|
|
|
def stat_summary_table(df: pl.DataFrame, |
|
|
player_input: int, |
|
|
sport_id: int, |
|
|
ax: plt.Axes, |
|
|
split: str = 'All'): |
|
|
""" |
|
|
Create a summary table of player statistics. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
df : pl.DataFrame |
|
|
The DataFrame containing pitch data. |
|
|
player_input : int |
|
|
The player's ID. |
|
|
sport_id : int |
|
|
The sport ID (1 for MLB, other for minor leagues). |
|
|
ax : plt.Axes |
|
|
The axis to plot the table on. |
|
|
split : str, optional |
|
|
The split type (default is 'All'). |
|
|
""" |
|
|
|
|
|
start_date_format = str(pd.to_datetime(df['game_date'][0]).strftime('%m/%d/%Y')) |
|
|
end_date_format = str(pd.to_datetime(df['game_date'][-1]).strftime('%m/%d/%Y')) |
|
|
|
|
|
|
|
|
appContext = 'majorLeague' if sport_id == 1 else 'minorLeague' |
|
|
|
|
|
|
|
|
pitcher_stats_call = requests.get( |
|
|
f'https://statsapi.mlb.com/api/v1/people/{player_input}?appContext={appContext}&hydrate=stats(group=[pitching],type=[byDateRange],sportId={sport_id},startDate={start_date_format},endDate={end_date_format})' |
|
|
).json() |
|
|
|
|
|
|
|
|
pitcher_stats_call_header = [x for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']] |
|
|
pitcher_stats_call_values = [pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat'][x] for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']] |
|
|
pitcher_stats_call_df = pl.DataFrame(data=dict(zip(pitcher_stats_call_header, pitcher_stats_call_values))) |
|
|
|
|
|
|
|
|
pitcher_stats_call_df = pitcher_stats_call_df.with_columns( |
|
|
pl.lit(df['is_whiff'].sum()).alias('whiffs'), |
|
|
(pl.col('strikeOuts') / pl.col('battersFaced') * 100).round(1).cast(pl.Utf8).str.concat('%').alias('k_percent'), |
|
|
(pl.col('baseOnBalls') / pl.col('battersFaced') * 100).round(1).cast(pl.Utf8).str.concat('%').alias('bb_percent'), |
|
|
((pl.col('strikeOuts') - pl.col('baseOnBalls')) / pl.col('battersFaced') * 100).round(1).cast(pl.Utf8).str.concat('%').alias('k_bb_percent'), |
|
|
(((pl.col('homeRuns') * 13 + 3 * ((pl.col('baseOnBalls')) + (pl.col('hitByPitch'))) - 2 * (pl.col('strikeOuts')))) / ((pl.col('outs')) / 3) + 3.15).round(2).map_elements(lambda x: f"{x:.2f}").alias('fip'), |
|
|
((pl.col('strikes') / pl.col('numberOfPitches') * 100)).round(1).cast(pl.Utf8).str.concat('%').alias('strikePercentage'), |
|
|
) |
|
|
|
|
|
|
|
|
if df['game_id'][0] == df['game_id'][-1]: |
|
|
pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched', 'battersFaced', 'earnedRuns', 'hits', 'strikeOuts', 'baseOnBalls', 'hitByPitch', 'homeRuns', 'strikePercentage', 'whiffs']) |
|
|
new_column_names = ['$\\bf{IP}$', '$\\bf{PA}$', '$\\bf{ER}$', '$\\bf{H}$', '$\\bf{K}$', '$\\bf{BB}$', '$\\bf{HBP}$', '$\\bf{HR}$', '$\\bf{Strike\%}$', '$\\bf{Whiffs}$'] |
|
|
title = f'{df["game_date"][0]} vs {df["batter_team"][0]}' |
|
|
elif sport_id != 1: |
|
|
pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched', 'battersFaced', 'whip', 'era', 'fip', 'k_percent', 'bb_percent', 'k_bb_percent', 'strikePercentage']) |
|
|
new_column_names = ['$\\bf{IP}$', '$\\bf{PA}$', '$\\bf{WHIP}$', '$\\bf{ERA}$', '$\\bf{FIP}$', '$\\bf{K\%}$', '$\\bf{BB\%}$', '$\\bf{K-BB\%}$', '$\\bf{Strike\%}$'] |
|
|
title = f'{df["game_date"][0]} to {df["game_date"][-1]}' |
|
|
else: |
|
|
fangraphs_table(df=df, ax=ax, player_input=player_input, season=int(df['game_date'][0][0:4]), split=split) |
|
|
return |
|
|
|
|
|
|
|
|
table_fg = ax.table(cellText=pitcher_stats_call_df_small.to_numpy(), colLabels=pitcher_stats_call_df_small.columns, cellLoc='center', bbox=[0.0, 0.1, 1, 0.7]) |
|
|
table_fg.set_fontsize(20) |
|
|
for i, col_name in enumerate(new_column_names): |
|
|
table_fg.get_celld()[(0, i)].get_text().set_text(col_name) |
|
|
|
|
|
|
|
|
ax.text(0.5, 0.9, title, va='bottom', ha='center', fontsize=36, fontstyle='italic') |
|
|
ax.axis('off') |
|
|
|