milb_statcast / app.py
nesticot's picture
Update app.py
dc447d5 verified
import polars as pl
import api_scraper
import pandas as pd
scrape = api_scraper.MLB_Scrape()
# import df_update
# update = df_update.df_update()
from matplotlib.colors import LinearSegmentedColormap, Normalize
import numpy as np
import requests
from io import BytesIO
from PIL import Image
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import PIL
level_dict = {'1':'MLB',
'11':'AAA',
'14':'A',
'17':'AFL',}
def player_bio(pitcher_id: str, ax: plt.Axes, sport_id: int, year_input: int):
"""
Display the player's bio information on the given axis.
Parameters
----------
pitcher_id : str
The player's ID.
ax : plt.Axes
The axis to display the bio information on.
sport_id : int
The sport ID (1 for MLB, other for minor leagues).
year_input : int
The season year.
"""
# Construct the URL to fetch player data
url = f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}&hydrate=currentTeam"
# Send a GET request to the URL and parse the JSON response
data = requests.get(url).json()
# Extract player information from the JSON data
player_name = data['people'][0]['fullName']
position = data['people'][0]['primaryPosition']['abbreviation']
bat_side = data['people'][0]['batSide']['code']
pitcher_hand = data['people'][0]['pitchHand']['code']
age = data['people'][0]['currentAge']
height = data['people'][0]['height']
weight = data['people'][0]['weight']
# Display the player's name, handedness, age, height, and weight on the axis
ax.text(0.5, 1, f'{player_name}', va='top', ha='center', fontsize=30)
ax.text(0.5, 0.65, f'{position}, B/T: {bat_side}/{pitcher_hand}, Age: {age}, {height}/{weight}', va='top', ha='center', fontsize=20)
if position == 'P':
ax.text(0.5, 0.38, f'Season Pitching Percentiles', va='top', ha='center', fontsize=16)
else:
ax.text(0.5, 0.41, f'Season Batting Percentiles', va='top', ha='center', fontsize=16)
# Make API call to retrieve sports information
response = requests.get(url='https://statsapi.mlb.com/api/v1/sports').json()
# Convert the JSON response into a Polars DataFrame
#df_sport_id = pl.DataFrame(response['sports'])
#abb = df_sport_id.filter(pl.col('id') == sport_id)['abbreviation'][0]
abb = level_dict[str(sport_id)]
# Display the season and sport abbreviation
ax.text(0.5, 0.20, f'{year_input} {abb} Season', va='top', ha='center', fontsize=14, fontstyle='italic')
# Turn off the axis
ax.axis('off')
df_teams = scrape.get_teams()
team_dict = dict(zip(df_teams['team_id'],df_teams['parent_org_abbreviation']))
# List of MLB teams and their corresponding ESPN logo URLs
mlb_teams = [
{"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"},
{"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
{"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"},
{"team": "BAL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bal.png&h=500&w=500"},
{"team": "BOS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bos.png&h=500&w=500"},
{"team": "CHC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chc.png&h=500&w=500"},
{"team": "CWS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chw.png&h=500&w=500"},
{"team": "CIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cin.png&h=500&w=500"},
{"team": "CLE", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cle.png&h=500&w=500"},
{"team": "COL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/col.png&h=500&w=500"},
{"team": "DET", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/det.png&h=500&w=500"},
{"team": "HOU", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/hou.png&h=500&w=500"},
{"team": "KC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/kc.png&h=500&w=500"},
{"team": "LAA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/laa.png&h=500&w=500"},
{"team": "LAD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/lad.png&h=500&w=500"},
{"team": "MIA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mia.png&h=500&w=500"},
{"team": "MIL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mil.png&h=500&w=500"},
{"team": "MIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/min.png&h=500&w=500"},
{"team": "NYM", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nym.png&h=500&w=500"},
{"team": "NYY", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nyy.png&h=500&w=500"},
{"team": "PHI", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/phi.png&h=500&w=500"},
{"team": "PIT", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/pit.png&h=500&w=500"},
{"team": "SD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sd.png&h=500&w=500"},
{"team": "SF", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sf.png&h=500&w=500"},
{"team": "SEA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sea.png&h=500&w=500"},
{"team": "STL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/stl.png&h=500&w=500"},
{"team": "TB", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tb.png&h=500&w=500"},
{"team": "TEX", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tex.png&h=500&w=500"},
{"team": "TOR", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tor.png&h=500&w=500"},
{"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"},
{"team": "ZZZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/leagues/500/mlb.png&w=500&h=500"}
]
df_image = pd.DataFrame(mlb_teams)
image_dict = df_image.set_index('team')['logo_url'].to_dict()
image_dict_flip = df_image.set_index('logo_url')['team'].to_dict()
# level_dict = {'1':'MLB',
# '11':'AAA'}
level_dict = {'1':'MLB',
'11':'AAA',
'14':'A (FSL)',
'17':'AFL',}
level_dict_file = {'1':'mlb',
'11':'aaa',
'14':'lo_a',
'17':'afl',}
level_dict_filter = {'1':350,
'11':300,
'14':200,
'17':50,}
year_list = [2024,2025]
import requests
import os
CAMPAIGN_ID = os.getenv("CAMPAIGN_ID")
ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")
BACKUP_PW = os.getenv("BACKUP_PW")
ADMIN_PW = os.getenv("ADMIN_PW")
url = f"https://www.patreon.com/api/oauth2/v2/campaigns/{CAMPAIGN_ID}/members"
headers = {
"Authorization": f"Bearer {ACCESS_TOKEN}"
}
# Simple parameters, requesting the member's email and currently entitled tiers
params = {
"fields[member]": "full_name,email", # Request the member's email
"include": "currently_entitled_tiers", # Include the currently entitled tiers
"page[size]": 1000 # Fetch up to 1000 patrons per request
}
response = requests.get(url, headers=headers, params=params)
VALID_PASSWORDS = []
if response.status_code == 200:
data = response.json()
for patron in data['data']:
try:
tiers = patron['relationships']['currently_entitled_tiers']['data']
if any(tier['id'] == '9078921' for tier in tiers):
full_name = patron['attributes']['email']
VALID_PASSWORDS.append(full_name)
except KeyError:
continue
VALID_PASSWORDS.append(BACKUP_PW)
VALID_PASSWORDS.append(ADMIN_PW)
from shiny import App, reactive, ui, render
from shiny.ui import h2, tags
# Define the login UI
login_ui = ui.page_fluid(
ui.card(
ui.h2([
"TJStats MiLB Statcast App ",
ui.tags.a("(@TJStats)", href="https://twitter.com/TJStats", target="_blank")
]),
ui.p(
"This App is available to Superstar Patrons and One-Time Purchasers. If you a Superstar Patron, please enter your Patreon email address in the box below. If you are a One-Time Buyer (or having trouble), please refer to the ",
ui.tags.a("Patreon post", href="https://www.patreon.com/posts/118363824", target="_blank"),
"."
),
ui.input_password("password", "Enter Patreon Email (or Password from Link):", width="25%"),
ui.tags.input(
type="checkbox",
id="authenticated",
value=False,
disabled=True
),
ui.input_action_button("login", "Login", class_="btn-primary"),
ui.output_text("login_message"),
)
)
main_ui = ui.page_sidebar(
# Sidebar content
ui.sidebar(
# Row for selecting season and level
ui.row(
ui.column(6, ui.input_select('year_input', 'Select Season', year_list, selected=2025)),
ui.column(6, ui.input_select('level_input', 'Select Level', level_dict)),
),
# Row for the action button to get player list
ui.row(ui.input_action_button("player_button", "Get Player List", class_="btn-primary")),
# Row for selecting the player
ui.row(ui.column(12, ui.output_ui('player_select_ui', 'Select Player'))),
ui.row(
ui.column(6, ui.input_switch("switch", "Custom Team?", False)),
ui.column(6, ui.input_select('logo_select', 'Select Custom Logo', image_dict_flip, multiple=False))
),
# Row for the action button to generate plot
ui.row(ui.input_action_button("generate_plot", "Generate Plot", class_="btn-primary")),
width="400px"
),
# Main content area with header and tabs
ui.tags.div(
{"style": "width:90%;margin: 0 auto;max-width: 1600px;"},
ui.tags.style(
"""
h4 {
margin-top: 1em;font-size:35px;
}
h2{
font-size:25px;
}
"""
),
ui.tags.h4("TJStats"),
ui.tags.i("Baseball Analytics and Visualizations"),
ui.markdown("""<a href='https://x.com/TJStats'>Follow me on Twitter</a><sup>1</sup>"""),
ui.markdown("""<a href='https://www.patreon.com/tj_stats'>Support me on Patreon for Access to 2025 Apps</a><sup>1</sup>"""),
ui.markdown("### MiLB Statcast Batting Summaries"),
ui.markdown("""This Shiny App allows you to generate Baseball Savant-style percentile bars for MiLB players in the 2024 & 2025 Season.
Currently, MiLB Statcast is only available for AAA and A (Florida State League) level.
"""),
ui.markdown("""
For ease of sharing, you can right-click (desktop) or press+hold (mobile) to save/copy the image.
"""),
# Main content area with tabs
ui.navset_tab(
ui.nav_panel("Batter Summary",
ui.output_text("status_batter"),
ui.output_plot('batter_plot', width='1200px', height='1200px')
),
ui.nav_panel("Pitcher Summary",
ui.output_text("status_pitcher"),
ui.output_plot('pitcher_plot', width='1200px', height='1200px')
),
id="tabset"
)
)
)
# Combined UI with conditional panel
app_ui = ui.page_fluid(
ui.tags.head(
ui.tags.script(src="script.js")
),
ui.panel_conditional(
"!input.authenticated",
login_ui
),
ui.panel_conditional(
"input.authenticated",
main_ui
)
)
def server(input, output, session):
@reactive.Effect
@reactive.event(input.login)
def check_password():
if input.password() in VALID_PASSWORDS:
ui.update_checkbox("authenticated", value=True)
ui.update_text("login_message", value="")
else:
ui.update_text("login_message", value="Invalid password!")
ui.update_text("password", value="")
@output
@render.text
def login_message():
return ""
@render.ui
@reactive.event(input.player_button,input.level_input,input.year_input,input.tabset, ignore_none=False)
def player_select_ui():
if input.tabset() == "Batter Summary":
#Get the list of pitchers for the selected level and season
df_pitcher_info = scrape.get_players(sport_id=int(input.level_input()), season=int(input.year_input())).filter(
~pl.col("position").is_in(['P','TWP'])).sort("name")
# Create a dictionary of pitcher IDs and names
batter_dict_pos = dict(zip(df_pitcher_info['player_id'], df_pitcher_info['position']))
year = int(input.year_input())
sport_id = int(input.level_input())
# batter_summary = pl.read_csv(f'data/statcast/batter_summary_{level_dict_file[str(sport_id)]}_{year}.csv').sort('batter_name',descending=False)
batter_summary = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/summary/batter_summary_{level_dict_file[str(sport_id)]}_{year}.parquet").sort('batter_name',descending=False)
# Map elements in Polars DataFrame from a dictionary
batter_summary = batter_summary.with_columns(
pl.col("batter_id").map_elements(lambda x: batter_dict_pos.get(x, x)).alias("position")
)
batter_dict_pos = dict(zip(batter_summary['batter_id'], batter_summary['batter_name']))
# Create a dictionary of pitcher IDs and names
batter_dict = dict(zip(batter_summary['batter_id'], batter_summary['batter_name'] + ' - ' + batter_summary['position']))
# Return a select input for choosing a pitcher
return ui.input_select("batter_id", "Select Player", batter_dict, selectize=True)
if input.tabset() == "Pitcher Summary":
#Get the list of pitchers for the selected level and season
df_pitcher_info = scrape.get_players(sport_id=int(input.level_input()), season=int(input.year_input())).filter(
pl.col("position").is_in(['P','TWP'])).sort("name")
# Create a dictionary of pitcher IDs and names
batter_dict_pos = dict(zip(df_pitcher_info['player_id'], df_pitcher_info['position']))
year = int(input.year_input())
sport_id = int(input.level_input())
#batter_summary = pl.read_csv(f'data/statcast/pitcher_summary_{level_dict_file[str(sport_id)]}_{year}.csv').sort('pitcher_name',descending=False)
batter_summary = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/summary/pitcher_summary_{level_dict_file[str(sport_id)]}_{year}.parquet").sort('pitcher_name',descending=False)
# Map elements in Polars DataFrame from a dictionary
batter_summary = batter_summary.with_columns(
pl.col("pitcher_id").map_elements(lambda x: batter_dict_pos.get(x, x)).alias("position")
)
batter_dict_pos = dict(zip(batter_summary['pitcher_id'], batter_summary['pitcher_name']))
# Create a dictionary of pitcher IDs and names
batter_dict = dict(zip(batter_summary['pitcher_id'], batter_summary['pitcher_name'] + ' - ' + batter_summary['position']))
# Return a select input for choosing a pitcher
return ui.input_select("pitcher_id", "Select Batter", batter_dict, selectize=True)
@output
@render.plot
@reactive.event(input.generate_plot, ignore_none=False)
def batter_plot():
merged_dict = {
"woba_percent": { "format": '.3f', "percentile_flip": False, "stat_title": "wOBA" },
"xwoba_percent": { "format": '.3f', "percentile_flip": False, "stat_title": "xwOBA" },
"launch_speed": { "format": '.1f', "percentile_flip": False, "stat_title": "Average EV"},
"launch_speed_90": { "format": '.1f', "percentile_flip": False, "stat_title": "90th% EV"},
"max_launch_speed": { "format": '.1f', "percentile_flip": False, "stat_title": "Max EV"},
"barrel_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Barrel%" },
"hard_hit_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Hard-Hit%" },
"sweet_spot_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "LA Sweet-Spot%" },
#"zone_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Zone%" },
"zone_swing_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Z-Swing%" },
"chase_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "O-Swing%" },
"whiff_rate": { "format": '.1%', "percentile_flip": True, "stat_title": "Whiff%" },
"zone_contact_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Z-Contact%" },
"k_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "K%" },
"bb_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "BB%" },
"pull_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Pull%" },
"pulled_air_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Pull Air%" },
}
# Show progress/loading notification
with ui.Progress(min=0, max=1) as p:
def draw_baseball_savant_percentiles(new_player_metrics, new_player_percentiles, colors=None,
sport_id=None,
year_input=None):
"""
Draw Baseball Savant-style percentile bars with proper alignment and scaling.
:param new_player_metrics: DataFrame containing new player metrics.
:param new_player_percentiles: DataFrame containing new player percentiles.
:param colors: List of colors for bars (optional, red/blue default).
"""
# Extract player information
batter_id = new_player_metrics['batter_id'][0]
player_name = batter_name_id[batter_id]
stats = [merged_dict[x]['stat_title'] for x in merged_dict.keys()]
try:
# Calculate percentiles and values
percentiles = [int((1 - x) * 100) if merged_dict[stat]["percentile_flip"] else int(x * 100) for x, stat in zip(new_player_percentiles.select(merged_dict.keys()).to_numpy()[0], merged_dict.keys())]
percentiles = np.clip(percentiles, 1, 100)
values = [str(f'{x:{merged_dict[stat]["format"]}}').strip('%') for x, stat in zip(new_player_metrics.select(merged_dict.keys()).to_numpy()[0], merged_dict.keys())]
# Get team logo URL
try:
logo_url = image_dict[team_dict[player_team_dict[batter_id]]]
except KeyError:
logo_url = "https://a.espncdn.com/combiner/i?img=/i/teamlogos/leagues/500/mlb.png&w=500&h=500"
# Create a custom colormap
color_list = ['#3661AD', '#B4CFD1', '#D82129']
cmap = LinearSegmentedColormap.from_list("custom_cmap", color_list)
norm = Normalize(vmin=0.1, vmax=0.9)
norm_percentiles = norm(percentiles / 100)
colors = [cmap(p) for p in norm_percentiles]
# Figure setup
num_stats = len(stats)
bar_height = 4.5
spacing = 1
fig_height = (bar_height + spacing) * num_stats
fig = plt.figure(figsize=(12, 12))
gs = GridSpec(6, 5, height_ratios=[0.1, 1.5, 0.9, 0.9, 7.6, 0.1], width_ratios=[0.2, 1.5, 7, 1.5, 0.2])
# Define subplots
ax_title = fig.add_subplot(gs[1, 2])
ax_table = fig.add_subplot(gs[2, :])
ax_fv_table = fig.add_subplot(gs[3, :])
ax_fv_table.axis('off')
ax = fig.add_subplot(gs[4, :])
ax_logo = fig.add_subplot(gs[1, 3])
ax.set_xlim(-1, 99)
ax.set_ylim(-1, 99)
ax.set_aspect("equal")
ax.axis("off")
# Draw each bar
for i, (stat, percentile, value, color) in enumerate(zip(stats, percentiles, values, colors)):
y = fig_height - (i + 1) * (bar_height + spacing)
ax.add_patch(patches.Rectangle((0, y + bar_height / 4), 100, bar_height / 2, color="#C7DCDC", lw=0))
ax.add_patch(patches.Rectangle((0, y), percentile, bar_height, color=color, lw=0))
circle_y = y + bar_height - bar_height / 2
circle = plt.Circle((percentile, circle_y), bar_height / 2, color=color, ec='white', lw=1.5, zorder=10)
ax.add_patch(circle)
fs = 14
ax.text(percentile, circle_y, f"{percentile}", ha="center", va="center", fontsize=10, color='white', zorder=10, fontweight='bold')
ax.text(-5, y + bar_height / 2, stat, ha="right", va="center", fontsize=fs)
ax.text(115, y + bar_height / 2, str(value), ha="right", va="center", fontsize=fs, zorder=5)
if i < len(stats) and i > 0:
ax.hlines(y=y + bar_height + spacing / 2, color='#399098', linestyle=(0, (5, 5)), linewidth=1, xmin=-33, xmax=0)
ax.hlines(y=y + bar_height + spacing / 2, color='#399098', linestyle=(0, (5, 5)), linewidth=1, xmin=100, xmax=115)
# Draw vertical lines for 10%, 50%, and 90% with labels
for x, label, align, color in zip([10, 50, 90], ["Poor", "Average", "Great"], ['center', 'center', 'center'], color_list):
ax.axvline(x=x, ymin=0, ymax=1, color='#FFF', linestyle='-', lw=1, zorder=1, alpha=0.5)
ax.text(x, fig_height + 4, label, ha=align, va='center', fontsize=12, fontweight='bold', color=color)
triangle = patches.RegularPolygon((x, fig_height + 1), 3, radius=1, orientation=0, color=color, zorder=2)
ax.add_patch(triangle)
# # Title
# ax_title.set_ylim(0, 1)
# ax_title.text(0.5, 0.5, f"{player_name} - {player_position_dict[batter_id]}\nPercentile Rankings - 2024 AAA", ha="center", va="center", fontsize=24)
# ax_title.axis("off")
player_bio(batter_id, ax=ax_title, sport_id=sport_id, year_input=year_input)
# Add team logo
#response = requests.get(logo_url)
if input.switch():
response = requests.get(input.logo_select())
else:
response = requests.get(logo_url)
img = Image.open(BytesIO(response.content))
ax_logo.imshow(img)
ax_logo.axis("off")
ax.axis('equal')
# Metrics data table
metrics_data = {
"Pitches": new_player_metrics['pitches'][0],
"PA": new_player_metrics['pa'][0],
"BIP": new_player_metrics['bip'][0],
"HR": f"{new_player_metrics['home_run'][0]:.0f}",
"AVG": f"{new_player_metrics['avg'][0]:.3f}",
"OBP": f"{new_player_metrics['obp'][0]:.3f}",
"SLG": f"{new_player_metrics['slg'][0]:.3f}",
"OPS": f"{new_player_metrics['obp'][0] + new_player_metrics['slg'][0]:.3f}",
}
df_table = pd.DataFrame(metrics_data, index=[0])
ax_table.axis('off')
table = ax_table.table(cellText=df_table.values, colLabels=df_table.columns, cellLoc='center', loc='bottom', bbox=[0.07, 0, 0.86, 1])
for key, cell in table.get_celld().items():
if key[0] == 0:
cell.set_text_props(fontweight='bold')
table.auto_set_font_size(False)
table.set_fontsize(12)
table.scale(1, 1.5)
# Additional subplots for spacing
ax_top = fig.add_subplot(gs[0, :])
ax_bot = fig.add_subplot(gs[-1, :])
ax_top.axis('off')
ax_bot.axis('off')
ax_bot.text(0.05, 2, "By: Thomas Nestico (@TJStats)", ha="left", va="center", fontsize=14)
ax_bot.text(0.95, 2, "Data: MLB, Fangraphs", ha="right", va="center", fontsize=14)
fig.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.01)
# Player headshot
ax_headshot = fig.add_subplot(gs[1, 1])
try:
if int(sport_id) == 1:
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/w_640,d_people:generic:headshot:silo:current.png,q_auto:best,f_auto/v1/people/{batter_id}/headshot/silo/current.png'
response = requests.get(url)
img = Image.open(BytesIO(response.content))
ax_headshot.set_xlim(0, 1)
ax_headshot.set_ylim(0, 1)
ax_headshot.imshow(img, extent=[0, 1, 0, 1], origin='upper')
else:
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{batter_id}/headshot/milb/current.png'
response = requests.get(url)
img = Image.open(BytesIO(response.content))
ax_headshot.set_xlim(0, 1)
ax_headshot.set_ylim(0, 1)
ax_headshot.imshow(img, extent=[1/6, 5/6, 0, 1], origin='upper')
except PIL.UnidentifiedImageError:
ax_headshot.axis('off')
#return
ax_headshot.axis('off')
ax_table.set_title('Season Summary', style='italic')
# Fangraphs scouting grades table
print(batter_id)
if batter_id not in dict_mlb_fg.keys():
ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10))
return
df_fv_table = df_prospects[(df_prospects['minorMasterId'] == dict_mlb_fg[batter_id])][['cFV', 'Hit', 'Game', 'Raw', 'Spd', 'Fld']].reset_index(drop=True)
ax_fv_table.axis('off')
if df_fv_table.empty:
ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10))
return
df_fv_table.columns = ['FV', 'Hit', 'Game', 'Raw', 'Spd', 'Fld']
table_fv = ax_fv_table.table(cellText=df_fv_table.values, colLabels=df_fv_table.columns, cellLoc='center', loc='bottom', bbox=[0.07, 0, 0.86, 1])
for key, cell in table_fv.get_celld().items():
if key[0] == 0:
cell.set_text_props(fontweight='bold')
table_fv.auto_set_font_size(False)
table_fv.set_fontsize(12)
table_fv.scale(1, 1.5)
ax_fv_table.set_title('Fangraphs Scouting Grades', style='italic')
except ValueError:
fig = plt.figure(figsize=(26,26))
fig.text(x=0.1,y=0.9,s='No Statcast Data For This Batter',fontsize=36,ha='left')
return fig
#plt.show()
def calculate_new_player_percentiles(player_id, new_player_metrics, player_summary_filtered):
"""
Calculate percentiles for a new player's metrics.
:param player_id: ID of the player.
:param new_player_metrics: DataFrame containing new player metrics.
:param player_summary_filtered: Filtered player summary DataFrame.
:return: DataFrame containing new player percentiles.
"""
filtered_summary_clone = player_summary_filtered[['batter_id'] + stat_list].filter(pl.col('batter_id') != player_id).clone()
combined_data = pl.concat([filtered_summary_clone, new_player_metrics], how="vertical").to_pandas()
combined_percentiles = pl.DataFrame(pd.concat([combined_data['batter_id'], combined_data[stat_list].rank(pct=True)], axis=1))
new_player_percentiles = combined_percentiles.filter(pl.col('batter_id') == player_id)
return new_player_percentiles
p.set(message="Generating plot", detail="This may take a while...")
p.set(0.3, "Gathering data...")
# Example: New player's metrics
year = int(input.year_input())
sport_id = int(input.level_input())
batter_id = int(input.batter_id())
df_player = scrape.get_players(sport_id=sport_id,season=year)
batter_name_id = dict(zip(df_player['player_id'],df_player['name']))
player_team_dict = dict(zip(df_player['player_id'],df_player['team']))
player_position_dict = dict(zip(df_player['player_id'],df_player['position']))
batter_summary = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/summary/batter_summary_{level_dict_file[str(sport_id)]}_{year}.parquet")
df_prospects = pd.read_csv(f'data/prospects/prospects_{year}.csv')
df_rosters = pd.read_csv(f'data/rosters/fangraphs_rosters_{year}.csv')
df_small = df_rosters[['minorbamid','minormasterid']].dropna()
dict_mlb_fg=dict(zip(df_small['minorbamid'].astype(int),df_small['minormasterid']))
from datetime import datetime
march_28 = datetime(year, 3, 28)
april_4 = datetime(year, 4, 4)
october_6 = datetime(year, 10, 6)
today = datetime.today()
# Calculate the difference in days
days_since_march_28 = (today - march_28).days
days_since_april_4 = (today - april_4).days
days_since_october_6 = (today - october_6).days
level_dict_filter = {'1':min(max(days_since_march_28*3,1),350),
'11':min(max(days_since_march_28*3,1),300),
'14':min(max(days_since_april_4*2.5,1),200),
'17':min(max(days_since_october_6*2.5,1),50),}
batter_summary_filter = batter_summary.filter((pl.col('pa') >= level_dict_filter[str(sport_id)]) & (pl.col('launch_speed') >= 0))
stat_list = [item for item in batter_summary.columns if item not in ['batter_id', 'batter_name']]
batter_summary_filter_pd = batter_summary_filter.to_pandas()
new_player_metrics = batter_summary.filter(pl.col('batter_id') == batter_id)[['batter_id'] + stat_list]
if len(new_player_metrics) == 0:
fig = plt.figure(figsize=(26,26))
fig.text(x=0.1,y=0.9,s='No Statcast Data For This Batter',fontsize=36,ha='left')
return fig
# Get percentiles for the new player
new_player_percentiles = calculate_new_player_percentiles(batter_id, new_player_metrics, batter_summary_filter)
p.set(0.6, "Creating plot...")
# Draw Baseball Savant-style percentile bars
draw_baseball_savant_percentiles(new_player_metrics=new_player_metrics,
new_player_percentiles=new_player_percentiles,
sport_id=sport_id,
year_input=year)
@output
@render.plot
@reactive.event(input.generate_plot, ignore_none=False)
def pitcher_plot():
merged_dict = {
"avg_start_speed_ff": { "format": '.1f', "percentile_flip": False, "stat_title": "Fastball Velocity" },
"extension": { "format": '.1f', "percentile_flip": False, "stat_title": "Extension" },
"woba_percent": { "format": '.3f', "percentile_flip": True, "stat_title": "wOBA" },
"xwoba_percent": { "format": '.3f', "percentile_flip": True, "stat_title": "xwOBA" },
"launch_speed": { "format": '.1f', "percentile_flip": True, "stat_title": "Average EV"},
"barrel_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "Barrel%" },
"hard_hit_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "Hard-Hit%" },
"whiff_rate": { "format": '.1%', "percentile_flip": False, "stat_title": "Whiff%" },
"zone_contact_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "Z-Contact%" },
"zone_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "Zone%" },
"chase_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "O-Swing%" },
"csw_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "CSW%" },
"k_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "K%" },
"bb_percent": { "format": '.1%', "percentile_flip": True, "stat_title": "BB%" },
"k_minus_bb_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "K - BB%" },
"ground_ball_percent": { "format": '.1%', "percentile_flip": False, "stat_title": "GB%" },
}
with ui.Progress(min=0, max=1) as p:
def draw_baseball_savant_percentiles(new_player_metrics, new_player_percentiles, colors=None,
sport_id=None,
year_input=None):
"""
Draw Baseball Savant-style percentile bars with proper alignment and scaling.
:param new_player_metrics: DataFrame containing new player metrics.
:param new_player_percentiles: DataFrame containing new player percentiles.
:param colors: List of colors for bars (optional, red/blue default).
"""
# Extract player information
pitcher_id = new_player_metrics['pitcher_id'][0]
player_name = new_player_metrics['pitcher_id'][0]
# player_name = pitcher_name_id[pitcher_id]
stats = [merged_dict[x]['stat_title'] for x in merged_dict.keys()]
# Calculate percentiles and values
percentiles = [int((1 - x) * 100) if merged_dict[stat]["percentile_flip"] else int(x * 100) for x, stat in zip(new_player_percentiles.select(merged_dict.keys()).to_numpy()[0], merged_dict.keys())]
percentiles = np.clip(percentiles, 1, 100)
values = [str(f'{x:{merged_dict[stat]["format"]}}').strip('%') for x, stat in zip(new_player_metrics.select(merged_dict.keys()).to_numpy()[0], merged_dict.keys())]
# Get team logo URL
try:
logo_url = image_dict[team_dict[player_team_dict[pitcher_id]]]
except KeyError:
logo_url = "https://a.espncdn.com/combiner/i?img=/i/teamlogos/leagues/500/mlb.png&w=500&h=500"
# Create a custom colormap
color_list = ['#3661AD', '#B4CFD1', '#D82129']
cmap = LinearSegmentedColormap.from_list("custom_cmap", color_list)
norm = Normalize(vmin=0.1, vmax=0.9)
norm_percentiles = norm(percentiles / 100)
colors = [cmap(p) for p in norm_percentiles]
# Figure setup
num_stats = len(stats)
bar_height = 4.4
spacing = 0.7
fig_height = (bar_height + spacing) * num_stats
fig = plt.figure(figsize=(12, 12))
gs = GridSpec(7, 5, height_ratios=[0.05, 1.5, 0.75, 0.75,0.75, 7.7, 0.1], width_ratios=[0.2, 1.5, 7, 1.5, 0.2])
# Define subplots
ax_title = fig.add_subplot(gs[1, 2])
ax_table = fig.add_subplot(gs[2, :])
ax_fv_table = fig.add_subplot(gs[3, :])
ax_fv_table.axis('off')
ax_stuff = fig.add_subplot(gs[4, :])
ax = fig.add_subplot(gs[5, :])
ax_logo = fig.add_subplot(gs[1, 3])
ax.set_xlim(-1, 99)
ax.set_ylim(-1, 99)
ax.set_aspect("equal")
ax.axis("off")
# Draw each bar
for i, (stat, percentile, value, color) in enumerate(zip(stats, percentiles, values, colors)):
y = fig_height - (i + 1) * (bar_height + spacing)
ax.add_patch(patches.Rectangle((0, y + bar_height / 4), 100, bar_height / 2, color="#C7DCDC", lw=0))
ax.add_patch(patches.Rectangle((0, y), percentile, bar_height, color=color, lw=0))
circle_y = y + bar_height - bar_height / 2
circle = plt.Circle((percentile, circle_y), bar_height / 2, color=color, ec='white', lw=1.5, zorder=10)
ax.add_patch(circle)
fs = 14
ax.text(percentile, circle_y, f"{percentile}", ha="center", va="center", fontsize=10, color='white', zorder=10, fontweight='bold')
ax.text(-5, y + bar_height / 2, stat, ha="right", va="center", fontsize=fs)
ax.text(115, y + bar_height / 2, str(value), ha="right", va="center", fontsize=fs, zorder=5)
if i < len(stats) and i > 0:
ax.hlines(y=y + bar_height + spacing / 2, color='#399098', linestyle=(0, (5, 5)), linewidth=1, xmin=-33, xmax=0)
ax.hlines(y=y + bar_height + spacing / 2, color='#399098', linestyle=(0, (5, 5)), linewidth=1, xmin=100, xmax=115)
# Draw vertical lines for 10%, 50%, and 90% with labels
for x, label, align, color in zip([10, 50, 90], ["Poor", "Average", "Great"], ['center', 'center', 'center'], color_list):
ax.axvline(x=x, ymin=0, ymax=1, color='#FFF', linestyle='-', lw=1, zorder=1, alpha=0.5)
ax.text(x, fig_height + 4, label, ha=align, va='center', fontsize=12, fontweight='bold', color=color)
triangle = patches.RegularPolygon((x, fig_height + 1), 3, radius=1, orientation=0, color=color, zorder=2)
ax.add_patch(triangle)
# # Title
# ax_title.set_ylim(0, 1)
# ax_title.text(0.5, 0.5, f"{player_name} - {player_position_dict[pitcher_id]}\nPercentile Rankings - 2024 AAA", ha="center", va="center", fontsize=24)
# ax_title.axis("off")
player_bio(pitcher_id, ax=ax_title, sport_id=sport_id, year_input=year_input)
# Add team logo
#response = requests.get(logo_url)
#######if input.switch():
######## response = requests.get(input.logo_select())
######else:
response = requests.get(logo_url)
img = Image.open(BytesIO(response.content))
ax_logo.imshow(img)
ax_logo.axis("off")
ax.axis('equal')
lg_dict = {
11:'all',
14:10
}
levelt = {
11:1,
14:4,
}
if sport_id != 17 and int(sport_id) != 1:
fg_api = f'https://www.fangraphs.com/api/leaders/minor-league/data?pos=all&level={levelt[sport_id]}&lg={lg_dict[sport_id]}&stats=pit&qual=0&type=2&team=&season={year}&seasonEnd={year}&org=&ind=0&splitTeam=false'
response = requests.get(fg_api)
data = response.json()
df_fg = pl.DataFrame(data)
if pitcher_id not in dict_mlb_fg.keys():
#ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10))
metrics_data = {
"Pitches": new_player_metrics['pitches'][0],
"PA": new_player_metrics['pa'][0],
"BIP": new_player_metrics['bip'][0],
"HR": f"{new_player_metrics['home_run'][0]:.0f}",
"K": f"{new_player_metrics['k'][0]:.0f}",
"BB": f"{new_player_metrics['bb'][0]:.0f}",
}
df_table = pd.DataFrame(metrics_data, index=[0])
table = ax_table.table(cellText=df_table.values, colLabels=df_table.columns, cellLoc='center', loc='bottom', bbox=[0.07, 0, 0.86, 1])
for key, cell in table.get_celld().items():
if key[0] == 0:
cell.set_text_props(fontweight='bold')
table.auto_set_font_size(False)
table.set_fontsize(12)
table.scale(1, 1.5)
else:
df_fg_filter = df_fg.filter(pl.col('minormasterid') == dict_mlb_fg[pitcher_id])
# Metrics data table
metrics_data = {
"G": f"{df_fg_filter['G'][0]:.0f}",
"IP": f"{df_fg_filter['IP'][0]:.1f}",
"Pitches": f"{new_player_metrics['pitches'][0]:.0f}",
"PA": f"{df_fg_filter['TBF'][0]:.0f}",
"BIP": new_player_metrics['bip'][0],
"ERA": f"{df_fg_filter['ERA'][0]:.2f}",
"FIP": f"{df_fg_filter['FIP'][0]:.2f}",
"WHIP": f"{df_fg_filter['WHIP'][0]:.2f}",
}
df_table = pd.DataFrame(metrics_data, index=[0])
table = ax_table.table(cellText=df_table.values, colLabels=df_table.columns, cellLoc='center', loc='bottom', bbox=[0.07, 0, 0.86, 1])
for key, cell in table.get_celld().items():
if key[0] == 0:
cell.set_text_props(fontweight='bold')
table.auto_set_font_size(False)
table.set_fontsize(12)
table.scale(1, 1.5)
if sport_id == 1:
url_season = url = f"""
https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&season={year}&season1={year}
&startdate={year}-01-01&enddate={year}-12-01&ind=0&qual=0&type=8&month=33&pageitems=500000
"""
data_season = requests.get(url_season).json()
df_season = pl.DataFrame(data=data_season['data'], infer_schema_length=1000)
df_season = df_season.with_columns(pl.lit('Season').alias('Time'))
df_fg_filter = df_season.filter(pl.col('xMLBAMID')==int(pitcher_id))
metrics_data = {
"G": f"{df_fg_filter['G'][0]:.0f}",
"IP": f"{df_fg_filter['IP'][0]:.1f}",
"Pitches": f"{new_player_metrics['pitches'][0]:.0f}",
"PA": f"{df_fg_filter['TBF'][0]:.0f}",
"BIP": new_player_metrics['bip'][0],
"ERA": f"{df_fg_filter['ERA'][0]:.2f}",
"FIP": f"{df_fg_filter['FIP'][0]:.2f}",
"WHIP": f"{df_fg_filter['WHIP'][0]:.2f}",
}
df_table = pd.DataFrame(metrics_data, index=[0])
table = ax_table.table(cellText=df_table.values, colLabels=df_table.columns, cellLoc='center', loc='bottom', bbox=[0.07, 0, 0.86, 1])
for key, cell in table.get_celld().items():
if key[0] == 0:
cell.set_text_props(fontweight='bold')
table.auto_set_font_size(False)
table.set_fontsize(12)
table.scale(1, 1.5)
# Additional subplots for spacing
ax_top = fig.add_subplot(gs[0, :])
ax_bot = fig.add_subplot(gs[-1, :])
ax_top.axis('off')
ax_bot.axis('off')
ax_bot.text(0.05, 2, "By: Thomas Nestico (@TJStats)", ha="left", va="center", fontsize=14)
ax_bot.text(0.95, 2, "Data: MLB, Fangraphs", ha="right", va="center", fontsize=14)
ax_table.axis('off')
# Player headshot
ax_headshot = fig.add_subplot(gs[1, 1])
try:
if int(sport_id) == 1:
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/w_640,d_people:generic:headshot:silo:current.png,q_auto:best,f_auto/v1/people/{pitcher_id}/headshot/silo/current.png'
response = requests.get(url)
img = Image.open(BytesIO(response.content))
ax_headshot.set_xlim(0, 1)
ax_headshot.set_ylim(0, 1)
ax_headshot.imshow(img, extent=[0, 1, 0, 1], origin='upper')
else:
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{pitcher_id}/headshot/milb/current.png'
response = requests.get(url)
img = Image.open(BytesIO(response.content))
ax_headshot.set_xlim(0, 1)
ax_headshot.set_ylim(0, 1)
ax_headshot.imshow(img, extent=[1/6, 5/6, 0, 1], origin='upper')
except PIL.UnidentifiedImageError:
ax_headshot.axis('off')
#return
ax_headshot.axis('off')
ax_table.set_title('Season Summary', style='italic')
# Fangraphs scouting grades table
print(pitcher_id)
if pitcher_id not in dict_mlb_fg.keys():
ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10))
#return
try:
df_fv_table = df_prospects[(df_prospects['minorMasterId'] == dict_mlb_fg[pitcher_id])][['cFV','FB', 'SL', 'CB', 'CH', 'SPL', 'CT','CMD']].dropna(axis=1).reset_index(drop=True)
except KeyError:
df_fv_table = pd.DataFrame()
# ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10))
#return
ax_fv_table.axis('off')
if df_fv_table.empty:
ax_fv_table.text(x=0.5, y=0.5, s='No Scouting Data', style='italic', ha='center', va='center', fontsize=20, bbox=dict(facecolor='white', alpha=1, pad=10))
#return
else:
df_fv_table.columns = ['FV']+[x.upper() for x in df_fv_table.columns[1:]]
table_fv = ax_fv_table.table(cellText=df_fv_table.values, colLabels=df_fv_table.columns, cellLoc='center', loc='bottom', bbox=[0.07, 0, 0.86, 1])
for key, cell in table_fv.get_celld().items():
if key[0] == 0:
cell.set_text_props(fontweight='bold')
table_fv.auto_set_font_size(False)
table_fv.set_fontsize(12)
table_fv.scale(1, 1.5)
ax_fv_table.set_title('Fangraphs Scouting Grades', style='italic')
# df_stuff_filter = df_stuff.filter(pl.col('pitcher_id')==pitcher_id)
stuff_table = ax_stuff.table(cellText=[df_stuff_filter['tj_stuff_plus']],
colLabels=df_stuff_filter['pitch_type'],
cellLoc='center',
loc='center', bbox=[0.07, 0, 0.86, 1])
stuff_table.auto_set_font_size(False)
stuff_table.set_fontsize(12)
stuff_table.scale(1, 1.5)
ax_stuff.axis('off')
ax_stuff.set_title('tjStuff+', style='italic')
for key, cell in stuff_table.get_celld().items():
if key[0] == 0:
cell.set_text_props(fontweight='bold')
# Color the stuff_table values based on the cmap defined
for (i, j), cell in stuff_table.get_celld().items():
if i == 0:
cell.set_text_props(fontweight='bold')
else:
norm = Normalize(vmin=90, vmax=110)
value = float(cell.get_text().get_text())
color = cmap(norm(value))
cell.set_facecolor(color)
#cell.set_text_props(color='white' if value < 100 else 'black')
fig.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.01)
def calculate_new_player_percentiles(player_id, new_player_metrics, player_summary_filtered):
"""
Calculate percentiles for a new player's metrics.
:param player_id: ID of the player.
:param new_player_metrics: DataFrame containing new player metrics.
:param player_summary_filtered: Filtered player summary DataFrame.
:return: DataFrame containing new player percentiles.
"""
filtered_summary_clone = player_summary_filtered[['pitcher_id'] + stat_list].filter(pl.col('pitcher_id') != player_id).clone()
combined_data = pl.concat([filtered_summary_clone, new_player_metrics], how="vertical").to_pandas()
combined_percentiles = pl.DataFrame(pd.concat([combined_data['pitcher_id'], combined_data[stat_list].rank(pct=True)], axis=1))
new_player_percentiles = combined_percentiles.filter(pl.col('pitcher_id') == player_id)
return new_player_percentiles
p.set(message="Generating plot", detail="This may take a while...")
p.set(0.3, "Gathering data...")
df_teams = scrape.get_teams()
team_dict = dict(zip(df_teams['team_id'],df_teams['parent_org_abbreviation']))
# Example: New player's metrics
# Example: New player's metrics
year = int(input.year_input())
sport_id = int(input.level_input())
pitcher_id = int(input.pitcher_id())
df_player = scrape.get_players(sport_id=sport_id,season=2024)
pitcher_name_id = dict(zip(df_player['player_id'],df_player['name']))
player_team_dict = dict(zip(df_player['player_id'],df_player['team']))
player_position_dict = dict(zip(df_player['player_id'],df_player['position']))
player_position_dict = dict(zip(df_player['player_id'],df_player['position']))
pitcher_summary = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/summary/pitcher_summary_{level_dict_file[str(sport_id)]}_{year}.parquet")
df_prospects = pd.read_csv(f'data/prospects/prospects_{year}.csv')
df_rosters = pd.read_csv(f'data/rosters/fangraphs_rosters_{year}.csv')
df_small = df_rosters[['minorbamid','minormasterid']].dropna()
dict_mlb_fg=dict(zip(df_small['minorbamid'].astype(int),df_small['minormasterid']))
df_stuff = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/stuff/stuff_{year}.parquet")
# Filter out the "All" row
filtered_df = df_stuff.filter(pl.col("pitch_type") != "All")
filtered_all_df = df_stuff.filter(pl.col("pitch_type") == "All")
# Calculate total pitches for each pitcher and proportion of each pitch type
result_df = (
filtered_df
.with_columns([
# Total pitches for each pitcher
pl.col("count").sum().over("pitcher_id").alias("total_pitches"),
# Proportion of pitches
(pl.col("count") / pl.col("count").sum().over("pitcher_id")).alias("pitch_proportion"),
])
).filter(pl.col("pitch_proportion") > 0.05)
df_stuff = pl.concat([filtered_all_df.with_columns(
[pl.col("count").sum().over("pitcher_id").alias("total_pitches"),
(pl.col("count") / pl.col("count").sum().over("pitcher_id")).alias("pitch_proportion")]
), result_df])
df_stuff_filter = df_stuff.filter(pl.col('pitcher_id')==pitcher_id)
df_stuff_filter = df_stuff_filter.with_columns(
pl.col("tj_stuff_plus").round(0).cast(pl.Int64) # Round to the nearest integer
)
from datetime import datetime
march_28 = datetime(year, 3, 28)
april_4 = datetime(year, 4, 4)
october_6 = datetime(year, 10, 6)
today = datetime.today()
# Calculate the difference in days
days_since_march_28 = (today - march_28).days
days_since_april_4 = (today - april_4).days
days_since_october_6 = (today - october_6).days
level_dict_filter = {'1':min(max(days_since_march_28*3,1),350),
'11':min(max(days_since_march_28*3,1),300),
'14':min(max(days_since_april_4*3,1),200),
'17':min(max(days_since_october_6*1,1),20),}
pitcher_summary_filter = pitcher_summary.filter((pl.col('pa') >= level_dict_filter[str(sport_id)]) & (pl.col('launch_speed') >= 0))
stat_list = [item for item in pitcher_summary.columns if item not in ['pitcher_id', 'pitcher_name']]
pitcher_summary_filter_pd = pitcher_summary_filter.to_pandas()
new_player_metrics = pitcher_summary.filter(pl.col('pitcher_id') == pitcher_id)[['pitcher_id'] + stat_list]
if len(new_player_metrics) == 0:
fig = plt.figure(figsize=(26,26))
fig.text(x=0.1,y=0.9,s='No Statcast Data For This Pitcher',fontsize=36,ha='left')
return fig
# Get percentiles for the new player
new_player_percentiles = calculate_new_player_percentiles(pitcher_id, new_player_metrics, pitcher_summary_filter)
p.set(0.6, "Creating plot...")
# Draw Baseball Savant-style percentile bars
draw_baseball_savant_percentiles(new_player_metrics=new_player_metrics,
new_player_percentiles=new_player_percentiles,
sport_id=sport_id,
year_input=year)
app = App(app_ui, server)