pitch_plot_select / app_streamlit.py
nesticot's picture
Upload 34 files
b02077b verified
import seaborn as sns
import streamlit as st
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
import PitchPlotFunctions as ppf
import requests
import polars as pl
from datetime import date
import api_scraper
# Display the app title and description
st.markdown("""
## MLB & AAA Pitch Plots App
##### By: Thomas Nestico ([@TJStats](https://x.com/TJStats))
##### Code: [GitHub Repo](https://github.com/tnestico/streamlit_pitch_plots)
##### Data: [MLB](https://baseballsavant.mlb.com/)
#### About
This Streamlit app retrieves MLB and AAA Pitching Data for a selected pitcher from the MLB Stats API and is accessed using my [MLB Stats API Scraper](https://github.com/tnestico/mlb_scraper).
The app outputs the pitcher's data into both a plot and table to illustrate and summarize the data.
It can also display data for games currently in progress.
*More information about the data and plots is shown at the bottom of this page.*
"""
)
# Initialize the plotter object from PitchPlotFunctions
ploter = ppf.PitchPlotFunctions()
# Initialize the scraper object
scraper = api_scraper.MLB_Scrape()
# Dictionary mapping league names to sport IDs
sport_id_dict = {'MLB': 1, 'AAA': 11}
# Create two columns for league and pitcher selection
st.write("#### Plot")
col_1, col_2 = st.columns(2)
with col_1:
# Select league
selected_league = st.selectbox('##### Select League', list(sport_id_dict.keys()))
selected_sport_id = sport_id_dict[selected_league]
with col_2:
# Get player data and filter for pitchers
df_player = scraper.get_players(sport_id=selected_sport_id)
df_player = df_player.filter(pl.col('position').str.contains('P'))
df_player = df_player.with_columns(
(pl.concat_str(["name", "player_id"], separator=" - ").alias("pitcher_name_id"))
)
# Select specific columns and convert to dictionary
pitcher_name_id_dict = dict(df_player.select(['pitcher_name_id', 'player_id']).iter_rows())
# Initialize session state for previous selection
if 'prev_pitcher_id' not in st.session_state:
st.session_state.prev_pitcher_id = None
# Display a selectbox for pitcher selection
selected_pitcher = st.selectbox("##### Select Pitcher", list(pitcher_name_id_dict.keys()))
pitcher_id = pitcher_name_id_dict[selected_pitcher]
# Clear cache if selection changes
if pitcher_id != st.session_state.prev_pitcher_id:
st.cache_data.clear()
st.session_state.prev_pitcher_id = pitcher_id
st.session_state.cache_cleared = False
st.write('Cache cleared!')
# Initialize session state for cache status
if 'cache_cleared' not in st.session_state:
st.session_state.cache_cleared = False
# Dictionary for batter hand selection
batter_hand_picker = {
'All': ['L', 'R'],
'LHH': ['L'],
'RHH': ['R']
}
# Define date range for the season
min_date = date(2024, 3, 20)
max_date = date(2024, 11, 30)
# Create columns for input widgets
st.write("##### Filters")
col1, col2, col3 = st.columns(3)
with col1:
# Selectbox for batter handedness
batter_hand_select = st.selectbox('Batter Handedness:', list(batter_hand_picker.keys()))
batter_hand = batter_hand_picker[batter_hand_select]
with col2:
# Date input for start date
start_date = st.date_input('Start Date:',
value=min_date,
min_value=min_date,
max_value=max_date,
format="YYYY-MM-DD")
with col3:
# Date input for end date
end_date = st.date_input('End Date:',
value="default_value_today",
min_value=min_date,
max_value=max_date,
format="YYYY-MM-DD")
# Dictionary for plot type selection
plot_picker_dict = {
'Short Form Movement': 'short_form_movement',
'Long Form Movement': 'long_form_movement',
'Release Points': 'release_point'
}
# Selectbox for plot type
plot_picker_select = st.selectbox('Select Plot Type:', list(plot_picker_dict.keys()))
plot_picker = plot_picker_dict[plot_picker_select]
# Extract season from start date
season = str(start_date)[0:4]
# Get list of games for the selected player and date range
player_games = scraper.get_player_games_list(player_id=pitcher_id, season=season,
start_date=str(start_date), end_date=str(end_date),
sport_id=selected_sport_id,
game_type = ['R','P'])
# Function to fetch data and cache it
@st.cache_data
def fetch_data():
data = scraper.get_data(game_list_input=player_games)
df = scraper.get_data_df(data_list=data)
return df
# Fetch data and manage cache status
if not st.session_state.cache_cleared:
df_original = fetch_data()
st.session_state.cache_cleared = True
else:
df_original = fetch_data()
# Button to generate plot
if st.button('Generate Plot'):
try:
# Convert dataframe to polars and filter based on inputs
df = ploter.df_to_polars(df_original=df_original,
pitcher_id=pitcher_id,
start_date=str(start_date),
end_date=str(end_date),
batter_hand=batter_hand)
print(df)
if len(df) == 0:
st.write('Please select different parameters.')
else:
# Generate the final plot
ploter.final_plot(
df=df,
pitcher_id=pitcher_id,
plot_picker=plot_picker,
sport_id=selected_sport_id)
# Use a container to control the width of the AgGrid display
with st.container():
# Group the data by pitch type
grouped_df = (
df.group_by(['pitcher_id', 'pitch_description'])
.agg([
pl.col('is_pitch').drop_nans().count().alias('pitches'),
pl.col('start_speed').drop_nans().mean().round(1).alias('start_speed'),
pl.col('vb').drop_nans().mean().round(1).alias('vb'),
pl.col('ivb').drop_nans().mean().round(1).alias('ivb'),
pl.col('hb').drop_nans().mean().round(1).alias('hb'),
pl.col('spin_rate').drop_nans().mean().round(0).alias('spin_rate'),
pl.col('x0').drop_nans().mean().round(1).alias('x0'),
pl.col('z0').drop_nans().mean().round(1).alias('z0'),
])
.with_columns(
(pl.col('pitches') / pl.col('pitches').sum().over('pitcher_id') * 100).round(3).alias('proportion')
)).sort('proportion', descending=True).select(["pitch_description", "pitches", "proportion", "start_speed", "vb", "ivb", "hb",
"spin_rate", "x0", "z0"])
st.write("#### Pitching Data")
column_config_dict = {
'pitcher_id': 'Pitcher ID',
'pitch_description': 'Pitch Type',
'pitches': 'Pitches',
'start_speed': 'Velocity',
'vb': 'VB',
'ivb': 'iVB',
'hb': 'HB',
'spin_rate': 'Spin Rate',
'proportion': st.column_config.NumberColumn("Pitch%", format="%.1f%%"),
'x0': 'hRel',
'z0': 'vRel',
}
st.markdown(f"""##### {selected_pitcher.split('-')[0]} {selected_league} Pitch Data""")
st.dataframe(grouped_df,
hide_index=True,
column_config=column_config_dict,
width=1500)
# Configure the AgGrid options
# gb = GridOptionsBuilder.from_dataframe(grouped_df)
# # Set display names for columns
# for col, display_name in zip(grouped_df.columns, grouped_df.columns):
# gb.configure_column(col, headerName=display_name)
# grid_options = gb.build()
# # Display the dataframe using AgGrid
# grid_response = AgGrid(
# grouped_df,
# gridOptions=grid_options,
# height=300,
# allow_unsafe_jscode=True,
# )
except IndexError:
st.write('Please select different parameters.')
# Display column and plot descriptions
st.markdown("""
#### Column Descriptions
- **`Pitch Type`**: Describes the type of pitch thrown (e.g., 4-Seam Fastball, Curveball, Slider).
- **`Pitches`**: The total number of pitches thrown by the pitcher.
- **`Pitch%`**: Proportion of pitch thrown.
- **`Velocity`**: The initial velocity of the pitch as it leaves the pitcher's hand, measured in miles per hour (mph).
- **`VB`**: Vertical Break (VB), representing the amount movement of a pitch due to spin and gravity, measured in inches (in).
- **`iVB`**: Induced Vertical Break (iVB), representing the amount movement of a pitch strictly due to the spin imparted on the ball, measured in inches (in).
- **`HB`**: Horizontal Break (HB), indicating the amount of horizontal movement of a pitch, measured in inches (in).
- **`Spin Rate`**: The rate of spin of the pitch as it is released, measured in revolutions per minute (rpm).
- **`hRel`**: The horizontal release point of the pitch, measured in feet from the center of the pitcher's mound (ft).
- **`vRel`**: The vertical release point of the pitch, measured in feet above the ground (ft).
#### Plot Descriptions
- **`Short Form Movement`**: Illustrates the movement of the pitch due to spin, where (0,0) indicates a pitch with perfect gyro-spin (e.g. Like a Football).
- **`Long Form Movement`**: Illustrates the movement of the pitch due to spin and gravity.
- **`Release Points`**: Illustrates a pitchers release points from the catcher's perspective.
#### Acknowledgements
Big thanks to [Michael Rosen](https://twitter.com/bymichaelrosen) and [Jeremy Maschino](https://twitter.com/pitchprofiler) for inspiration for this project
Check Out Michael's [Pitch Plotting App](https://pitchplotgenerator.streamlit.app/)
Check Out Jeremy's Website [Pitch Profiler](http://www.mlbpitchprofiler.com/)
"""
)