|
|
import urllib.request |
|
|
from urllib.error import HTTPError |
|
|
import requests |
|
|
from bs4 import BeautifulSoup |
|
|
import os |
|
|
import json |
|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
|
|
|
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode |
|
|
|
|
|
|
|
|
pos_dict = {1 :'P', |
|
|
2 :'C', |
|
|
3 :'1B', |
|
|
4 :'2B', |
|
|
5 :'3B', |
|
|
6 :'SS', |
|
|
7 :'LF', |
|
|
8 :'CF', |
|
|
9 :'RF', |
|
|
10 :'DH'} |
|
|
|
|
|
|
|
|
st.set_page_config(layout="wide") |
|
|
|
|
|
|
|
|
st.markdown( |
|
|
""" |
|
|
<style> |
|
|
.main-container { |
|
|
max-width: 1250px; |
|
|
margin: 0 auto; |
|
|
} |
|
|
</style> |
|
|
""", |
|
|
unsafe_allow_html=True |
|
|
) |
|
|
|
|
|
|
|
|
st.markdown('<div class="main-container">', unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
markdown_text = """ |
|
|
## Catch Probability Lookup Tool |
|
|
|
|
|
##### By: Thomas Nestico ([@TJStats](https://x.com/TJStats)) |
|
|
##### Data: [MLB](https://baseballsavant.mlb.com/) |
|
|
|
|
|
#### About |
|
|
This Streamlit app retrieves catch probability data for a selected fielder from [Baseball Savant](https://baseballsavant.mlb.com/leaderboard/catch_probability). |
|
|
The app displays the fielder's data in a table and allows the user to select a |
|
|
row to view the corresponding video. |
|
|
|
|
|
Catch probability data is only available for outfielders. |
|
|
|
|
|
#### What is Catch Probability? |
|
|
*From MLB:* |
|
|
|
|
|
**Catch Probability** expresses the likelihood for a ball to be caught by an outfielder based on opportunity time, |
|
|
distance needed, and direction. “Opportunity time” starts when the ball is released by the pitcher, |
|
|
and “distance needed” is the shortest distance needed to make the catch. |
|
|
[Learn more about how direction is accounted for here](https://www.mlb.com/news/catch-probability-updated-to-include-direction-c232532408). |
|
|
[Read more about the details of how Catch Probability works here](https://www.mlb.com/news/statcast-introduces-catch-probability-for-2017-c217802340). |
|
|
|
|
|
""" |
|
|
|
|
|
markdown_text_end = ''' |
|
|
*Columns:* |
|
|
- **Batter Name**: Name of the batter |
|
|
- **Pitcher Name**: Name of the pitcher |
|
|
- **Fielder Name**: Name of the fielder |
|
|
- **Position**: Position of the fielder |
|
|
- **Event**: Type of play |
|
|
- **Out**: Was the ball caught? |
|
|
- **Wall**: [Did the fielder catch the ball at the wall?](https://www.mlb.com/news/catch-probability-updated-to-account-for-walls-c269814542) |
|
|
- **Back**: [Did the fielder catch the ball while moving back?](https://www.mlb.com/news/catch-probability-updated-to-include-direction-c232532408) |
|
|
- **Stars**: [Number of stars assigned to the play](https://baseballsavant.mlb.com/leaderboard/catch_probability) |
|
|
- **Distance**: Distance required to make the catch in feet |
|
|
- **Hang Time**: Hang time of the ball in seconds |
|
|
- **Catch Rate**: Probability of the catch being made |
|
|
''' |
|
|
|
|
|
|
|
|
st.markdown(markdown_text) |
|
|
|
|
|
|
|
|
|
|
|
import datasets |
|
|
from datasets import load_dataset |
|
|
|
|
|
season = 2025 |
|
|
level = 'mlb' |
|
|
|
|
|
|
|
|
ds = load_dataset("TJStatsApps/mlb_data", data_files=f"data/{level}_pitch_data_{season}.parquet") |
|
|
dataset = ds["train"].to_pandas() |
|
|
|
|
|
df = dataset.drop_duplicates(subset=['play_id'],keep='last') |
|
|
df['batter_name_team'] = df['batter_name'] + ' - ' + df['batter_team'] |
|
|
|
|
|
|
|
|
fielders = df.drop_duplicates(['batter_id']).sort_values(['batter_name']).set_index('batter_id')['batter_name_team'].to_dict() |
|
|
fielders_reversed = {v: k for k, v in fielders.items()} |
|
|
|
|
|
|
|
|
st.write("#### Select Fielder") |
|
|
selected_fielder = st.selectbox('',list(fielders_reversed.keys())) |
|
|
|
|
|
|
|
|
fielder_select = fielders_reversed[selected_fielder] |
|
|
|
|
|
|
|
|
url = f"https://baseballsavant.mlb.com/player-services/range?playerId={fielder_select}&season={season}&playerType=fielder" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data = requests.get(url).json() |
|
|
df_catch = pd.DataFrame(data) |
|
|
|
|
|
if df_catch.empty: |
|
|
st.write("No data available for the selected fielder.") |
|
|
st.stop() |
|
|
|
|
|
df_catch['hang_time'] = df_catch['hang_time'].astype(float).round(1) |
|
|
df_catch['distance'] = df_catch['distance'].astype(float).round(1) |
|
|
|
|
|
df_merge = df.merge(df_catch, on='play_id', how='right', suffixes=('', '_fielder')).reset_index(drop=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df_merge['pos'] = df_merge['pos'].astype(int) |
|
|
df_merge['Position'] = df_merge['pos'].map(pos_dict) |
|
|
|
|
|
|
|
|
df_merge = df_merge[df_merge['batter_id'] != df_merge['player_id']] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df_merge.sort_values(by='game_date',inplace=True) |
|
|
column_names = ['game_date','batter_name', 'pitcher_name', 'name_display_first_last', 'Position','event', 'out', 'wall', 'back', 'stars', 'distance', 'hang_time', 'catch_rate'] |
|
|
column_names_display = ['Game Date','Batter Name', 'Pitcher Name', 'Fielder Name', 'Position','Event', 'Out', 'Wall', 'Back', 'Stars', 'Distance', 'Hang Time', 'Catch Rate'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with st.container(): |
|
|
st.write("#### Fielder Data") |
|
|
|
|
|
gb = GridOptionsBuilder.from_dataframe(df_merge[column_names]) |
|
|
|
|
|
for col, display_name in zip(column_names, column_names_display): |
|
|
gb.configure_column(col, headerName=display_name) |
|
|
|
|
|
|
|
|
gb.configure_selection('single', use_checkbox=True) |
|
|
grid_options = gb.build() |
|
|
|
|
|
|
|
|
grid_response = AgGrid( |
|
|
df_merge[column_names], |
|
|
gridOptions=grid_options, |
|
|
update_mode=GridUpdateMode.SELECTION_CHANGED, |
|
|
height=300, |
|
|
allow_unsafe_jscode=True, |
|
|
width="100%", |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
selected_row_index = int(grid_response['selected_rows'].index.values[0]) |
|
|
play_link = f'https://baseballsavant.mlb.com/sporty-videos?playId={df_merge["play_id"].values[selected_row_index]}' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.markdown( f'#### [Link to Video]({play_link})') |
|
|
|
|
|
|
|
|
|
|
|
except AttributeError: |
|
|
st.write("#### Select Row to Get Video Link") |
|
|
|
|
|
st.markdown(markdown_text_end) |
|
|
st.markdown('</div>', unsafe_allow_html=True) |