File size: 7,449 Bytes
aa49833 c247301 aa49833 07b3375 aa49833 c644018 9e77e0f 1606618 ee029d5 c644018 83173ab c644018 ee029d5 c644018 ee029d5 c644018 c8b3408 07b3375 ee029d5 677f8e6 07b3375 c8b3408 c644018 aa49833 67c2815 a41a992 67c2815 6806b25 b56a26c 06789cc 3f6340f 515500e aa49833 e61a842 f61359b aa49833 737255a aa49833 939ade4 aa49833 939ade4 aa49833 939ade4 aa49833 939ade4 aa49833 939ade4 5de68ce aa49833 939ade4 07b3375 ee029d5 07b3375 1606618 9e77e0f 5de68ce 07b3375 1606618 07b3375 aa49833 8d95317 1606618 e61a842 1606618 07b3375 1606618 aa49833 1606618 2edc113 1606618 aa49833 1606618 aa49833 1606618 dc210c6 1606618 a0660fc dc210c6 a0660fc e86de71 1606618 2e8c439 aa49833 e86de71 aa49833 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
import urllib.request
from urllib.error import HTTPError
import requests
from bs4 import BeautifulSoup
import os
import json
import streamlit as st
import pandas as pd
# import polars as pl
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
pos_dict = {1 :'P',
2 :'C',
3 :'1B',
4 :'2B',
5 :'3B',
6 :'SS',
7 :'LF',
8 :'CF',
9 :'RF',
10 :'DH'}
# Set Streamlit page configuration
st.set_page_config(layout="wide")
# Inject custom CSS to set the width of the container to 1250px
st.markdown(
"""
<style>
.main-container {
max-width: 1250px;
margin: 0 auto;
}
</style>
""",
unsafe_allow_html=True
)
# Wrap the main content in a container with the specified width
st.markdown('<div class="main-container">', unsafe_allow_html=True)
# Example text with links and bullet points
markdown_text = """
## Catch Probability Lookup Tool
##### By: Thomas Nestico ([@TJStats](https://x.com/TJStats))
##### Data: [MLB](https://baseballsavant.mlb.com/)
#### About
This Streamlit app retrieves catch probability data for a selected fielder from [Baseball Savant](https://baseballsavant.mlb.com/leaderboard/catch_probability).
The app displays the fielder's data in a table and allows the user to select a
row to view the corresponding video.
Catch probability data is only available for outfielders.
#### What is Catch Probability?
*From MLB:*
**Catch Probability** expresses the likelihood for a ball to be caught by an outfielder based on opportunity time,
distance needed, and direction. “Opportunity time” starts when the ball is released by the pitcher,
and “distance needed” is the shortest distance needed to make the catch.
[Learn more about how direction is accounted for here](https://www.mlb.com/news/catch-probability-updated-to-include-direction-c232532408).
[Read more about the details of how Catch Probability works here](https://www.mlb.com/news/statcast-introduces-catch-probability-for-2017-c217802340).
"""
markdown_text_end = '''
*Columns:*
- **Batter Name**: Name of the batter
- **Pitcher Name**: Name of the pitcher
- **Fielder Name**: Name of the fielder
- **Position**: Position of the fielder
- **Event**: Type of play
- **Out**: Was the ball caught?
- **Wall**: [Did the fielder catch the ball at the wall?](https://www.mlb.com/news/catch-probability-updated-to-account-for-walls-c269814542)
- **Back**: [Did the fielder catch the ball while moving back?](https://www.mlb.com/news/catch-probability-updated-to-include-direction-c232532408)
- **Stars**: [Number of stars assigned to the play](https://baseballsavant.mlb.com/leaderboard/catch_probability)
- **Distance**: Distance required to make the catch in feet
- **Hang Time**: Hang time of the ball in seconds
- **Catch Rate**: Probability of the catch being made
'''
# Display the markdown text in Streamlit
st.markdown(markdown_text)
# Load the data
import datasets
from datasets import load_dataset
### Import Datasets
season = 2025
level = 'mlb'
# dataset = load_dataset('TJStatsApps/mlb_data', data_files=[f'{level}_pitch_data_{season}.csv' ])
# load the dataset file from your repo
ds = load_dataset("TJStatsApps/mlb_data", data_files=f"data/{level}_pitch_data_{season}.parquet")
dataset = ds["train"].to_pandas() # or to_polars() if you want Polars
# dataset_train = dataset['train']
df = dataset.drop_duplicates(subset=['play_id'],keep='last')
df['batter_name_team'] = df['batter_name'] + ' - ' + df['batter_team']
# Sample dictionary
fielders = df.drop_duplicates(['batter_id']).sort_values(['batter_name']).set_index('batter_id')['batter_name_team'].to_dict()
fielders_reversed = {v: k for k, v in fielders.items()}
# Create a selectbox for selecting a key from the dictionary
st.write("#### Select Fielder")
selected_fielder = st.selectbox('',list(fielders_reversed.keys()))
# Retrieve the corresponding pitcher ID
fielder_select = fielders_reversed[selected_fielder]
# Define the URL
url = f"https://baseballsavant.mlb.com/player-services/range?playerId={fielder_select}&season={season}&playerType=fielder"
# # Make the GET request
# response = requests.get(url)
# # Check if the request was successful
# if response.status_code == 200:
# print(response.status_code)
# else:
# print(f"Failed to fetch data, status code: {response.status_code}")
# # Format the string as a JSON array
# text = response.text
# dict_catch = text.split('rangeLine')[1].split('data')[0].split('[')[1].split(']')[0]
# formatted_dict_catch = f'[{dict_catch}]'
# # Convert the formatted string to a list of dictionaries
# dict_catch_list = json.loads(formatted_dict_catch)
data = requests.get(url).json()
df_catch = pd.DataFrame(data)
if df_catch.empty:
st.write("No data available for the selected fielder.")
st.stop()
df_catch['hang_time'] = df_catch['hang_time'].astype(float).round(1)
df_catch['distance'] = df_catch['distance'].astype(float).round(1)
df_merge = df.merge(df_catch, on='play_id', how='right', suffixes=('', '_fielder')).reset_index(drop=True)
# Format the 'catch_rate' column as a percentage
#df_merge['catch_rate'] = df_merge['catch_rate'].astype(float).apply(lambda x: f"{x:.0%}")
df_merge['pos'] = df_merge['pos'].astype(int)
df_merge['Position'] = df_merge['pos'].map(pos_dict)
df_merge = df_merge[df_merge['batter_id'] != df_merge['player_id']]
df_merge.sort_values(by='game_date',inplace=True)
column_names = ['game_date','batter_name', 'pitcher_name', 'name_display_first_last', 'Position','event', 'out', 'wall', 'back', 'stars', 'distance', 'hang_time', 'catch_rate']
column_names_display = ['Game Date','Batter Name', 'Pitcher Name', 'Fielder Name', 'Position','Event', 'Out', 'Wall', 'Back', 'Stars', 'Distance', 'Hang Time', 'Catch Rate']
# Use a container to control the width of the AgGrid display
with st.container():
st.write("#### Fielder Data")
# Configure the AgGrid options
gb = GridOptionsBuilder.from_dataframe(df_merge[column_names])
# Set display names for columns
for col, display_name in zip(column_names, column_names_display):
gb.configure_column(col, headerName=display_name)
gb.configure_selection('single', use_checkbox=True)
grid_options = gb.build()
# Display the dataframe using AgGrid
grid_response = AgGrid(
df_merge[column_names],
gridOptions=grid_options,
update_mode=GridUpdateMode.SELECTION_CHANGED,
height=300,
allow_unsafe_jscode=True,
width="100%",
)
# Get the selected row index
try:
# Update the video URL based on the selected row
selected_row_index = int(grid_response['selected_rows'].index.values[0])
play_link = f'https://baseballsavant.mlb.com/sporty-videos?playId={df_merge["play_id"].values[selected_row_index]}'
#a = requests.get(f'https://baseballsavant.mlb.com/sporty-videos?playId={df_merge["play_id"].values[selected_row_index]}')
#soup = BeautifulSoup(a.content, 'lxml')
#video_url = str(soup).split('<source src="')[1].split('" ')[0]
# Share the video through Streamlit
#st.video(video_url)
st.markdown( f'#### [Link to Video]({play_link})')
#st.write("Select Row to Display Video")
except AttributeError:
st.write("#### Select Row to Get Video Link")
st.markdown(markdown_text_end)
st.markdown('</div>', unsafe_allow_html=True) |