Spaces:
Running
Running
| import urllib.request | |
| from urllib.error import HTTPError | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import os | |
| import json | |
| import streamlit as st | |
| import pandas as pd | |
| # import polars as pl | |
| from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode | |
| pos_dict = {1 :'P', | |
| 2 :'C', | |
| 3 :'1B', | |
| 4 :'2B', | |
| 5 :'3B', | |
| 6 :'SS', | |
| 7 :'LF', | |
| 8 :'CF', | |
| 9 :'RF', | |
| 10 :'DH'} | |
| # Set Streamlit page configuration | |
| st.set_page_config(layout="wide") | |
| # Inject custom CSS to set the width of the container to 1250px | |
| st.markdown( | |
| """ | |
| <style> | |
| .main-container { | |
| max-width: 1250px; | |
| margin: 0 auto; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| # Wrap the main content in a container with the specified width | |
| st.markdown('<div class="main-container">', unsafe_allow_html=True) | |
| # Example text with links and bullet points | |
| markdown_text = """ | |
| ## Catch Probability Lookup Tool | |
| ##### By: Thomas Nestico ([@TJStats](https://x.com/TJStats)) | |
| ##### Data: [MLB](https://baseballsavant.mlb.com/) | |
| #### About | |
| This Streamlit app retrieves catch probability data for a selected fielder from [Baseball Savant](https://baseballsavant.mlb.com/leaderboard/catch_probability). | |
| The app displays the fielder's data in a table and allows the user to select a | |
| row to view the corresponding video. | |
| Catch probability data is only available for outfielders. | |
| #### What is Catch Probability? | |
| *From MLB:* | |
| **Catch Probability** expresses the likelihood for a ball to be caught by an outfielder based on opportunity time, | |
| distance needed, and direction. “Opportunity time” starts when the ball is released by the pitcher, | |
| and “distance needed” is the shortest distance needed to make the catch. | |
| [Learn more about how direction is accounted for here](https://www.mlb.com/news/catch-probability-updated-to-include-direction-c232532408). | |
| [Read more about the details of how Catch Probability works here](https://www.mlb.com/news/statcast-introduces-catch-probability-for-2017-c217802340). | |
| """ | |
| markdown_text_end = ''' | |
| *Columns:* | |
| - **Batter Name**: Name of the batter | |
| - **Pitcher Name**: Name of the pitcher | |
| - **Fielder Name**: Name of the fielder | |
| - **Position**: Position of the fielder | |
| - **Event**: Type of play | |
| - **Out**: Was the ball caught? | |
| - **Wall**: [Did the fielder catch the ball at the wall?](https://www.mlb.com/news/catch-probability-updated-to-account-for-walls-c269814542) | |
| - **Back**: [Did the fielder catch the ball while moving back?](https://www.mlb.com/news/catch-probability-updated-to-include-direction-c232532408) | |
| - **Stars**: [Number of stars assigned to the play](https://baseballsavant.mlb.com/leaderboard/catch_probability) | |
| - **Distance**: Distance required to make the catch in feet | |
| - **Hang Time**: Hang time of the ball in seconds | |
| - **Catch Rate**: Probability of the catch being made | |
| ''' | |
| # Display the markdown text in Streamlit | |
| st.markdown(markdown_text) | |
| # Load the data | |
| import datasets | |
| from datasets import load_dataset | |
| ### Import Datasets | |
| season = 2025 | |
| level = 'mlb' | |
| # dataset = load_dataset('TJStatsApps/mlb_data', data_files=[f'{level}_pitch_data_{season}.csv' ]) | |
| # load the dataset file from your repo | |
| ds = load_dataset("TJStatsApps/mlb_data", data_files=f"data/{level}_pitch_data_{season}.parquet") | |
| dataset = ds["train"].to_pandas() # or to_polars() if you want Polars | |
| # dataset_train = dataset['train'] | |
| df = dataset.drop_duplicates(subset=['play_id'],keep='last') | |
| df['batter_name_team'] = df['batter_name'] + ' - ' + df['batter_team'] | |
| # Sample dictionary | |
| fielders = df.drop_duplicates(['batter_id']).sort_values(['batter_name']).set_index('batter_id')['batter_name_team'].to_dict() | |
| fielders_reversed = {v: k for k, v in fielders.items()} | |
| # Create a selectbox for selecting a key from the dictionary | |
| st.write("#### Select Fielder") | |
| selected_fielder = st.selectbox('',list(fielders_reversed.keys())) | |
| # Retrieve the corresponding pitcher ID | |
| fielder_select = fielders_reversed[selected_fielder] | |
| # Define the URL | |
| url = f"https://baseballsavant.mlb.com/player-services/range?playerId={fielder_select}&season={season}&playerType=fielder" | |
| # # Make the GET request | |
| # response = requests.get(url) | |
| # # Check if the request was successful | |
| # if response.status_code == 200: | |
| # print(response.status_code) | |
| # else: | |
| # print(f"Failed to fetch data, status code: {response.status_code}") | |
| # # Format the string as a JSON array | |
| # text = response.text | |
| # dict_catch = text.split('rangeLine')[1].split('data')[0].split('[')[1].split(']')[0] | |
| # formatted_dict_catch = f'[{dict_catch}]' | |
| # # Convert the formatted string to a list of dictionaries | |
| # dict_catch_list = json.loads(formatted_dict_catch) | |
| data = requests.get(url).json() | |
| df_catch = pd.DataFrame(data) | |
| if df_catch.empty: | |
| st.write("No data available for the selected fielder.") | |
| st.stop() | |
| df_catch['hang_time'] = df_catch['hang_time'].astype(float).round(1) | |
| df_catch['distance'] = df_catch['distance'].astype(float).round(1) | |
| df_merge = df.merge(df_catch, on='play_id', how='right', suffixes=('', '_fielder')).reset_index(drop=True) | |
| # Format the 'catch_rate' column as a percentage | |
| #df_merge['catch_rate'] = df_merge['catch_rate'].astype(float).apply(lambda x: f"{x:.0%}") | |
| df_merge['pos'] = df_merge['pos'].astype(int) | |
| df_merge['Position'] = df_merge['pos'].map(pos_dict) | |
| df_merge = df_merge[df_merge['batter_id'] != df_merge['player_id']] | |
| df_merge.sort_values(by='game_date',inplace=True) | |
| column_names = ['game_date','batter_name', 'pitcher_name', 'name_display_first_last', 'Position','event', 'out', 'wall', 'back', 'stars', 'distance', 'hang_time', 'catch_rate'] | |
| column_names_display = ['Game Date','Batter Name', 'Pitcher Name', 'Fielder Name', 'Position','Event', 'Out', 'Wall', 'Back', 'Stars', 'Distance', 'Hang Time', 'Catch Rate'] | |
| # Use a container to control the width of the AgGrid display | |
| with st.container(): | |
| st.write("#### Fielder Data") | |
| # Configure the AgGrid options | |
| gb = GridOptionsBuilder.from_dataframe(df_merge[column_names]) | |
| # Set display names for columns | |
| for col, display_name in zip(column_names, column_names_display): | |
| gb.configure_column(col, headerName=display_name) | |
| gb.configure_selection('single', use_checkbox=True) | |
| grid_options = gb.build() | |
| # Display the dataframe using AgGrid | |
| grid_response = AgGrid( | |
| df_merge[column_names], | |
| gridOptions=grid_options, | |
| update_mode=GridUpdateMode.SELECTION_CHANGED, | |
| height=300, | |
| allow_unsafe_jscode=True, | |
| width="100%", | |
| ) | |
| # Get the selected row index | |
| try: | |
| # Update the video URL based on the selected row | |
| selected_row_index = int(grid_response['selected_rows'].index.values[0]) | |
| play_link = f'https://baseballsavant.mlb.com/sporty-videos?playId={df_merge["play_id"].values[selected_row_index]}' | |
| #a = requests.get(f'https://baseballsavant.mlb.com/sporty-videos?playId={df_merge["play_id"].values[selected_row_index]}') | |
| #soup = BeautifulSoup(a.content, 'lxml') | |
| #video_url = str(soup).split('<source src="')[1].split('" ')[0] | |
| # Share the video through Streamlit | |
| #st.video(video_url) | |
| st.markdown( f'#### [Link to Video]({play_link})') | |
| #st.write("Select Row to Display Video") | |
| except AttributeError: | |
| st.write("#### Select Row to Get Video Link") | |
| st.markdown(markdown_text_end) | |
| st.markdown('</div>', unsafe_allow_html=True) |