File size: 7,449 Bytes
aa49833
 
 
 
 
 
 
 
c247301
aa49833
 
07b3375
 
 
 
 
 
 
 
 
 
 
 
aa49833
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c644018
 
 
 
9e77e0f
 
 
1606618
ee029d5
c644018
83173ab
c644018
 
 
 
 
ee029d5
c644018
 
 
ee029d5
 
c644018
c8b3408
 
 
07b3375
 
 
 
 
 
 
ee029d5
 
677f8e6
07b3375
 
 
c8b3408
c644018
 
 
 
 
aa49833
67c2815
 
 
a41a992
67c2815
6806b25
b56a26c
 
06789cc
3f6340f
515500e
aa49833
 
 
 
 
 
 
e61a842
f61359b
aa49833
 
 
 
 
737255a
aa49833
939ade4
 
aa49833
939ade4
 
 
 
 
aa49833
939ade4
 
 
 
aa49833
939ade4
 
aa49833
939ade4
 
5de68ce
 
 
 
 
aa49833
 
 
 
 
 
939ade4
07b3375
ee029d5
07b3375
 
1606618
9e77e0f
 
 
5de68ce
07b3375
1606618
 
 
07b3375
 
 
aa49833
8d95317
1606618
 
e61a842
1606618
 
 
 
 
07b3375
 
1606618
 
aa49833
1606618
 
 
 
 
 
 
2edc113
1606618
aa49833
1606618
aa49833
 
1606618
 
 
dc210c6
 
 
 
1606618
a0660fc
dc210c6
a0660fc
 
e86de71
1606618
2e8c439
aa49833
e86de71
aa49833
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
import urllib.request
from urllib.error import HTTPError
import requests
from bs4 import BeautifulSoup
import os
import json
import streamlit as st
import pandas as pd
# import polars as pl
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode


pos_dict = {1 :'P',
            2 :'C',
            3 :'1B',
            4 :'2B',
            5 :'3B',
            6 :'SS',
            7 :'LF',
            8 :'CF',
            9 :'RF',
            10 :'DH'}

# Set Streamlit page configuration
st.set_page_config(layout="wide")

# Inject custom CSS to set the width of the container to 1250px
st.markdown(
    """
    <style>
    .main-container {
        max-width: 1250px;
        margin: 0 auto;
    }
    </style>
    """,
    unsafe_allow_html=True
)

# Wrap the main content in a container with the specified width
st.markdown('<div class="main-container">', unsafe_allow_html=True)

# Example text with links and bullet points
markdown_text = """
## Catch Probability Lookup Tool

##### By: Thomas Nestico ([@TJStats](https://x.com/TJStats))
##### Data: [MLB](https://baseballsavant.mlb.com/)

#### About
This Streamlit app retrieves catch probability data for a selected fielder from [Baseball Savant](https://baseballsavant.mlb.com/leaderboard/catch_probability).
The app displays the fielder's data in a table and allows the user to select a 
row to view the corresponding video.

Catch probability data is only available for outfielders.

#### What is Catch Probability?
*From MLB:*

**Catch Probability** expresses the likelihood for a ball to be caught by an outfielder based on opportunity time, 
distance needed, and direction. “Opportunity time” starts when the ball is released by the pitcher, 
and “distance needed” is the shortest distance needed to make the catch.
[Learn more about how direction is accounted for here](https://www.mlb.com/news/catch-probability-updated-to-include-direction-c232532408). 
[Read more about the details of how Catch Probability works here](https://www.mlb.com/news/statcast-introduces-catch-probability-for-2017-c217802340).

"""

markdown_text_end = '''
*Columns:*
- **Batter Name**: Name of the batter
- **Pitcher Name**: Name of the pitcher
- **Fielder Name**: Name of the fielder
- **Position**: Position of the fielder
- **Event**: Type of play
- **Out**: Was the ball caught?
- **Wall**: [Did the fielder catch the ball at the wall?](https://www.mlb.com/news/catch-probability-updated-to-account-for-walls-c269814542)
- **Back**: [Did the fielder catch the ball while moving back?](https://www.mlb.com/news/catch-probability-updated-to-include-direction-c232532408)
- **Stars**: [Number of stars assigned to the play](https://baseballsavant.mlb.com/leaderboard/catch_probability)
- **Distance**: Distance required to make the catch in feet
- **Hang Time**: Hang time of the ball in seconds
- **Catch Rate**: Probability of the catch being made
'''

# Display the markdown text in Streamlit
st.markdown(markdown_text)


# Load the data
import datasets
from datasets import load_dataset
### Import Datasets
season = 2025
level = 'mlb'
# dataset = load_dataset('TJStatsApps/mlb_data', data_files=[f'{level}_pitch_data_{season}.csv' ])
# load the dataset file from your repo
ds = load_dataset("TJStatsApps/mlb_data", data_files=f"data/{level}_pitch_data_{season}.parquet")
dataset = ds["train"].to_pandas()   # or to_polars() if you want Polars
# dataset_train = dataset['train']
df = dataset.drop_duplicates(subset=['play_id'],keep='last')
df['batter_name_team'] = df['batter_name'] + ' - ' + df['batter_team']

# Sample dictionary
fielders = df.drop_duplicates(['batter_id']).sort_values(['batter_name']).set_index('batter_id')['batter_name_team'].to_dict()
fielders_reversed = {v: k for k, v in fielders.items()}

# Create a selectbox for selecting a key from the dictionary
st.write("#### Select Fielder")
selected_fielder = st.selectbox('',list(fielders_reversed.keys()))

# Retrieve the corresponding pitcher ID
fielder_select = fielders_reversed[selected_fielder]

# Define the URL
url = f"https://baseballsavant.mlb.com/player-services/range?playerId={fielder_select}&season={season}&playerType=fielder"

# # Make the GET request
# response = requests.get(url)

# # Check if the request was successful
# if response.status_code == 200:
#     print(response.status_code)
# else:
#     print(f"Failed to fetch data, status code: {response.status_code}")

# # Format the string as a JSON array
# text = response.text
# dict_catch = text.split('rangeLine')[1].split('data')[0].split('[')[1].split(']')[0]
# formatted_dict_catch = f'[{dict_catch}]'

# # Convert the formatted string to a list of dictionaries
# dict_catch_list = json.loads(formatted_dict_catch)

data = requests.get(url).json()
df_catch = pd.DataFrame(data)

if df_catch.empty:
    st.write("No data available for the selected fielder.")
    st.stop()

df_catch['hang_time'] = df_catch['hang_time'].astype(float).round(1)
df_catch['distance'] = df_catch['distance'].astype(float).round(1)

df_merge = df.merge(df_catch, on='play_id', how='right', suffixes=('', '_fielder')).reset_index(drop=True)

# Format the 'catch_rate' column as a percentage
#df_merge['catch_rate'] = df_merge['catch_rate'].astype(float).apply(lambda x: f"{x:.0%}")

df_merge['pos'] = df_merge['pos'].astype(int)
df_merge['Position'] = df_merge['pos'].map(pos_dict)


df_merge = df_merge[df_merge['batter_id'] != df_merge['player_id']]




df_merge.sort_values(by='game_date',inplace=True)
column_names = ['game_date','batter_name', 'pitcher_name', 'name_display_first_last', 'Position','event', 'out', 'wall', 'back', 'stars', 'distance', 'hang_time', 'catch_rate']
column_names_display = ['Game Date','Batter Name', 'Pitcher Name', 'Fielder Name', 'Position','Event', 'Out', 'Wall', 'Back', 'Stars', 'Distance', 'Hang Time', 'Catch Rate']





# Use a container to control the width of the AgGrid display
with st.container():
    st.write("#### Fielder Data")
    # Configure the AgGrid options
    gb = GridOptionsBuilder.from_dataframe(df_merge[column_names])
    # Set display names for columns
    for col, display_name in zip(column_names, column_names_display):
        gb.configure_column(col, headerName=display_name)


    gb.configure_selection('single', use_checkbox=True)
    grid_options = gb.build()

    # Display the dataframe using AgGrid
    grid_response = AgGrid(
        df_merge[column_names],
        gridOptions=grid_options,
        update_mode=GridUpdateMode.SELECTION_CHANGED,
        height=300,
        allow_unsafe_jscode=True,
        width="100%", 
    )

# Get the selected row index


try:
    # Update the video URL based on the selected row
    selected_row_index = int(grid_response['selected_rows'].index.values[0])
    play_link = f'https://baseballsavant.mlb.com/sporty-videos?playId={df_merge["play_id"].values[selected_row_index]}'
    #a = requests.get(f'https://baseballsavant.mlb.com/sporty-videos?playId={df_merge["play_id"].values[selected_row_index]}')
    #soup = BeautifulSoup(a.content, 'lxml')
    #video_url = str(soup).split('<source src="')[1].split('" ')[0]
    # Share the video through Streamlit
    #st.video(video_url)
    st.markdown( f'#### [Link to Video]({play_link})')

    #st.write("Select Row to Display Video")
    
except AttributeError:
    st.write("#### Select Row to Get Video Link")

st.markdown(markdown_text_end)
st.markdown('</div>', unsafe_allow_html=True)