File size: 6,884 Bytes
1ebf57c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db329f6
 
 
 
1ebf57c
 
 
 
 
 
3868427
db329f6
 
 
1ebf57c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import seaborn as sns
import streamlit as st
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
import requests
import polars as pl
from datetime import date
import pandas as pd
import matplotlib



# Display the app title and description
st.markdown("""
## tjStuff+ App

##### By: Thomas Nestico ([@TJStats](https://x.com/TJStats))
##### Code: [GitHub Repo](https://github.com/tnestico/streamlit_tjstuff)
##### Data: [MLB](https://baseballsavant.mlb.com/) ([Gathered from my MLB Scraper](https://github.com/tnestico/mlb_scraper))

#### About
This Streamlit app tabulates and plots my pitching metric, tjStuff+, for all MLB players during the 2024 MLB Season

About tjStuff+:
* tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type
* tjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10
* Pitch Grade is based off tjStuff+ and scales the data to the traditional 20-80 Scouting Scale for a given pitch type

[Learn More about tjStuff+ here](https://github.com/tnestico/tjstuff_plus/tree/main)
                            
"""
)


# Dictionary to map pitch types to their corresponding colors and names
pitch_colours = {
    ## Fastballs ##
    'FF': {'colour': '#FF007D', 'name': '4-Seam Fastball'},
    'FA': {'colour': '#FF007D', 'name': 'Fastball'},
    'SI': {'colour': '#98165D', 'name': 'Sinker'},
    'FC': {'colour': '#BE5FA0', 'name': 'Cutter'},

    ## Offspeed ##
    'CH': {'colour': '#F79E70', 'name': 'Changeup'},
    'FS': {'colour': '#FE6100', 'name': 'Splitter'},
    'SC': {'colour': '#F08223', 'name': 'Screwball'},
    'FO': {'colour': '#FFB000', 'name': 'Forkball'},

    ## Sliders ##
    'SL': {'colour': '#67E18D', 'name': 'Slider'},
    'ST': {'colour': '#1BB999', 'name': 'Sweeper'},
    'SV': {'colour': '#376748', 'name': 'Slurve'},

    ## Curveballs ##
    'KC': {'colour': '#311D8B', 'name': 'Knuckle Curve'},
    'CU': {'colour': '#3025CE', 'name': 'Curveball'},
    'CS': {'colour': '#274BFC', 'name': 'Slow Curve'},
    'EP': {'colour': '#648FFF', 'name': 'Eephus'},

    ## Others ##
    'KN': {'colour': '#867A08', 'name': 'Knuckleball'},
    'PO': {'colour': '#472C30', 'name': 'Pitch Out'},
    'UN': {'colour': '#9C8975', 'name': 'Unknown'},
}

# Create dictionaries for pitch types and their attributes
dict_colour = {key: value['colour'] for key, value in pitch_colours.items()}
dict_pitch = {key: value['name'] for key, value in pitch_colours.items()}
dict_pitch_desc_type = {value['name']: key for key, value in pitch_colours.items()}
dict_pitch_name = {value['name']: value['colour'] for key, value in pitch_colours.items()}

# Define a custom colormap for styling
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF', '#FFFFFF', '#FFB000'])

# Initialize session state for cache status
if 'cache_cleared' not in st.session_state:
    st.session_state.cache_cleared = False

# Function to fetch data and cache it
@st.cache_data
def fetch_data():
    df = pl.read_csv("tjstuff_plus_pitch_data_2024.csv").fill_nan(None)
    return df

# Fetch and preprocess data
df = fetch_data()
df_plot = df.clone()
df = df.filter(df['pitches'] >= 10).drop_nulls(subset=['pitch_grade', 'tj_stuff_plus'])
df = df.sort(['pitcher_name', 'pitch_type'], descending=[False, False])

# Cast columns to appropriate data types
df = df.with_columns([
    pl.col('tj_stuff_plus').cast(pl.Int64).alias('tj_stuff_plus'),
    pl.col('pitches').cast(pl.Int64).alias('pitches'),
    pl.col('pitcher_id').cast(pl.Int64).alias('pitcher_id'),
    pl.col('pitch_grade').cast(pl.Int64).alias('pitch_grade')
])

# Define column configuration for Streamlit
column_config_dict = {
    'pitcher_id': 'Pitcher ID',
    'pitcher_name': 'Pitcher Name',
    'pitch_type': 'Pitch Type',
    'pitches': 'Pitches',
    'tj_stuff_plus': st.column_config.NumberColumn("tjStuff+", format="%.0f"),
    'pitch_grade': st.column_config.NumberColumn("Pitch Grade", format="%.0f")
}

# Get unique pitch types for selection
unique_pitch_types = [''] + sorted(df['pitch_type'].unique().to_list())
unique_pitch_types = [dict_pitch.get(x, x) for x in unique_pitch_types]


st.markdown("""
#### tjStuff+ Table

Filter and sort tjStuff+ Data for all MLB Pitchers
"""
           )
# Create a selectbox widget for pitch types
selected_pitch_types = st.selectbox('Select Pitch Types *(leave blank for all pitch types)*', unique_pitch_types)

# Create a selectbox widget for position
selected_position = st.selectbox('Select Position *(leave blank for all Pitchers)*', ['','SP','RP'])


# Filter the DataFrame based on selected pitch types
if selected_pitch_types == 'All':
    df = df.filter(pl.col('pitch_type') == 'All').sort('tj_stuff_plus', descending=True)
elif selected_pitch_types != '':
    df = df.filter(pl.col('pitch_type') == dict_pitch_desc_type[selected_pitch_types]).sort('tj_stuff_plus', descending=True)

if selected_position != '':
    df = df.filter(pl.col('position') == selected_position).sort('tj_stuff_plus', descending=True)


# Convert Polars DataFrame to Pandas DataFrame and apply styling
styled_df = df[['pitcher_id', 'pitcher_name', 'pitch_type', 'pitches', 'tj_stuff_plus', 'pitch_grade']].to_pandas().style

# Apply background gradient styling to specific columns
styled_df = styled_df.background_gradient(subset=['tj_stuff_plus'], cmap=cmap_sum, vmin=80, vmax=120)
styled_df = styled_df.background_gradient(subset=['pitch_grade'], cmap=cmap_sum, vmin=20, vmax=80)

# Display the styled DataFrame in Streamlit
st.dataframe(styled_df, hide_index=True, column_config=column_config_dict, width=1500)

# Create dictionaries for pitcher information
pitcher_id_name = dict(zip(df_plot['pitcher_id'], df_plot['pitcher_name']))
pitcher_id_name_id = dict(zip(df_plot['pitcher_id'], df_plot['pitcher_name'] + ' - ' + df_plot['pitcher_id']))
pitcher_name_id_id = dict(zip(df_plot['pitcher_name'] + ' - ' + df_plot['pitcher_id'], df_plot['pitcher_id']))
pitcher_id_position = dict(zip(df_plot['pitcher_id'], df_plot.drop_nulls(subset=['position'])['position']))


st.markdown("""
#### tjStuff+ Plot

Visualize tjStuff+ and Pitching Grade by Pitcher
"""
           )

# Create a selectbox widget for pitchers
pitcher_id_name_select = st.selectbox('Select Pitcher', sorted(pitcher_name_id_id.keys()))

# Get selected pitcher information
pitcher_id = pitcher_name_id_id[pitcher_id_name_select]
position = pitcher_id_position[pitcher_id]
pitcher_name = pitcher_id_name[pitcher_id]

import tjstuff_plot
# Button to update plot

# Get selected pitcher information
pitcher_id = pitcher_name_id_id[pitcher_id_name_select]
position = pitcher_id_position[pitcher_id]
pitcher_name = pitcher_id_name[pitcher_id]

import tjstuff_plot

# Button to update plot
if st.button('Update Plot'):
    st.session_state.update_plot = True
    tjstuff_plot.tjstuff_plot(df_plot, pitcher_id, position, pitcher_name)