nesticot commited on
Commit
fbbefa4
·
verified ·
1 Parent(s): a9dc092

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +172 -24
  2. tjstuff_plot.py +215 -0
  3. tjstuff_plus_pitch_data_2024.csv +0 -0
app.py CHANGED
@@ -1,24 +1,172 @@
1
- import seaborn as sns
2
- import streamlit as st
3
- from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
4
- import requests
5
- import polars as pl
6
- from datetime import date
7
-
8
- # Load data
9
- df = pl.read_csv("tjstuff_plus_pitch_data_2024.csv")
10
-
11
-
12
- column_config_dict = {
13
- 'pitcher_id': 'Pitcher ID',
14
- 'pitcher_name': 'Pitcher Name',
15
- 'pitch_type': 'Pitch Type',
16
- 'pitches': 'Pitches',
17
- 'tj_stuff_plus': 'tjStuff+',
18
- 'pitch_grade': 'Grade'
19
- }
20
-
21
- st.dataframe(df[['pitcher_id', 'pitcher_name', 'pitch_type', 'pitches', 'tj_stuff_plus', 'pitch_grade']],
22
- hide_index=True,
23
- column_config=column_config_dict,
24
- width=1500)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import seaborn as sns
2
+ import streamlit as st
3
+ from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
4
+ import requests
5
+ import polars as pl
6
+ from datetime import date
7
+ import pandas as pd
8
+ import matplotlib
9
+
10
+
11
+
12
+ # Display the app title and description
13
+ st.markdown("""
14
+ ## tjStuff+ App
15
+
16
+ ##### By: Thomas Nestico ([@TJStats](https://x.com/TJStats))
17
+ ##### Code: [GitHub Repo](https://github.com/tnestico/streamlit_tjstuff)
18
+ ##### Data: [MLB](https://baseballsavant.mlb.com/) ([Gathered from my MLB Scraper](https://github.com/tnestico/mlb_scraper))
19
+
20
+ #### About
21
+ This Streamlit app tabulates and plots my pitching metric, tjStuff+, for all MLB players during the 2024 MLB Season
22
+
23
+ About tjStuff+:
24
+ * tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type
25
+ * tjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10
26
+ * Pitch Grade is based off tjStuff+ and scales the data to the traditional 20-80 Scouting Scale for a given pitch type
27
+
28
+ """
29
+ )
30
+
31
+
32
+ # Dictionary to map pitch types to their corresponding colors and names
33
+ pitch_colours = {
34
+ ## Fastballs ##
35
+ 'FF': {'colour': '#FF007D', 'name': '4-Seam Fastball'},
36
+ 'FA': {'colour': '#FF007D', 'name': 'Fastball'},
37
+ 'SI': {'colour': '#98165D', 'name': 'Sinker'},
38
+ 'FC': {'colour': '#BE5FA0', 'name': 'Cutter'},
39
+
40
+ ## Offspeed ##
41
+ 'CH': {'colour': '#F79E70', 'name': 'Changeup'},
42
+ 'FS': {'colour': '#FE6100', 'name': 'Splitter'},
43
+ 'SC': {'colour': '#F08223', 'name': 'Screwball'},
44
+ 'FO': {'colour': '#FFB000', 'name': 'Forkball'},
45
+
46
+ ## Sliders ##
47
+ 'SL': {'colour': '#67E18D', 'name': 'Slider'},
48
+ 'ST': {'colour': '#1BB999', 'name': 'Sweeper'},
49
+ 'SV': {'colour': '#376748', 'name': 'Slurve'},
50
+
51
+ ## Curveballs ##
52
+ 'KC': {'colour': '#311D8B', 'name': 'Knuckle Curve'},
53
+ 'CU': {'colour': '#3025CE', 'name': 'Curveball'},
54
+ 'CS': {'colour': '#274BFC', 'name': 'Slow Curve'},
55
+ 'EP': {'colour': '#648FFF', 'name': 'Eephus'},
56
+
57
+ ## Others ##
58
+ 'KN': {'colour': '#867A08', 'name': 'Knuckleball'},
59
+ 'PO': {'colour': '#472C30', 'name': 'Pitch Out'},
60
+ 'UN': {'colour': '#9C8975', 'name': 'Unknown'},
61
+ }
62
+
63
+ # Create dictionaries for pitch types and their attributes
64
+ dict_colour = {key: value['colour'] for key, value in pitch_colours.items()}
65
+ dict_pitch = {key: value['name'] for key, value in pitch_colours.items()}
66
+ dict_pitch_desc_type = {value['name']: key for key, value in pitch_colours.items()}
67
+ dict_pitch_name = {value['name']: value['colour'] for key, value in pitch_colours.items()}
68
+
69
+ # Define a custom colormap for styling
70
+ cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF', '#FFFFFF', '#FFB000'])
71
+
72
+ # Initialize session state for cache status
73
+ if 'cache_cleared' not in st.session_state:
74
+ st.session_state.cache_cleared = False
75
+
76
+ # Function to fetch data and cache it
77
+ @st.cache_data
78
+ def fetch_data():
79
+ df = pl.read_csv("tjstuff_plus_pitch_data_2024.csv").fill_nan(None)
80
+ return df
81
+
82
+ # Fetch and preprocess data
83
+ df = fetch_data()
84
+ df_plot = df.clone()
85
+ df = df.filter(df['pitches'] >= 10).drop_nulls(subset=['pitch_grade', 'tj_stuff_plus'])
86
+ df = df.sort(['pitcher_name', 'pitch_type'], descending=[False, False])
87
+
88
+ # Cast columns to appropriate data types
89
+ df = df.with_columns([
90
+ pl.col('tj_stuff_plus').cast(pl.Int64).alias('tj_stuff_plus'),
91
+ pl.col('pitches').cast(pl.Int64).alias('pitches'),
92
+ pl.col('pitcher_id').cast(pl.Int64).alias('pitcher_id'),
93
+ pl.col('pitch_grade').cast(pl.Int64).alias('pitch_grade')
94
+ ])
95
+
96
+ # Define column configuration for Streamlit
97
+ column_config_dict = {
98
+ 'pitcher_id': 'Pitcher ID',
99
+ 'pitcher_name': 'Pitcher Name',
100
+ 'pitch_type': 'Pitch Type',
101
+ 'pitches': 'Pitches',
102
+ 'tj_stuff_plus': st.column_config.NumberColumn("tjStuff+", format="%.0f"),
103
+ 'pitch_grade': st.column_config.NumberColumn("Pitch Grade", format="%.0f")
104
+ }
105
+
106
+ # Get unique pitch types for selection
107
+ unique_pitch_types = [''] + sorted(df['pitch_type'].unique().to_list())
108
+ unique_pitch_types = [dict_pitch.get(x, x) for x in unique_pitch_types]
109
+
110
+
111
+ st.markdown("""
112
+ #### tjStuff+ Table
113
+
114
+ Filter and sort tjStuff+ Data for all MLB Pitchers
115
+ """
116
+ )
117
+ # Create a selectbox widget for pitch types
118
+ selected_pitch_types = st.selectbox('Select Pitch Types *(leave blank for all pitch types)*', unique_pitch_types)
119
+
120
+ # Filter the DataFrame based on selected pitch types
121
+ if selected_pitch_types == 'All':
122
+ df = df.filter(pl.col('pitch_type') == 'All').sort('tj_stuff_plus', descending=True)
123
+ elif selected_pitch_types != '':
124
+ df = df.filter(pl.col('pitch_type') == dict_pitch_desc_type[selected_pitch_types]).sort('tj_stuff_plus', descending=True)
125
+
126
+ # Convert Polars DataFrame to Pandas DataFrame and apply styling
127
+ styled_df = df[['pitcher_id', 'pitcher_name', 'pitch_type', 'pitches', 'tj_stuff_plus', 'pitch_grade']].to_pandas().style
128
+
129
+ # Apply background gradient styling to specific columns
130
+ styled_df = styled_df.background_gradient(subset=['tj_stuff_plus'], cmap=cmap_sum, vmin=80, vmax=120)
131
+ styled_df = styled_df.background_gradient(subset=['pitch_grade'], cmap=cmap_sum, vmin=20, vmax=80)
132
+
133
+ # Display the styled DataFrame in Streamlit
134
+ st.dataframe(styled_df, hide_index=True, column_config=column_config_dict, width=1500)
135
+
136
+ # Create dictionaries for pitcher information
137
+ pitcher_id_name = dict(zip(df_plot['pitcher_id'], df_plot['pitcher_name']))
138
+ pitcher_id_name_id = dict(zip(df_plot['pitcher_id'], df_plot['pitcher_name'] + ' - ' + df_plot['pitcher_id']))
139
+ pitcher_name_id_id = dict(zip(df_plot['pitcher_name'] + ' - ' + df_plot['pitcher_id'], df_plot['pitcher_id']))
140
+ pitcher_id_position = dict(zip(df_plot['pitcher_id'], df_plot.drop_nulls(subset=['position'])['position']))
141
+
142
+
143
+ st.markdown("""
144
+ #### tjStuff+ Plot
145
+
146
+ Visualize tjStuff+ and Pitching Grade by Pitcher
147
+ """
148
+ )
149
+
150
+ # Create a selectbox widget for pitchers
151
+ pitcher_id_name_select = st.selectbox('Select Pitcher', sorted(pitcher_name_id_id.keys()))
152
+
153
+ # Get selected pitcher information
154
+ pitcher_id = pitcher_name_id_id[pitcher_id_name_select]
155
+ position = pitcher_id_position[pitcher_id]
156
+ pitcher_name = pitcher_id_name[pitcher_id]
157
+
158
+ import tjstuff_plot
159
+ # Button to update plot
160
+
161
+ # Get selected pitcher information
162
+ pitcher_id = pitcher_name_id_id[pitcher_id_name_select]
163
+ position = pitcher_id_position[pitcher_id]
164
+ pitcher_name = pitcher_id_name[pitcher_id]
165
+
166
+ import tjstuff_plot
167
+
168
+ # Button to update plot
169
+ if st.button('Update Plot'):
170
+ st.session_state.update_plot = True
171
+ tjstuff_plot.tjstuff_plot(df_plot, pitcher_id, position, pitcher_name)
172
+
tjstuff_plot.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import polars as pl
2
+ import matplotlib.pyplot as plt
3
+ import numpy as np
4
+ import pandas as pd
5
+ import seaborn as sns
6
+ from matplotlib.gridspec import GridSpec
7
+ import streamlit as st
8
+
9
+
10
+ # For help with plotting the pitch data, we will use the following dictionary to map pitch types to their corresponding colours
11
+ ### PITCH COLOURS ###
12
+ pitch_colours = {
13
+ ## Fastballs ##
14
+ 'FF': {'colour': '#FF007D', 'name': '4-Seam Fastball'},
15
+ 'FA': {'colour': '#FF007D', 'name': 'Fastball'},
16
+ 'SI': {'colour': '#98165D', 'name': 'Sinker'},
17
+ 'FC': {'colour': '#BE5FA0', 'name': 'Cutter'},
18
+
19
+ ## Offspeed ##
20
+ 'CH': {'colour': '#F79E70', 'name': 'Changeup'},
21
+ 'FS': {'colour': '#FE6100', 'name': 'Splitter'},
22
+ 'SC': {'colour': '#F08223', 'name': 'Screwball'},
23
+ 'FO': {'colour': '#FFB000', 'name': 'Forkball'},
24
+
25
+ ## Sliders ##
26
+ 'SL': {'colour': '#67E18D', 'name': 'Slider'},
27
+ 'ST': {'colour': '#1BB999', 'name': 'Sweeper'},
28
+ 'SV': {'colour': '#376748', 'name': 'Slurve'},
29
+
30
+ ## Curveballs ##
31
+ 'KC': {'colour': '#311D8B', 'name': 'Knuckle Curve'},
32
+ 'CU': {'colour': '#3025CE', 'name': 'Curveball'},
33
+ 'CS': {'colour': '#274BFC', 'name': 'Slow Curve'},
34
+ 'EP': {'colour': '#648FFF', 'name': 'Eephus'},
35
+
36
+ ## Others ##
37
+ 'KN': {'colour': '#867A08', 'name': 'Knuckleball'},
38
+ 'PO': {'colour': '#472C30', 'name': 'Pitch Out'},
39
+ 'UN': {'colour': '#9C8975', 'name': 'Unknown'},
40
+ }
41
+
42
+ # Create a dictionary mapping pitch types to their colors
43
+ dict_colour = dict(zip(pitch_colours.keys(), [pitch_colours[key]['colour'] for key in pitch_colours]))
44
+ dict_colour.update({'All': '#808080'})
45
+ # Create a dictionary mapping pitch types to their colors
46
+ dict_pitch = dict(zip(pitch_colours.keys(), [pitch_colours[key]['name'] for key in pitch_colours]))
47
+
48
+ # Create a dictionary mapping pitch types to their colors
49
+ dict_pitch_desc_type = dict(zip([pitch_colours[key]['name'] for key in pitch_colours],pitch_colours.keys()))
50
+
51
+
52
+ # Create a dictionary mapping pitch types to their colors
53
+ dict_pitch_name = dict(zip([pitch_colours[key]['name'] for key in pitch_colours],
54
+ [pitch_colours[key]['colour'] for key in pitch_colours]))
55
+
56
+
57
+
58
+ required_pitch_types = ['All', 'FF', 'SI', 'FC', 'CH', 'FS','FO','SC','SL',
59
+ 'ST','SV' ,'CU', 'KC','KN']
60
+ # Create a mapping dictionary from the list
61
+ custom_order_dict = {pitch: index for index, pitch in enumerate(required_pitch_types)}
62
+
63
+ def tjstuff_plot(df:pl.DataFrame,
64
+ pitcher_id:int,
65
+ position:str,
66
+ pitcher_name:str):
67
+ sns.set_style("ticks")
68
+ # Create the figure and GridSpec layout
69
+ fig = plt.figure(figsize=(10, 8), dpi=450)
70
+ gs = GridSpec(5, 3, height_ratios=[0.1, 10, 10, 2, 0.1], width_ratios=[1, 100, 1])
71
+ gs.update(hspace=0.4, wspace=0.1)
72
+
73
+ # Add subplots to the grid
74
+ ax0 = fig.add_subplot(gs[1, 1])
75
+ ax1 = fig.add_subplot(gs[2, 1])
76
+ ax1_left = fig.add_subplot(gs[:, 0])
77
+ ax1_right = fig.add_subplot(gs[:, 2])
78
+ ax1_top = fig.add_subplot(gs[0, :])
79
+ ax1_bot = fig.add_subplot(gs[4, 1])
80
+ ax2 = fig.add_subplot(gs[3, 1])
81
+
82
+ # Update color dictionary
83
+
84
+
85
+
86
+ df = df.to_pandas()
87
+ # Filter data for the specific pitcher
88
+ pitcher_df = df[(df['pitcher_id'] == pitcher_id) &
89
+ (df['pitches'] >= 10)]
90
+
91
+
92
+
93
+ # Add a new column for the custom order
94
+ pitcher_df['order'] = pitcher_df['pitch_type'].map(custom_order_dict)
95
+ pitcher_df = pitcher_df.sort_values('order')
96
+
97
+ # Get unique pitch types for the pitcher
98
+ pitcher_pitches = pitcher_df['pitch_type'].unique()
99
+ pitcher_pitches = [x for x in required_pitch_types if x in pitcher_pitches]
100
+
101
+
102
+
103
+ # Plot tjStuff+ with swarmplot for all players in the same position
104
+ sns.swarmplot(data=df[(df['pitches'] >= 10) &
105
+ (df['position'] == position)].dropna(subset=['pitch_type']),
106
+ x='pitch_type',
107
+ y='tj_stuff_plus',
108
+ palette=dict_colour,
109
+ alpha=0.3,
110
+ size=3,
111
+ ax=ax0,
112
+ order=pitcher_pitches)
113
+
114
+ # Overlay swarmplot for the specific pitcher
115
+ sns.swarmplot(data=df[(df['pitcher_id'] == pitcher_id) &
116
+ (df['pitches'] >= 10)],
117
+ x='pitch_type',
118
+ y='tj_stuff_plus',
119
+ palette=dict_colour,
120
+ alpha=1,
121
+ size=16,
122
+ ax=ax0,
123
+ order=pitcher_pitches,
124
+ edgecolor='black',
125
+ linewidth=1)
126
+
127
+ # Annotate the median values on the plot
128
+ for index, row in pitcher_df.reset_index(drop=True).iterrows():
129
+ ax0.text(index,
130
+ row['tj_stuff_plus'],
131
+ f'{row["tj_stuff_plus"]:.0f}',
132
+ color='white',
133
+ ha="center",
134
+ va="center",
135
+ fontsize=8,
136
+ weight='bold',
137
+ clip_on=False)
138
+
139
+ # Customize ax0
140
+ ax0.set_xlabel('')
141
+ ax0.set_ylabel('tjStuff+')
142
+ ax0.grid(False)
143
+ ax0.set_ylim(70, 130)
144
+ ax0.axhline(y=100, color='black', linestyle='--', alpha=0.2, zorder=0)
145
+
146
+ # Plot pitch grade with swarmplot for all players in the same position
147
+ sns.swarmplot(data=df[(df['pitches'] >= 10) &
148
+ (df['position'] == position)].dropna(subset=['pitch_type']),
149
+ x='pitch_type',
150
+ y='pitch_grade',
151
+ palette=dict_colour,
152
+ alpha=0.3,
153
+ size=3,
154
+ ax=ax1,
155
+ clip_on=False,
156
+ order=pitcher_pitches)
157
+
158
+ # Overlay swarmplot for the specific pitcher
159
+ sns.swarmplot(data=df[(df['pitcher_id'] == pitcher_id) &
160
+ (df['pitches'] >= 10)],
161
+ x='pitch_type',
162
+ y='pitch_grade',
163
+ palette=dict_colour,
164
+ alpha=1,
165
+ size=16,
166
+ ax=ax1,
167
+ order=pitcher_pitches,
168
+ edgecolor='black',
169
+ clip_on=False,
170
+ linewidth=1)
171
+
172
+ # Annotate the median values on the plot
173
+ for index, row in pitcher_df.reset_index(drop=True).iterrows():
174
+ ax1.text(index,
175
+ row['pitch_grade'],
176
+ f'{row["pitch_grade"]:.0f}',
177
+ color='white',
178
+ ha="center",
179
+ va="center",
180
+ fontsize=8,
181
+ weight='bold',
182
+ clip_on=False,
183
+ zorder=1000)
184
+
185
+ # Customize ax1
186
+ ax1.set_xlabel('Pitch Type')
187
+ ax1.set_ylabel('Pitch Grade')
188
+ ax1.grid(False)
189
+ ax1.set_ylim(20, 80)
190
+ ax1.axhline(y=50, color='black', linestyle='--', alpha=0.2, zorder=0)
191
+
192
+ # Hide axes for additional subplots
193
+ ax2.axis('off')
194
+ ax1_left.axis('off')
195
+ ax1_right.axis('off')
196
+ ax1_top.axis('off')
197
+ ax1_bot.axis('off')
198
+
199
+ # Add text annotations
200
+ ax1_bot.text(s='By: @TJStats', x=0, y=1, fontsize=12, ha='left')
201
+ ax1_bot.text(s='Data: MLB', x=1, y=1, fontsize=12, ha='right')
202
+
203
+ ax1_top.text(0.5, 0, f'{pitcher_name} tjStuff+ 2024 Season - {position}',
204
+ fontsize=24, ha='center', va='top')
205
+
206
+ ax2.text(x=0.5, y=0.6, s='tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type\n'
207
+ 'tjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10\n'
208
+ 'Pitch Grade is based off tjStuff+ and scales the data to the traditional 20-80 Scouting Scale for a given pitch type',
209
+
210
+ ha='center', va='top', fontname='Calibri', fontsize=10)
211
+
212
+ # Adjust subplot layout
213
+ fig.subplots_adjust(left=0.03, right=0.97, top=0.97, bottom=0.03)
214
+ # fig.set_facecolor('#e0e0e0')
215
+ st.pyplot(fig)
tjstuff_plus_pitch_data_2024.csv CHANGED
The diff for this file is too large to render. See raw diff