MLBPitchGPT / app.py
ZENLLC's picture
Duplicate from nesticot/mlb_csw_whiff_rolling_2
f21c7b1
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
import matplotlib.ticker as mtick
from matplotlib.ticker import FuncFormatter
# stuff_df = pd.read_html('https://www.fangraphs.com/leaders.aspx?pos=all&stats=pit&lg=all&qual=0&type=36&season=2023&month=0&season1=2023&ind=0&team=0&rost=0&age=0&filter=&players=0&startdate=2023-01-01&enddate=2023-12-31&page=1_5000')[5]#.droplevel(1)
# stuff_df.columns = stuff_df.columns.droplevel(0)
# stuff_df = stuff_df.iloc[:-1]
# stuff_df = stuff_df[stuff_df.columns[1:]]
# stuff_df.columns = [x.replace('Stf+ ','') for x in stuff_df.columns]
# stuff_df = stuff_df.rename(columns = {'FA':'FF'})
# stuff_df['ST'] = stuff_df.SL
# stuff_df_melt = stuff_df.melt(id_vars=['Name','Team','IP']).dropna().sort_values(by='IP',ascending=False).reset_index(drop=True)
stuff_df_melt = pd.read_csv('stuff_df_melt.csv',index_col=[0])
exit_velo_df = pd.read_csv('exit_velo_df.csv',index_col=[0])
swing_codes = ['Swinging Strike', 'In play, no out',
'Foul', 'In play, out(s)',
'In play, run(s)', 'Swinging Strike (Blocked)',
'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout']
swings_in = ['Swinging Strike', 'In play, no out',
'Foul', 'In play, out(s)',
'In play, run(s)', 'Swinging Strike (Blocked)',
'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout']
swing_strike_codes = ['Swinging Strike',
'Swinging Strike (Blocked)','Missed Bunt','Foul Tip','Swinging Pitchout']
contact_codes = ['In play, no out',
'Foul', 'In play, out(s)',
'In play, run(s)',
'Foul Bunt']
codes_in = ['In play, out(s)',
'Swinging Strike',
'Ball',
'Foul',
'In play, no out',
'Called Strike',
'Foul Tip',
'In play, run(s)',
'Hit By Pitch',
'Ball In Dirt',
'Pitchout',
'Swinging Strike (Blocked)',
'Foul Bunt',
'Missed Bunt',
'Foul Pitchout',
'Intent Ball',
'Swinging Pitchout']
exit_velo_df = exit_velo_df[exit_velo_df['description'].isin(codes_in)]
exit_velo_df['pitch'] = exit_velo_df.groupby('pitcher_id').cumcount() + 1
exit_velo_df['pitch_type_count'] = exit_velo_df.groupby(['pitcher_id','pitch_type']).cumcount() + 1
exit_velo_df = exit_velo_df[['game_id','date','pitcher_id','pitcher','pitch_code','pitch_type','pitch_velocity','code','description','pitch','pitch_type_count']]
#print(exit_velo_df.head())
print('we made it',len(exit_velo_df))
exit_velo_df['pitches'] = 1
exit_velo_df['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in exit_velo_df.code]
exit_velo_df['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in exit_velo_df.code]
exit_velo_df['swings'] = [1 if x in swings_in else 0 for x in exit_velo_df.description]
#print(exit_velo_df['swings'])
# exit_velo_df['swings_type_count'] = exit_velo_df.groupby(['pitcher_id','pitch_type','swings']).cumcount() + 1
# print(exit_velo_df['swings_type_count'])
import difflib #stuff_df_melt
eno_name_list = exit_velo_df.drop_duplicates(subset='pitcher_id')['pitcher'].apply(lambda x: (difflib.get_close_matches(x,stuff_df_melt.drop_duplicates(subset='Name').reset_index(drop=True).Name)[:1] or [None])[0]).reset_index(drop=True)
df_eno_exit_name = pd.DataFrame(data={'pitcher':exit_velo_df.drop_duplicates(subset='pitcher_id')['pitcher'].reset_index(drop=True),'eno_name':eno_name_list})
exit_velo_df = exit_velo_df.merge(right=df_eno_exit_name,left_on='pitcher',right_on='pitcher',how='left').merge(right=stuff_df_melt,left_on=['eno_name','pitch_type'],right_on=['Name','variable'],how='left')
print('we made it',len(exit_velo_df))
test_df = exit_velo_df.sort_values(by='pitcher').drop_duplicates(subset='pitcher_id').reset_index(drop=True)[['pitcher_id','pitcher']]#['pitcher'].to_dict()
test_df = test_df.set_index('pitcher_id')
#test_df = test_df[test_df.pitcher == 'Chris Bassitt'].append(test_df[test_df.pitcher != 'Chris Bassitt'])
pitcher_dict = test_df['pitcher'].to_dict()
colour_palette = ['#FFB000','#648FFF','#785EF0',
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
#print
from shiny import ui, render, App
import matplotlib.image as mpimg
app_ui = ui.page_fluid(
# ui.output_plot("plot"),
ui.h2('MLB Pitcher Rolling CSW% and Whiff%'),
ui.row(
ui.column(
2,
ui.row(ui.h6('Created by: @TJStats')),
ui.row(ui.h6('Data: MLB, Eno Sarris')),
ui.row(ui.input_select("id", "Select Pitcher",pitcher_dict))
),
ui.column(
1,
ui.output_plot("plot3",height = "125px",width="100px"),offset=0),
ui.column(
3,
ui.input_slider("n", "Pitch Rolling Window Size", 0, 100, 10),offset=1),
ui.column(
3,
ui.input_slider("swing", "Swing Rolling Window Size", 0, 50, 5),offset=1),
),
# ui.layout_sidebar(
# ui.panel_sidebar((ui.input_select("id", "Select Pitcher",pitcher_dict)), width = 2),
# ui.panel_main(ui.input_slider("n", "Rolling Window Size", 0, 100, 10), width = 10)),
ui.row(
ui.column(
6,
ui.output_plot("plot2",height = "225px",width="900px"),offset=0),
ui.column(
6,
ui.output_plot("plotwhiff",height = "225px",width="900px"),offset=0),
),
# ui.layout_sidebar(ui.panel_sidebar((ui.input_select("id", "Select Pitcher",pitcher_dict))),
# ui.panel_main(ui.output_plot("plot3",height = "250px",width="250px"))),
# #ui.input_slider("n", "Rolling Window Size", 0, 100, 10),
# ui.layout_sidebar(
# ui.panel_sidebar(ui.input_slider("n", "Rolling Window Size", 0, 100, 10)),
#ui.input_slider("n", "Number of bins", 0, 10, 1),
#ui.output_text("value"),
ui.h4('Rolling CSW% and Whiff% by Pitch Type'),
ui.output_plot("plot",height = "400px"),
#ui.download_button('test','Download'),
)
from urllib.request import Request, urlopen
# importing OpenCV(cv2) module
#print(app_ui)
def server(input, output, session):
#@output
# @render.text
# def txt():
# return f'pitcher_id: "{input.pitcher_id()}"'
@output
@render.plot(alt='hist')
def plot3():
#fig, ax = plt.subplots(1, 1, figsize=(10, 4))
#img = mpimg.imread('players/'+name+'_'+str(last_games)+' int.png')
fig, ax = plt.subplots(figsize=(2,3))
fig.set_facecolor('#f7f7f7')
# img = mpimg.imread('players/'+name+'_'+str(last_games)+'.png')
# ax.imshow(img)
# ax.axis('off')
# fig.tight_layout()
#ax.axis('off')
# with fp:
# img = mpimg.imread(fp, format='jpeg')
ax.axis('off')
im = plt.imread(f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/{exit_velo_df[exit_velo_df.pitcher_id==int(input.id())].reset_index(drop=True).pitcher_id[0]}/headshot/67/current.png')
# ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1)
ax.imshow(im)
ax.axis('off')
@output
@render.plot(alt='hist')
def plot2():
exit_velo_df_small = exit_velo_df[exit_velo_df.pitcher_id==int(input.id())]
fig, ax = plt.subplots(1, 1, figsize=(10, 4))
sns.set_theme(style="whitegrid", palette="pastel")
ax.hlines(exit_velo_df_small['csw'].sum()/exit_velo_df_small['pitches'].sum(),xmin=input.n(),
xmax=exit_velo_df_small.pitch.max(),
label='Pitcher Average ('+"{:.1%}".format(exit_velo_df_small['csw'].sum()/exit_velo_df_small['pitches'].sum())+')',color='black',linestyles='-.',alpha=0.4)
ax.hlines(exit_velo_df.csw.sum()/exit_velo_df.pitches.sum(),xmin=input.n(),xmax=exit_velo_df_small.pitch.max(),
label='League Average ('+"{:.1%}".format(exit_velo_df.csw.sum()/exit_velo_df.pitches.sum())+')',color='r',linestyles='--',alpha=0.6)
#exit_velo_df_small_pitch.pitch_type_count,y=exit_velo_df_small_pitch.rolling(input.n()).sum()['csw']/exit_velo_df_small.rolling(input.n()).sum()['pitches']
sns.lineplot(x=exit_velo_df_small.pitch,y=exit_velo_df_small.rolling(input.n(),min_periods=input.n()).sum()['csw']/exit_velo_df_small.rolling(input.n(),min_periods=input.n()).sum()['pitches'],linewidth = 2,ax=ax,color=colour_palette[0])
ax.set_xlim(input.n(),exit_velo_df_small.pitch.max())
plt.yticks([0,0.2,0.4,0.6,0.8,1])
ax.set_ylim(0,1)
ax.legend(fontsize=8)
vals = ax.get_yticks()
ax.set_xlabel('Pitch', fontsize=10,fontname='Century Gothic')
ax.set_ylabel('CSW%', fontsize=10,fontname='Century Gothic')
ax.set_yticklabels(['{:,.0%}'.format(x) for x in vals])
#fig.subplots_adjust(wspace=.02, hspace=.02)
ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
fig.set_facecolor('white')
fig.tight_layout()
@output
@render.plot(alt='hist')
def plotwhiff():
exit_velo_df_small = exit_velo_df[exit_velo_df.pitcher_id==int(input.id())]
print(exit_velo_df_small)
exit_velo_df_small_swing = exit_velo_df_small[exit_velo_df_small.swings == 1]
print(exit_velo_df_small_swing.reset_index())
exit_velo_df_small_swing['whiff_count'] = exit_velo_df_small_swing.groupby('pitcher_id').cumcount()+1
print(exit_velo_df_small_swing.whiff_count)
fig, ax = plt.subplots(1, 1, figsize=(10, 4))
sns.set_theme(style="whitegrid", palette="pastel")
ax.hlines(exit_velo_df_small_swing['whiffs'].sum()/exit_velo_df_small_swing['swings'].sum(),xmin=input.swing(),
xmax=exit_velo_df_small_swing.whiff_count.max(),
label='Pitcher Average ('+"{:.1%}".format(exit_velo_df_small_swing['whiffs'].sum()/exit_velo_df_small_swing['swings'].sum())+')',color='black',linestyles='-.',alpha=0.4)
ax.hlines(exit_velo_df.whiffs.sum()/exit_velo_df.swings.sum(),xmin=input.swing(),xmax=exit_velo_df_small_swing.whiff_count.max(),
label='League Average ('+"{:.1%}".format(exit_velo_df.whiffs.sum()/exit_velo_df.swings.sum())+')',color='r',linestyles='--',alpha=0.6)
#exit_velo_df_small_pitch.pitch_type_count,y=exit_velo_df_small_pitch.rolling(input.n()).sum()['csw']/exit_velo_df_small.rolling(input.n()).sum()['pitches']
sns.lineplot(x=exit_velo_df_small_swing.whiff_count,y=exit_velo_df_small_swing.rolling(input.swing()).sum()['whiffs']/exit_velo_df_small_swing.rolling(input.swing()).sum()['swings'],linewidth = 2,ax=ax,color=colour_palette[1])
ax.set_xlim(input.swing(),exit_velo_df_small_swing.whiff_count.max())
plt.yticks([0,0.2,0.4,0.6,0.8,1])
ax.set_ylim(0,1)
ax.legend(fontsize=8)
vals = ax.get_yticks()
ax.set_xlabel('Swings', fontsize=10,fontname='Century Gothic')
ax.set_ylabel('Whiff%', fontsize=10,fontname='Century Gothic')
ax.set_yticklabels(['{:,.0%}'.format(x) for x in vals])
#fig.subplots_adjust(wspace=.02, hspace=.02)
ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
fig.set_facecolor('white')
fig.tight_layout()
@output
@render.plot(alt="A histogram")
def plot():
# np.random.seed(19680801)
# x = 100 + 15 * np.random.randn(437)
# fig, ax = plt.subplots()
# ax.hist(x, input.n(), density=True)
# return fig
# penguins = sns.load_dataset("penguins")
# sns.histplot(data=penguins, x="flipper_length_mm")
exit_velo_df_small = exit_velo_df[exit_velo_df.pitcher_id==int(input.id())]
#pitch_list = exit_velo_df_small.pitch_type.unique()
pitch_list_df = (exit_velo_df_small.groupby(['pitch_type','pitch_code'])[['pitch_type']].count() / exit_velo_df_small['pitch_type'].count())#.sort_values(by='pitch_type',ascending=False)
pitch_list_df.columns = ['porp']
pitch_list_df = pitch_list_df.sort_values(by='porp',ascending=False).reset_index()
pitch_list_df = pitch_list_df[pitch_list_df.porp >= 0.05]
#print(pitch_list_df)
#fig, ax = plt.subplots(math.ceiling(len(pitch_list))/2, 2, figsize=(10, 4*len(pitch_list)))
fig = plt.figure(figsize=(20,20))
#fig = px.line(exit_velo_df_small, x=exit_velo_df_small.pitch, y=exit_velo_df_small.rolling(50).sum()['csw']/exit_velo_df_small.rolling(50).sum()['pitches'])
#sns.lineplot(x=exit_velo_df_small.pitch,y=exit_velo_df_small.rolling(input.n()).sum()['csw']/exit_velo_df_small.rolling(input.n()).sum()['pitches'],color='#648FFF',linewidth = 2,label='csw%')
for i in range(0,len(pitch_list_df)):
exit_velo_df_small_pitch = exit_velo_df_small[exit_velo_df_small.pitch_type==pitch_list_df.pitch_type[i]]
sns.set_theme(style="whitegrid", palette="pastel")
ax = plt.subplot(2,len(pitch_list_df),i+1)
ax.hlines(exit_velo_df_small_pitch['csw'].sum()/exit_velo_df_small_pitch['pitches'].sum(),xmin=input.n(),
xmax=exit_velo_df_small.pitch_type_count.max(),
label='Pitcher Average ('+"{:.1%}".format(exit_velo_df_small_pitch['csw'].sum()/exit_velo_df_small_pitch['pitches'].sum())+')',color='black',linestyles='-.',alpha=0.4)
ax.hlines(exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].csw.sum()/exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].pitches.sum(),xmin=input.n(),xmax=exit_velo_df_small.pitch_type_count.max(),
label='League Average ('+"{:.1%}".format(exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].csw.sum()/exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].pitches.sum())+')',color='r',linestyles='--',alpha=0.6)
sns.lineplot(x=exit_velo_df_small_pitch.pitch_type_count,y=exit_velo_df_small_pitch.rolling(input.n()).sum()['csw']/exit_velo_df_small_pitch.rolling(input.n()).sum()['pitches'],linewidth = 2,ax=ax,color=colour_palette[i])
#print("{:.1%}".format(exit_velo_df_small_pitch['csw'].sum()/exit_velo_df_small_pitch['pitches'].sum()))
#sns.despine()
ax.set_xlabel("Pitches", fontsize=10,fontname='Century Gothic')
# print(exit_velo_df_small)
# #print(pitch_list_df)
# print(exit_velo_df_small[exit_velo_df_small.pitch_type=='SL'])#.value.max())
# if pitch_list_df.pitch_type[i]== 'ST':
# ax.set_title(pitch_list_df.pitch_code[i] +' - '+ pitch_list_df.pitch_type[i] +' ('+"{:.1%}".format((pitch_list_df.porp[i]))+') - '+str((exit_velo_df_small[exit_velo_df_small.pitch_type=='SL'].value.max()))+' Stuff+', fontsize=11,fontname='Century Gothic',weight='bold')
# else:
ax.set_title(pitch_list_df.pitch_code[i] +' - '+ pitch_list_df.pitch_type[i] +' ('+"{:.1%}".format((pitch_list_df.porp[i]))+') - '+str(exit_velo_df_small_pitch.value.max())+' Stuff+', fontsize=11,fontname='Century Gothic',weight='bold')
ax.set_ylabel('CSW%', fontsize=10,fontname='Century Gothic')
if i ==0:
ax.legend()
ax.set_xlim(input.n(),exit_velo_df_small.pitch_type_count.max())
plt.yticks([0,0.2,0.4,0.6,0.8,1])
ax.set_ylim(0,1)
vals = ax.get_yticks()
ax.set_yticklabels(['{:,.0%}'.format(x) for x in vals])
ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
ax.legend(fontsize=8)
#whiffs
#exit_velo_df_small_pitch
#exit_velo_df_small = exit_velo_df_small_pitch[exit_velo_df_small_pitch.pitcher_id==int(input.id())]
#print(exit_velo_df_small)
exit_velo_df_small_swing = exit_velo_df_small_pitch[exit_velo_df_small_pitch.swings == 1]
#print(exit_velo_df_small_swing.reset_index())
exit_velo_df_small_swing['whiff_count'] = exit_velo_df_small_swing.groupby('pitcher_id').cumcount()+1
# print(exit_velo_df_small_swing.whiff_count)
ax = plt.subplot(2,len(pitch_list_df),i+1+len(pitch_list_df))
ax.hlines(exit_velo_df_small_swing['whiffs'].sum()/exit_velo_df_small_swing['swings'].sum(),xmin=input.swing(),
xmax=exit_velo_df_small.pitch_type_count.max(),
label='Pitcher Average ('+"{:.1%}".format(exit_velo_df_small_swing['whiffs'].sum()/exit_velo_df_small_swing['swings'].sum())+')',color='black',linestyles='-.',alpha=0.4)
ax.hlines(exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].whiffs.sum()/exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].swings.sum(),xmin=input.swing(),xmax=exit_velo_df_small.pitch_type_count.max(),
label='League Average ('+"{:.1%}".format(exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].whiffs.sum()/exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].swings.sum())+')',color='r',linestyles='--',alpha=0.6)
sns.lineplot(x=exit_velo_df_small_swing.whiff_count,y=exit_velo_df_small_swing.rolling(input.swing()).sum()['whiffs']/exit_velo_df_small_swing.rolling(input.swing()).sum()['swings'],linewidth = 2,ax=ax,color=colour_palette[i])
#print("{:.1%}".format(exit_velo_df_small_pitch['whiffs'].sum()/exit_velo_df_small_pitch['swings'].sum()))
#sns.despine()
ax.set_xlabel("Swings", fontsize=10,fontname='Century Gothic')
ax.set_ylabel('Whiff%', fontsize=10,fontname='Century Gothic')
if i ==0:
ax.legend()
max_whiff = exit_velo_df_small_swing.whiff_count.max()
ax.set_xlim(input.swing(),max_whiff)
plt.yticks([0,0.2,0.4,0.6,0.8,1])
ax.set_ylim(0,1)
vals = ax.get_yticks()
ax.set_yticklabels(['{:,.0%}'.format(x) for x in vals])
ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
ax.legend(fontsize=8)
# ax.set_xlim(input.n(),exit_velo_df_small.pitch.max())
#ax.axis('off')
fig.set_facecolor('white')
fig.tight_layout()
#ax.hist(exit_velo_df[exit_velo_df.pitcher_id==int(input.id())]['pitch_velocity'],input.n(),density=True)
#plt.show()
#return g
# This is a shiny.App object. It must be named `app`.
# fig, ax = plt.subplots()
#print(input.pitcher_id())
# print(input)
# plt.hist(x=exit_velo_df[exit_velo_df.pitcher_id==input.x()]['pitch_velocity'])
# plt.show()
app = App(app_ui, server)