import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import math import matplotlib.ticker as mtick from matplotlib.ticker import FuncFormatter # stuff_df = pd.read_html('https://www.fangraphs.com/leaders.aspx?pos=all&stats=pit&lg=all&qual=0&type=36&season=2023&month=0&season1=2023&ind=0&team=0&rost=0&age=0&filter=&players=0&startdate=2023-01-01&enddate=2023-12-31&page=1_5000')[5]#.droplevel(1) # stuff_df.columns = stuff_df.columns.droplevel(0) # stuff_df = stuff_df.iloc[:-1] # stuff_df = stuff_df[stuff_df.columns[1:]] # stuff_df.columns = [x.replace('Stf+ ','') for x in stuff_df.columns] # stuff_df = stuff_df.rename(columns = {'FA':'FF'}) # stuff_df['ST'] = stuff_df.SL # stuff_df_melt = stuff_df.melt(id_vars=['Name','Team','IP']).dropna().sort_values(by='IP',ascending=False).reset_index(drop=True) stuff_df_melt = pd.read_csv('stuff_df_melt.csv',index_col=[0]) exit_velo_df = pd.read_csv('exit_velo_df.csv',index_col=[0]) swing_codes = ['Swinging Strike', 'In play, no out', 'Foul', 'In play, out(s)', 'In play, run(s)', 'Swinging Strike (Blocked)', 'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout'] swings_in = ['Swinging Strike', 'In play, no out', 'Foul', 'In play, out(s)', 'In play, run(s)', 'Swinging Strike (Blocked)', 'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout'] swing_strike_codes = ['Swinging Strike', 'Swinging Strike (Blocked)','Missed Bunt','Foul Tip','Swinging Pitchout'] contact_codes = ['In play, no out', 'Foul', 'In play, out(s)', 'In play, run(s)', 'Foul Bunt'] codes_in = ['In play, out(s)', 'Swinging Strike', 'Ball', 'Foul', 'In play, no out', 'Called Strike', 'Foul Tip', 'In play, run(s)', 'Hit By Pitch', 'Ball In Dirt', 'Pitchout', 'Swinging Strike (Blocked)', 'Foul Bunt', 'Missed Bunt', 'Foul Pitchout', 'Intent Ball', 'Swinging Pitchout'] exit_velo_df = exit_velo_df[exit_velo_df['description'].isin(codes_in)] exit_velo_df['pitch'] = exit_velo_df.groupby('pitcher_id').cumcount() + 1 exit_velo_df['pitch_type_count'] = exit_velo_df.groupby(['pitcher_id','pitch_type']).cumcount() + 1 exit_velo_df = exit_velo_df[['game_id','date','pitcher_id','pitcher','pitch_code','pitch_type','pitch_velocity','code','description','pitch','pitch_type_count']] #print(exit_velo_df.head()) print('we made it',len(exit_velo_df)) exit_velo_df['pitches'] = 1 exit_velo_df['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in exit_velo_df.code] exit_velo_df['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in exit_velo_df.code] exit_velo_df['swings'] = [1 if x in swings_in else 0 for x in exit_velo_df.description] #print(exit_velo_df['swings']) # exit_velo_df['swings_type_count'] = exit_velo_df.groupby(['pitcher_id','pitch_type','swings']).cumcount() + 1 # print(exit_velo_df['swings_type_count']) import difflib #stuff_df_melt eno_name_list = exit_velo_df.drop_duplicates(subset='pitcher_id')['pitcher'].apply(lambda x: (difflib.get_close_matches(x,stuff_df_melt.drop_duplicates(subset='Name').reset_index(drop=True).Name)[:1] or [None])[0]).reset_index(drop=True) df_eno_exit_name = pd.DataFrame(data={'pitcher':exit_velo_df.drop_duplicates(subset='pitcher_id')['pitcher'].reset_index(drop=True),'eno_name':eno_name_list}) exit_velo_df = exit_velo_df.merge(right=df_eno_exit_name,left_on='pitcher',right_on='pitcher',how='left').merge(right=stuff_df_melt,left_on=['eno_name','pitch_type'],right_on=['Name','variable'],how='left') print('we made it',len(exit_velo_df)) test_df = exit_velo_df.sort_values(by='pitcher').drop_duplicates(subset='pitcher_id').reset_index(drop=True)[['pitcher_id','pitcher']]#['pitcher'].to_dict() test_df = test_df.set_index('pitcher_id') #test_df = test_df[test_df.pitcher == 'Chris Bassitt'].append(test_df[test_df.pitcher != 'Chris Bassitt']) pitcher_dict = test_df['pitcher'].to_dict() colour_palette = ['#FFB000','#648FFF','#785EF0', '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED'] #print from shiny import ui, render, App import matplotlib.image as mpimg app_ui = ui.page_fluid( # ui.output_plot("plot"), ui.h2('MLB Pitcher Rolling CSW% and Whiff%'), ui.row( ui.column( 2, ui.row(ui.h6('Created by: @TJStats')), ui.row(ui.h6('Data: MLB, Eno Sarris')), ui.row(ui.input_select("id", "Select Pitcher",pitcher_dict)) ), ui.column( 1, ui.output_plot("plot3",height = "125px",width="100px"),offset=0), ui.column( 3, ui.input_slider("n", "Pitch Rolling Window Size", 0, 100, 10),offset=1), ui.column( 3, ui.input_slider("swing", "Swing Rolling Window Size", 0, 50, 5),offset=1), ), # ui.layout_sidebar( # ui.panel_sidebar((ui.input_select("id", "Select Pitcher",pitcher_dict)), width = 2), # ui.panel_main(ui.input_slider("n", "Rolling Window Size", 0, 100, 10), width = 10)), ui.row( ui.column( 6, ui.output_plot("plot2",height = "225px",width="900px"),offset=0), ui.column( 6, ui.output_plot("plotwhiff",height = "225px",width="900px"),offset=0), ), # ui.layout_sidebar(ui.panel_sidebar((ui.input_select("id", "Select Pitcher",pitcher_dict))), # ui.panel_main(ui.output_plot("plot3",height = "250px",width="250px"))), # #ui.input_slider("n", "Rolling Window Size", 0, 100, 10), # ui.layout_sidebar( # ui.panel_sidebar(ui.input_slider("n", "Rolling Window Size", 0, 100, 10)), #ui.input_slider("n", "Number of bins", 0, 10, 1), #ui.output_text("value"), ui.h4('Rolling CSW% and Whiff% by Pitch Type'), ui.output_plot("plot",height = "400px"), #ui.download_button('test','Download'), ) from urllib.request import Request, urlopen # importing OpenCV(cv2) module #print(app_ui) def server(input, output, session): #@output # @render.text # def txt(): # return f'pitcher_id: "{input.pitcher_id()}"' @output @render.plot(alt='hist') def plot3(): #fig, ax = plt.subplots(1, 1, figsize=(10, 4)) #img = mpimg.imread('players/'+name+'_'+str(last_games)+' int.png') fig, ax = plt.subplots(figsize=(2,3)) fig.set_facecolor('#f7f7f7') # img = mpimg.imread('players/'+name+'_'+str(last_games)+'.png') # ax.imshow(img) # ax.axis('off') # fig.tight_layout() #ax.axis('off') # with fp: # img = mpimg.imread(fp, format='jpeg') ax.axis('off') im = plt.imread(f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/{exit_velo_df[exit_velo_df.pitcher_id==int(input.id())].reset_index(drop=True).pitcher_id[0]}/headshot/67/current.png') # ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1) ax.imshow(im) ax.axis('off') @output @render.plot(alt='hist') def plot2(): exit_velo_df_small = exit_velo_df[exit_velo_df.pitcher_id==int(input.id())] fig, ax = plt.subplots(1, 1, figsize=(10, 4)) sns.set_theme(style="whitegrid", palette="pastel") ax.hlines(exit_velo_df_small['csw'].sum()/exit_velo_df_small['pitches'].sum(),xmin=input.n(), xmax=exit_velo_df_small.pitch.max(), label='Pitcher Average ('+"{:.1%}".format(exit_velo_df_small['csw'].sum()/exit_velo_df_small['pitches'].sum())+')',color='black',linestyles='-.',alpha=0.4) ax.hlines(exit_velo_df.csw.sum()/exit_velo_df.pitches.sum(),xmin=input.n(),xmax=exit_velo_df_small.pitch.max(), label='League Average ('+"{:.1%}".format(exit_velo_df.csw.sum()/exit_velo_df.pitches.sum())+')',color='r',linestyles='--',alpha=0.6) #exit_velo_df_small_pitch.pitch_type_count,y=exit_velo_df_small_pitch.rolling(input.n()).sum()['csw']/exit_velo_df_small.rolling(input.n()).sum()['pitches'] sns.lineplot(x=exit_velo_df_small.pitch,y=exit_velo_df_small.rolling(input.n(),min_periods=input.n()).sum()['csw']/exit_velo_df_small.rolling(input.n(),min_periods=input.n()).sum()['pitches'],linewidth = 2,ax=ax,color=colour_palette[0]) ax.set_xlim(input.n(),exit_velo_df_small.pitch.max()) plt.yticks([0,0.2,0.4,0.6,0.8,1]) ax.set_ylim(0,1) ax.legend(fontsize=8) vals = ax.get_yticks() ax.set_xlabel('Pitch', fontsize=10,fontname='Century Gothic') ax.set_ylabel('CSW%', fontsize=10,fontname='Century Gothic') ax.set_yticklabels(['{:,.0%}'.format(x) for x in vals]) #fig.subplots_adjust(wspace=.02, hspace=.02) ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) fig.set_facecolor('white') fig.tight_layout() @output @render.plot(alt='hist') def plotwhiff(): exit_velo_df_small = exit_velo_df[exit_velo_df.pitcher_id==int(input.id())] print(exit_velo_df_small) exit_velo_df_small_swing = exit_velo_df_small[exit_velo_df_small.swings == 1] print(exit_velo_df_small_swing.reset_index()) exit_velo_df_small_swing['whiff_count'] = exit_velo_df_small_swing.groupby('pitcher_id').cumcount()+1 print(exit_velo_df_small_swing.whiff_count) fig, ax = plt.subplots(1, 1, figsize=(10, 4)) sns.set_theme(style="whitegrid", palette="pastel") ax.hlines(exit_velo_df_small_swing['whiffs'].sum()/exit_velo_df_small_swing['swings'].sum(),xmin=input.swing(), xmax=exit_velo_df_small_swing.whiff_count.max(), label='Pitcher Average ('+"{:.1%}".format(exit_velo_df_small_swing['whiffs'].sum()/exit_velo_df_small_swing['swings'].sum())+')',color='black',linestyles='-.',alpha=0.4) ax.hlines(exit_velo_df.whiffs.sum()/exit_velo_df.swings.sum(),xmin=input.swing(),xmax=exit_velo_df_small_swing.whiff_count.max(), label='League Average ('+"{:.1%}".format(exit_velo_df.whiffs.sum()/exit_velo_df.swings.sum())+')',color='r',linestyles='--',alpha=0.6) #exit_velo_df_small_pitch.pitch_type_count,y=exit_velo_df_small_pitch.rolling(input.n()).sum()['csw']/exit_velo_df_small.rolling(input.n()).sum()['pitches'] sns.lineplot(x=exit_velo_df_small_swing.whiff_count,y=exit_velo_df_small_swing.rolling(input.swing()).sum()['whiffs']/exit_velo_df_small_swing.rolling(input.swing()).sum()['swings'],linewidth = 2,ax=ax,color=colour_palette[1]) ax.set_xlim(input.swing(),exit_velo_df_small_swing.whiff_count.max()) plt.yticks([0,0.2,0.4,0.6,0.8,1]) ax.set_ylim(0,1) ax.legend(fontsize=8) vals = ax.get_yticks() ax.set_xlabel('Swings', fontsize=10,fontname='Century Gothic') ax.set_ylabel('Whiff%', fontsize=10,fontname='Century Gothic') ax.set_yticklabels(['{:,.0%}'.format(x) for x in vals]) #fig.subplots_adjust(wspace=.02, hspace=.02) ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) fig.set_facecolor('white') fig.tight_layout() @output @render.plot(alt="A histogram") def plot(): # np.random.seed(19680801) # x = 100 + 15 * np.random.randn(437) # fig, ax = plt.subplots() # ax.hist(x, input.n(), density=True) # return fig # penguins = sns.load_dataset("penguins") # sns.histplot(data=penguins, x="flipper_length_mm") exit_velo_df_small = exit_velo_df[exit_velo_df.pitcher_id==int(input.id())] #pitch_list = exit_velo_df_small.pitch_type.unique() pitch_list_df = (exit_velo_df_small.groupby(['pitch_type','pitch_code'])[['pitch_type']].count() / exit_velo_df_small['pitch_type'].count())#.sort_values(by='pitch_type',ascending=False) pitch_list_df.columns = ['porp'] pitch_list_df = pitch_list_df.sort_values(by='porp',ascending=False).reset_index() pitch_list_df = pitch_list_df[pitch_list_df.porp >= 0.05] #print(pitch_list_df) #fig, ax = plt.subplots(math.ceiling(len(pitch_list))/2, 2, figsize=(10, 4*len(pitch_list))) fig = plt.figure(figsize=(20,20)) #fig = px.line(exit_velo_df_small, x=exit_velo_df_small.pitch, y=exit_velo_df_small.rolling(50).sum()['csw']/exit_velo_df_small.rolling(50).sum()['pitches']) #sns.lineplot(x=exit_velo_df_small.pitch,y=exit_velo_df_small.rolling(input.n()).sum()['csw']/exit_velo_df_small.rolling(input.n()).sum()['pitches'],color='#648FFF',linewidth = 2,label='csw%') for i in range(0,len(pitch_list_df)): exit_velo_df_small_pitch = exit_velo_df_small[exit_velo_df_small.pitch_type==pitch_list_df.pitch_type[i]] sns.set_theme(style="whitegrid", palette="pastel") ax = plt.subplot(2,len(pitch_list_df),i+1) ax.hlines(exit_velo_df_small_pitch['csw'].sum()/exit_velo_df_small_pitch['pitches'].sum(),xmin=input.n(), xmax=exit_velo_df_small.pitch_type_count.max(), label='Pitcher Average ('+"{:.1%}".format(exit_velo_df_small_pitch['csw'].sum()/exit_velo_df_small_pitch['pitches'].sum())+')',color='black',linestyles='-.',alpha=0.4) ax.hlines(exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].csw.sum()/exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].pitches.sum(),xmin=input.n(),xmax=exit_velo_df_small.pitch_type_count.max(), label='League Average ('+"{:.1%}".format(exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].csw.sum()/exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].pitches.sum())+')',color='r',linestyles='--',alpha=0.6) sns.lineplot(x=exit_velo_df_small_pitch.pitch_type_count,y=exit_velo_df_small_pitch.rolling(input.n()).sum()['csw']/exit_velo_df_small_pitch.rolling(input.n()).sum()['pitches'],linewidth = 2,ax=ax,color=colour_palette[i]) #print("{:.1%}".format(exit_velo_df_small_pitch['csw'].sum()/exit_velo_df_small_pitch['pitches'].sum())) #sns.despine() ax.set_xlabel("Pitches", fontsize=10,fontname='Century Gothic') # print(exit_velo_df_small) # #print(pitch_list_df) # print(exit_velo_df_small[exit_velo_df_small.pitch_type=='SL'])#.value.max()) # if pitch_list_df.pitch_type[i]== 'ST': # ax.set_title(pitch_list_df.pitch_code[i] +' - '+ pitch_list_df.pitch_type[i] +' ('+"{:.1%}".format((pitch_list_df.porp[i]))+') - '+str((exit_velo_df_small[exit_velo_df_small.pitch_type=='SL'].value.max()))+' Stuff+', fontsize=11,fontname='Century Gothic',weight='bold') # else: ax.set_title(pitch_list_df.pitch_code[i] +' - '+ pitch_list_df.pitch_type[i] +' ('+"{:.1%}".format((pitch_list_df.porp[i]))+') - '+str(exit_velo_df_small_pitch.value.max())+' Stuff+', fontsize=11,fontname='Century Gothic',weight='bold') ax.set_ylabel('CSW%', fontsize=10,fontname='Century Gothic') if i ==0: ax.legend() ax.set_xlim(input.n(),exit_velo_df_small.pitch_type_count.max()) plt.yticks([0,0.2,0.4,0.6,0.8,1]) ax.set_ylim(0,1) vals = ax.get_yticks() ax.set_yticklabels(['{:,.0%}'.format(x) for x in vals]) ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) ax.legend(fontsize=8) #whiffs #exit_velo_df_small_pitch #exit_velo_df_small = exit_velo_df_small_pitch[exit_velo_df_small_pitch.pitcher_id==int(input.id())] #print(exit_velo_df_small) exit_velo_df_small_swing = exit_velo_df_small_pitch[exit_velo_df_small_pitch.swings == 1] #print(exit_velo_df_small_swing.reset_index()) exit_velo_df_small_swing['whiff_count'] = exit_velo_df_small_swing.groupby('pitcher_id').cumcount()+1 # print(exit_velo_df_small_swing.whiff_count) ax = plt.subplot(2,len(pitch_list_df),i+1+len(pitch_list_df)) ax.hlines(exit_velo_df_small_swing['whiffs'].sum()/exit_velo_df_small_swing['swings'].sum(),xmin=input.swing(), xmax=exit_velo_df_small.pitch_type_count.max(), label='Pitcher Average ('+"{:.1%}".format(exit_velo_df_small_swing['whiffs'].sum()/exit_velo_df_small_swing['swings'].sum())+')',color='black',linestyles='-.',alpha=0.4) ax.hlines(exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].whiffs.sum()/exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].swings.sum(),xmin=input.swing(),xmax=exit_velo_df_small.pitch_type_count.max(), label='League Average ('+"{:.1%}".format(exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].whiffs.sum()/exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].swings.sum())+')',color='r',linestyles='--',alpha=0.6) sns.lineplot(x=exit_velo_df_small_swing.whiff_count,y=exit_velo_df_small_swing.rolling(input.swing()).sum()['whiffs']/exit_velo_df_small_swing.rolling(input.swing()).sum()['swings'],linewidth = 2,ax=ax,color=colour_palette[i]) #print("{:.1%}".format(exit_velo_df_small_pitch['whiffs'].sum()/exit_velo_df_small_pitch['swings'].sum())) #sns.despine() ax.set_xlabel("Swings", fontsize=10,fontname='Century Gothic') ax.set_ylabel('Whiff%', fontsize=10,fontname='Century Gothic') if i ==0: ax.legend() max_whiff = exit_velo_df_small_swing.whiff_count.max() ax.set_xlim(input.swing(),max_whiff) plt.yticks([0,0.2,0.4,0.6,0.8,1]) ax.set_ylim(0,1) vals = ax.get_yticks() ax.set_yticklabels(['{:,.0%}'.format(x) for x in vals]) ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) ax.legend(fontsize=8) # ax.set_xlim(input.n(),exit_velo_df_small.pitch.max()) #ax.axis('off') fig.set_facecolor('white') fig.tight_layout() #ax.hist(exit_velo_df[exit_velo_df.pitcher_id==int(input.id())]['pitch_velocity'],input.n(),density=True) #plt.show() #return g # This is a shiny.App object. It must be named `app`. # fig, ax = plt.subplots() #print(input.pitcher_id()) # print(input) # plt.hist(x=exit_velo_df[exit_velo_df.pitcher_id==input.x()]['pitch_velocity']) # plt.show() app = App(app_ui, server)