Spaces:
Build error
Build error
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import math | |
| import matplotlib.ticker as mtick | |
| from matplotlib.ticker import FuncFormatter | |
| # stuff_df = pd.read_html('https://www.fangraphs.com/leaders.aspx?pos=all&stats=pit&lg=all&qual=0&type=36&season=2023&month=0&season1=2023&ind=0&team=0&rost=0&age=0&filter=&players=0&startdate=2023-01-01&enddate=2023-12-31&page=1_5000')[5]#.droplevel(1) | |
| # stuff_df.columns = stuff_df.columns.droplevel(0) | |
| # stuff_df = stuff_df.iloc[:-1] | |
| # stuff_df = stuff_df[stuff_df.columns[1:]] | |
| # stuff_df.columns = [x.replace('Stf+ ','') for x in stuff_df.columns] | |
| # stuff_df = stuff_df.rename(columns = {'FA':'FF'}) | |
| # stuff_df['ST'] = stuff_df.SL | |
| # stuff_df_melt = stuff_df.melt(id_vars=['Name','Team','IP']).dropna().sort_values(by='IP',ascending=False).reset_index(drop=True) | |
| stuff_df_melt = pd.read_csv('stuff_df_melt.csv',index_col=[0]) | |
| exit_velo_df = pd.read_csv('exit_velo_df.csv',index_col=[0]) | |
| swing_codes = ['Swinging Strike', 'In play, no out', | |
| 'Foul', 'In play, out(s)', | |
| 'In play, run(s)', 'Swinging Strike (Blocked)', | |
| 'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout'] | |
| swings_in = ['Swinging Strike', 'In play, no out', | |
| 'Foul', 'In play, out(s)', | |
| 'In play, run(s)', 'Swinging Strike (Blocked)', | |
| 'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout'] | |
| swing_strike_codes = ['Swinging Strike', | |
| 'Swinging Strike (Blocked)','Missed Bunt','Foul Tip','Swinging Pitchout'] | |
| contact_codes = ['In play, no out', | |
| 'Foul', 'In play, out(s)', | |
| 'In play, run(s)', | |
| 'Foul Bunt'] | |
| codes_in = ['In play, out(s)', | |
| 'Swinging Strike', | |
| 'Ball', | |
| 'Foul', | |
| 'In play, no out', | |
| 'Called Strike', | |
| 'Foul Tip', | |
| 'In play, run(s)', | |
| 'Hit By Pitch', | |
| 'Ball In Dirt', | |
| 'Pitchout', | |
| 'Swinging Strike (Blocked)', | |
| 'Foul Bunt', | |
| 'Missed Bunt', | |
| 'Foul Pitchout', | |
| 'Intent Ball', | |
| 'Swinging Pitchout'] | |
| exit_velo_df = exit_velo_df[exit_velo_df['description'].isin(codes_in)] | |
| exit_velo_df['pitch'] = exit_velo_df.groupby('pitcher_id').cumcount() + 1 | |
| exit_velo_df['pitch_type_count'] = exit_velo_df.groupby(['pitcher_id','pitch_type']).cumcount() + 1 | |
| exit_velo_df = exit_velo_df[['game_id','date','pitcher_id','pitcher','pitch_code','pitch_type','pitch_velocity','code','description','pitch','pitch_type_count']] | |
| #print(exit_velo_df.head()) | |
| print('we made it',len(exit_velo_df)) | |
| exit_velo_df['pitches'] = 1 | |
| exit_velo_df['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in exit_velo_df.code] | |
| exit_velo_df['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in exit_velo_df.code] | |
| exit_velo_df['swings'] = [1 if x in swings_in else 0 for x in exit_velo_df.description] | |
| #print(exit_velo_df['swings']) | |
| # exit_velo_df['swings_type_count'] = exit_velo_df.groupby(['pitcher_id','pitch_type','swings']).cumcount() + 1 | |
| # print(exit_velo_df['swings_type_count']) | |
| import difflib #stuff_df_melt | |
| eno_name_list = exit_velo_df.drop_duplicates(subset='pitcher_id')['pitcher'].apply(lambda x: (difflib.get_close_matches(x,stuff_df_melt.drop_duplicates(subset='Name').reset_index(drop=True).Name)[:1] or [None])[0]).reset_index(drop=True) | |
| df_eno_exit_name = pd.DataFrame(data={'pitcher':exit_velo_df.drop_duplicates(subset='pitcher_id')['pitcher'].reset_index(drop=True),'eno_name':eno_name_list}) | |
| exit_velo_df = exit_velo_df.merge(right=df_eno_exit_name,left_on='pitcher',right_on='pitcher',how='left').merge(right=stuff_df_melt,left_on=['eno_name','pitch_type'],right_on=['Name','variable'],how='left') | |
| print('we made it',len(exit_velo_df)) | |
| test_df = exit_velo_df.sort_values(by='pitcher').drop_duplicates(subset='pitcher_id').reset_index(drop=True)[['pitcher_id','pitcher']]#['pitcher'].to_dict() | |
| test_df = test_df.set_index('pitcher_id') | |
| #test_df = test_df[test_df.pitcher == 'Chris Bassitt'].append(test_df[test_df.pitcher != 'Chris Bassitt']) | |
| pitcher_dict = test_df['pitcher'].to_dict() | |
| colour_palette = ['#FFB000','#648FFF','#785EF0', | |
| '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED'] | |
| from shiny import ui, render, App | |
| import matplotlib.image as mpimg | |
| app_ui = ui.page_fluid( | |
| # ui.output_plot("plot"), | |
| ui.h2('MLB Pitcher Rolling CSW% and Whiff%'), | |
| ui.row( | |
| ui.column( | |
| 2, | |
| ui.row(ui.h6('Created by: @TJStats')), | |
| ui.row(ui.h6('Data: MLB, Eno Sarris')), | |
| ui.row(ui.input_select("id", "Select Pitcher",pitcher_dict)) | |
| ), | |
| ui.column( | |
| 1, | |
| ui.output_plot("plot3",height = "125px",width="100px"),offset=0), | |
| ui.column( | |
| 3, | |
| ui.input_slider("n", "Pitch Rolling Window Size", 0, 100, 10),offset=1), | |
| ui.column( | |
| 3, | |
| ui.input_slider("swing", "Swing Rolling Window Size", 0, 50, 5),offset=1), | |
| ), | |
| # ui.layout_sidebar( | |
| # ui.panel_sidebar((ui.input_select("id", "Select Pitcher",pitcher_dict)), width = 2), | |
| # ui.panel_main(ui.input_slider("n", "Rolling Window Size", 0, 100, 10), width = 10)), | |
| ui.row( | |
| ui.column( | |
| 6, | |
| ui.output_plot("plot2",height = "225px",width="900px"),offset=0), | |
| ui.column( | |
| 6, | |
| ui.output_plot("plotwhiff",height = "225px",width="900px"),offset=0), | |
| ), | |
| # ui.layout_sidebar(ui.panel_sidebar((ui.input_select("id", "Select Pitcher",pitcher_dict))), | |
| # ui.panel_main(ui.output_plot("plot3",height = "250px",width="250px"))), | |
| # #ui.input_slider("n", "Rolling Window Size", 0, 100, 10), | |
| # ui.layout_sidebar( | |
| # ui.panel_sidebar(ui.input_slider("n", "Rolling Window Size", 0, 100, 10)), | |
| #ui.input_slider("n", "Number of bins", 0, 10, 1), | |
| #ui.output_text("value"), | |
| ui.h4('Rolling CSW% and Whiff% by Pitch Type'), | |
| ui.output_plot("plot",height = "400px"), | |
| #ui.download_button('test','Download'), | |
| ) | |
| from urllib.request import Request, urlopen | |
| # importing OpenCV(cv2) module | |
| #print(app_ui) | |
| def server(input, output, session): | |
| #@output | |
| # @render.text | |
| # def txt(): | |
| # return f'pitcher_id: "{input.pitcher_id()}"' | |
| def plot3(): | |
| #fig, ax = plt.subplots(1, 1, figsize=(10, 4)) | |
| #img = mpimg.imread('players/'+name+'_'+str(last_games)+' int.png') | |
| fig, ax = plt.subplots(figsize=(2,3)) | |
| fig.set_facecolor('#f7f7f7') | |
| # img = mpimg.imread('players/'+name+'_'+str(last_games)+'.png') | |
| # ax.imshow(img) | |
| # ax.axis('off') | |
| # fig.tight_layout() | |
| #ax.axis('off') | |
| # with fp: | |
| # img = mpimg.imread(fp, format='jpeg') | |
| ax.axis('off') | |
| im = plt.imread(f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/{exit_velo_df[exit_velo_df.pitcher_id==int(input.id())].reset_index(drop=True).pitcher_id[0]}/headshot/67/current.png') | |
| # ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1) | |
| ax.imshow(im) | |
| ax.axis('off') | |
| def plot2(): | |
| exit_velo_df_small = exit_velo_df[exit_velo_df.pitcher_id==int(input.id())] | |
| fig, ax = plt.subplots(1, 1, figsize=(10, 4)) | |
| sns.set_theme(style="whitegrid", palette="pastel") | |
| ax.hlines(exit_velo_df_small['csw'].sum()/exit_velo_df_small['pitches'].sum(),xmin=input.n(), | |
| xmax=exit_velo_df_small.pitch.max(), | |
| label='Pitcher Average ('+"{:.1%}".format(exit_velo_df_small['csw'].sum()/exit_velo_df_small['pitches'].sum())+')',color='black',linestyles='-.',alpha=0.4) | |
| ax.hlines(exit_velo_df.csw.sum()/exit_velo_df.pitches.sum(),xmin=input.n(),xmax=exit_velo_df_small.pitch.max(), | |
| label='League Average ('+"{:.1%}".format(exit_velo_df.csw.sum()/exit_velo_df.pitches.sum())+')',color='r',linestyles='--',alpha=0.6) | |
| #exit_velo_df_small_pitch.pitch_type_count,y=exit_velo_df_small_pitch.rolling(input.n()).sum()['csw']/exit_velo_df_small.rolling(input.n()).sum()['pitches'] | |
| sns.lineplot(x=exit_velo_df_small.pitch,y=exit_velo_df_small.rolling(input.n(),min_periods=input.n()).sum()['csw']/exit_velo_df_small.rolling(input.n(),min_periods=input.n()).sum()['pitches'],linewidth = 2,ax=ax,color=colour_palette[0]) | |
| ax.set_xlim(input.n(),exit_velo_df_small.pitch.max()) | |
| plt.yticks([0,0.2,0.4,0.6,0.8,1]) | |
| ax.set_ylim(0,1) | |
| ax.legend(fontsize=8) | |
| vals = ax.get_yticks() | |
| ax.set_xlabel('Pitch', fontsize=10,fontname='Century Gothic') | |
| ax.set_ylabel('CSW%', fontsize=10,fontname='Century Gothic') | |
| ax.set_yticklabels(['{:,.0%}'.format(x) for x in vals]) | |
| #fig.subplots_adjust(wspace=.02, hspace=.02) | |
| ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) | |
| fig.set_facecolor('white') | |
| fig.tight_layout() | |
| def plotwhiff(): | |
| exit_velo_df_small = exit_velo_df[exit_velo_df.pitcher_id==int(input.id())] | |
| print(exit_velo_df_small) | |
| exit_velo_df_small_swing = exit_velo_df_small[exit_velo_df_small.swings == 1] | |
| print(exit_velo_df_small_swing.reset_index()) | |
| exit_velo_df_small_swing['whiff_count'] = exit_velo_df_small_swing.groupby('pitcher_id').cumcount()+1 | |
| print(exit_velo_df_small_swing.whiff_count) | |
| fig, ax = plt.subplots(1, 1, figsize=(10, 4)) | |
| sns.set_theme(style="whitegrid", palette="pastel") | |
| ax.hlines(exit_velo_df_small_swing['whiffs'].sum()/exit_velo_df_small_swing['swings'].sum(),xmin=input.swing(), | |
| xmax=exit_velo_df_small_swing.whiff_count.max(), | |
| label='Pitcher Average ('+"{:.1%}".format(exit_velo_df_small_swing['whiffs'].sum()/exit_velo_df_small_swing['swings'].sum())+')',color='black',linestyles='-.',alpha=0.4) | |
| ax.hlines(exit_velo_df.whiffs.sum()/exit_velo_df.swings.sum(),xmin=input.swing(),xmax=exit_velo_df_small_swing.whiff_count.max(), | |
| label='League Average ('+"{:.1%}".format(exit_velo_df.whiffs.sum()/exit_velo_df.swings.sum())+')',color='r',linestyles='--',alpha=0.6) | |
| #exit_velo_df_small_pitch.pitch_type_count,y=exit_velo_df_small_pitch.rolling(input.n()).sum()['csw']/exit_velo_df_small.rolling(input.n()).sum()['pitches'] | |
| sns.lineplot(x=exit_velo_df_small_swing.whiff_count,y=exit_velo_df_small_swing.rolling(input.swing()).sum()['whiffs']/exit_velo_df_small_swing.rolling(input.swing()).sum()['swings'],linewidth = 2,ax=ax,color=colour_palette[1]) | |
| ax.set_xlim(input.swing(),exit_velo_df_small_swing.whiff_count.max()) | |
| plt.yticks([0,0.2,0.4,0.6,0.8,1]) | |
| ax.set_ylim(0,1) | |
| ax.legend(fontsize=8) | |
| vals = ax.get_yticks() | |
| ax.set_xlabel('Swings', fontsize=10,fontname='Century Gothic') | |
| ax.set_ylabel('Whiff%', fontsize=10,fontname='Century Gothic') | |
| ax.set_yticklabels(['{:,.0%}'.format(x) for x in vals]) | |
| #fig.subplots_adjust(wspace=.02, hspace=.02) | |
| ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) | |
| fig.set_facecolor('white') | |
| fig.tight_layout() | |
| def plot(): | |
| # np.random.seed(19680801) | |
| # x = 100 + 15 * np.random.randn(437) | |
| # fig, ax = plt.subplots() | |
| # ax.hist(x, input.n(), density=True) | |
| # return fig | |
| # penguins = sns.load_dataset("penguins") | |
| # sns.histplot(data=penguins, x="flipper_length_mm") | |
| exit_velo_df_small = exit_velo_df[exit_velo_df.pitcher_id==int(input.id())] | |
| #pitch_list = exit_velo_df_small.pitch_type.unique() | |
| pitch_list_df = (exit_velo_df_small.groupby(['pitch_type','pitch_code'])[['pitch_type']].count() / exit_velo_df_small['pitch_type'].count())#.sort_values(by='pitch_type',ascending=False) | |
| pitch_list_df.columns = ['porp'] | |
| pitch_list_df = pitch_list_df.sort_values(by='porp',ascending=False).reset_index() | |
| pitch_list_df = pitch_list_df[pitch_list_df.porp >= 0.05] | |
| #print(pitch_list_df) | |
| #fig, ax = plt.subplots(math.ceiling(len(pitch_list))/2, 2, figsize=(10, 4*len(pitch_list))) | |
| fig = plt.figure(figsize=(20,20)) | |
| #fig = px.line(exit_velo_df_small, x=exit_velo_df_small.pitch, y=exit_velo_df_small.rolling(50).sum()['csw']/exit_velo_df_small.rolling(50).sum()['pitches']) | |
| #sns.lineplot(x=exit_velo_df_small.pitch,y=exit_velo_df_small.rolling(input.n()).sum()['csw']/exit_velo_df_small.rolling(input.n()).sum()['pitches'],color='#648FFF',linewidth = 2,label='csw%') | |
| for i in range(0,len(pitch_list_df)): | |
| exit_velo_df_small_pitch = exit_velo_df_small[exit_velo_df_small.pitch_type==pitch_list_df.pitch_type[i]] | |
| sns.set_theme(style="whitegrid", palette="pastel") | |
| ax = plt.subplot(2,len(pitch_list_df),i+1) | |
| ax.hlines(exit_velo_df_small_pitch['csw'].sum()/exit_velo_df_small_pitch['pitches'].sum(),xmin=input.n(), | |
| xmax=exit_velo_df_small.pitch_type_count.max(), | |
| label='Pitcher Average ('+"{:.1%}".format(exit_velo_df_small_pitch['csw'].sum()/exit_velo_df_small_pitch['pitches'].sum())+')',color='black',linestyles='-.',alpha=0.4) | |
| ax.hlines(exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].csw.sum()/exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].pitches.sum(),xmin=input.n(),xmax=exit_velo_df_small.pitch_type_count.max(), | |
| label='League Average ('+"{:.1%}".format(exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].csw.sum()/exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].pitches.sum())+')',color='r',linestyles='--',alpha=0.6) | |
| sns.lineplot(x=exit_velo_df_small_pitch.pitch_type_count,y=exit_velo_df_small_pitch.rolling(input.n()).sum()['csw']/exit_velo_df_small_pitch.rolling(input.n()).sum()['pitches'],linewidth = 2,ax=ax,color=colour_palette[i]) | |
| #print("{:.1%}".format(exit_velo_df_small_pitch['csw'].sum()/exit_velo_df_small_pitch['pitches'].sum())) | |
| #sns.despine() | |
| ax.set_xlabel("Pitches", fontsize=10,fontname='Century Gothic') | |
| # print(exit_velo_df_small) | |
| # #print(pitch_list_df) | |
| # print(exit_velo_df_small[exit_velo_df_small.pitch_type=='SL'])#.value.max()) | |
| # if pitch_list_df.pitch_type[i]== 'ST': | |
| # ax.set_title(pitch_list_df.pitch_code[i] +' - '+ pitch_list_df.pitch_type[i] +' ('+"{:.1%}".format((pitch_list_df.porp[i]))+') - '+str((exit_velo_df_small[exit_velo_df_small.pitch_type=='SL'].value.max()))+' Stuff+', fontsize=11,fontname='Century Gothic',weight='bold') | |
| # else: | |
| ax.set_title(pitch_list_df.pitch_code[i] +' - '+ pitch_list_df.pitch_type[i] +' ('+"{:.1%}".format((pitch_list_df.porp[i]))+') - '+str(exit_velo_df_small_pitch.value.max())+' Stuff+', fontsize=11,fontname='Century Gothic',weight='bold') | |
| ax.set_ylabel('CSW%', fontsize=10,fontname='Century Gothic') | |
| if i ==0: | |
| ax.legend() | |
| ax.set_xlim(input.n(),exit_velo_df_small.pitch_type_count.max()) | |
| plt.yticks([0,0.2,0.4,0.6,0.8,1]) | |
| ax.set_ylim(0,1) | |
| vals = ax.get_yticks() | |
| ax.set_yticklabels(['{:,.0%}'.format(x) for x in vals]) | |
| ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) | |
| ax.legend(fontsize=8) | |
| #whiffs | |
| #exit_velo_df_small_pitch | |
| #exit_velo_df_small = exit_velo_df_small_pitch[exit_velo_df_small_pitch.pitcher_id==int(input.id())] | |
| #print(exit_velo_df_small) | |
| exit_velo_df_small_swing = exit_velo_df_small_pitch[exit_velo_df_small_pitch.swings == 1] | |
| #print(exit_velo_df_small_swing.reset_index()) | |
| exit_velo_df_small_swing['whiff_count'] = exit_velo_df_small_swing.groupby('pitcher_id').cumcount()+1 | |
| # print(exit_velo_df_small_swing.whiff_count) | |
| ax = plt.subplot(2,len(pitch_list_df),i+1+len(pitch_list_df)) | |
| ax.hlines(exit_velo_df_small_swing['whiffs'].sum()/exit_velo_df_small_swing['swings'].sum(),xmin=input.swing(), | |
| xmax=exit_velo_df_small.pitch_type_count.max(), | |
| label='Pitcher Average ('+"{:.1%}".format(exit_velo_df_small_swing['whiffs'].sum()/exit_velo_df_small_swing['swings'].sum())+')',color='black',linestyles='-.',alpha=0.4) | |
| ax.hlines(exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].whiffs.sum()/exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].swings.sum(),xmin=input.swing(),xmax=exit_velo_df_small.pitch_type_count.max(), | |
| label='League Average ('+"{:.1%}".format(exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].whiffs.sum()/exit_velo_df[exit_velo_df.pitch_type==pitch_list_df.pitch_type[i]].swings.sum())+')',color='r',linestyles='--',alpha=0.6) | |
| sns.lineplot(x=exit_velo_df_small_swing.whiff_count,y=exit_velo_df_small_swing.rolling(input.swing()).sum()['whiffs']/exit_velo_df_small_swing.rolling(input.swing()).sum()['swings'],linewidth = 2,ax=ax,color=colour_palette[i]) | |
| #print("{:.1%}".format(exit_velo_df_small_pitch['whiffs'].sum()/exit_velo_df_small_pitch['swings'].sum())) | |
| #sns.despine() | |
| ax.set_xlabel("Swings", fontsize=10,fontname='Century Gothic') | |
| ax.set_ylabel('Whiff%', fontsize=10,fontname='Century Gothic') | |
| if i ==0: | |
| ax.legend() | |
| max_whiff = exit_velo_df_small_swing.whiff_count.max() | |
| ax.set_xlim(input.swing(),max_whiff) | |
| plt.yticks([0,0.2,0.4,0.6,0.8,1]) | |
| ax.set_ylim(0,1) | |
| vals = ax.get_yticks() | |
| ax.set_yticklabels(['{:,.0%}'.format(x) for x in vals]) | |
| ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) | |
| ax.legend(fontsize=8) | |
| # ax.set_xlim(input.n(),exit_velo_df_small.pitch.max()) | |
| #ax.axis('off') | |
| fig.set_facecolor('white') | |
| fig.tight_layout() | |
| #ax.hist(exit_velo_df[exit_velo_df.pitcher_id==int(input.id())]['pitch_velocity'],input.n(),density=True) | |
| #plt.show() | |
| #return g | |
| # This is a shiny.App object. It must be named `app`. | |
| # fig, ax = plt.subplots() | |
| #print(input.pitcher_id()) | |
| # print(input) | |
| # plt.hist(x=exit_velo_df[exit_velo_df.pitcher_id==input.x()]['pitch_velocity']) | |
| # plt.show() | |
| app = App(app_ui, server) |