Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import numpy as np | |
| import json | |
| from matplotlib.ticker import FuncFormatter | |
| from matplotlib.ticker import MaxNLocator | |
| import math | |
| from matplotlib.patches import Ellipse | |
| import matplotlib.transforms as transforms | |
| import matplotlib.colors | |
| import matplotlib.colors as mcolors | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| import requests | |
| font_properties = {'family': 'calibi', 'size': 12} | |
| font_properties_titles = {'family': 'calibi', 'size': 20} | |
| font_properties_axes = {'family': 'calibi', 'size': 16} | |
| colour_palette = ['#FFB000','#648FFF','#785EF0', | |
| '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED'] | |
| season_start = '2024-03-20' | |
| season_end = '2024-09-29' | |
| season_fg=2024 | |
| chad_fg = requests.get(f'https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&qual=0&season={season_fg}&season={season_fg}&month=1000&season1={season_fg}&ind=0&pageitems=2000000000&pagenum=1&ind=0&rost=0&players=&type=36&postseason=&sortdir=default&sortstat=sp_pitching').json() | |
| cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) | |
| chadwick_df_small = pd.DataFrame(data={ | |
| 'key_mlbam':[x['xMLBAMID'] for x in chad_fg['data']], | |
| 'key_fangraphs':[x['playerid'] for x in chad_fg['data']], | |
| 'Name':[x['PlayerName'] for x in chad_fg['data']], | |
| }) | |
| pitcher_dicts = chadwick_df_small.set_index('key_mlbam')['Name'].sort_values().to_dict() | |
| mlb_fg_dicts = chadwick_df_small.set_index('key_mlbam')['key_fangraphs'].sort_values().to_dict() | |
| ### DF UPDATE CODE ### | |
| def df_update_code(df): | |
| print('Starting') | |
| #df = pd.read_csv('2024_spring_data.csv',index_col=[0]) | |
| print('Starting') | |
| df['vy_f'] = -(df['vy0']**2 - (2 * df['ay'] * (df['y0'] - 17/12)))**0.5 | |
| df['t'] = (df['vy_f'] - df['vy0']) / df['ay'] | |
| df['vz_f'] = (df['vz0']) + (df['az'] * df['t']) | |
| df['vaa'] = -np.arctan(df['vz_f'] / df['vy_f']) * (180 / np.pi) | |
| #df['vy_f'] = -(df['vy0']**2 - (2 * df['ay'] * (df['y0'] - 17/12)))**0.5 | |
| #df['t'] = (df['vy_f'] - df['vy0']) / df['ay'] | |
| df['vx_f'] = (df['vx0']) + (df['ax'] * df['t']) | |
| df['haa'] = -np.arctan(df['vx_f'] / df['vy_f']) * (180 / np.pi) | |
| end_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', | |
| 'double', 'sac_fly', 'force_out', 'home_run', | |
| 'grounded_into_double_play', 'fielders_choice', 'field_error', | |
| 'triple', 'sac_bunt', 'double_play', 'intent_walk', | |
| 'fielders_choice_out', 'strikeout_double_play', | |
| 'sac_fly_double_play', 'catcher_interf', 'other_out'] | |
| df['pa'] = df.event_type.isin(end_codes) | |
| #df['pa'] = 1 | |
| df['k'] = df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in df.event_type.fillna('None').unique()]))) | |
| df['bb'] = df.event_type.isin(list(filter(None, [x if 'walk' in x else '' for x in df.event_type.fillna('None').unique()]))) | |
| df['k_minus_bb'] = df['k'].astype(np.float32)-df['bb'].astype(np.float32) | |
| df = df.drop_duplicates(subset=['play_id']) | |
| df = df.dropna(subset=['start_speed']) | |
| swing_codes = ['Swinging Strike', 'In play, no out', | |
| 'Foul', 'In play, out(s)', | |
| 'In play, run(s)', 'Swinging Strike (Blocked)', | |
| 'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout'] | |
| swings_in = ['Swinging Strike', 'In play, no out', | |
| 'Foul', 'In play, out(s)', | |
| 'In play, run(s)', 'Swinging Strike (Blocked)', | |
| 'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout'] | |
| swing_strike_codes = ['Swinging Strike', | |
| 'Swinging Strike (Blocked)','Missed Bunt','Foul Tip','Swinging Pitchout'] | |
| contact_codes = ['In play, no out', | |
| 'Foul', 'In play, out(s)', | |
| 'In play, run(s)', | |
| 'Foul Bunt'] | |
| codes_in = ['In play, out(s)', | |
| 'Swinging Strike', | |
| 'Ball', | |
| 'Foul', | |
| 'In play, no out', | |
| 'Called Strike', | |
| 'Foul Tip', | |
| 'In play, run(s)', | |
| 'Hit By Pitch', | |
| 'Ball In Dirt', | |
| 'Pitchout', | |
| 'Swinging Strike (Blocked)', | |
| 'Foul Bunt', | |
| 'Missed Bunt', | |
| 'Foul Pitchout', | |
| 'Intent Ball', | |
| 'Swinging Pitchout'] | |
| df['in_zone'] = df['zone'] < 10 | |
| df = df.drop_duplicates(subset=['play_id']) | |
| df_codes = df[df.play_description.isin(codes_in)].dropna(subset=['in_zone']) | |
| df_codes['bip'] = ~df_codes.launch_speed.isna() | |
| conditions = [ | |
| (df_codes['launch_speed'].isna()), | |
| (df_codes['launch_speed']*1.5 - df_codes['launch_angle'] >= 117 ) & (df_codes['launch_speed'] + df_codes['launch_angle'] >= 124) & (df_codes['launch_speed'] > 98) & (df_codes['launch_angle'] >= 8) & (df_codes['launch_angle'] <= 50) | |
| ] | |
| choices = [False,True] | |
| df_codes['barrel'] = np.select(conditions, choices, default=np.nan) | |
| conditions_ss = [ | |
| (df_codes['launch_angle'].isna()), | |
| (df_codes['launch_angle'] >= 8 ) * (df_codes['launch_angle'] <= 32 ) | |
| ] | |
| choices_ss = [False,True] | |
| df_codes['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan) | |
| conditions_hh = [ | |
| (df_codes['launch_speed'].isna()), | |
| (df_codes['launch_speed'] >= 94.5 ) | |
| ] | |
| choices_hh = [False,True] | |
| df_codes['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan) | |
| conditions_tb = [ | |
| (df_codes['event_type']=='single'), | |
| (df_codes['event_type']=='double'), | |
| (df_codes['event_type']=='triple'), | |
| (df_codes['event_type']=='home_run'), | |
| ] | |
| choices_tb = [1,2,3,4] | |
| df_codes['tb'] = np.select(conditions_tb, choices_tb, default=np.nan) | |
| conditions_woba = [ | |
| (df_codes['event_type']=='walk'), | |
| (df_codes['event_type']=='hit_by_pitch'), | |
| (df_codes['event_type']=='single'), | |
| (df_codes['event_type']=='double'), | |
| (df_codes['event_type']=='triple'), | |
| (df_codes['event_type']=='home_run'), | |
| ] | |
| choices_woba = [0.705, | |
| 0.688, | |
| 0.897, | |
| 1.233, | |
| 1.612, | |
| 2.013] | |
| df_codes['woba'] = np.select(conditions_woba, choices_woba, default=np.nan) | |
| woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', | |
| 'double', 'sac_fly', 'force_out', 'home_run', | |
| 'grounded_into_double_play', 'fielders_choice', 'field_error', | |
| 'triple', 'sac_bunt', 'double_play', | |
| 'fielders_choice_out', 'strikeout_double_play', | |
| 'sac_fly_double_play', 'other_out'] | |
| conditions_woba_code = [ | |
| (df_codes['event_type'].isin(woba_codes)) | |
| ] | |
| choices_woba_code = [1] | |
| df_codes['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan) | |
| #df_codes['barrel'] = (df_codes.launch_speed >= 98) & (df_codes.launch_angle >= (26 - (-98 + df_codes.launch_speed))) & (df_codes.launch_angle <= 30 + (-98 + df_codes.launch_speed)) & (df_codes.launch_angle >= 8) & (df_codes.launch_angle <= 50) | |
| #df_codes['barrel'] = (df_codes.launch_speed >= 98) & (df_codes.launch_angle >= (26 - (-98 + df_codes.launch_speed))) & (df_codes.launch_angle <= 30 + (-98 + df_codes.launch_speed)) & (df_codes.launch_angle >= 8) & (df_codes.launch_angle <= 50) | |
| df_codes['pitches'] = 1 | |
| df_codes['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in df_codes.play_code] | |
| df_codes['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in df_codes.play_code] | |
| df_codes['swings'] = [1 if x in swings_in else 0 for x in df_codes.play_description] | |
| df_codes['out_zone'] = df_codes.in_zone == False | |
| df_codes['zone_swing'] = (df_codes.in_zone == True)&(df_codes.swings == 1) | |
| df_codes['zone_contact'] = (df_codes.in_zone == True)&(df_codes.swings == 1)&(df_codes.whiffs == 0) | |
| df_codes['ozone_swing'] = (df_codes.in_zone==False)&(df_codes.swings == 1) | |
| df_codes['ozone_contact'] = (df_codes.in_zone==False)&(df_codes.swings == 1)&(df_codes.whiffs == 0) | |
| return df_codes | |
| ### GET COLOURS## | |
| def get_color(value,normalize,cmap_sum): | |
| color = cmap_sum(normalize(value)) | |
| return mcolors.to_hex(color) | |
| ### PERCENTILE ### | |
| def percentile(n): | |
| def percentile_(x): | |
| return x.quantile(n) | |
| percentile_.__name__ = 'percentile_{:02.0f}'.format(n*100) | |
| return percentile_ | |
| ### TJ STUFF+ DF CLEAN ### | |
| def df_clean(df): | |
| df_copy = df.copy() | |
| df_copy.loc[df_copy['pitcher_hand'] == 'L','hb'] *= -1 | |
| df_copy.loc[df_copy['pitcher_hand'] == 'L','x0'] *= -1 | |
| df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] = 360 - df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] | |
| df_copy['pitch_l'] = [1 if x == 'L' else 0 for x in df_copy['pitcher_hand']] | |
| df_copy['bat_l'] = [1 if x == 'L' else 0 for x in df_copy['batter_hand']] | |
| #df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "KN", "CS", "SC", "FA"])].reset_index(drop=True) | |
| #df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "CS", "SC", "FA"])].reset_index(drop=True) | |
| df_copy['pitch_type'] = df_copy['pitch_type'].replace({'FT':'SI', | |
| #'KC':'CU', | |
| 'SV':'SL', | |
| 'FO':'FS'}) | |
| df_copy_fb_sum = df_copy[df_copy.pitch_type.isin(["FF", "FC", "SI"])].groupby(['pitcher_id']).agg( | |
| fb_velo = ('start_speed','mean'), | |
| fb_max_ivb = ('ivb',percentile(0.9)), | |
| fb_max_x = ('hb',percentile(0.9)), | |
| fb_min_x = ('hb',percentile(0.1)), | |
| fb_max_velo = ('start_speed',percentile(0.9)), | |
| fb_axis = ('spin_direction','mean'), | |
| ) | |
| df_copy = df_copy.merge(df_copy_fb_sum,left_on='pitcher_id',right_index=True,how='left') | |
| df_copy['fb_velo_diff'] = df_copy['start_speed']- df_copy['fb_velo'] | |
| df_copy['fb_max_ivb_diff'] = df_copy['ivb']- df_copy['fb_max_ivb'] | |
| df_copy['fb_max_hb_diff'] = -abs(df_copy['hb']- df_copy['fb_max_x']) | |
| df_copy['fb_min_hb_diff'] = df_copy['hb']- df_copy['fb_min_x'] | |
| df_copy['fb_max_velo_diff'] = df_copy['start_speed']- df_copy['fb_max_velo'] | |
| df_copy['fb_axis_diff'] = df_copy['spin_direction']- df_copy['fb_axis'] | |
| # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_velo_diff'] = 0 | |
| # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_ivb_diff'] = 0 | |
| # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_hb_diff'] = 0 | |
| # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_min_hb_diff'] = 0 | |
| # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_velo_diff'] = 0 | |
| # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_axis_diff'] = 0 | |
| df_copy['max_speed'] = df_copy.groupby(['pitcher_id'])['start_speed'].transform('max') | |
| df_copy['max_speed_diff'] = df_copy['start_speed'] - df_copy['max_speed'] | |
| df_copy['max_ivb'] = df_copy.groupby(['pitcher_id'])['ivb'].transform('max') | |
| df_copy['max_ivb_diff'] = df_copy['ivb'] - df_copy['max_ivb'] | |
| df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5 | |
| df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay'] | |
| df_copy['vz_f'] = (df_copy['vz0']) + (df_copy['az'] * df_copy['t']) | |
| df_copy['vaa'] = -np.arctan(df_copy['vz_f'] / df_copy['vy_f']) * (180 / np.pi) | |
| #df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5 | |
| #df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay'] | |
| df_copy['vx_f'] = (df_copy['vx0']) + (df_copy['ax'] * df_copy['t']) | |
| df_copy['haa'] = -np.arctan(df_copy['vx_f'] / df_copy['vy_f']) * (180 / np.pi) | |
| # df_copy['x_diff'] = df_copy['x0'] - df_copy['px'] | |
| # df_copy['z_diff'] = df_copy['z0'] - df_copy['pz'] | |
| # df_copy['vaa'] = np.arctan(df_copy['z_diff'] / df_copy['release_pos_y']) * 360 / np.pi | |
| # df_copy['haa'] = np.arctan(-df_copy['x_diff'] / df_copy['release_pos_y']) * 360 / np.pi | |
| df_copy = df_copy.dropna(subset=['pitch_type']).fillna(0) | |
| return df_copy | |
| ### PITCH COLOURS ### | |
| pitch_colours = { | |
| 'Four-Seam Fastball':'#FF007D',#BC136F | |
| 'Sinker':'#98165D',#DC267F | |
| 'Cutter':'#BE5FA0', | |
| 'Changeup':'#F79E70',#F75233 | |
| 'Splitter':'#FE6100',#F75233 | |
| 'Screwball':'#F08223', | |
| 'Forkball':'#FFB000', | |
| 'Slider':'#67E18D',#1BB999#785EF0 | |
| 'Sweeper':'#1BB999',#37CD85#904039 | |
| 'Slurve':'#376748',#785EF0#549C07#BEABD8 | |
| 'Knuckle Curve':'#311D8B', | |
| 'Curveball':'#3025CE', | |
| 'Slow Curve':'#274BFC', | |
| 'Eephus':'#648FFF', | |
| 'Knuckleball':'#867A08', | |
| 'Pitch Out':'#472C30', | |
| 'Other':'#9C8975', | |
| } | |
| ### PITCH ELLIPSE ### | |
| def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs): | |
| """ | |
| Create a plot of the covariance confidence ellipse of *x* and *y*. | |
| Parameters | |
| ---------- | |
| x, y : array-like, shape (n, ) | |
| Input data. | |
| ax : matplotlib.axes.Axes | |
| The axes object to draw the ellipse into. | |
| n_std : float | |
| The number of standard deviations to determine the ellipse's radiuses. | |
| **kwargs | |
| Forwarded to `~matplotlib.patches.Ellipse` | |
| Returns | |
| ------- | |
| matplotlib.patches.Ellipse | |
| """ | |
| if x.size != y.size: | |
| raise ValueError("x and y must be the same size") | |
| try: | |
| cov = np.cov(x, y) | |
| pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1]) | |
| # Using a special case to obtain the eigenvalues of this | |
| # two-dimensional dataset. | |
| ell_radius_x = np.sqrt(1 + pearson) | |
| ell_radius_y = np.sqrt(1 - pearson) | |
| ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2, | |
| facecolor=facecolor,linewidth=2,linestyle='--', **kwargs) | |
| # Calculating the standard deviation of x from | |
| # the squareroot of the variance and multiplying | |
| # with the given number of standard deviations. | |
| scale_x = np.sqrt(cov[0, 0]) * n_std | |
| mean_x = np.mean(x) | |
| # calculating the standard deviation of y ... | |
| scale_y = np.sqrt(cov[1, 1]) * n_std | |
| mean_y = np.mean(y) | |
| transf = transforms.Affine2D() \ | |
| .rotate_deg(45) \ | |
| .scale(scale_x, scale_y) \ | |
| .translate(mean_x, mean_y) | |
| ellipse.set_transform(transf + ax.transData) | |
| except ValueError: | |
| return | |
| return ax.add_patch(ellipse) | |
| # DEFINE STRIKE ZONE | |
| strike_zone = pd.DataFrame({ | |
| 'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9], | |
| 'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5] | |
| }) | |
| ### STRIKE ZONE ### | |
| def draw_line(axis,alpha_spot=1,catcher_p = True): | |
| axis.plot(strike_zone['PlateLocSide'], strike_zone['PlateLocHeight'], color='black', linewidth=1.3,zorder=3,alpha=alpha_spot,) | |
| # ax.plot([-0.2833333, -0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) | |
| # ax.plot([0.2833333, 0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) | |
| # ax.plot([-0.85, 0.85], [2.2, 2.2], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) | |
| # ax.plot([-0.85, 0.85], [2.9, 2.9], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) | |
| if catcher_p: | |
| # Add dashed line | |
| # Add home plate | |
| axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
| axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
| axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
| axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
| axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
| else: | |
| axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
| axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
| axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
| axis.plot([0, 0.9], [-.35, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
| axis.plot([0.9, 0.708], [-0.1,0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
| ### FANGRAPHS STATS DICT ### | |
| fangraphs_stats_dict = {'IP':{'table_header':'$\\bf{IP}$','format':'.1f',} , | |
| 'TBF':{'table_header':'$\\bf{PA}$','format':'.0f',} , | |
| 'AVG':{'table_header':'$\\bf{AVG}$','format':'.3f',} , | |
| 'K/9':{'table_header':'$\\bf{K\/9}$','format':'.2f',} , | |
| 'BB/9':{'table_header':'$\\bf{BB\/9}$','format':'.2f',} , | |
| 'K/BB':{'table_header':'$\\bf{K\/BB}$','format':'.2f',} , | |
| 'HR/9':{'table_header':'$\\bf{HR\/9}$','format':'.2f',} , | |
| 'K%':{'table_header':'$\\bf{K\%}$','format':'.1%',} , | |
| 'BB%':{'table_header':'$\\bf{BB\%}$','format':'.1%',} , | |
| 'K-BB%':{'table_header':'$\\bf{K-BB\%}$','format':'.1%',} , | |
| 'WHIP':{'table_header':'$\\bf{WHIP}$','format':'.2f',} , | |
| 'BABIP':{'table_header':'$\\bf{BABIP}$','format':'.3f',} , | |
| 'LOB%':{'table_header':'$\\bf{LOB\%}$','format':'.1%',} , | |
| 'xFIP':{'table_header':'$\\bf{xFIP}$','format':'.2f',} , | |
| 'FIP':{'table_header':'$\\bf{FIP}$','format':'.2f',} , | |
| 'H':{'table_header':'$\\bf{H}$','format':'.0f',} , | |
| '2B':{'table_header':'$\\bf{2B}$','format':'.0f',} , | |
| '3B':{'table_header':'$\\bf{3B}$','format':'.0f',} , | |
| 'R':{'table_header':'$\\bf{R}$','format':'.0f',} , | |
| 'ER':{'table_header':'$\\bf{ER}$','format':'.0f',} , | |
| 'HR':{'table_header':'$\\bf{HR}$','format':'.0f',} , | |
| 'BB':{'table_header':'$\\bf{BB}$','format':'.0f',} , | |
| 'IBB':{'table_header':'$\\bf{IBB}$','format':'.0f',} , | |
| 'HBP':{'table_header':'$\\bf{HBP}$','format':'.0f',} , | |
| 'SO':{'table_header':'$\\bf{SO}$','format':'.0f',} , | |
| 'OBP':{'table_header':'$\\bf{OBP}$','format':'.0f',} , | |
| 'SLG':{'table_header':'$\\bf{SLG}$','format':'.0f',} , | |
| 'ERA':{'table_header':'$\\bf{ERA}$','format':'.2f',} , | |
| 'wOBA':{'table_header':'$\\bf{wOBA}$','format':'.3f',} , | |
| 'G':{'table_header':'$\\bf{G}$','format':'.0f',} } | |
| ## Fangraphs Table | |
| ### FANGRAPHS SPLITS SCRAPE ### | |
| split_dict = {'all':[], | |
| 'left':['5'], | |
| 'right':['6'] | |
| } | |
| def fangraphs_scrape(pitcher_id=808967, | |
| split='all', | |
| start_date='2024-03-20', | |
| end_date='2024-09-29'): | |
| url = "https://www.fangraphs.com/api/leaders/splits/splits-leaders" | |
| payload = { | |
| "strPlayerId": str(mlb_fg_dicts[pitcher_id]), | |
| "strSplitArr": split_dict[split], | |
| "strGroup": "season", | |
| "strPosition": "P", | |
| "strType": "2", | |
| "strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')), | |
| "strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')), | |
| "strSplitTeams": False, | |
| "dctFilters": [], | |
| "strStatType": "player", | |
| "strAutoPt": False, | |
| "arrPlayerId": [], | |
| "strSplitArrPitch": [], | |
| "arrWxTemperature": None, | |
| "arrWxPressure": None, | |
| "arrWxAirDensity": None, | |
| "arrWxElevation": None, | |
| "arrWxWindSpeed": None | |
| } | |
| json_payload = json.dumps(payload) | |
| headers = {'Content-Type': 'application/json'} | |
| response = requests.post(url, data=json_payload, headers=headers) | |
| data_pull = response.json()['data'][0] | |
| payload_advanced = { | |
| "strPlayerId": str(mlb_fg_dicts[pitcher_id]), | |
| "strSplitArr": split_dict[split], | |
| "strGroup": "season", | |
| "strPosition": "P", | |
| "strType": "1", | |
| "strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')), | |
| "strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')), | |
| "strSplitTeams": False, | |
| "dctFilters": [], | |
| "strStatType": "player", | |
| "strAutoPt": False, | |
| "arrPlayerId": [], | |
| "strSplitArrPitch": [], | |
| "arrWxTemperature": None, | |
| "arrWxPressure": None, | |
| "arrWxAirDensity": None, | |
| "arrWxElevation": None, | |
| "arrWxWindSpeed": None | |
| } | |
| json_payload_advanced = json.dumps(payload_advanced) | |
| headers = {'Content-Type': 'application/json'} | |
| response_advanced = requests.post(url, data=json_payload_advanced, headers=headers) | |
| data_pull_advanced = response_advanced.json()['data'][0] | |
| data_pull.update(data_pull_advanced) | |
| return data_pull | |
| ### FANGRAPHS TABLE PLOT ### | |
| def fangraphs_table(data, | |
| stats, | |
| ax): | |
| fg_values = [data[x] if x in data else '---' for x in stats] | |
| df_fg = pd.DataFrame(data=dict(zip(stats,fg_values)),index=[0]) | |
| df_fg.loc[0] = [format(df_fg[x][0],fangraphs_stats_dict[x]['format']) if df_fg[x][0] != '---' else '---' for x in df_fg] | |
| table_fg = ax.table(cellText=df_fg.values, colLabels=df_fg.columns, cellLoc='center', | |
| bbox=[0.04, 0.2, 0.92, 0.8]) | |
| min_font_size = 20 | |
| table_fg.set_fontsize(min_font_size) | |
| new_column_names = [fangraphs_stats_dict[x]['table_header'] if x in data else '---' for x in stats] | |
| # #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%'] | |
| for i, col_name in enumerate(new_column_names): | |
| table_fg.get_celld()[(0, i)].get_text().set_text(col_name) | |
| ax.axis('off') | |
| return table_fg | |
| ### VELOCITY KDES ### | |
| def velocity_kdes(df, | |
| ax, | |
| gs, | |
| gs_list, | |
| fig): | |
| sorted_value_counts = df['pitch_type'].value_counts().sort_values(ascending=False) | |
| # Get the list of items ordered from most to least frequent | |
| items_in_order = sorted_value_counts.index.tolist() | |
| # Create the inner subplot inside the outer subplot | |
| import matplotlib.gridspec as gridspec | |
| ax.axis ('off') | |
| #ax.set_ylabel('Pitch Velocity Distribution', fontdict=font_properties_axes) | |
| ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20}) | |
| inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order),1, subplot_spec=gs[2,gs_list]) | |
| ax_top = [] | |
| for inner in inner_grid_1: | |
| ax_top.append(fig.add_subplot(inner)) | |
| ax_number = 0 | |
| for i in items_in_order[0:]: | |
| if np.unique(df[df['pitch_type']==i]['start_speed']).size == 1: # Check if all values are the same | |
| print('just') | |
| ax_top[ax_number].plot([np.unique(df[df['pitch_type']==i]['start_speed']),np.unique(df[df['pitch_type']==i]['start_speed'])],[0,1], linewidth=4, | |
| color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]],zorder=20) | |
| # ax_top[ax_number].plot(np.unique(df_melt[df_melt['Player']==i]['value']), [0.5]*len(np.unique(df_melt[df_melt['Player']==i]['value'])), linewidth=4) | |
| else: | |
| sns.kdeplot(df[df['pitch_type']==i]['start_speed'],ax=ax_top[ax_number],fill=True, | |
| clip=(df[df['pitch_type']==i]['start_speed'].min(),df[df['pitch_type']==i]['start_speed'].max()), | |
| color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]]) | |
| ax_top[ax_number].set_xlim(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5) | |
| ax_top[ax_number].set_xlabel('') | |
| ax_top[ax_number].set_ylabel('') | |
| if ax_number < len(items_in_order)-1: | |
| ax_top[ax_number].spines['top'].set_visible(False) | |
| ax_top[ax_number].spines['right'].set_visible(False) | |
| ax_top[ax_number].spines['left'].set_visible(False) | |
| ax_top[ax_number].tick_params(axis='x', colors='none') | |
| ax_top[ax_number].set_xticks(range(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5,5)) | |
| ax_top[ax_number].set_yticks([]) | |
| ax_top[ax_number].grid(axis='x', linestyle='--') | |
| ax_top[ax_number].text(-0.01, 0.5, i, transform=ax_top[ax_number].transAxes, | |
| fontsize=14, va='center', ha='right') | |
| ax_number = ax_number + 1 | |
| ax_top[-1].spines['top'].set_visible(False) | |
| ax_top[-1].spines['right'].set_visible(False) | |
| ax_top[-1].spines['left'].set_visible(False) | |
| ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5,5))) | |
| ax_top[-1].set_xlabel('Velocity (mph)') | |
| ### TJ STUFF+ ROLLING ### | |
| def tj_stuff_roling(df, | |
| window, | |
| ax): | |
| ## Velocity Plot | |
| sorted_value_counts = df['pitch_type'].value_counts().sort_values(ascending=False) | |
| # Get the list of items ordered from most to least frequent | |
| items_in_order = sorted_value_counts.index.tolist() | |
| for i in items_in_order: | |
| if max(df[df['pitch_type']==i]['pitch_type_count_each']) >= window: | |
| sns.lineplot(x=range(1,max(df[df['pitch_type']==i]['pitch_type_count_each'])+1), | |
| y=df[df['pitch_type']==i]['tj_stuff_plus'].rolling(window).sum()/window, | |
| color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]], | |
| ax=ax,linewidth=3) | |
| # Adjust x-axis limits to start from 1 | |
| ax.set_xlim(window,max(df['pitch_type_count_each'])) | |
| ax.set_ylim(70,130) | |
| #ax.get_legend().remove() | |
| ax.set_xlabel('Pitches', fontdict=font_properties_axes) | |
| ax.set_ylabel('tjStuff+', fontdict=font_properties_axes) | |
| ax.set_title(f"{window} Pitch Rolling tjStuff+",fontdict=font_properties_titles) | |
| # ax.axis('square') | |
| # ax.set_xlim(left=1) | |
| ax.xaxis.set_major_locator(MaxNLocator(integer=True)) | |
| ### BREAK PLOT ### | |
| def break_plot(df, | |
| ax): | |
| label_labels = df.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique() | |
| j = 0 | |
| for label in label_labels: | |
| subset = df[df['pitch_description'] == label] | |
| print(label) | |
| if len(subset) > 4: | |
| if df['pitcher_hand'].values[0] == 'R': | |
| subset['hb'] = subset['hb']*1 | |
| if df['pitcher_hand'].values[0] == 'L': | |
| subset['hb'] = subset['hb']*1 | |
| subset['ivb'] = subset['ivb']*1 | |
| try: | |
| confidence_ellipse(subset['hb'], subset['ivb'], ax=ax,edgecolor = pitch_colours[label],n_std=2,facecolor= pitch_colours[label],alpha=0.2) | |
| except ValueError: | |
| return | |
| j=j+1 | |
| else: | |
| j=j+1 | |
| if df['pitcher_hand'].values[0] == 'R': | |
| sns.scatterplot(ax=ax,x=df.hb*1,y=df.ivb*1,hue=df.pitch_description,palette=pitch_colours,ec='black',alpha=1,zorder=2) | |
| if df['pitcher_hand'].values[0] == 'L': | |
| sns.scatterplot(ax=ax,x=df.hb*1,y=df.ivb*1,hue=df.pitch_description,palette=pitch_colours,ec='black',alpha=1,zorder=2) | |
| ax.set_xlim((-25,25)) | |
| ax.set_ylim((-25,25)) | |
| ax.hlines(y=0,xmin=-50,xmax=50,color=colour_palette[8],alpha=0.5,linestyles='--',zorder=1) | |
| ax.vlines(x=0,ymin=-50,ymax=50,color=colour_palette[8],alpha=0.5,linestyles='--',zorder=1) | |
| ax.set_xlabel('Horizontal Break (in)', fontdict=font_properties_axes) | |
| ax.set_ylabel('Induced Vertical Break (in)', fontdict=font_properties_axes) | |
| ax.set_title("Pitch Breaks",fontdict=font_properties_titles) | |
| ax.get_legend().remove() | |
| # ax1.set_xticklabels(ax1.get_xticks(), fontdict=font_properties) | |
| ax.set_xticklabels(ax.get_xticks(), fontdict=font_properties) | |
| # ax1.set_yticklabels(ax1.get_yticks(), fontdict=font_properties) | |
| ax.set_yticklabels(ax.get_yticks(), fontdict=font_properties) | |
| #ax1.set_aspect('equal', adjustable='box') | |
| if df['pitcher_hand'].values[0] == 'R': | |
| ax.text(-24.5,-24.5,s='← Glove Side',fontstyle='italic',ha='left',va='bottom', | |
| bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) | |
| ax.text(24.5,-24.5,s='Arm Side →',fontstyle='italic',ha='right',va='bottom', | |
| bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) | |
| #ax.invert_xaxis() | |
| if df['pitcher_hand'].values[0] == 'L': | |
| ax.invert_xaxis() | |
| ax.text(24.5,-24.5,s='← Arm Side',fontstyle='italic',ha='left',va='bottom', | |
| bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) | |
| ax.text(-24.5,-24.5,s='Glove Side →',fontstyle='italic',ha='right',va='bottom', | |
| bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) | |
| ax.set_aspect('equal', adjustable='box') | |
| #ax1.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) | |
| ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) | |
| ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) | |
| ### TABLE SUMMARY ### | |
| def table_summary(df, | |
| pitcher_id, | |
| ax, | |
| df_group, | |
| df_group_all, | |
| statcast_pitch_summary): | |
| cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) | |
| ax.axis('off') | |
| df_group['spin_direction_adj'] = [(x + 180) for x in df_group['spin_direction']] | |
| #(((df_group.groupby('pitch_description').mean()[['spin_direction_adj']] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 ) | |
| clock_time = ((df_group.groupby('pitch_description').mean()['spin_direction_adj']) %360 // 30 )+ (((df_group.groupby('pitch_description').mean()['spin_direction_adj'] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 ) | |
| # print('Clocks') | |
| # print(clock_time) | |
| clock_time = (clock_time.astype(int) + clock_time%1*60/100).round(2).astype(str).str.replace('.',':').str.replace(':0',':00').str.replace(':3',':30').str.replace('0:','12:').str.replace('112:','10:').to_frame() | |
| df_group = df_group.merge(right=clock_time,left_on='pitch_description',right_index=True,suffixes=['','_clock']) | |
| plot_table = df_group[df_group['pitcher_id']==pitcher_id].sort_values( | |
| by=['pitches'],ascending=False)[['pitch_description','pitches','start_speed','ivb', | |
| 'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release', | |
| 'extension','tj_stuff_plus','spin_direction_adj_clock','zone_percent','chase_percent','whiff_rate']] | |
| # if df['pitcher_hand'].values[0] == 'L': | |
| # plot_table['hb'] = plot_table['hb']*-1 | |
| #if df['pitcher_hand'].values[0] == 'R': | |
| plot_table['horizontal_release'] = plot_table['horizontal_release']*-1 | |
| plot_table['pitch_percent'] = plot_table['pitches'] / plot_table['pitches'].sum() | |
| plot_table = plot_table[['pitch_description','pitches','pitch_percent','start_speed','ivb', | |
| 'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release', | |
| 'extension','spin_direction_adj_clock','tj_stuff_plus','zone_percent','chase_percent','whiff_rate']] | |
| plot_table_all = pd.DataFrame(data={'pitch_description': 'All', | |
| 'pitches': plot_table['pitches'].sum(), | |
| 'pitch_percent': 1.0, | |
| 'start_speed': '—', | |
| 'ivb': '—', | |
| 'hb': '—', | |
| 'spin_rate': '—', | |
| 'vaa': '—', | |
| 'haa': '—', | |
| 'vertical_release': '—', | |
| 'horizontal_release': '—', | |
| 'extension': df['extension'].mean(), | |
| 'spin_direction_adj_clock': '—', | |
| 'tj_stuff_plus': df[df['pitcher_id']==pitcher_id]['tj_stuff_plus'].mean(), | |
| 'zone_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['zone_percent'].values[0], | |
| 'chase_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['chase_percent'].values[0], | |
| 'whiff_rate': df_group_all[df_group_all['pitcher_id']==pitcher_id]['whiff_rate'].values[0], | |
| },index=[0] | |
| ) | |
| plot_table = pd.concat([plot_table,plot_table_all]).fillna('—') | |
| plt.rcParams['font.family'] = 'Calibri' | |
| table = ax.table(cellText=plot_table.values, colLabels=plot_table.columns, cellLoc='center', | |
| colWidths=[2.3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], bbox=[0.04, 0, 0.92, 0.8]) | |
| min_font_size = 14 | |
| # Set table properties | |
| table.auto_set_font_size(False) | |
| #table.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10))) | |
| table.set_fontsize(min_font_size) | |
| table.scale(1, 0.5) | |
| min_font_size = 18 | |
| # Set font size for values | |
| # Adjust the font size as needed | |
| for i in range(len(plot_table)+1): | |
| for j in range(len(plot_table.columns)): | |
| if i > 0: # Skip the header row | |
| cell = table.get_celld()[i, j] | |
| cell.set_fontsize(min_font_size) | |
| for i in range(len(plot_table)): | |
| if table.get_celld()[(i+1, 0)].get_text().get_text() != 'All': | |
| table.get_celld()[(i+1, 0)].set_facecolor(pitch_colours[table.get_celld()[(i+1, 0)].get_text().get_text()]) # Header cell color | |
| if table.get_celld()[(i+1, 0)].get_text().get_text() in ['Split-Finger','Slider','Changeup']: | |
| table.get_celld()[(i+1, 0)].set_text_props(color='#000000',fontweight='bold') | |
| else: | |
| table.get_celld()[(i+1, 0)].set_text_props(color='#ffffff',fontweight='bold') | |
| if table.get_celld()[(i+1, 0)].get_text().get_text() == 'Four-Seam Fastball': | |
| table.get_celld()[(i+1, 0)].get_text().set_text('4-Seam') | |
| select_df = statcast_pitch_summary[statcast_pitch_summary['pitch_description'] == plot_table['pitch_description'].values[i]] | |
| normalize = mcolors.Normalize(vmin=select_df['start_speed'].mean()-select_df.pitch_velocity_std.mean(), | |
| vmax=select_df['start_speed'].mean()+select_df.pitch_velocity_std.mean()) # Define the range of values | |
| if table.get_celld()[(i+1, 3)].get_text().get_text() != '—': | |
| table.get_celld()[(i+1, 3)].set_facecolor(get_color(float(table.get_celld()[(i+1, 3)].get_text().get_text()),normalize,cmap_sum)) # Header cell color | |
| cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) | |
| normalize = mcolors.Normalize(vmin=select_df['extension'].mean()*0.9, vmax=select_df['extension'].mean()*1.1) | |
| if table.get_celld()[(i+1,11)].get_text().get_text() != '—': | |
| table.get_celld()[(i+1,11)].set_facecolor(get_color(float(table.get_celld()[(i+1, 11)].get_text().get_text()),normalize,cmap_sum)) # Header cell color | |
| cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) | |
| normalize = mcolors.Normalize(vmin=80, vmax=120) | |
| print(normalize) | |
| if table.get_celld()[(i+1,13)].get_text().get_text() != '—': | |
| table.get_celld()[(i+1,13)].set_facecolor(get_color(float(table.get_celld()[(i+1, 13)].get_text().get_text()),normalize,cmap_sum)) # Header cell color | |
| cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) | |
| normalize = mcolors.Normalize(vmin=select_df['zone_percent'].mean()*0.7, vmax=select_df['zone_percent'].mean()*1.3) | |
| if table.get_celld()[(i+1,14)].get_text().get_text() != '—': | |
| table.get_celld()[(i+1,14)].set_facecolor(get_color(float(table.get_celld()[(i+1, 14)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color | |
| cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) | |
| normalize = mcolors.Normalize(vmin=select_df['chase_percent'].mean()*0.7, vmax=select_df['chase_percent'].mean()*1.3) | |
| if table.get_celld()[(i+1,15)].get_text().get_text() != '—': | |
| table.get_celld()[(i+1,15)].set_facecolor(get_color(float(table.get_celld()[(i+1, 15)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color | |
| cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) | |
| normalize = mcolors.Normalize(vmin=select_df['whiff_rate'].mean()*0.7, vmax=select_df['whiff_rate'].mean()*1.3) | |
| if table.get_celld()[(i+1,16)].get_text().get_text() != '—': | |
| table.get_celld()[(i+1,16)].set_facecolor(get_color(float(table.get_celld()[(i+1, 16)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color | |
| table.get_celld()[(len(plot_table), 0)].set_text_props(color='#000000',fontweight='bold') | |
| new_column_names = ['$\\bf{Pitch\ Name}$', | |
| '$\\bf{Count}$', | |
| '$\\bf{Pitch\%}$', | |
| '$\\bf{Velocity}$', | |
| '$\\bf{iVB}$', | |
| '$\\bf{HB}$', | |
| '$\\bf{Spin}$', | |
| '$\\bf{VAA}$', | |
| '$\\bf{HAA}$', | |
| '$\\bf{vRel}$', | |
| '$\\bf{hRel}$', | |
| '$\\bf{Ext.}$', | |
| '$\\bf{Axis}$', | |
| '$\\bf{tjStuff+}$', | |
| '$\\bf{Zone\%}$', | |
| '$\\bf{Chase\%}$', | |
| '$\\bf{Whiff\%}$', | |
| ] | |
| for i, col_name in enumerate(new_column_names): | |
| table.get_celld()[(0, i)].get_text().set_text(col_name) | |
| float_list = ['start_speed','ivb', | |
| 'hb', 'vaa', 'haa', 'vertical_release','horizontal_release', 'extension'] | |
| for fl in float_list: | |
| # Subset of column names | |
| subset_columns = [fl] | |
| # Get the list of column indices | |
| column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] | |
| # # print(column_indices) | |
| for row_l in range(1,len(plot_table)+1): | |
| # print(row_l) | |
| if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': | |
| # print() | |
| # print(fl) | |
| table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) | |
| percent_list = ['pitch_percent','zone_percent','chase_percent','whiff_rate'] | |
| for fl in percent_list: | |
| # Subset of column names | |
| subset_columns = [fl] | |
| # Get the list of column indices | |
| column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] | |
| # # print(column_indices) | |
| for row_l in range(1,len(plot_table)+1): | |
| # print(row_l) | |
| if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': | |
| # print(fl) | |
| table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1%}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) | |
| int_list = ['tj_stuff_plus','spin_rate'] | |
| for fl in int_list: | |
| # Subset of column names | |
| subset_columns = [fl] | |
| # Get the list of column indices | |
| column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] | |
| # # print(column_indices) | |
| for row_l in range(1,len(plot_table)+1): | |
| # print(row_l) | |
| if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': | |
| # print(fl) | |
| table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.0f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) | |
| return table | |
| ### GROUED IVB CREATION ### | |
| def group_ivb_update(df, | |
| agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description']): | |
| grouped_ivb = df.groupby(agg_list).agg( | |
| pitches = ('start_speed','count'), | |
| start_speed = ('start_speed','mean'), | |
| ivb = ('ivb','mean'), | |
| hb = ('hb','mean'), | |
| spin_rate = ('spin_rate','mean'), | |
| vaa = ('vaa','mean'), | |
| haa = ('haa','mean'), | |
| horizontal_release = ('x0','mean'), | |
| vertical_release = ('z0','mean'), | |
| extension = ('extension','mean'), | |
| spin_direction = ('spin_direction','mean'), | |
| tj_stuff_plus = ('tj_stuff_plus','mean'), | |
| swings = ('swings','sum'), | |
| in_zone = ('in_zone','sum'), | |
| out_zone = ('out_zone','sum'), | |
| whiffs = ('whiffs','sum'), | |
| zone_swing = ('zone_swing','sum'), | |
| zone_contact = ('zone_contact','sum'), | |
| ozone_swing = ('ozone_swing','sum'), | |
| ozone_contact = ('ozone_contact','sum'), | |
| ).reset_index() | |
| grouped_ivb['zone_contact_percent'] = [grouped_ivb.zone_contact[x]/grouped_ivb.zone_swing[x] if grouped_ivb.zone_swing[x] != 0 else np.nan for x in range(len(grouped_ivb))] | |
| grouped_ivb['zone_swing_percent'] = [grouped_ivb.zone_swing[x]/grouped_ivb.in_zone[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] | |
| grouped_ivb['zone_percent'] = [grouped_ivb.in_zone[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] | |
| grouped_ivb['chase_percent'] = [grouped_ivb.ozone_swing[x]/(grouped_ivb.pitches[x] - grouped_ivb.in_zone[x]) if (grouped_ivb.pitches[x]- grouped_ivb.in_zone[x]) != 0 else np.nan for x in range(len(grouped_ivb))] | |
| grouped_ivb['chase_contact'] = [grouped_ivb.ozone_contact[x]/grouped_ivb.ozone_swing[x] if grouped_ivb.ozone_swing[x] != 0 else np.nan for x in range(len(grouped_ivb))] | |
| grouped_ivb['swing_percent'] = [grouped_ivb.swings[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] | |
| grouped_ivb['whiff_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.swings[x] if grouped_ivb.swings[x] != 0 else np.nan for x in range(len(grouped_ivb))] | |
| grouped_ivb['swstr_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] | |
| return grouped_ivb | |
| ####LHH | |
| def location_plot(df,ax,hand): | |
| label_labels = df.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique() | |
| j = 0 | |
| for label in label_labels: | |
| subset = df[(df['pitch_description'] == label)&(df['batter_hand'] == hand)] | |
| print(label) | |
| if len(subset) >= 5: | |
| confidence_ellipse(subset['px'], subset['pz'], ax=ax,edgecolor = pitch_colours[label],n_std=1.5,facecolor= pitch_colours[label],alpha=0.3) | |
| j=j+1 | |
| else: | |
| j=j+1 | |
| pitch_location_group = df[(df['batter_hand'] == hand)].groupby(['pitch_description']).agg( | |
| pitches = ('start_speed','count'), | |
| px = ('px','mean'), | |
| pz = ('pz','mean')).reset_index() | |
| pitch_location_group['pitch_percent'] = pitch_location_group['pitches']/pitch_location_group['pitches'].sum() | |
| ## Location Plot | |
| sns.scatterplot(ax=ax,x=pitch_location_group['px'], | |
| y=pitch_location_group['pz'], | |
| hue=pitch_location_group['pitch_description'], | |
| palette=pitch_colours,ec='black', | |
| s=pitch_location_group['pitch_percent']*750, | |
| linewidth=2, | |
| zorder=2) | |
| ax.axis('square') | |
| draw_line(ax,alpha_spot=0.75,catcher_p=False) | |
| ax.axis('off') | |
| ax.set_xlim((-2.75,2.75)) | |
| ax.set_ylim((-0.5,5)) | |
| if len(pitch_location_group['px'])>0: | |
| ax.get_legend().remove() | |
| ax.grid(False) | |
| ax.set_title(f"Pitch Locations vs {hand}HB\n{pitch_location_group['pitches'].sum()} Pitches",fontdict=font_properties_titles) | |