import polars as pl import numpy as np import pandas as pd import api_scraper scrape = api_scraper.MLB_Scrape() from functions import df_update from functions import pitch_summary_functions update = df_update.df_update() from stuff_model import feature_engineering as fe from stuff_model import stuff_apply import requests import joblib from matplotlib.gridspec import GridSpec from shiny import App, reactive, ui, render from shiny.ui import h2, tags import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import seaborn as sns from functions.pitch_summary_functions import * from shiny import App, reactive, ui, render from shiny.ui import h2, tags import datetime import matplotlib.colors import pandas as pd from stuff_model import calculate_arm_angles as caa cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#648FFF","#FFFFFF","#FFB000"]) colour_palette = ['#FFB000','#648FFF','#785EF0', '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED'] year_list = [2017,2018,2019,2020,2021,2022,2023,2024] level_dict = {'1':'MLB', '11':'AAA', '12':'AA', '13':'A+', '14':'A', '16':'ROK', '17':'AFL', '22':'College', '21':'Prospects', '51':'International' } function_dict={ 'velocity_kdes':'Velocity Distributions', 'break_plot':'Pitch Movement', 'break_plot_rhh':'Pitch Movement LHH', 'break_plot_lhh':'Pitch Movement RHH', 'tj_stuff_roling':'Rolling tjStuff+ by Pitch', 'tj_stuff_roling_game':'Rolling tjStuff+ by Game', 'location_plot_lhb':'Locations vs LHB', 'location_plot_rhb':'Locations vs RHB', } split_dict = {'all':'All', 'left':'LHH', 'right':'RHH'} split_dict_hand = {'all':['L','R'], 'left':['L'], 'right':['R']} # List of MLB teams and their corresponding ESPN logo URLs mlb_teams = [ {"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"}, {"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"}, {"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"}, {"team": "BAL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bal.png&h=500&w=500"}, {"team": "BOS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bos.png&h=500&w=500"}, {"team": "CHC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chc.png&h=500&w=500"}, {"team": "CWS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chw.png&h=500&w=500"}, {"team": "CIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cin.png&h=500&w=500"}, {"team": "CLE", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cle.png&h=500&w=500"}, {"team": "COL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/col.png&h=500&w=500"}, {"team": "DET", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/det.png&h=500&w=500"}, {"team": "HOU", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/hou.png&h=500&w=500"}, {"team": "KC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/kc.png&h=500&w=500"}, {"team": "LAA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/laa.png&h=500&w=500"}, {"team": "LAD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/lad.png&h=500&w=500"}, {"team": "MIA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mia.png&h=500&w=500"}, {"team": "MIL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mil.png&h=500&w=500"}, {"team": "MIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/min.png&h=500&w=500"}, {"team": "NYM", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nym.png&h=500&w=500"}, {"team": "NYY", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nyy.png&h=500&w=500"}, {"team": "PHI", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/phi.png&h=500&w=500"}, {"team": "PIT", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/pit.png&h=500&w=500"}, {"team": "SD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sd.png&h=500&w=500"}, {"team": "SF", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sf.png&h=500&w=500"}, {"team": "SEA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sea.png&h=500&w=500"}, {"team": "STL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/stl.png&h=500&w=500"}, {"team": "TB", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tb.png&h=500&w=500"}, {"team": "TEX", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tex.png&h=500&w=500"}, {"team": "TOR", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tor.png&h=500&w=500"}, {"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"}, {"team": "ZZZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/leagues/500/mlb.png&w=500&h=500"} ] df_image = pd.DataFrame(mlb_teams) image_dict = df_image.set_index('team')['logo_url'].to_dict() image_dict_flip = df_image.set_index('logo_url')['team'].to_dict() import requests import requests import os CAMPAIGN_ID = os.getenv("CAMPAIGN_ID") ACCESS_TOKEN = os.getenv("ACCESS_TOKEN") BACKUP_PW = os.getenv("BACKUP_PW") ADMIN_PW = os.getenv("ADMIN_PW") url = f"https://www.patreon.com/api/oauth2/v2/campaigns/{CAMPAIGN_ID}/members" headers = { "Authorization": f"Bearer {ACCESS_TOKEN}" } # Simple parameters, requesting the member's email and currently entitled tiers params = { "fields[member]": "full_name,email", # Request the member's email "include": "currently_entitled_tiers", # Include the currently entitled tiers "page[size]": 10000 # Fetch up to 1000 patrons per request } response = requests.get(url, headers=headers, params=params) VALID_PASSWORDS = [] if response.status_code == 200: data = response.json() for patron in data['data']: try: tiers = patron['relationships']['currently_entitled_tiers']['data'] if any(tier['id'] == '9078921' for tier in tiers): full_name = patron['attributes']['email'] VALID_PASSWORDS.append(full_name) except KeyError: continue VALID_PASSWORDS.append(BACKUP_PW) VALID_PASSWORDS.append(ADMIN_PW) from shiny import App, reactive, ui, render from shiny.ui import h2, tags # Define the login UI login_ui = ui.page_fluid( ui.card( ui.h2([ "TJStats Daily Pitching Summary App ", ui.tags.a("(@TJStats)", href="https://twitter.com/TJStats", target="_blank") ]), ui.p( "This App is available to Superstar Patrons. Please enter your Patreon email address in the box below. If you're having trouble, please refer to the ", ui.tags.a("Patreon post", href="https://www.patreon.com/posts/122860440", target="_blank"), "." ), ui.input_password("password", "Enter Patreon Email (or Password from Link):", width="25%"), ui.tags.input( type="checkbox", id="authenticated", value=False, disabled=True ), ui.input_action_button("login", "Login", class_="btn-primary"), ui.output_text("login_message"), ) ) # Define the UI layout for the app main_ui = ui.page_sidebar( ui.sidebar( # Row for selecting season and level ui.row( ui.column(6, ui.input_date('date_input', 'Select Date')), ui.column(6, ui.input_select('level_input', 'Select Level', level_dict)) ), ui.row(ui.input_action_button("game_button", "Get Games", class_="btn-primary")), ui.row( ui.row(ui.column(12, ui.output_ui('game_select_ui', 'Select Game'))), ui.row(ui.column(12, ui.output_ui('player_select_ui', 'Select Player'))), ui.row(ui.column(12, ui.output_data_frame('season_stats'))), ), # Rows for selecting plots and split options ui.row( ui.column(4, ui.input_select('plot_id_1', 'Plot Left', function_dict, multiple=False, selected='location_plot_lhb')), ui.column(4, ui.input_select('plot_id_2', 'Plot Middle', function_dict, multiple=False, selected='break_plot')), ui.column(4, ui.input_select('plot_id_3', 'Plot Right', function_dict, multiple=False, selected='location_plot_rhb')) ), ui.row( ui.column(6, ui.input_select('split_id', 'Select Split', split_dict, multiple=False)), ui.column(6, ui.input_numeric('rolling_window', 'Rolling Window (for tjStuff+ Plot)', min=1, value=50)) ), ui.row( ui.column(6, ui.input_switch("switch", "Custom Team?", False)), ui.column(6, ui.input_select('logo_select', 'Select Custom Logo', image_dict_flip, multiple=False)) ), # Row for the action button to generate plot ui.row(ui.input_action_button("generate_plot", "Generate Plot", class_="btn-primary")) ,width='500px'), # Main content area with tabs (placed directly in page_sidebar) ui.navset_tab( ui.nav_panel("Pitching Summary", ui.output_text("status"), ui.output_plot('plot', width='2100px', height='2100px') ), ui.nav_panel("PBP Data", ui.output_data_frame("grid_pbp")), ui.nav_panel("Table Summary", ui.output_data_frame("grid_summary")), ui.nav_panel("Daily Table", ui.output_data_frame("grid")), ui.nav_panel("Daily Table Style", ui.input_numeric('head', 'Table Limit', min=0, value=10), ui.input_numeric('pitch_min', 'Pitch Min.', min=0, value=10), ui.card( {"style": "width: 1560px;"}, ui.head_content( ui.tags.script(src="https://html2canvas.hertzen.com/dist/html2canvas.min.js"), ui.tags.script(""" async function downloadPNG() { const content = document.getElementById('capture-section'); try { // Create a wrapper div with right margin only const wrapper = document.createElement('div'); wrapper.style.paddingRight = '20px'; wrapper.style.paddingLeft = '20px'; wrapper.style.paddingTop = '20px'; wrapper.style.backgroundColor = 'white'; // Clone the content const clonedContent = content.cloneNode(true); wrapper.appendChild(clonedContent); // Add wrapper to document temporarily document.body.appendChild(wrapper); const canvas = await html2canvas(wrapper, { backgroundColor: 'white', scale: 2, useCORS: true, logging: false, width: content.offsetWidth + 20, height: content.offsetHeight + 50 }); // Remove temporary wrapper document.body.removeChild(wrapper); // Convert canvas to blob canvas.toBlob(function(blob) { const url = URL.createObjectURL(blob); const link = document.createElement('a'); link.href = url; link.download = 'stats_card.png'; document.body.appendChild(link); link.click(); document.body.removeChild(link); URL.revokeObjectURL(url); }, 'image/png'); } catch (error) { console.error('Error generating PNG:', error); } } $(document).on('click', '#capture_png_btn', function() { downloadPNG(); }); """) ), ui.div( { "id": "capture-section", "style": "background-color: white; padding: 0; margin-left: 20px; margin-right: 20px; margin-top: 20px; margin-bottom: 20px;" }, ui.div({"style": "font-size:3em;"}, ui.output_text("style_title")), ui.div({"style": "font-size:1.5em;"}, ui.output_text("min_title")), ui.br(), ui.output_table("grid_style"), ui.br(), ui.row( ui.column(8, ui.div( {"style": "text-align: left;"}, ui.markdown("### By: @TJStats"), ui.markdown("### Data: MLB"), ) ), ui.column(4, ui.div( {"style": "text-align: left; height: 86px; display: flex; justify-content: flex-end;"}, ui.output_image("logo", height="86px") ) ) ), ui.div({"style": "height: 20px;"}) ), ui.div( {"style": "display: flex; gap: 10px;"}, ui.input_action_button("capture_png_btn", "Save as PNG", class_="btn-success"), ), ) ), ui.nav_panel("Whiffs Table", ui.output_table("whiff_table")), ) ) # Combined UI with conditional panel app_ui = ui.page_fluid( ui.tags.head( ui.tags.script(src="script.js") ), ui.panel_conditional( "!input.authenticated", login_ui ), ui.panel_conditional( "input.authenticated", main_ui ) ) def server(input, output, session): @reactive.Effect @reactive.event(input.login) def check_password(): if input.password() in VALID_PASSWORDS: ui.update_checkbox("authenticated", value=True) ui.update_text("login_message", value="") else: ui.update_text("login_message", value="Invalid password!") ui.update_text("password", value="") @output @render.text def login_message(): return "" @render.image def logo(): # You'll need to provide the actual image path or URL here return {"src": "tjstats_logo.jpg"} @render.text def style_title(): return f"Daily {level_dict[input.level_input()]} tjStuff+ Leaders - {str(input.date_input())}" @render.text def min_title(): return f"(Min. {int(input.pitch_min())} Pitches)" @render.ui @reactive.event(input.game_button,input.date_input,input.level_input, ignore_none=False) def game_select_ui(): df = (scrape.get_schedule(year_input=[int(str(input.date_input())[:4])], sport_id=[int(input.level_input())], game_type=['S','R','P','E','A','I','W','F','L','D']) .filter((pl.col('gameday_type').is_in(['P', 'E'])) | (pl.col('venue_id').is_in([7250,2532]))) .filter(pl.col('state').is_in(['I','M','N','O','F','T','U','Q','R','D'])) .with_columns(pl.col('date').cast(pl.Utf8)) .filter(pl.col('date') == str(input.date_input()))).with_columns( (pl.col('away')+' @ '+pl.col('home')+' - '+pl.col('state')).alias('matchup')).sort('time') game_dict = dict(zip(df['game_id'], df['matchup'])) print('GAMES') print(game_dict) return ui.input_select("game_id", "Select Game", game_dict) @render.ui @reactive.event(input.game_id) def player_select_ui(): try: # Get the list of pitchers for the selected level and season data_list = scrape.get_data(game_list_input = [int(input.game_id())]) print('DATALIST') print(input.game_id()) # print(data_list) # test = scrape.get_data_df(data_list = data_list) # test.write_csv('cubs_dodgers.csv') df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter( (pl.col("is_pitch") == True)& (pl.col('batter_hand').is_in(split_dict_hand[input.split_id()])) )))).with_columns( pl.col('pitch_type').count().over('pitch_type').alias('pitch_count') ).with_columns( (pl.col('pitcher_name')+' - '+pl.col('pitcher_team')).alias('pitcher_name')) ) # print('DATAFRAME') # print(df) pitcher_dict = dict(zip(df['pitcher_id'], df['pitcher_name'])) print('PITCHERS') print(pitcher_dict) return ui.input_select("pitcher_id", "Select Pitcher", pitcher_dict) except Exception as e: print(e) return ui.output_text('pitcher_id',"No pitchers available for this game") @output @render.text def status(): # Only show status when generating if input.generate == 0: return "" return "" @output @render.data_frame def season_stats(): if int(input.level_input()) == 1: season = int(str(input.date_input())[:4]) url_season = url = f""" https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&season={season}&season1={season} &startdate={season}-01-01&enddate={season}-12-01&ind=0&qual=0&type=8&month=33&pageitems=500000 """ url_day = f""" https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&qual=y&season={season}&season1={season} &startdate={season}-03-01&enddate={season}-11-01 &month=34&hand=&team=0&pageitems=500000&ind=0&rost=0&players=&type=8 &sortstat=WAR """ data_season = requests.get(url_season).json() df_season = pl.DataFrame(data=data_season['data'], infer_schema_length=1000) df_season = df_season.with_columns(pl.lit('Season').alias('Time')) data_day = requests.get(url_day).json() if data_day['dateRange'][:10] == str(input.date_input()): df_day = pl.DataFrame(data=data_day['data'], infer_schema_length=1000) df_day = df_day.with_columns(pl.lit('Today').alias('Time')) df_all = pl.DataFrame(pd.concat([df_day.to_pandas(),df_season.to_pandas()])) df_player = df_all.filter(pl.col('xMLBAMID')==int(input.pitcher_id())) else: df_player = df_season.filter(pl.col('xMLBAMID')==int(input.pitcher_id())) return render.DataGrid( df_player.select(['Time','IP','TBF','R','ER','SO','BB','ERA','FIP','WHIP']).to_pandas().round(2), row_selection_mode='multiple', height='700px', width='fit-content', ) else: d = scrape.get_data([int(input.game_id())]) player_id = f'ID{int(input.pitcher_id())}' home_players = d[0]['liveData']['boxscore']['teams']['home']['players'] away_players = d[0]['liveData']['boxscore']['teams']['away']['players'] if player_id in home_players: batters_faced = home_players[player_id]['stats']['pitching']['battersFaced'] team_side = 'home' elif player_id in away_players: batters_faced = away_players[player_id]['stats']['pitching']['battersFaced'] team_side = 'away' else: batters_faced = None team_side = None pitches = d[0]['liveData']['boxscore']['teams'][team_side]['players'][player_id]['stats']['pitching']['pitchesThrown'] innings = d[0]['liveData']['boxscore']['teams'][team_side]['players'][player_id]['stats']['pitching']['inningsPitched'] battersFaced = d[0]['liveData']['boxscore']['teams'][team_side]['players'][player_id]['stats']['pitching']['battersFaced'] runs = d[0]['liveData']['boxscore']['teams'][team_side]['players'][player_id]['stats']['pitching']['runs'] earnedRuns = d[0]['liveData']['boxscore']['teams'][team_side]['players'][player_id]['stats']['pitching']['earnedRuns'] strikeOuts = d[0]['liveData']['boxscore']['teams'][team_side]['players'][player_id]['stats']['pitching']['strikeOuts'] baseOnBalls = d[0]['liveData']['boxscore']['teams'][team_side]['players'][player_id]['stats']['pitching']['baseOnBalls'] df_player = pl.DataFrame({ 'Pitches': int(pitches), 'IP': float(innings), 'PA': int(battersFaced), 'R': int(runs), 'ER': int(earnedRuns), 'K': int(strikeOuts), 'BB': int(baseOnBalls) }) return render.DataGrid( df_player.to_pandas().round(2), row_selection_mode='multiple', height='700px', width='fit-content', ) @output @render.plot @reactive.event(input.generate_plot, ignore_none=False) def plot(): # Show progress/loading notification with ui.Progress(min=0, max=1) as p: p.set(message="Generating plot", detail="This may take a while...") p.set(0.3, "Gathering data...") data_list = scrape.get_data(game_list_input = [int(input.game_id())]) # df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter( # (pl.col("pitcher_id") == int(input.pitcher_id()))& # (pl.col("is_pitch") == True)& # (pl.col('batter_hand').is_in(split_dict_hand[input.split_id()])) # )))).with_columns( # pl.col('pitch_type').count().over('pitch_type').alias('pitch_count') # )) df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter( (pl.col("pitcher_id") == int(input.pitcher_id()))& (pl.col("is_pitch") == True)& (pl.col('batter_hand').is_in(split_dict_hand[input.split_id()])) ))).with_columns( pl.col("extension").fill_null(6.2) )).with_columns( pl.col('pitch_type').count().over('pitch_type').alias('pitch_count') )) df = df.clone() # df = df.with_columns( # pl.lit(6.2).alias("extension") # ) p.set(0.6, "Creating plot...") #plt.rcParams["figure.figsize"] = [10,10] fig = plt.figure(figsize=(26,26)) plt.rcParams.update({'figure.autolayout': True}) fig.set_facecolor('white') sns.set_theme(style="whitegrid", palette=colour_palette) print('this is the one plot') gs = gridspec.GridSpec(6, 8, height_ratios=[6,20,12,36,36,6], width_ratios=[4,18,18,18,18,18,18,4]) gs.update(hspace=0.2, wspace=0.5) # Define the positions of each subplot in the grid ax_headshot = fig.add_subplot(gs[1,1:3]) ax_bio = fig.add_subplot(gs[1,3:5]) ax_logo = fig.add_subplot(gs[1,5:7]) ax_season_table = fig.add_subplot(gs[2,1:7]) ax_plot_1 = fig.add_subplot(gs[3,1:3]) ax_plot_2 = fig.add_subplot(gs[3,3:5]) ax_plot_3 = fig.add_subplot(gs[3,5:7]) ax_table = fig.add_subplot(gs[4,1:7]) ax_footer = fig.add_subplot(gs[-1,1:7]) ax_header = fig.add_subplot(gs[0,1:7]) ax_left = fig.add_subplot(gs[:,0]) ax_right = fig.add_subplot(gs[:,-1]) # Hide axes for footer, header, left, and right ax_footer.axis('off') ax_header.axis('off') ax_left.axis('off') ax_right.axis('off') sns.set_theme(style="whitegrid", palette=colour_palette) fig.set_facecolor('white') df_teams = scrape.get_teams() year_input = int(str(input.date_input())[:4]) sport_id = int(input.level_input()) player_input = int(input.pitcher_id()) team_id = df['pitcher_team_id'][0] player_headshot(player_input=player_input, ax=ax_headshot,sport_id=sport_id,season=year_input) player_bio(pitcher_id=player_input, ax=ax_bio,sport_id=sport_id,year_input=year_input) # plot_logo(pitcher_id=player_input, ax=ax_logo, df_team=df_teams,df_players=scrape.get_players(sport_id,year_input)) if input.switch(): # Get the logo URL from the image dictionary using the team abbreviation logo_url = input.logo_select() # Send a GET request to the logo URL response = requests.get(logo_url) # Open the image from the response content img = Image.open(BytesIO(response.content)) # Display the image on the axis ax_logo.set_xlim(0, 1.3) ax_logo.set_ylim(0, 1) ax_logo.imshow(img, extent=[0.3, 1.3, 0, 1], origin='upper') # Turn off the axis ax_logo.axis('off') else: plot_logo(pitcher_id=player_input, ax=ax_logo, df_team=df_teams,df_players=scrape.get_players(sport_id,year_input),team_id=team_id) # stat_summary_table(df=df, # ax=ax_season_table, # player_input=player_input, # split=input.split_id(), # sport_id=sport_id) stat_daily_summary(df=df, data=data_list, player_input=int(input.pitcher_id()), sport_id=int(input.level_input()), ax=ax_season_table) # break_plot(df=df_plot,ax=ax2) for x,y,z in zip([input.plot_id_1(),input.plot_id_2(),input.plot_id_3()],[ax_plot_1,ax_plot_2,ax_plot_3],[1,3,5]): if x == 'velocity_kdes': velocity_kdes(df, ax=y, gs=gs, gs_x=[3,4], gs_y=[z,z+2], fig=fig) if x == 'tj_stuff_roling': tj_stuff_roling(df=df, window=int(input.rolling_window()), ax=y) if x == 'tj_stuff_roling_game': tj_stuff_roling_game(df=df, window=int(input.rolling_window()), ax=y) if x == 'break_plot': break_plot(df = df,ax=y) if x == 'location_plot_lhb': location_plot(df = df,ax=y,hand='L') if x == 'location_plot_rhb': location_plot(df = df,ax=y,hand='R') if x == 'break_plot_rhh': break_plot(df = df.filter(pl.col('batter_hand')=='R'),ax=y) if x == 'break_plot_lhh': break_plot(df = df.filter(pl.col('batter_hand')=='L'),ax=y) summary_table(df=df, ax=ax_table) plot_footer(ax_footer) ax_watermark = fig.add_subplot(gs[1:-1,1:-1],zorder=-1) # Hide axes ticks and labels ax_watermark.set_xticks([]) ax_watermark.set_yticks([]) ax_watermark.set_frame_on(False) # Optional: Hide border img = Image.open('tj stats circle-01_new.jpg') img = img.convert("LA") # Display the image ax_watermark.imshow(img, extent=[0, 1, 0, 1], origin='upper',zorder=-1, alpha=0.06) ax_watermark2 = fig.add_subplot(gs[-2:,1:4],zorder=1) ax_watermark2.set_xlim(0,1) ax_watermark2.set_ylim(0,1) # Hide axes ticks and labels ax_watermark2.set_xticks([]) ax_watermark2.set_yticks([]) ax_watermark2.set_frame_on(False) # Optional: Hide border # Open the image img = Image.open('tj stats circle-01_new.jpg') # Get the original size width, height = img.size # Calculate the new size (50% larger) new_width = int(width * 0.5) new_height = int(height * 0.5) # Resize the image img_resized = img.resize((new_width, new_height)) # Display the image ax_watermark2.imshow(img, extent=[0.26, 0.46, 0.0,0.2], origin='upper',zorder=-1, alpha=1) # fig.set_facecolor('#fff0f9') fig.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.01) @output @render.data_frame @reactive.event(input.generate_plot, ignore_none=False) def grid_summary(): data_list = scrape.get_data(game_list_input = [int(input.game_id())]) df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter( (pl.col("pitcher_id") == int(input.pitcher_id()))& (pl.col("is_pitch") == True)& (pl.col('batter_hand').is_in(split_dict_hand[input.split_id()])) ))).with_columns( pl.col("extension").fill_null(6.2) )).with_columns( pl.col('pitch_type').count().over('pitch_type').alias('pitch_count') )) df = df.clone() features_table = ['start_speed', 'spin_rate', 'extension', 'ivb', 'hb', 'x0', 'z0'] selection = ['game_id','pitcher_id','pitcher_name','batter_id','batter_name','pitcher_hand', 'batter_hand','balls','strikes','play_code','event_type','pitch_type','vaa','haa']+features_table+['tj_stuff_plus','pitch_grade'] return render.DataGrid( df.select(selection).to_pandas().round(1), row_selection_mode='multiple', height='700px', width='fit-content', filters=True, ) @output @render.data_frame @reactive.event(input.generate_plot, ignore_none=False) def grid_pbp(): data_list = scrape.get_data(game_list_input = [int(input.game_id())]) df = scrape.get_data_df(data_list = data_list) # df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter( # (pl.col("is_pitch") == True)& # (pl.col('batter_hand').is_in(split_dict_hand[input.split_id()])) # )))).with_columns( # pl.col('pitch_type').count().over('pitch_type').alias('pitch_count') # ).with_columns( # (pl.col('pitcher_name')+' - '+pl.col('pitcher_team')).alias('pitcher_name')) # ) #df = caa.calculate_arm_angles(df=df,pitcher_id=int(input.pitcher_id())).clone() return render.DataGrid( df.to_pandas(), row_selection_mode='multiple', height='700px', width='fit-content', filters=True, ) @output @render.data_frame @reactive.event(input.generate_plot, ignore_none=False) def grid(): df_games = (scrape.get_schedule(year_input=[int(str(input.date_input())[:4])], sport_id=[int(input.level_input())], game_type=['S','R','P','E','A','I','W','F','L']).with_columns(pl.col('date').cast(pl.Utf8)). filter(pl.col('date') == str(input.date_input()))).with_columns( (pl.col('away')+' @ '+pl.col('home')).alias('matchup')) game_list = df_games['game_id'].unique().to_list() # Get the list of pitchers for the selected level and season data_list = scrape.get_data(game_list) df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter( (pl.col("is_pitch") == True)& (pl.col('batter_hand').is_in(split_dict_hand[input.split_id()])) )))).with_columns( pl.col('pitch_type').count().over('pitch_type').alias('pitch_count') ).with_columns( (pl.col('pitcher_name')+' - '+pl.col('pitcher_team')).alias('pitcher_name')) ) # game_list = game_list_df['game_id'].unique().to_list() data = scrape.get_data(game_list[:]) df = scrape.get_data_df(data) pitcher_team_dict = dict(zip(df['pitcher_id'], df['pitcher_team'])) df_test = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(df).filter( (pl.col("is_pitch") == True))))) df_test = df_test.with_columns( (pl.col('pitcher_name')+' - '+pl.col('pitcher_team')).alias('pitcher_name') ) # Aggregate tj_stuff_plus by pitcher_id and year df_agg_2024_pitch = df_test.group_by(['pitcher_id','pitcher_name','pitch_type']).agg( pl.col('tj_stuff_plus').len().alias('count'), pl.col('tj_stuff_plus').mean() ) # Calculate the weighted average of 'tj_stuff_plus' for each pitcher df_weighted_avg = df_agg_2024_pitch.with_columns( (pl.col('tj_stuff_plus') * pl.col('count')).alias('weighted_tj_stuff_plus') ).group_by(['pitcher_id', 'pitcher_name']).agg( pl.col('count').sum().alias('total_count'), pl.col('weighted_tj_stuff_plus').sum().alias('total_weighted_tj_stuff_plus') ).with_columns( (pl.col('total_weighted_tj_stuff_plus') / pl.col('total_count')).alias('tj_stuff_plus') ).select(['pitcher_id', 'pitcher_name', 'tj_stuff_plus', 'total_count']) # Add the 'pitch_type' column with value "All" df_weighted_avg = df_weighted_avg.with_columns( pl.lit("All").alias('pitch_type') ) # Select and rename columns to match the original DataFrame df_weighted_avg = df_weighted_avg.select([ 'pitcher_id', 'pitcher_name', 'pitch_type', pl.col('total_count').alias('count'), 'tj_stuff_plus' ]) # Concatenate the new rows with the original DataFrame df_agg_2024_pitch = pl.concat([df_agg_2024_pitch, df_weighted_avg]) df_small = df_agg_2024_pitch.select(['pitcher_id','pitcher_name','pitch_type','count','tj_stuff_plus']) count_dict = dict(zip(df_small.filter(pl.col('pitch_type')=='All')['pitcher_id'], df_small.filter(pl.col('pitch_type')=='All')['count'])) # Check if 'FS' column exists, if not create it and fill with None df_small_pivot = (df_small.pivot(index=['pitcher_id','pitcher_name'], columns='pitch_type', values='tj_stuff_plus').with_columns( pl.col("pitcher_id").replace_strict(count_dict, default=None).alias("count"))) # Check if 'FS' column exists, if not create it and fill with None for col in ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All']: if col not in df_small_pivot.columns: df_small_pivot = df_small_pivot.with_columns(pl.lit(None).alias(col)) df_small_pivot = df_small_pivot.with_columns( pl.col("pitcher_id").replace_strict(pitcher_team_dict, default=None).alias("pitcher_team")) df_small_pivot = df_small_pivot.select(['pitcher_id','pitcher_name','pitcher_team','count','CH','CU','FC','FF','FS','SI','SL','ST','All']).sort('All',descending=True)#.head(10)#.write_clipboard() df_small_pivot = df_small_pivot.with_columns( pl.col(col).cast(pl.Int32, strict=False) for col in ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All'] ) return render.DataGrid( df_small_pivot, row_selection_mode='multiple', height='700px', width='fit-content', filters=True, ) @output @render.table @reactive.event(input.generate_plot, input.pitch_min,input.head,ignore_none=False) def grid_style(): row_limit = int(input.head()) pitch_limit = int(input.pitch_min()) df_games = (scrape.get_schedule(year_input=[int(str(input.date_input())[:4])], sport_id=[int(input.level_input())], game_type=['S','R','P','E','A','I','W','F','L']).with_columns(pl.col('date').cast(pl.Utf8)). filter(pl.col('date') == str(input.date_input()))).with_columns( (pl.col('away')+' @ '+pl.col('home')).alias('matchup')) game_list = df_games['game_id'].unique().to_list() # Get the list of pitchers for the selected level and season data_list = scrape.get_data(game_list) df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter( (pl.col("is_pitch") == True)& (pl.col('batter_hand').is_in(split_dict_hand[input.split_id()])) )))).with_columns( pl.col('pitch_type').count().over('pitch_type').alias('pitch_count') ).with_columns( (pl.col('pitcher_name')+' - '+pl.col('pitcher_team')).alias('pitcher_name')) ) # game_list = game_list_df['game_id'].unique().to_list() data = scrape.get_data(game_list[:]) df = scrape.get_data_df(data) pitcher_team_dict = dict(zip(df['pitcher_id'], df['pitcher_team'])) df_test = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(df).filter( (pl.col("is_pitch") == True))))) # df_test = df_test.with_columns( # (pl.col('pitcher_name')+' - '+pl.col('pitcher_team')).alias('pitcher_name') # ) # Aggregate tj_stuff_plus by pitcher_id and year df_agg_2024_pitch = df_test.group_by(['pitcher_id','pitcher_name','pitch_type']).agg( pl.col('tj_stuff_plus').len().alias('count'), pl.col('tj_stuff_plus').mean() ) # Calculate the weighted average of 'tj_stuff_plus' for each pitcher df_weighted_avg = df_agg_2024_pitch.with_columns( (pl.col('tj_stuff_plus') * pl.col('count')).alias('weighted_tj_stuff_plus') ).group_by(['pitcher_id', 'pitcher_name']).agg( pl.col('count').sum().alias('total_count'), pl.col('weighted_tj_stuff_plus').sum().alias('total_weighted_tj_stuff_plus') ).with_columns( (pl.col('total_weighted_tj_stuff_plus') / pl.col('total_count')).alias('tj_stuff_plus') ).select(['pitcher_id', 'pitcher_name', 'tj_stuff_plus', 'total_count']) # Add the 'pitch_type' column with value "All" df_weighted_avg = df_weighted_avg.with_columns( pl.lit("All").alias('pitch_type') ) # Select and rename columns to match the original DataFrame df_weighted_avg = df_weighted_avg.select([ 'pitcher_id', 'pitcher_name', 'pitch_type', pl.col('total_count').alias('count'), 'tj_stuff_plus' ]) # Concatenate the new rows with the original DataFrame df_agg_2024_pitch = pl.concat([df_agg_2024_pitch, df_weighted_avg]) df_small = df_agg_2024_pitch.select(['pitcher_id','pitcher_name','pitch_type','count','tj_stuff_plus']) count_dict = dict(zip(df_small.filter(pl.col('pitch_type')=='All')['pitcher_id'], df_small.filter(pl.col('pitch_type')=='All')['count'])) # Check if 'FS' column exists, if not create it and fill with None df_small_pivot = (df_small.pivot(index=['pitcher_id','pitcher_name'], columns='pitch_type', values='tj_stuff_plus').with_columns( pl.col("pitcher_id").replace_strict(count_dict, default=None).alias("count"))) # Check if 'FS' column exists, if not create it and fill with None for col in ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All']: if col not in df_small_pivot.columns: df_small_pivot = df_small_pivot.with_columns(pl.lit(None).alias(col)) df_small_pivot = df_small_pivot.with_columns( pl.col("pitcher_id").replace_strict(pitcher_team_dict, default=None).alias("pitcher_team")) df_small_pivot = df_small_pivot.select(['pitcher_name','pitcher_team','count','CH','CU','FC','FF','FS','SI','SL','ST','All']).sort('All',descending=True)#.head(10)#.write_clipboard() df_small_pivot = df_small_pivot.with_columns( pl.col(col).cast(pl.Int32, strict=False) for col in ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All'] ) df_export = df_small_pivot.filter(pl.col('count')>=pitch_limit).to_pandas().head(row_limit) df_export.columns = ['Name', 'Team', 'Pitches', 'CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All'] df_style = df_export.style df_style = df_style.set_properties(**{'border': '1.0 px'},overwrite=False).set_table_styles([{'selector' :'th', 'props':[('text-align', 'center'),('font-size', '22px'),('Height','30px'),('border', '1px black solid !important')]}, {'selector' :'td', 'props':[('text-align', 'center'),('font-size', '22px')]}],overwrite=False).set_table_styles( [{'selector': 'tr', 'props': [('line-height', '1px')]}],overwrite=False).set_properties( **{'Height': '60px'},**{'text-align': 'center'},overwrite=False).hide_index() #cmap_sum_2 = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#FFFFFF","#F0E442"]) df_style = df_style.format('{:.0f}',subset=df_export.columns[3:], na_rep='') # df_style df_style = df_style.background_gradient(cmap=cmap_sum,subset = ((list(df_export.index[:]),df_export.columns[3:])),vmin=80,vmax=120)#.applymap(lambda x: 'color: white' if pd.isnull(x) else '') #df_style = df_style.applymap(background_gradient_ignore_nan) #df_style = df_style df_style = df_style.applymap(lambda x: 'color: transparent; background-color: transparent' if pd.isnull(x) else '') df_style = df_style.set_properties( **{'border': '1px black solid !important'},subset = ((list(df_style.index[:-1]),df_style.columns[:]))).set_properties( **{'min-width':'325px'},subset = ((list(df_style.index[:-1]),df_style.columns[0])),overwrite=False).set_properties( **{'min-width':'100px'},subset = ((list(df_style.index[:-1]),df_style.columns[1:3])),overwrite=False).set_properties( **{'min-width':'100px'},subset = ((list(df_style.index[:-1]),df_style.columns[3:])),overwrite=False).set_properties( # **{'min-width':'125px'},subset = ((list(df_style.index[:-1]),df_style.columns[-1])),overwrite=False).set_properties( **{'border': '1px black solid !important'},subset = ((list(df_style.index[:]),df_style.columns[:]))) # df_style = df_style.set_table_styles([{'selector' :'th', # 'props':[('text-align', 'center'),('font-size', '22px'),('Height','30px'),('border', '1px black solid !important')]}, # {'selector' :'td', 'props':[('text-align', 'center'),('font-size', '22px')]}], overwrite=False) return df_style @output @render.table @reactive.event(input.generate_plot, ignore_none=False) def whiff_table(): time_delta_fill = 8 teams = requests.get(url='https://statsapi.mlb.com/api/v1/teams/').json() #Select only teams that are at the MLB level # mlb_teams_city = [x['franchiseName'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball'] # mlb_teams_name = [x['teamName'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball'] # mlb_teams_franchise = [x['name'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball'] # mlb_teams_id = [x['id'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball'] # mlb_teams_abb = [x['abbreviation'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball'] mlb_teams_city = [x['franchiseName'] if 'franchiseName' in x else None for x in teams['teams']] mlb_teams_name = [x['teamName'] if 'franchiseName' in x else None for x in teams['teams']] mlb_teams_franchise = [x['name'] if 'franchiseName' in x else None for x in teams['teams']] mlb_teams_id = [x['id'] if 'franchiseName' in x else None for x in teams['teams']] mlb_teams_abb = [x['abbreviation'] if 'franchiseName' in x else None for x in teams['teams']] mlb_teams_parent = [x['parentOrgName'] if 'parentOrgName' in x else None for x in teams['teams']] #Create a dataframe of all the teams mlb_teams_df = pd.DataFrame(data={'team_id':mlb_teams_id,'city':mlb_teams_franchise,'name':mlb_teams_name,'franchise':mlb_teams_franchise,'abbreviation':mlb_teams_abb,'parent_org':mlb_teams_parent}).drop_duplicates() ##Create a dataframe of all players in the database #Make an api call to get a dictionary of all players player_data = requests.get(url=f'https://statsapi.mlb.com/api/v1/sports/{int(input.level_input())}/players').json() #Select relevant data that will help distinguish players from one another fullName_list = [x['fullName'] for x in player_data['people']] id_list = [x['id'] for x in player_data['people']] position_list = [x['primaryPosition']['abbreviation'] for x in player_data['people']] team_list = [x['currentTeam']['id']for x in player_data['people']] df_games = (scrape.get_schedule(year_input=[int(str(input.date_input())[:4])], sport_id=[int(input.level_input())], game_type=['S','R','P','E','A','I','W','F','L']).with_columns(pl.col('date').cast(pl.Utf8)). filter(pl.col('date') == str(input.date_input()))).with_columns( (pl.col('away')+' @ '+pl.col('home')).alias('matchup')) game_list = df_games['game_id'].unique().to_list() # Get the list of pitchers for the selected level and season data_list = scrape.get_data(game_list) df = scrape.get_data_df(data_list = data_list).filter( (pl.col("is_pitch") == True)& (pl.col('batter_hand').is_in(split_dict_hand[input.split_id()])) ).with_columns( pl.col('pitch_type').count().over('pitch_type').alias('pitch_count') ).with_columns( (pl.col('pitcher_name')+' - '+pl.col('pitcher_team')).alias('pitcher_name')) # game_list = game_list_df['game_id'].unique().to_list() data = scrape.get_data(game_list[:]) df = scrape.get_data_df(data) player_id = [] team_id = [] for y in range(0,len(data_list)): #print(game_df_filter.game_id.reset_index(drop=True)[y]) player_id.append([data_list[y]['liveData']['boxscore']['teams']['away']['players'][x]['person']['id'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['away']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['away']['players']]) player_id.append([data_list[y]['liveData']['boxscore']['teams']['home']['players'][x]['person']['id'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['home']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['home']['players']]) # player_name.append([data_list[y]['liveData']['boxscore']['teams']['away']['players'][x]['person']['fullName'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['away']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['away']['players']]) # player_name.append([data_list[y]['liveData']['boxscore']['teams']['home']['players'][x]['person']['fullName'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['home']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['home']['players']]) team_id.append([data_list[y]['liveData']['boxscore']['teams']['away']['players'][x]['parentTeamId'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['away']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['away']['players']]) team_id.append([data_list[y]['liveData']['boxscore']['teams']['home']['players'][x]['parentTeamId'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['home']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['home']['players']]) player_id_final = [item for sublist in player_id for item in sublist] #player_name_final = [item for sublist in player_name for item in sublist] team_id_final = [item for sublist in team_id for item in sublist] player_list = pd.DataFrame(data={'player_id':player_id_final,'team_id':team_id_final}) player_list = player_list.drop_duplicates(subset=['player_id'],keep='last') player_df_all = player_list.merge(right=mlb_teams_df[['team_id','abbreviation']],left_on = 'team_id',right_on='team_id',how='left').drop_duplicates(keep='last') mlb_teams_df = mlb_teams_df.merge(right=mlb_teams_df[['abbreviation','franchise']],left_on='parent_org',right_on='franchise',how='left').drop_duplicates().reset_index(drop=True) mlb_teams_df = mlb_teams_df[mlb_teams_df.columns[:-1]] mlb_teams_df.columns = ['team_id', 'city', 'name', 'franchise', 'abbreviation', 'parent_org', 'parent_org_abb'] pk_list = [] pitcher_id_list = [] summary_list = [] hit_list = [] k_list = [] bb_list = [] pa_list = [] test_list = [] game_pk_list = [] for y in range(0,len(data_list)): pk_list.append([data_list[y]['gameData']['game']['pk'] for x in data_list[y]['liveData']['boxscore']['teams']['away']['pitchers']]) pk_list.append([data_list[y]['gameData']['game']['pk'] for x in data_list[y]['liveData']['boxscore']['teams']['home']['pitchers']]) pitcher_id_list.append([x for x in data_list[y]['liveData']['boxscore']['teams']['away']['pitchers']]) pitcher_id_list.append([x for x in data_list[y]['liveData']['boxscore']['teams']['home']['pitchers']]) summary_list.append([data_list[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['summary'] for x in data_list[y]['liveData']['boxscore']['teams']['away']['pitchers']]) summary_list.append([data_list[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['summary'] for x in data_list[y]['liveData']['boxscore']['teams']['home']['pitchers']]) hit_list.append([data_list[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['hits'] for x in data_list[y]['liveData']['boxscore']['teams']['away']['pitchers']]) hit_list.append([data_list[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['hits'] for x in data_list[y]['liveData']['boxscore']['teams']['home']['pitchers']]) k_list.append([data_list[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['strikeOuts'] for x in data_list[y]['liveData']['boxscore']['teams']['away']['pitchers']]) k_list.append([data_list[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['strikeOuts'] for x in data_list[y]['liveData']['boxscore']['teams']['home']['pitchers']]) bb_list.append([data_list[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['baseOnBalls'] for x in data_list[y]['liveData']['boxscore']['teams']['away']['pitchers']]) bb_list.append([data_list[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['baseOnBalls'] for x in data_list[y]['liveData']['boxscore']['teams']['home']['pitchers']]) pa_list.append([data_list[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['battersFaced'] for x in data_list[y]['liveData']['boxscore']['teams']['away']['pitchers']]) pa_list.append([data_list[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['battersFaced'] for x in data_list[y]['liveData']['boxscore']['teams']['home']['pitchers']]) test_list.append([x for x in data_list[y]['liveData']['plays']['allPlays']]) game_pk_list.append([data_list[y]['gameData']['game']['pk'] for x in data_list[y]['liveData']['plays']['allPlays']]) flat_list = [item for sublist in test_list for item in sublist] flat_list_games = [item for sublist in game_pk_list for item in sublist] test_list_2 = [x['playEvents'] for x in flat_list] test_list_3 = [x['matchup'] for x in flat_list] flat_list_pk = [item for sublist in pk_list for item in sublist] flat_list_pitcher_id = [item for sublist in pitcher_id_list for item in sublist] flat_list_summary = [item for sublist in summary_list for item in sublist] flat_list_hits = [item for sublist in hit_list for item in sublist] flat_list_k = [item for sublist in k_list for item in sublist] flat_list_bb = [item for sublist in bb_list for item in sublist] flat_list_pa = [item for sublist in pa_list for item in sublist] pitcher_summary_df = pd.DataFrame(data={'game_id':flat_list_pk,'pitcher_id':flat_list_pitcher_id,'summary':flat_list_summary,'hits':flat_list_hits,'k':flat_list_k,'bb':flat_list_bb,'pa':flat_list_pa}) pitcher_summary_df.summary = pitcher_summary_df.summary + ', ' + pitcher_summary_df.hits.astype(str) + ' H' pitcher_summary_df['k_bb_percent'] = (pitcher_summary_df.k - pitcher_summary_df.bb) / (pitcher_summary_df.pa) game_id_list = [] bat_play_id = [] batter_id_list = [] batter_name_list = [] date_list = [] desc_list = [] code_list = [] pitch_list = [] pitch_type_code_list = [] pitch_type_list = [] pitch_velo_list = [] pitch_break_length_list = [] pitch_break_angle_list = [] pitch_ivb_list = [] pitch_spin_list = [] pitch_spin_direction_list = [] launchSpeed_list = [] launchAngle_list = [] #totalDistance_list = [] trajectory_list = [] hardness_list = [] pitcher_id_list = [] pitcher_name_list = [] k_zone_top = [] k_zone_bottom = [] pitch_x = [] pitch_z = [] zone_list = [] pitch_x_what = [] pitch_y_what = [] from datetime import datetime from datetime import timedelta for i in range(0,len(test_list_2)): #n = n + 10000 for j in range(0,len(test_list_2[i])): if 'playId' in test_list_2[i][j]: #print('test') #n = n + 1 game_id_list.append(flat_list_games[i]) bat_play_id.append(str(flat_list_games[i])+str(1000+flat_list[i]['about']['atBatIndex'])+str(1000+test_list_2[i][j]['index'])) batter_id_list.append(test_list_3[i]['batter']['id']) batter_name_list.append(test_list_3[i]['batter']['fullName']) pitcher_id_list.append(test_list_3[i]['pitcher']['id']) pitcher_name_list.append(test_list_3[i]['pitcher']['fullName']) date_list.append((datetime.strptime(test_list_2[i][j]['startTime'][0:16], '%Y-%m-%dT%H:%M') - timedelta(hours=8)).date()) desc_list.append(test_list_2[i][j]['details']['description'] if 'description' in test_list_2[i][j]['details'] else np.nan) code_list.append(test_list_2[i][j]['details']['code'] if 'code' in test_list_2[i][j]['details'] else np.nan) # if 'hitData' in test_list_2[i][j]: if 'pitchData' in test_list_2[i][j]: #print(i,j) #pitch_abb_list.append(test_list_2[i][j]['details']['type']['code']) #pitch_name_list.append(test_list_2[i][j]['details']['type']['description']) pitch_type_code_list.append(test_list_2[i][j]['details']['type']['description'] if 'type' in test_list_2[i][j]['details'] else np.nan) pitch_type_list.append(test_list_2[i][j]['details']['type']['code'] if 'type' in test_list_2[i][j]['details'] else np.nan) pitch_velo_list.append(test_list_2[i][j]['pitchData']['startSpeed'] if 'startSpeed' in test_list_2[i][j]['pitchData'] else np.nan) pitch_break_length_list.append(test_list_2[i][j]['pitchData']['coordinates']['pfxX'] if 'pfxX' in test_list_2[i][j]['pitchData']['coordinates'] else np.nan) pitch_break_angle_list.append(test_list_2[i][j]['pitchData']['breaks']['breakAngle'] if 'breakAngle' in test_list_2[i][j]['pitchData']['breaks'] else np.nan) pitch_ivb_list.append(test_list_2[i][j]['pitchData']['coordinates']['pfxZ'] if 'pfxZ' in test_list_2[i][j]['pitchData']['coordinates'] else np.nan) pitch_spin_list.append(test_list_2[i][j]['pitchData']['breaks']['spinRate'] if 'spinRate' in test_list_2[i][j]['pitchData']['breaks'] else np.nan) pitch_spin_direction_list.append(test_list_2[i][j]['pitchData']['breaks']['spinDirection'] if 'spinDirection' in test_list_2[i][j]['pitchData']['breaks'] else np.nan) k_zone_top.append(test_list_2[i][j]['pitchData']['strikeZoneTop'] if 'strikeZoneTop' in test_list_2[i][j]['pitchData'] else np.nan) k_zone_bottom.append(test_list_2[i][j]['pitchData']['strikeZoneBottom'] if 'strikeZoneBottom' in test_list_2[i][j]['pitchData'] else np.nan) pitch_x.append(test_list_2[i][j]['pitchData']['coordinates']['pX'] if 'pX' in test_list_2[i][j]['pitchData']['coordinates'] else np.nan) pitch_z.append(test_list_2[i][j]['pitchData']['coordinates']['pZ'] if 'pZ' in test_list_2[i][j]['pitchData']['coordinates'] else np.nan) zone_list.append(test_list_2[i][j]['pitchData']['zone'] if 'zone' in test_list_2[i][j]['pitchData'] else np.nan) pitch_x_what.append(test_list_2[i][j]['pitchData']['coordinates']['x'] if 'x' in test_list_2[i][j]['pitchData']['coordinates'] else np.nan) pitch_y_what.append(test_list_2[i][j]['pitchData']['coordinates']['y'] if 'y' in test_list_2[i][j]['pitchData']['coordinates'] else np.nan) #totalDistance_list.append(test_list_2[i][j]['hitData']['totalDistance']) #trajectory_list.append(test_list_2[i][j]['hitData']['trajectory']) #hardness_list.append(test_list_2[i][j]['hitData']['hardness']) # #print(i,j) else: pitch_velo_list.append(np.nan) pitch_type_code_list.append(np.nan) pitch_type_list.append(np.nan) pitch_break_length_list.append(np.nan) pitch_break_angle_list.append(np.nan) pitch_ivb_list.append(np.nan) pitch_spin_list.append(np.nan) pitch_spin_direction_list.append(np.nan) k_zone_top.append(np.nan) k_zone_bottom.append(np.nan) pitch_x.append(np.nan) pitch_z.append(np.nan) zone_list.append(np.nan) pitch_x_what.append(np.nan) pitch_y_what.append(np.nan) if 'hitData' in test_list_2[i][j]: # print(i,j) #pitch_abb_list.append(test_list_2[i][j]['details']['type']['code']) #pitch_name_list.append(test_list_2[i][j]['details']['type']['description']) launchSpeed_list.append(test_list_2[i][j]['hitData']['launchSpeed'] if 'launchSpeed' in test_list_2[i][j]['hitData'] else np.nan) launchAngle_list.append(test_list_2[i][j]['hitData']['launchAngle'] if 'launchAngle' in test_list_2[i][j]['hitData'] else np.nan) #totalDistance_list.append(test_list_2[i][j]['hitData']['totalDistance']) #trajectory_list.append(test_list_2[i][j]['hitData']['trajectory']) #hardness_list.append(test_list_2[i][j]['hitData']['hardness']) else: launchSpeed_list.append(np.nan) launchAngle_list.append(np.nan) exit_velo_df = pd.DataFrame(data={'play_id':bat_play_id, 'game_id':game_id_list, 'date':date_list, 'pitcher_id':pitcher_id_list, 'pitcher':pitcher_name_list, 'pitch_code':pitch_type_code_list, 'pitch_type':pitch_type_list, 'pitch_velocity':pitch_velo_list, 'break_length':pitch_break_length_list, 'break_angle':pitch_break_angle_list, 'break_ivb':pitch_ivb_list, 'spin_rate':pitch_spin_list, 'spin_direction':pitch_spin_direction_list, 'batter_id':batter_id_list, 'batter':batter_name_list, 'code':code_list, 'description':desc_list, 'launch_speed':launchSpeed_list, 'launch_angle':launchAngle_list, 'k_zone_top':k_zone_top, 'k_zone_bottom':k_zone_bottom, 'pitch_x':pitch_x, 'pitch_z':pitch_z, 'zone':zone_list, 'pitch_x_what':pitch_x_what, 'pitch_y_what':pitch_y_what, }) exit_velo_df['plate_negative'] = -17/12/2 exit_velo_df['plate_positive'] = 17/12/2 exit_velo_df[['k_zone_top','k_zone_bottom']] = exit_velo_df.groupby('batter_id')[['k_zone_top','k_zone_bottom']].transform('mean') exit_velo_df.play_id = exit_velo_df.play_id.astype(float) exit_velo_df = exit_velo_df.drop_duplicates(subset=['play_id'],keep='last').reset_index(drop=True) # exit_velo_df['pitch_x_what'] = ((exit_velo_df['pitch_x_what'] - 117)*-1)/38.11904466310226 # exit_velo_df['pitch_y_what'] = ((exit_velo_df['pitch_y_what'] - 238.8)*-1)/27.008296321000604 # from joblib import load # knn = load('knn_model.joblib') # exit_velo_df['in_zone'] = exit_velo_df['zone'] < 10 # exit_velo_df['in_zone_what'] = knn.predict(exit_velo_df[['pitch_x_what','pitch_y_what']].fillna(0)) # #exit_velo_df['in_zone_what'] = (exit_velo_df.pitch_x_what > exit_velo_df.plate_negative-1.4/12)&(exit_velo_df.pitch_x_what < exit_velo_df.plate_positive+1.4/12)&(exit_velo_df.pitch_y_what > exit_velo_df.k_zone_bottom-1.4/12)&(exit_velo_df.pitch_y_what < exit_velo_df.k_zone_top+1.4/12) # import math # exit_velo_df['in_zone'] = [exit_velo_df['in_zone'][x] if not math.isnan(exit_velo_df['zone'][x]) else np.nan for x in range(len(exit_velo_df))] # exit_velo_df['in_zone_what'] =[exit_velo_df['in_zone_what'][x] if not math.isnan(exit_velo_df['pitch_x_what'][x]) else np.nan for x in range(len(exit_velo_df))] #exit_velo_df['in_zone_what'] = [exit_velo_df['in_zone_what'][x] if not math.isnan(exit_velo_df['pitch_x_what'][x]) else np.nan for x in range(len(exit_velo_df))] # if os.path.isfile('exit_velo_df.csv'): # exit_velo_df_full.play_id = exit_velo_df_full.play_id.astype(float) # exit_velo_df = pd.concat([exit_velo_df_full,exit_velo_df])# # print('check the length') # print(len(exit_velo_df)) # exit_velo_df = exit_velo_df.drop_duplicates(subset='play_id',keep='last') exit_velo_df = exit_velo_df.drop_duplicates(subset='play_id',keep='last') print('check the length') print(len(exit_velo_df)) player_id = [] team_id = [] for y in range(0,len(data_list)): #print(game_df_filter.game_id.reset_index(drop=True)[y]) player_id.append([data_list[y]['liveData']['boxscore']['teams']['away']['players'][x]['person']['id'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['away']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['away']['players']]) player_id.append([data_list[y]['liveData']['boxscore']['teams']['home']['players'][x]['person']['id'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['home']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['home']['players']]) # player_name.append([data_list[y]['liveData']['boxscore']['teams']['away']['players'][x]['person']['fullName'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['away']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['away']['players']]) # player_name.append([data_list[y]['liveData']['boxscore']['teams']['home']['players'][x]['person']['fullName'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['home']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['home']['players']]) team_id.append([data_list[y]['liveData']['boxscore']['teams']['away']['players'][x]['parentTeamId'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['away']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['away']['players']]) team_id.append([data_list[y]['liveData']['boxscore']['teams']['home']['players'][x]['parentTeamId'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['home']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['home']['players']]) game_id_pk_list = flat_list_games play_id_list_full = [str(game_id_pk_list[i])+str(1000+flat_list[i]['about']['atBatIndex'])+str(1000+flat_list[i]['playEvents'][-1]['index']) if (len(flat_list[i]['playEvents'])>0 and 'playEvents' in flat_list[i] and 'startTime' in flat_list[i]['playEvents'][-1]) else np.nan for i in range(len(flat_list))] date_list = [(datetime.strptime(flat_list[i]['about']['startTime'][0:16], '%Y-%m-%dT%H:%M') - timedelta(hours=8)).date() if 'startTime' in flat_list[i]['about'] else np.nan for i in range(len(flat_list))] time_list = [flat_list[i]['playEvents'][-1]['startTime'] if (len(flat_list[i]['playEvents'])>0 and 'playEvents' in flat_list[i] and 'startTime' in flat_list[i]['playEvents'][-1]) else np.nan for i in range(len(flat_list))] type_list = [flat_list[i]['result']['type'] if 'type' in flat_list[i]['result'] else np.nan for i in range(len(flat_list))] eventType_list = [flat_list[i]['result']['eventType'] if 'eventType' in flat_list[i]['result'] else np.nan for i in range(len(flat_list))] batter_id_type_list = [flat_list[i]['matchup']['batter']['id'] if 'id' in flat_list[i]['matchup']['batter'] else np.nan for i in range(len(flat_list))] batter_name_type_list = [flat_list[i]['matchup']['batter']['fullName'] if 'fullName' in flat_list[i]['matchup']['batter'] else np.nan for i in range(len(flat_list))] pitcher_id_type_list = [flat_list[i]['matchup']['pitcher']['id'] if 'id' in flat_list[i]['matchup']['pitcher'] else np.nan for i in range(len(flat_list))] pitcher_name_type_list = [flat_list[i]['matchup']['pitcher']['fullName'] if 'fullName' in flat_list[i]['matchup']['pitcher'] else np.nan for i in range(len(flat_list))] #play_id_type_list = [flat_list[i]['playEvents'][-1]['index'] if (len(flat_list[i]['playEvents'])>0 and 'playEvents' in flat_list[i] and 'index' in flat_list[i]['playEvents'][-1]) else np.nan for i in range(len(flat_list))] is_out_id_type_list = [flat_list[i]['playEvents'][-1]['details']['isInPlay'] if (len(flat_list[i]['playEvents'])>0 and 'playEvents' in flat_list[i] and 'isInPlay' in flat_list[i]['playEvents'][-1]['details']) else np.nan for i in range(len(flat_list))] is_in_play_id_type_list = [flat_list[i]['playEvents'][-1]['details']['isOut'] if (len(flat_list[i]['playEvents'])>0 and 'playEvents' in flat_list[i] and 'isInPlay' in flat_list[i]['playEvents'][-1]['details']) else np.nan for i in range(len(flat_list))] complete_id_type_list = [flat_list[i]['about']['isComplete'] if 'isComplete' in flat_list[i]['about'] else np.nan for i in range(len(flat_list))] pa_df = pd.DataFrame(data={'game_id':game_id_pk_list,'play_id':play_id_list_full,'date':date_list,'start_time':time_list,'type':type_list,'event_type':eventType_list,'batter_id':batter_id_type_list, 'batter_name':batter_name_type_list,'pitcher_id':pitcher_id_type_list,'pitcher_name':pitcher_name_type_list, 'is_out':is_out_id_type_list,'is_in_play':is_in_play_id_type_list,'complete_id':complete_id_type_list}) pa_df = pa_df.dropna(subset=['play_id']).drop_duplicates(subset='play_id',keep='last').reset_index(drop=True) if os.path.isfile('pa_df.csv'): pa_df_full = pd.read_csv('pa_df.csv',index_col=[0]) pa_df = pd.concat([pa_df_full,pa_df]).dropna(subset=['play_id']).drop_duplicates(subset=['play_id'],keep='last').reset_index(drop=True) player_id_final = [item for sublist in player_id for item in sublist] #player_name_final = [item for sublist in player_name for item in sublist] team_id_final = [item for sublist in team_id for item in sublist] player_list = pd.DataFrame(data={'player_id':player_id_final,'team_id':team_id_final}) player_list = player_list.drop_duplicates(subset=['player_id'],keep='last') player_df_all = player_list.merge(right=mlb_teams_df[['team_id','abbreviation']],left_on = 'team_id',right_on='team_id',how='left').drop_duplicates(keep='last') # player_df_all = player_df_all.merge(right=player_list,left_on='team_id',right_on='team_id',how='left',suffixes=['','_y']) # player_df_all = player_df_all.drop(columns='player_id_y') codes_in = ['In play, out(s)', 'Swinging Strike', 'Ball', 'Foul', 'In play, no out', 'Called Strike', 'Foul Tip', 'In play, run(s)', 'Hit By Pitch', 'Ball In Dirt', 'Pitchout', 'Swinging Strike (Blocked)', 'Foul Bunt', 'Missed Bunt', 'Foul Pitchout', 'Intent Ball', 'Swinging Pitchout'] # import pygsheets # gc = pygsheets.authorize(service_file='stunning-hue-363921-db5ac144d947.json') # sh = gc.open_by_key('1hlPJ-oL60cqsXqZ4aGmU0L4HsFl-KJTSgR2Oi039jTY') exit_velo_df_copy = exit_velo_df.copy() exit_velo_df_copy = exit_velo_df_copy[exit_velo_df_copy['description'].isin(codes_in)] exit_velo_df_copy['pitch'] = exit_velo_df_copy.groupby('pitcher_id').cumcount() + 1 exit_velo_df_copy['pitch_type_count'] = exit_velo_df_copy.groupby(['pitcher_id','pitch_type']).cumcount() + 1 # if len(exit_velo_df_copy[exit_velo_df_copy['description'].isin(codes_in)].reset_index(drop=True)[len(sh[0].get_as_df()):].reset_index(drop=True).fillna('')) != 0: # sh[0].set_dataframe(exit_velo_df_copy[['game_id','date','pitcher_id','pitcher','pitch_type','pitch_velocity','code','pitch','pitch_type_count']][len(sh[0].get_as_df()):],(len(sh[0].get_as_df())+1,1), copy_head=False) exit_velo_df_batter = exit_velo_df.copy() exit_velo_df_batter = exit_velo_df_batter.merge(right=player_df_all,left_on='batter_id',right_on='player_id',how='left',suffixes=('','_y')) exit_velo_df = exit_velo_df.merge(right=player_df_all,left_on='pitcher_id',right_on='player_id',how='left',suffixes=('','_y')).merge(right=player_df_all,left_on='batter_id',right_on='player_id',how='left',suffixes=('','_batter')) codes_in = ['In play, out(s)', 'Swinging Strike', 'Ball', 'Foul', 'In play, no out', 'Called Strike', 'Foul Tip', 'In play, run(s)', 'Hit By Pitch', 'Ball In Dirt', 'Pitchout', 'Swinging Strike (Blocked)', 'Foul Bunt', 'Missed Bunt'] exit_velo_df.date = pd.to_datetime(exit_velo_df.date).dt.date pitch_df = exit_velo_df[exit_velo_df['description'].isin(codes_in)].groupby(['pitcher_id','pitcher','game_id','abbreviation','abbreviation_batter','date']).agg( pitches = ('pitcher_id','count'), #pitch_velocity = ('pitch_velocity','mean'), # pitch_velocity = ('launch_speed',percentile(95)), # launch_speed = ('launch_speed','mean'), # launch_angle = ('launch_angle','mean'), ).reset_index().sort_values(by='pitches',ascending=False) whiff_df = exit_velo_df[((exit_velo_df.code == 'S')|(exit_velo_df.code == 'W')|(exit_velo_df.code =='T'))].groupby(['pitcher_id','pitcher','game_id','abbreviation','abbreviation_batter','date']).agg( whiffs = ('pitcher_id','count'), #pitch_velocity = ('pitch_velocity','mean'), # pitch_velocity = ('launch_speed',percentile(95)), # launch_angle = ('launch_angle','mean'), ).reset_index().sort_values(by='whiffs',ascending=False) csw_df = exit_velo_df[((exit_velo_df.code == 'S')|(exit_velo_df.code == 'W')|(exit_velo_df.code =='T')|(exit_velo_df.code =='C'))].groupby(['pitcher_id','pitcher','game_id','abbreviation','abbreviation_batter','date']).agg( csw = ('pitcher_id','count'), #pitch_velocity = ('pitch_velocity','mean'), # pitch_velocity = ('launch_speed',percentile(95)), # launch_speed = ('launch_speed','mean'), # launch_angle = ('launch_angle','mean'), ).reset_index().sort_values(by='csw',ascending=False) pitch_df_batter = exit_velo_df[exit_velo_df['description'].isin(codes_in)].groupby(['abbreviation_batter']).agg( pitches = ('pitcher_id','count'), #pitch_velocity = ('pitch_velocity','mean'), # pitch_velocity = ('launch_speed',percentile(95)), # launch_speed = ('launch_speed','mean'), # launch_angle = ('launch_angle','mean'), ).reset_index().sort_values(by='abbreviation_batter',ascending=True) whiff_df_batter = exit_velo_df[((exit_velo_df.code == 'S')|(exit_velo_df.code == 'W')|(exit_velo_df.code =='T'))].groupby(['abbreviation_batter']).agg( whiffs = ('pitcher_id','count'), #pitch_velocity = ('pitch_velocity','mean'), # pitch_velocity = ('launch_speed',percentile(95)), # launch_angle = ('launch_angle','mean'), ).reset_index().sort_values(by='abbreviation_batter',ascending=True) csw_df_batter = exit_velo_df[((exit_velo_df.code == 'S')|(exit_velo_df.code == 'W')|(exit_velo_df.code =='T')|(exit_velo_df.code =='C'))].groupby(['abbreviation_batter']).agg( csw = ('pitcher_id','count'), #pitch_velocity = ('pitch_velocity','mean'), # pitch_velocity = ('launch_speed',percentile(95)), # launch_speed = ('launch_speed','mean'), # launch_angle = ('launch_angle','mean'), ).reset_index().sort_values(by='abbreviation_batter',ascending=True) pitch_df_batter = pitch_df_batter.merge(whiff_df_batter,how='left').merge(csw_df_batter,how='left').fillna(0) pitch_df_batter['whiffs'] = pitch_df_batter['whiffs'].astype(int) pitch_df_batter['csw'] = pitch_df_batter['csw'].astype(int) pitch_df_batter['whiff_rate'] = pitch_df_batter.whiffs/pitch_df_batter.pitches pitch_df_batter['csw_rate'] = pitch_df_batter.csw/pitch_df_batter.pitches # pitch_df_batter = pitch_df_batter.sort_values(by='csw_rate',ascending=False).reset_index(drop=True) # pitch_df_batter['rank'] = pitch_df_batter['csw_rate'].rank(ascending=False,method='min').astype(int) # #pitch_df_batter = pitch_df_batter[pitch_df_batter.pitches>=50] # pitch_df_batter = pitch_df_batter[['rank','pitcher','abbreviation','abbreviation_batter','pitches','whiffs','whiff_rate','csw','csw_rate']] # pitch_df_batter.columns = ['Rank','Pitcher','Team','Opp.','Pitches','Whiffs','SwStr%','CSW','CSW%'] exit_velo_df.date = pd.to_datetime(exit_velo_df.date).dt.date starter_list = exit_velo_df[['pitcher_id','pitcher','game_id','abbreviation']].drop_duplicates(subset=['game_id','abbreviation']).reset_index(drop=True) #pitcher_40_list = pitch_df[pitch_df['pitches']>=50][['pitcher_id','pitcher','parent_org_abb_pitcher']].reset_index(drop=True) #starter_list = starter_list.append(pitcher_40_list).drop_duplicates(subset='pitcher_id') print('Creating df:') print('Creating df:') pitch_whiff_df = pitch_df.merge(whiff_df,how='left').merge(csw_df,how='left').fillna(0) #pitch_whiff_df = pitch_whiff_df[((pitch_whiff_df['pitcher_id'].isin(starter_list.pitcher_id))&(pitch_whiff_df['pitches']>=0))|(pitch_whiff_df['pitches']>=0)] pitch_whiff_df.date = pd.to_datetime(pitch_whiff_df.date).dt.date # pitch_whiff_df = pitch_whiff_df[pitch_whiff_df.date == (datetime.today()-timedelta(hours=time_delta_fill)).date()] pitch_whiff_df['whiffs'] = pitch_whiff_df['whiffs'].astype(int) pitch_whiff_df['csw'] = pitch_whiff_df['csw'].astype(int) pitch_whiff_df['whiff_rate'] = pitch_whiff_df.whiffs/pitch_whiff_df.pitches pitch_whiff_df['csw_rate'] = pitch_whiff_df.csw/pitch_whiff_df.pitches pitch_whiff_df = pitch_whiff_df.sort_values(by='whiffs',ascending=False).reset_index(drop=True) pitch_whiff_df['rank'] = pitch_whiff_df['whiffs'].rank(ascending=False,method='min').astype(int) pitch_whiff_df = pitch_whiff_df.merge(right=pitcher_summary_df,left_on=['pitcher_id','game_id'],right_on=['pitcher_id','game_id'],how='left') #pitch_whiff_df = pitch_whiff_df[pitch_whiff_df.pitches>=50] pitch_whiff_df = pitch_whiff_df[['rank','pitcher','abbreviation','abbreviation_batter','pitches','whiffs','whiff_rate','csw','csw_rate']] pitch_whiff_df.columns = ['Rank','Pitcher','Team','Opp.','Pitches','Whiffs','SwStr%','CSW','CSW%'] print(pitch_whiff_df)#.sort_values(by='whiffs',ascending=False) # import matplotlib.pyplot as plt # import matplotlib.colors # cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#FFFFFF","#F0E442"]) top_d_score = pitch_df.merge(whiff_df,how='left').merge(csw_df,how='left').fillna(0) top_d_score = pitch_df.merge(whiff_df,how='left').merge(csw_df,how='left').fillna(0) #top_d_score = top_d_score[(top_d_score['pitcher_id'].isin(starter_list.pitcher_id))|(top_d_score['pitches']>=50)] #top_d_score = top_d_score[((top_d_score['pitcher_id'].isin(starter_list.pitcher_id))&(top_d_score['pitches']>=50))|(top_d_score['pitches']>=50)] top_d_score.date = pd.to_datetime(top_d_score.date).dt.date # top_d_score = top_d_score[top_d_score.date == (datetime.today()-timedelta(hours=time_delta_fill)).date()] top_d_score['whiffs'] = top_d_score['whiffs'].astype(int) top_d_score['csw'] = top_d_score['csw'].astype(int) top_d_score['whiff_rate'] = top_d_score.whiffs/top_d_score.pitches top_d_score['csw_rate'] = top_d_score.csw/top_d_score.pitches top_d_score = top_d_score.merge(right=pitcher_summary_df,left_on=['pitcher_id','game_id'],right_on=['pitcher_id','game_id']) top_d_score.summary = top_d_score.summary.str.replace(', ER', ', 1 ER') top_d_score.summary = top_d_score.summary.str.replace(', K', ', 1 K') top_d_score.summary = top_d_score.summary.str.replace(', BB', ', 1 BB') top_d_score = top_d_score.merge(right=pitch_df_batter[['abbreviation_batter','csw_rate']],left_on='abbreviation_batter',right_on='abbreviation_batter',how='left',suffixes=['','_opp']) top_d_score['diff'] = top_d_score.csw_rate - top_d_score.csw_rate_opp top_d_score = top_d_score.sort_values(by=['whiffs','csw_rate'],ascending=False).reset_index(drop=True) top_d_score['rank'] = top_d_score['whiffs'].rank(ascending=False,method='min').astype(int) pitcher_to_select = top_d_score['pitcher_id'].values[0] top_d_score = top_d_score[['rank','pitcher','abbreviation','abbreviation_batter','pitches','whiffs','whiff_rate','csw','csw_rate','k_bb_percent','summary']]#.head(30) top_d_score.columns = ['Rank','Pitcher','Team','Opp.','Pitches','Whiffs','SwStr%','CSW','CSW%','K-BB%','Summary'] cols = top_d_score.columns.tolist(); #top_d_score = top_d_score[cols[:3] + cols[3:9]]; #top_d_score = top_d_score[cols[:3] + cols[3:9]]; df_combined_t_style = top_d_score.style.set_properties(**{'border': '0.4 px'},overwrite=False).set_caption('MLB Daily Whiff Leaders - ' + str((datetime.today()-timedelta(hours=time_delta_fill)).date())).set_table_styles([{ 'selector': 'caption', 'props': [ ('color', ''), ('fontname', 'Century Gothic'), ('font-size', '24px'), ('font-style', 'italic'), ('font-weight', ''), ('text-align', 'centre'), ] },{'selector' :'th', 'props':[('text-align', 'center'),('Height','24px'),('border', '0.4px black solid !important'),('font-size', '16px')]},{'selector' :'td', 'props':[('text-align', 'center'),('font-size', '16px')]}],overwrite=False).set_table_styles( [{'selector': 'tr', 'props': [('line-height', '0.4px')]}],overwrite=False).set_properties( **{'Height': '24px'},**{'text-align': 'center'},overwrite=False).hide_index() cmap_sum_2 = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#FFFFFF","#F0E442"]) ##df_combined_t_style = df_combined_t_style.format({df_combined_t_style.columns[-1]:"{0:+g}"}) df_combined_t_style = df_combined_t_style.background_gradient(cmap=cmap_sum_2,subset = ((list(top_d_score.index[:]),top_d_score.columns[5]))) df_combined_t_style = df_combined_t_style.background_gradient(cmap=cmap_sum,subset = ((list(top_d_score.index[:]),top_d_score.columns[8])),vmax=.40,vmin=.20) df_combined_t_style = df_combined_t_style.background_gradient(cmap=cmap_sum,subset = ((list(top_d_score.index[:]),top_d_score.columns[6])),vmax=.20,vmin=.05) #df_combined_t_style = df_combined_t_style.background_gradient(cmap=cmap_sum,subset = ((list(top_d_score.index[:]),top_d_score.columns[-3])),vmax=.1,vmin=-0.1) df_combined_t_style = df_combined_t_style.background_gradient(cmap=cmap_sum,subset = ((list(top_d_score.index[:]),top_d_score.columns[-2])),vmax=0.4,vmin=-.1) #df_combined_t_style = df_combined_t_style.background_gradient(cmap=['#d4af37'],subset = ((list(top_d_score[top_d_score['CSW%']>.40].index[:]),top_d_score.columns[-4]))) def cond_formatting_csw(x): if x > 0.45: return 'background-color: #d4af37' else: return None def cond_formatting_whiff(x): if x > 0.25: return 'background-color: #d4af37' else: return None def cond_formatting_diff(x): if x > 0.15: return 'background-color: #d4af37' else: return None df_combined_t_style = df_combined_t_style.applymap(cond_formatting_csw,subset=((list(top_d_score.index[:]),top_d_score.columns[8]))) df_combined_t_style = df_combined_t_style.applymap(cond_formatting_whiff,subset=((list(top_d_score.index[:]),top_d_score.columns[6]))) #df_combined_t_style = df_combined_t_style.applymap(cond_formatting_diff,subset=((list(top_d_score.index[:]),top_d_score.columns[-3]))) df_combined_t_style = df_combined_t_style.format( {df_combined_t_style.columns[6]: '{:,.1%}'.format, df_combined_t_style.columns[5]: '{:,.0f}'.format, df_combined_t_style.columns[8]: '{:,.1%}'.format, df_combined_t_style.columns[7]: '{:,.0f}'.format, #df_combined_t_style.columns[-3]: '{:,.1%}'.format, #df_combined_t_style.columns[-4]: '{:,.1%}'.format, df_combined_t_style.columns[-2]:'{0:.1%}'.format, # df_combined_t_style.columns[-3]:'{0:+.1%}' }) df_combined_t_style = df_combined_t_style.set_properties( **{'border': '0.4px black solid !important'},subset = ((list(top_d_score.index[:-1]),top_d_score.columns[:]))).set_properties( **{'min-width':'75px'},subset = ((list(top_d_score.index[:-1]),top_d_score.columns[1])),overwrite=False).set_properties( **{'min-width':'50px'},subset = ((list(top_d_score.index[:-1]),top_d_score.columns[2:])),overwrite=False).set_properties( **{'min-width':'75px'},subset = ((list(top_d_score.index[:-1]),top_d_score.columns[-1])),overwrite=False).set_properties( **{'border': '0.4px black solid !important'},subset = ((list(top_d_score.index[:]),top_d_score.columns[:]))) return df_combined_t_style app = App(app_ui, server)