nesticot's picture
Update app.py
ccb7c2c verified
import polars as pl
import numpy as np
import pandas as pd
import api_scraper
scrape = api_scraper.MLB_Scrape()
from functions import df_update
from functions import pitch_summary_functions
update = df_update.df_update()
from stuff_model import feature_engineering as fe
from stuff_model import stuff_apply
import requests
import joblib
from matplotlib.gridspec import GridSpec
from shiny import App, reactive, ui, render
from shiny.ui import h2, tags
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
from functions.pitch_summary_functions import *
from shiny import App, reactive, ui, render
from shiny.ui import h2, tags
import datetime
import matplotlib.colors
import pandas as pd
from stuff_model import calculate_arm_angles as caa
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#648FFF","#FFFFFF","#FFB000"])
colour_palette = ['#FFB000','#648FFF','#785EF0',
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
year_list = [2017,2018,2019,2020,2021,2022,2023,2024]
level_dict = {'1':'MLB',
'11':'AAA',
'12':'AA',
'13':'A+',
'14':'A',
'16':'ROK',
'17':'AFL',
'22':'College',
'21':'Prospects',
'51':'International' }
function_dict={
'velocity_kdes':'Velocity Distributions',
'break_plot':'Pitch Movement',
'break_plot_rhh':'Pitch Movement LHH',
'break_plot_lhh':'Pitch Movement RHH',
'tj_stuff_roling':'Rolling tjStuff+ by Pitch',
'tj_stuff_roling_game':'Rolling tjStuff+ by Game',
'location_plot_lhb':'Locations vs LHB',
'location_plot_rhb':'Locations vs RHB',
}
split_dict = {'all':'All',
'left':'LHH',
'right':'RHH'}
split_dict_hand = {'all':['L','R'],
'left':['L'],
'right':['R']}
# List of MLB teams and their corresponding ESPN logo URLs
mlb_teams = [
{"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"},
{"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
{"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"},
{"team": "BAL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bal.png&h=500&w=500"},
{"team": "BOS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bos.png&h=500&w=500"},
{"team": "CHC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chc.png&h=500&w=500"},
{"team": "CWS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chw.png&h=500&w=500"},
{"team": "CIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cin.png&h=500&w=500"},
{"team": "CLE", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cle.png&h=500&w=500"},
{"team": "COL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/col.png&h=500&w=500"},
{"team": "DET", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/det.png&h=500&w=500"},
{"team": "HOU", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/hou.png&h=500&w=500"},
{"team": "KC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/kc.png&h=500&w=500"},
{"team": "LAA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/laa.png&h=500&w=500"},
{"team": "LAD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/lad.png&h=500&w=500"},
{"team": "MIA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mia.png&h=500&w=500"},
{"team": "MIL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mil.png&h=500&w=500"},
{"team": "MIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/min.png&h=500&w=500"},
{"team": "NYM", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nym.png&h=500&w=500"},
{"team": "NYY", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nyy.png&h=500&w=500"},
{"team": "PHI", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/phi.png&h=500&w=500"},
{"team": "PIT", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/pit.png&h=500&w=500"},
{"team": "SD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sd.png&h=500&w=500"},
{"team": "SF", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sf.png&h=500&w=500"},
{"team": "SEA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sea.png&h=500&w=500"},
{"team": "STL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/stl.png&h=500&w=500"},
{"team": "TB", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tb.png&h=500&w=500"},
{"team": "TEX", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tex.png&h=500&w=500"},
{"team": "TOR", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tor.png&h=500&w=500"},
{"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"},
{"team": "ZZZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/leagues/500/mlb.png&w=500&h=500"}
]
df_image = pd.DataFrame(mlb_teams)
image_dict = df_image.set_index('team')['logo_url'].to_dict()
image_dict_flip = df_image.set_index('logo_url')['team'].to_dict()
import requests
import requests
import os
CAMPAIGN_ID = os.getenv("CAMPAIGN_ID")
ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")
BACKUP_PW = os.getenv("BACKUP_PW")
ADMIN_PW = os.getenv("ADMIN_PW")
url = f"https://www.patreon.com/api/oauth2/v2/campaigns/{CAMPAIGN_ID}/members"
headers = {
"Authorization": f"Bearer {ACCESS_TOKEN}"
}
# Simple parameters, requesting the member's email and currently entitled tiers
params = {
"fields[member]": "full_name,email", # Request the member's email
"include": "currently_entitled_tiers", # Include the currently entitled tiers
"page[size]": 10000 # Fetch up to 1000 patrons per request
}
response = requests.get(url, headers=headers, params=params)
VALID_PASSWORDS = []
if response.status_code == 200:
data = response.json()
for patron in data['data']:
try:
tiers = patron['relationships']['currently_entitled_tiers']['data']
if any(tier['id'] == '9078921' for tier in tiers):
full_name = patron['attributes']['email']
VALID_PASSWORDS.append(full_name)
except KeyError:
continue
VALID_PASSWORDS.append(BACKUP_PW)
VALID_PASSWORDS.append(ADMIN_PW)
from shiny import App, reactive, ui, render
from shiny.ui import h2, tags
# Define the login UI
login_ui = ui.page_fluid(
ui.card(
ui.h2([
"TJStats Daily Pitching Summary App ",
ui.tags.a("(@TJStats)", href="https://twitter.com/TJStats", target="_blank")
]),
ui.p(
"This App is available to Superstar Patrons. Please enter your Patreon email address in the box below. If you're having trouble, please refer to the ",
ui.tags.a("Patreon post", href="https://www.patreon.com/posts/122860440", target="_blank"),
"."
),
ui.input_password("password", "Enter Patreon Email (or Password from Link):", width="25%"),
ui.tags.input(
type="checkbox",
id="authenticated",
value=False,
disabled=True
),
ui.input_action_button("login", "Login", class_="btn-primary"),
ui.output_text("login_message"),
)
)
# Define the UI layout for the app
main_ui = ui.page_sidebar(
ui.sidebar(
# Row for selecting season and level
ui.row(
ui.column(6, ui.input_date('date_input', 'Select Date')),
ui.column(6, ui.input_select('level_input', 'Select Level', level_dict))
),
ui.row(ui.input_action_button("game_button", "Get Games", class_="btn-primary")),
ui.row(
ui.row(ui.column(12, ui.output_ui('game_select_ui', 'Select Game'))),
ui.row(ui.column(12, ui.output_ui('player_select_ui', 'Select Player'))),
ui.row(ui.column(12, ui.output_data_frame('season_stats'))),
),
# Rows for selecting plots and split options
ui.row(
ui.column(4, ui.input_select('plot_id_1', 'Plot Left', function_dict, multiple=False, selected='location_plot_lhb')),
ui.column(4, ui.input_select('plot_id_2', 'Plot Middle', function_dict, multiple=False, selected='break_plot')),
ui.column(4, ui.input_select('plot_id_3', 'Plot Right', function_dict, multiple=False, selected='location_plot_rhb'))
),
ui.row(
ui.column(6, ui.input_select('split_id', 'Select Split', split_dict, multiple=False)),
ui.column(6, ui.input_numeric('rolling_window', 'Rolling Window (for tjStuff+ Plot)', min=1, value=50))
),
ui.row(
ui.column(6, ui.input_switch("switch", "Custom Team?", False)),
ui.column(6, ui.input_select('logo_select', 'Select Custom Logo', image_dict_flip, multiple=False))
),
# Row for the action button to generate plot
ui.row(ui.input_action_button("generate_plot", "Generate Plot", class_="btn-primary"))
,width='500px'),
# Main content area with tabs (placed directly in page_sidebar)
ui.navset_tab(
ui.nav_panel("Pitching Summary",
ui.output_text("status"),
ui.output_plot('plot', width='2100px', height='2100px')
),
ui.nav_panel("PBP Data",
ui.output_data_frame("grid_pbp")),
ui.nav_panel("Table Summary",
ui.output_data_frame("grid_summary")),
ui.nav_panel("Daily Table",
ui.output_data_frame("grid")),
ui.nav_panel("Daily Table Style",
ui.input_numeric('head', 'Table Limit', min=0, value=10),
ui.input_numeric('pitch_min', 'Pitch Min.', min=0, value=10),
ui.card(
{"style": "width: 1560px;"},
ui.head_content(
ui.tags.script(src="https://html2canvas.hertzen.com/dist/html2canvas.min.js"),
ui.tags.script("""
async function downloadPNG() {
const content = document.getElementById('capture-section');
try {
// Create a wrapper div with right margin only
const wrapper = document.createElement('div');
wrapper.style.paddingRight = '20px';
wrapper.style.paddingLeft = '20px';
wrapper.style.paddingTop = '20px';
wrapper.style.backgroundColor = 'white';
// Clone the content
const clonedContent = content.cloneNode(true);
wrapper.appendChild(clonedContent);
// Add wrapper to document temporarily
document.body.appendChild(wrapper);
const canvas = await html2canvas(wrapper, {
backgroundColor: 'white',
scale: 2,
useCORS: true,
logging: false,
width: content.offsetWidth + 20,
height: content.offsetHeight + 50
});
// Remove temporary wrapper
document.body.removeChild(wrapper);
// Convert canvas to blob
canvas.toBlob(function(blob) {
const url = URL.createObjectURL(blob);
const link = document.createElement('a');
link.href = url;
link.download = 'stats_card.png';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
URL.revokeObjectURL(url);
}, 'image/png');
} catch (error) {
console.error('Error generating PNG:', error);
}
}
$(document).on('click', '#capture_png_btn', function() {
downloadPNG();
});
""")
),
ui.div(
{
"id": "capture-section",
"style": "background-color: white; padding: 0; margin-left: 20px; margin-right: 20px; margin-top: 20px; margin-bottom: 20px;"
},
ui.div({"style": "font-size:3em;"}, ui.output_text("style_title")),
ui.div({"style": "font-size:1.5em;"}, ui.output_text("min_title")),
ui.br(),
ui.output_table("grid_style"),
ui.br(),
ui.row(
ui.column(8,
ui.div(
{"style": "text-align: left;"},
ui.markdown("### By: @TJStats"),
ui.markdown("### Data: MLB"),
)
),
ui.column(4,
ui.div(
{"style": "text-align: left; height: 86px; display: flex; justify-content: flex-end;"},
ui.output_image("logo", height="86px")
)
)
),
ui.div({"style": "height: 20px;"})
),
ui.div(
{"style": "display: flex; gap: 10px;"},
ui.input_action_button("capture_png_btn", "Save as PNG", class_="btn-success"),
),
)
),
ui.nav_panel("Whiffs Table",
ui.output_table("whiff_table")),
)
)
# Combined UI with conditional panel
app_ui = ui.page_fluid(
ui.tags.head(
ui.tags.script(src="script.js")
),
ui.panel_conditional(
"!input.authenticated",
login_ui
),
ui.panel_conditional(
"input.authenticated",
main_ui
)
)
def server(input, output, session):
@reactive.Effect
@reactive.event(input.login)
def check_password():
if input.password() in VALID_PASSWORDS:
ui.update_checkbox("authenticated", value=True)
ui.update_text("login_message", value="")
else:
ui.update_text("login_message", value="Invalid password!")
ui.update_text("password", value="")
@output
@render.text
def login_message():
return ""
@render.image
def logo():
# You'll need to provide the actual image path or URL here
return {"src": "tjstats_logo.jpg"}
@render.text
def style_title():
return f"Daily {level_dict[input.level_input()]} tjStuff+ Leaders - {str(input.date_input())}"
@render.text
def min_title():
return f"(Min. {int(input.pitch_min())} Pitches)"
@render.ui
@reactive.event(input.game_button,input.date_input,input.level_input, ignore_none=False)
def game_select_ui():
df = (scrape.get_schedule(year_input=[int(str(input.date_input())[:4])],
sport_id=[int(input.level_input())],
game_type=['S','R','P','E','A','I','W','F','L','D'])
.filter((pl.col('gameday_type').is_in(['P', 'E'])) | (pl.col('venue_id').is_in([7250,2532])))
.filter(pl.col('state').is_in(['I','M','N','O','F','T','U','Q','R','D']))
.with_columns(pl.col('date').cast(pl.Utf8))
.filter(pl.col('date') == str(input.date_input()))).with_columns(
(pl.col('away')+' @ '+pl.col('home')+' - '+pl.col('state')).alias('matchup')).sort('time')
game_dict = dict(zip(df['game_id'], df['matchup']))
print('GAMES')
print(game_dict)
return ui.input_select("game_id", "Select Game", game_dict)
@render.ui
@reactive.event(input.game_id)
def player_select_ui():
try:
# Get the list of pitchers for the selected level and season
data_list = scrape.get_data(game_list_input = [int(input.game_id())])
print('DATALIST')
print(input.game_id())
# print(data_list)
# test = scrape.get_data_df(data_list = data_list)
# test.write_csv('cubs_dodgers.csv')
df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter(
(pl.col("is_pitch") == True)&
(pl.col('batter_hand').is_in(split_dict_hand[input.split_id()]))
)))).with_columns(
pl.col('pitch_type').count().over('pitch_type').alias('pitch_count')
).with_columns(
(pl.col('pitcher_name')+' - '+pl.col('pitcher_team')).alias('pitcher_name'))
)
# print('DATAFRAME')
# print(df)
pitcher_dict = dict(zip(df['pitcher_id'], df['pitcher_name']))
print('PITCHERS')
print(pitcher_dict)
return ui.input_select("pitcher_id", "Select Pitcher", pitcher_dict)
except Exception as e:
print(e)
return ui.output_text('pitcher_id',"No pitchers available for this game")
@output
@render.text
def status():
# Only show status when generating
if input.generate == 0:
return ""
return ""
@output
@render.data_frame
def season_stats():
if int(input.level_input()) == 1:
season = int(str(input.date_input())[:4])
url_season = url = f"""
https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&season={season}&season1={season}
&startdate={season}-01-01&enddate={season}-12-01&ind=0&qual=0&type=8&month=33&pageitems=500000
"""
url_day = f"""
https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&qual=y&season={season}&season1={season}
&startdate={season}-03-01&enddate={season}-11-01
&month=34&hand=&team=0&pageitems=500000&ind=0&rost=0&players=&type=8
&sortstat=WAR
"""
data_season = requests.get(url_season).json()
df_season = pl.DataFrame(data=data_season['data'], infer_schema_length=1000)
df_season = df_season.with_columns(pl.lit('Season').alias('Time'))
data_day = requests.get(url_day).json()
if data_day['dateRange'][:10] == str(input.date_input()):
df_day = pl.DataFrame(data=data_day['data'], infer_schema_length=1000)
df_day = df_day.with_columns(pl.lit('Today').alias('Time'))
df_all = pl.DataFrame(pd.concat([df_day.to_pandas(),df_season.to_pandas()]))
df_player = df_all.filter(pl.col('xMLBAMID')==int(input.pitcher_id()))
else:
df_player = df_season.filter(pl.col('xMLBAMID')==int(input.pitcher_id()))
return render.DataGrid(
df_player.select(['Time','IP','TBF','R','ER','SO','BB','ERA','FIP','WHIP']).to_pandas().round(2),
row_selection_mode='multiple',
height='700px',
width='fit-content',
)
else:
d = scrape.get_data([int(input.game_id())])
player_id = f'ID{int(input.pitcher_id())}'
home_players = d[0]['liveData']['boxscore']['teams']['home']['players']
away_players = d[0]['liveData']['boxscore']['teams']['away']['players']
if player_id in home_players:
batters_faced = home_players[player_id]['stats']['pitching']['battersFaced']
team_side = 'home'
elif player_id in away_players:
batters_faced = away_players[player_id]['stats']['pitching']['battersFaced']
team_side = 'away'
else:
batters_faced = None
team_side = None
pitches = d[0]['liveData']['boxscore']['teams'][team_side]['players'][player_id]['stats']['pitching']['pitchesThrown']
innings = d[0]['liveData']['boxscore']['teams'][team_side]['players'][player_id]['stats']['pitching']['inningsPitched']
battersFaced = d[0]['liveData']['boxscore']['teams'][team_side]['players'][player_id]['stats']['pitching']['battersFaced']
runs = d[0]['liveData']['boxscore']['teams'][team_side]['players'][player_id]['stats']['pitching']['runs']
earnedRuns = d[0]['liveData']['boxscore']['teams'][team_side]['players'][player_id]['stats']['pitching']['earnedRuns']
strikeOuts = d[0]['liveData']['boxscore']['teams'][team_side]['players'][player_id]['stats']['pitching']['strikeOuts']
baseOnBalls = d[0]['liveData']['boxscore']['teams'][team_side]['players'][player_id]['stats']['pitching']['baseOnBalls']
df_player = pl.DataFrame({
'Pitches': int(pitches),
'IP': float(innings),
'PA': int(battersFaced),
'R': int(runs),
'ER': int(earnedRuns),
'K': int(strikeOuts),
'BB': int(baseOnBalls)
})
return render.DataGrid(
df_player.to_pandas().round(2),
row_selection_mode='multiple',
height='700px',
width='fit-content',
)
@output
@render.plot
@reactive.event(input.generate_plot, ignore_none=False)
def plot():
# Show progress/loading notification
with ui.Progress(min=0, max=1) as p:
p.set(message="Generating plot", detail="This may take a while...")
p.set(0.3, "Gathering data...")
data_list = scrape.get_data(game_list_input = [int(input.game_id())])
# df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter(
# (pl.col("pitcher_id") == int(input.pitcher_id()))&
# (pl.col("is_pitch") == True)&
# (pl.col('batter_hand').is_in(split_dict_hand[input.split_id()]))
# )))).with_columns(
# pl.col('pitch_type').count().over('pitch_type').alias('pitch_count')
# ))
df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter(
(pl.col("pitcher_id") == int(input.pitcher_id()))&
(pl.col("is_pitch") == True)&
(pl.col('batter_hand').is_in(split_dict_hand[input.split_id()]))
))).with_columns(
pl.col("extension").fill_null(6.2)
)).with_columns(
pl.col('pitch_type').count().over('pitch_type').alias('pitch_count')
))
df = df.clone()
# df = df.with_columns(
# pl.lit(6.2).alias("extension")
# )
p.set(0.6, "Creating plot...")
#plt.rcParams["figure.figsize"] = [10,10]
fig = plt.figure(figsize=(26,26))
plt.rcParams.update({'figure.autolayout': True})
fig.set_facecolor('white')
sns.set_theme(style="whitegrid", palette=colour_palette)
print('this is the one plot')
gs = gridspec.GridSpec(6, 8,
height_ratios=[6,20,12,36,36,6],
width_ratios=[4,18,18,18,18,18,18,4])
gs.update(hspace=0.2, wspace=0.5)
# Define the positions of each subplot in the grid
ax_headshot = fig.add_subplot(gs[1,1:3])
ax_bio = fig.add_subplot(gs[1,3:5])
ax_logo = fig.add_subplot(gs[1,5:7])
ax_season_table = fig.add_subplot(gs[2,1:7])
ax_plot_1 = fig.add_subplot(gs[3,1:3])
ax_plot_2 = fig.add_subplot(gs[3,3:5])
ax_plot_3 = fig.add_subplot(gs[3,5:7])
ax_table = fig.add_subplot(gs[4,1:7])
ax_footer = fig.add_subplot(gs[-1,1:7])
ax_header = fig.add_subplot(gs[0,1:7])
ax_left = fig.add_subplot(gs[:,0])
ax_right = fig.add_subplot(gs[:,-1])
# Hide axes for footer, header, left, and right
ax_footer.axis('off')
ax_header.axis('off')
ax_left.axis('off')
ax_right.axis('off')
sns.set_theme(style="whitegrid", palette=colour_palette)
fig.set_facecolor('white')
df_teams = scrape.get_teams()
year_input = int(str(input.date_input())[:4])
sport_id = int(input.level_input())
player_input = int(input.pitcher_id())
team_id = df['pitcher_team_id'][0]
player_headshot(player_input=player_input, ax=ax_headshot,sport_id=sport_id,season=year_input)
player_bio(pitcher_id=player_input, ax=ax_bio,sport_id=sport_id,year_input=year_input)
# plot_logo(pitcher_id=player_input, ax=ax_logo, df_team=df_teams,df_players=scrape.get_players(sport_id,year_input))
if input.switch():
# Get the logo URL from the image dictionary using the team abbreviation
logo_url = input.logo_select()
# Send a GET request to the logo URL
response = requests.get(logo_url)
# Open the image from the response content
img = Image.open(BytesIO(response.content))
# Display the image on the axis
ax_logo.set_xlim(0, 1.3)
ax_logo.set_ylim(0, 1)
ax_logo.imshow(img, extent=[0.3, 1.3, 0, 1], origin='upper')
# Turn off the axis
ax_logo.axis('off')
else:
plot_logo(pitcher_id=player_input, ax=ax_logo, df_team=df_teams,df_players=scrape.get_players(sport_id,year_input),team_id=team_id)
# stat_summary_table(df=df,
# ax=ax_season_table,
# player_input=player_input,
# split=input.split_id(),
# sport_id=sport_id)
stat_daily_summary(df=df,
data=data_list,
player_input=int(input.pitcher_id()),
sport_id=int(input.level_input()),
ax=ax_season_table)
# break_plot(df=df_plot,ax=ax2)
for x,y,z in zip([input.plot_id_1(),input.plot_id_2(),input.plot_id_3()],[ax_plot_1,ax_plot_2,ax_plot_3],[1,3,5]):
if x == 'velocity_kdes':
velocity_kdes(df,
ax=y,
gs=gs,
gs_x=[3,4],
gs_y=[z,z+2],
fig=fig)
if x == 'tj_stuff_roling':
tj_stuff_roling(df=df,
window=int(input.rolling_window()),
ax=y)
if x == 'tj_stuff_roling_game':
tj_stuff_roling_game(df=df,
window=int(input.rolling_window()),
ax=y)
if x == 'break_plot':
break_plot(df = df,ax=y)
if x == 'location_plot_lhb':
location_plot(df = df,ax=y,hand='L')
if x == 'location_plot_rhb':
location_plot(df = df,ax=y,hand='R')
if x == 'break_plot_rhh':
break_plot(df = df.filter(pl.col('batter_hand')=='R'),ax=y)
if x == 'break_plot_lhh':
break_plot(df = df.filter(pl.col('batter_hand')=='L'),ax=y)
summary_table(df=df,
ax=ax_table)
plot_footer(ax_footer)
ax_watermark = fig.add_subplot(gs[1:-1,1:-1],zorder=-1)
# Hide axes ticks and labels
ax_watermark.set_xticks([])
ax_watermark.set_yticks([])
ax_watermark.set_frame_on(False) # Optional: Hide border
img = Image.open('tj stats circle-01_new.jpg')
img = img.convert("LA")
# Display the image
ax_watermark.imshow(img, extent=[0, 1, 0, 1], origin='upper',zorder=-1, alpha=0.06)
ax_watermark2 = fig.add_subplot(gs[-2:,1:4],zorder=1)
ax_watermark2.set_xlim(0,1)
ax_watermark2.set_ylim(0,1)
# Hide axes ticks and labels
ax_watermark2.set_xticks([])
ax_watermark2.set_yticks([])
ax_watermark2.set_frame_on(False) # Optional: Hide border
# Open the image
img = Image.open('tj stats circle-01_new.jpg')
# Get the original size
width, height = img.size
# Calculate the new size (50% larger)
new_width = int(width * 0.5)
new_height = int(height * 0.5)
# Resize the image
img_resized = img.resize((new_width, new_height))
# Display the image
ax_watermark2.imshow(img, extent=[0.26, 0.46, 0.0,0.2], origin='upper',zorder=-1, alpha=1)
# fig.set_facecolor('#fff0f9')
fig.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.01)
@output
@render.data_frame
@reactive.event(input.generate_plot, ignore_none=False)
def grid_summary():
data_list = scrape.get_data(game_list_input = [int(input.game_id())])
df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter(
(pl.col("pitcher_id") == int(input.pitcher_id()))&
(pl.col("is_pitch") == True)&
(pl.col('batter_hand').is_in(split_dict_hand[input.split_id()]))
))).with_columns(
pl.col("extension").fill_null(6.2)
)).with_columns(
pl.col('pitch_type').count().over('pitch_type').alias('pitch_count')
))
df = df.clone()
features_table = ['start_speed',
'spin_rate',
'extension',
'ivb',
'hb',
'x0',
'z0']
selection = ['game_id','pitcher_id','pitcher_name','batter_id','batter_name','pitcher_hand',
'batter_hand','balls','strikes','play_code','event_type','pitch_type','vaa','haa']+features_table+['tj_stuff_plus','pitch_grade']
return render.DataGrid(
df.select(selection).to_pandas().round(1),
row_selection_mode='multiple',
height='700px',
width='fit-content',
filters=True,
)
@output
@render.data_frame
@reactive.event(input.generate_plot, ignore_none=False)
def grid_pbp():
data_list = scrape.get_data(game_list_input = [int(input.game_id())])
df = scrape.get_data_df(data_list = data_list)
# df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter(
# (pl.col("is_pitch") == True)&
# (pl.col('batter_hand').is_in(split_dict_hand[input.split_id()]))
# )))).with_columns(
# pl.col('pitch_type').count().over('pitch_type').alias('pitch_count')
# ).with_columns(
# (pl.col('pitcher_name')+' - '+pl.col('pitcher_team')).alias('pitcher_name'))
# )
#df = caa.calculate_arm_angles(df=df,pitcher_id=int(input.pitcher_id())).clone()
return render.DataGrid(
df.to_pandas(),
row_selection_mode='multiple',
height='700px',
width='fit-content',
filters=True,
)
@output
@render.data_frame
@reactive.event(input.generate_plot, ignore_none=False)
def grid():
df_games = (scrape.get_schedule(year_input=[int(str(input.date_input())[:4])],
sport_id=[int(input.level_input())],
game_type=['S','R','P','E','A','I','W','F','L']).with_columns(pl.col('date').cast(pl.Utf8)).
filter(pl.col('date') == str(input.date_input()))).with_columns(
(pl.col('away')+' @ '+pl.col('home')).alias('matchup'))
game_list = df_games['game_id'].unique().to_list()
# Get the list of pitchers for the selected level and season
data_list = scrape.get_data(game_list)
df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter(
(pl.col("is_pitch") == True)&
(pl.col('batter_hand').is_in(split_dict_hand[input.split_id()]))
)))).with_columns(
pl.col('pitch_type').count().over('pitch_type').alias('pitch_count')
).with_columns(
(pl.col('pitcher_name')+' - '+pl.col('pitcher_team')).alias('pitcher_name'))
)
# game_list = game_list_df['game_id'].unique().to_list()
data = scrape.get_data(game_list[:])
df = scrape.get_data_df(data)
pitcher_team_dict = dict(zip(df['pitcher_id'], df['pitcher_team']))
df_test = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(df).filter(
(pl.col("is_pitch") == True)))))
df_test = df_test.with_columns(
(pl.col('pitcher_name')+' - '+pl.col('pitcher_team')).alias('pitcher_name')
)
# Aggregate tj_stuff_plus by pitcher_id and year
df_agg_2024_pitch = df_test.group_by(['pitcher_id','pitcher_name','pitch_type']).agg(
pl.col('tj_stuff_plus').len().alias('count'),
pl.col('tj_stuff_plus').mean()
)
# Calculate the weighted average of 'tj_stuff_plus' for each pitcher
df_weighted_avg = df_agg_2024_pitch.with_columns(
(pl.col('tj_stuff_plus') * pl.col('count')).alias('weighted_tj_stuff_plus')
).group_by(['pitcher_id', 'pitcher_name']).agg(
pl.col('count').sum().alias('total_count'),
pl.col('weighted_tj_stuff_plus').sum().alias('total_weighted_tj_stuff_plus')
).with_columns(
(pl.col('total_weighted_tj_stuff_plus') / pl.col('total_count')).alias('tj_stuff_plus')
).select(['pitcher_id', 'pitcher_name', 'tj_stuff_plus', 'total_count'])
# Add the 'pitch_type' column with value "All"
df_weighted_avg = df_weighted_avg.with_columns(
pl.lit("All").alias('pitch_type')
)
# Select and rename columns to match the original DataFrame
df_weighted_avg = df_weighted_avg.select([
'pitcher_id',
'pitcher_name',
'pitch_type',
pl.col('total_count').alias('count'),
'tj_stuff_plus'
])
# Concatenate the new rows with the original DataFrame
df_agg_2024_pitch = pl.concat([df_agg_2024_pitch, df_weighted_avg])
df_small = df_agg_2024_pitch.select(['pitcher_id','pitcher_name','pitch_type','count','tj_stuff_plus'])
count_dict = dict(zip(df_small.filter(pl.col('pitch_type')=='All')['pitcher_id'],
df_small.filter(pl.col('pitch_type')=='All')['count']))
# Check if 'FS' column exists, if not create it and fill with None
df_small_pivot = (df_small.pivot(index=['pitcher_id','pitcher_name'],
columns='pitch_type',
values='tj_stuff_plus').with_columns(
pl.col("pitcher_id").replace_strict(count_dict, default=None).alias("count")))
# Check if 'FS' column exists, if not create it and fill with None
for col in ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All']:
if col not in df_small_pivot.columns:
df_small_pivot = df_small_pivot.with_columns(pl.lit(None).alias(col))
df_small_pivot = df_small_pivot.with_columns(
pl.col("pitcher_id").replace_strict(pitcher_team_dict, default=None).alias("pitcher_team"))
df_small_pivot = df_small_pivot.select(['pitcher_id','pitcher_name','pitcher_team','count','CH','CU','FC','FF','FS','SI','SL','ST','All']).sort('All',descending=True)#.head(10)#.write_clipboard()
df_small_pivot = df_small_pivot.with_columns(
pl.col(col).cast(pl.Int32, strict=False) for col in ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All']
)
return render.DataGrid(
df_small_pivot,
row_selection_mode='multiple',
height='700px',
width='fit-content',
filters=True,
)
@output
@render.table
@reactive.event(input.generate_plot, input.pitch_min,input.head,ignore_none=False)
def grid_style():
row_limit = int(input.head())
pitch_limit = int(input.pitch_min())
df_games = (scrape.get_schedule(year_input=[int(str(input.date_input())[:4])],
sport_id=[int(input.level_input())],
game_type=['S','R','P','E','A','I','W','F','L']).with_columns(pl.col('date').cast(pl.Utf8)).
filter(pl.col('date') == str(input.date_input()))).with_columns(
(pl.col('away')+' @ '+pl.col('home')).alias('matchup'))
game_list = df_games['game_id'].unique().to_list()
# Get the list of pitchers for the selected level and season
data_list = scrape.get_data(game_list)
df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter(
(pl.col("is_pitch") == True)&
(pl.col('batter_hand').is_in(split_dict_hand[input.split_id()]))
)))).with_columns(
pl.col('pitch_type').count().over('pitch_type').alias('pitch_count')
).with_columns(
(pl.col('pitcher_name')+' - '+pl.col('pitcher_team')).alias('pitcher_name'))
)
# game_list = game_list_df['game_id'].unique().to_list()
data = scrape.get_data(game_list[:])
df = scrape.get_data_df(data)
pitcher_team_dict = dict(zip(df['pitcher_id'], df['pitcher_team']))
df_test = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(df).filter(
(pl.col("is_pitch") == True)))))
# df_test = df_test.with_columns(
# (pl.col('pitcher_name')+' - '+pl.col('pitcher_team')).alias('pitcher_name')
# )
# Aggregate tj_stuff_plus by pitcher_id and year
df_agg_2024_pitch = df_test.group_by(['pitcher_id','pitcher_name','pitch_type']).agg(
pl.col('tj_stuff_plus').len().alias('count'),
pl.col('tj_stuff_plus').mean()
)
# Calculate the weighted average of 'tj_stuff_plus' for each pitcher
df_weighted_avg = df_agg_2024_pitch.with_columns(
(pl.col('tj_stuff_plus') * pl.col('count')).alias('weighted_tj_stuff_plus')
).group_by(['pitcher_id', 'pitcher_name']).agg(
pl.col('count').sum().alias('total_count'),
pl.col('weighted_tj_stuff_plus').sum().alias('total_weighted_tj_stuff_plus')
).with_columns(
(pl.col('total_weighted_tj_stuff_plus') / pl.col('total_count')).alias('tj_stuff_plus')
).select(['pitcher_id', 'pitcher_name', 'tj_stuff_plus', 'total_count'])
# Add the 'pitch_type' column with value "All"
df_weighted_avg = df_weighted_avg.with_columns(
pl.lit("All").alias('pitch_type')
)
# Select and rename columns to match the original DataFrame
df_weighted_avg = df_weighted_avg.select([
'pitcher_id',
'pitcher_name',
'pitch_type',
pl.col('total_count').alias('count'),
'tj_stuff_plus'
])
# Concatenate the new rows with the original DataFrame
df_agg_2024_pitch = pl.concat([df_agg_2024_pitch, df_weighted_avg])
df_small = df_agg_2024_pitch.select(['pitcher_id','pitcher_name','pitch_type','count','tj_stuff_plus'])
count_dict = dict(zip(df_small.filter(pl.col('pitch_type')=='All')['pitcher_id'],
df_small.filter(pl.col('pitch_type')=='All')['count']))
# Check if 'FS' column exists, if not create it and fill with None
df_small_pivot = (df_small.pivot(index=['pitcher_id','pitcher_name'],
columns='pitch_type',
values='tj_stuff_plus').with_columns(
pl.col("pitcher_id").replace_strict(count_dict, default=None).alias("count")))
# Check if 'FS' column exists, if not create it and fill with None
for col in ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All']:
if col not in df_small_pivot.columns:
df_small_pivot = df_small_pivot.with_columns(pl.lit(None).alias(col))
df_small_pivot = df_small_pivot.with_columns(
pl.col("pitcher_id").replace_strict(pitcher_team_dict, default=None).alias("pitcher_team"))
df_small_pivot = df_small_pivot.select(['pitcher_name','pitcher_team','count','CH','CU','FC','FF','FS','SI','SL','ST','All']).sort('All',descending=True)#.head(10)#.write_clipboard()
df_small_pivot = df_small_pivot.with_columns(
pl.col(col).cast(pl.Int32, strict=False) for col in ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All']
)
df_export = df_small_pivot.filter(pl.col('count')>=pitch_limit).to_pandas().head(row_limit)
df_export.columns = ['Name', 'Team', 'Pitches', 'CH', 'CU', 'FC',
'FF', 'FS', 'SI', 'SL', 'ST', 'All']
df_style = df_export.style
df_style = df_style.set_properties(**{'border': '1.0 px'},overwrite=False).set_table_styles([{'selector' :'th',
'props':[('text-align', 'center'),('font-size', '22px'),('Height','30px'),('border', '1px black solid !important')]},
{'selector' :'td', 'props':[('text-align', 'center'),('font-size', '22px')]}],overwrite=False).set_table_styles(
[{'selector': 'tr', 'props': [('line-height', '1px')]}],overwrite=False).set_properties(
**{'Height': '60px'},**{'text-align': 'center'},overwrite=False).hide_index()
#cmap_sum_2 = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#FFFFFF","#F0E442"])
df_style = df_style.format('{:.0f}',subset=df_export.columns[3:], na_rep='')
# df_style
df_style = df_style.background_gradient(cmap=cmap_sum,subset = ((list(df_export.index[:]),df_export.columns[3:])),vmin=80,vmax=120)#.applymap(lambda x: 'color: white' if pd.isnull(x) else '')
#df_style = df_style.applymap(background_gradient_ignore_nan)
#df_style = df_style
df_style = df_style.applymap(lambda x: 'color: transparent; background-color: transparent' if pd.isnull(x) else '')
df_style = df_style.set_properties(
**{'border': '1px black solid !important'},subset = ((list(df_style.index[:-1]),df_style.columns[:]))).set_properties(
**{'min-width':'325px'},subset = ((list(df_style.index[:-1]),df_style.columns[0])),overwrite=False).set_properties(
**{'min-width':'100px'},subset = ((list(df_style.index[:-1]),df_style.columns[1:3])),overwrite=False).set_properties(
**{'min-width':'100px'},subset = ((list(df_style.index[:-1]),df_style.columns[3:])),overwrite=False).set_properties(
# **{'min-width':'125px'},subset = ((list(df_style.index[:-1]),df_style.columns[-1])),overwrite=False).set_properties(
**{'border': '1px black solid !important'},subset = ((list(df_style.index[:]),df_style.columns[:])))
# df_style = df_style.set_table_styles([{'selector' :'th',
# 'props':[('text-align', 'center'),('font-size', '22px'),('Height','30px'),('border', '1px black solid !important')]},
# {'selector' :'td', 'props':[('text-align', 'center'),('font-size', '22px')]}], overwrite=False)
return df_style
@output
@render.table
@reactive.event(input.generate_plot, ignore_none=False)
def whiff_table():
time_delta_fill = 8
teams = requests.get(url='https://statsapi.mlb.com/api/v1/teams/').json()
#Select only teams that are at the MLB level
# mlb_teams_city = [x['franchiseName'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
# mlb_teams_name = [x['teamName'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
# mlb_teams_franchise = [x['name'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
# mlb_teams_id = [x['id'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
# mlb_teams_abb = [x['abbreviation'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
mlb_teams_city = [x['franchiseName'] if 'franchiseName' in x else None for x in teams['teams']]
mlb_teams_name = [x['teamName'] if 'franchiseName' in x else None for x in teams['teams']]
mlb_teams_franchise = [x['name'] if 'franchiseName' in x else None for x in teams['teams']]
mlb_teams_id = [x['id'] if 'franchiseName' in x else None for x in teams['teams']]
mlb_teams_abb = [x['abbreviation'] if 'franchiseName' in x else None for x in teams['teams']]
mlb_teams_parent = [x['parentOrgName'] if 'parentOrgName' in x else None for x in teams['teams']]
#Create a dataframe of all the teams
mlb_teams_df = pd.DataFrame(data={'team_id':mlb_teams_id,'city':mlb_teams_franchise,'name':mlb_teams_name,'franchise':mlb_teams_franchise,'abbreviation':mlb_teams_abb,'parent_org':mlb_teams_parent}).drop_duplicates()
##Create a dataframe of all players in the database
#Make an api call to get a dictionary of all players
player_data = requests.get(url=f'https://statsapi.mlb.com/api/v1/sports/{int(input.level_input())}/players').json()
#Select relevant data that will help distinguish players from one another
fullName_list = [x['fullName'] for x in player_data['people']]
id_list = [x['id'] for x in player_data['people']]
position_list = [x['primaryPosition']['abbreviation'] for x in player_data['people']]
team_list = [x['currentTeam']['id']for x in player_data['people']]
df_games = (scrape.get_schedule(year_input=[int(str(input.date_input())[:4])],
sport_id=[int(input.level_input())],
game_type=['S','R','P','E','A','I','W','F','L']).with_columns(pl.col('date').cast(pl.Utf8)).
filter(pl.col('date') == str(input.date_input()))).with_columns(
(pl.col('away')+' @ '+pl.col('home')).alias('matchup'))
game_list = df_games['game_id'].unique().to_list()
# Get the list of pitchers for the selected level and season
data_list = scrape.get_data(game_list)
df = scrape.get_data_df(data_list = data_list).filter(
(pl.col("is_pitch") == True)&
(pl.col('batter_hand').is_in(split_dict_hand[input.split_id()]))
).with_columns(
pl.col('pitch_type').count().over('pitch_type').alias('pitch_count')
).with_columns(
(pl.col('pitcher_name')+' - '+pl.col('pitcher_team')).alias('pitcher_name'))
# game_list = game_list_df['game_id'].unique().to_list()
data = scrape.get_data(game_list[:])
df = scrape.get_data_df(data)
player_id = []
team_id = []
for y in range(0,len(data_list)):
#print(game_df_filter.game_id.reset_index(drop=True)[y])
player_id.append([data_list[y]['liveData']['boxscore']['teams']['away']['players'][x]['person']['id'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['away']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['away']['players']])
player_id.append([data_list[y]['liveData']['boxscore']['teams']['home']['players'][x]['person']['id'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['home']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['home']['players']])
# player_name.append([data_list[y]['liveData']['boxscore']['teams']['away']['players'][x]['person']['fullName'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['away']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['away']['players']])
# player_name.append([data_list[y]['liveData']['boxscore']['teams']['home']['players'][x]['person']['fullName'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['home']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['home']['players']])
team_id.append([data_list[y]['liveData']['boxscore']['teams']['away']['players'][x]['parentTeamId'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['away']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['away']['players']])
team_id.append([data_list[y]['liveData']['boxscore']['teams']['home']['players'][x]['parentTeamId'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['home']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['home']['players']])
player_id_final = [item for sublist in player_id for item in sublist]
#player_name_final = [item for sublist in player_name for item in sublist]
team_id_final = [item for sublist in team_id for item in sublist]
player_list = pd.DataFrame(data={'player_id':player_id_final,'team_id':team_id_final})
player_list = player_list.drop_duplicates(subset=['player_id'],keep='last')
player_df_all = player_list.merge(right=mlb_teams_df[['team_id','abbreviation']],left_on = 'team_id',right_on='team_id',how='left').drop_duplicates(keep='last')
mlb_teams_df = mlb_teams_df.merge(right=mlb_teams_df[['abbreviation','franchise']],left_on='parent_org',right_on='franchise',how='left').drop_duplicates().reset_index(drop=True)
mlb_teams_df = mlb_teams_df[mlb_teams_df.columns[:-1]]
mlb_teams_df.columns = ['team_id', 'city', 'name', 'franchise', 'abbreviation',
'parent_org', 'parent_org_abb']
pk_list = []
pitcher_id_list = []
summary_list = []
hit_list = []
k_list = []
bb_list = []
pa_list = []
test_list = []
game_pk_list = []
for y in range(0,len(data_list)):
pk_list.append([data_list[y]['gameData']['game']['pk'] for x in data_list[y]['liveData']['boxscore']['teams']['away']['pitchers']])
pk_list.append([data_list[y]['gameData']['game']['pk'] for x in data_list[y]['liveData']['boxscore']['teams']['home']['pitchers']])
pitcher_id_list.append([x for x in data_list[y]['liveData']['boxscore']['teams']['away']['pitchers']])
pitcher_id_list.append([x for x in data_list[y]['liveData']['boxscore']['teams']['home']['pitchers']])
summary_list.append([data_list[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['summary'] for x in data_list[y]['liveData']['boxscore']['teams']['away']['pitchers']])
summary_list.append([data_list[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['summary'] for x in data_list[y]['liveData']['boxscore']['teams']['home']['pitchers']])
hit_list.append([data_list[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['hits'] for x in data_list[y]['liveData']['boxscore']['teams']['away']['pitchers']])
hit_list.append([data_list[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['hits'] for x in data_list[y]['liveData']['boxscore']['teams']['home']['pitchers']])
k_list.append([data_list[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['strikeOuts'] for x in data_list[y]['liveData']['boxscore']['teams']['away']['pitchers']])
k_list.append([data_list[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['strikeOuts'] for x in data_list[y]['liveData']['boxscore']['teams']['home']['pitchers']])
bb_list.append([data_list[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['baseOnBalls'] for x in data_list[y]['liveData']['boxscore']['teams']['away']['pitchers']])
bb_list.append([data_list[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['baseOnBalls'] for x in data_list[y]['liveData']['boxscore']['teams']['home']['pitchers']])
pa_list.append([data_list[y]['liveData']['boxscore']['teams']['away']['players']['ID'+str(x)]['stats']['pitching']['battersFaced'] for x in data_list[y]['liveData']['boxscore']['teams']['away']['pitchers']])
pa_list.append([data_list[y]['liveData']['boxscore']['teams']['home']['players']['ID'+str(x)]['stats']['pitching']['battersFaced'] for x in data_list[y]['liveData']['boxscore']['teams']['home']['pitchers']])
test_list.append([x for x in data_list[y]['liveData']['plays']['allPlays']])
game_pk_list.append([data_list[y]['gameData']['game']['pk'] for x in data_list[y]['liveData']['plays']['allPlays']])
flat_list = [item for sublist in test_list for item in sublist]
flat_list_games = [item for sublist in game_pk_list for item in sublist]
test_list_2 = [x['playEvents'] for x in flat_list]
test_list_3 = [x['matchup'] for x in flat_list]
flat_list_pk = [item for sublist in pk_list for item in sublist]
flat_list_pitcher_id = [item for sublist in pitcher_id_list for item in sublist]
flat_list_summary = [item for sublist in summary_list for item in sublist]
flat_list_hits = [item for sublist in hit_list for item in sublist]
flat_list_k = [item for sublist in k_list for item in sublist]
flat_list_bb = [item for sublist in bb_list for item in sublist]
flat_list_pa = [item for sublist in pa_list for item in sublist]
pitcher_summary_df = pd.DataFrame(data={'game_id':flat_list_pk,'pitcher_id':flat_list_pitcher_id,'summary':flat_list_summary,'hits':flat_list_hits,'k':flat_list_k,'bb':flat_list_bb,'pa':flat_list_pa})
pitcher_summary_df.summary = pitcher_summary_df.summary + ', ' + pitcher_summary_df.hits.astype(str) + ' H'
pitcher_summary_df['k_bb_percent'] = (pitcher_summary_df.k - pitcher_summary_df.bb) / (pitcher_summary_df.pa)
game_id_list = []
bat_play_id = []
batter_id_list = []
batter_name_list = []
date_list = []
desc_list = []
code_list = []
pitch_list = []
pitch_type_code_list = []
pitch_type_list = []
pitch_velo_list = []
pitch_break_length_list = []
pitch_break_angle_list = []
pitch_ivb_list = []
pitch_spin_list = []
pitch_spin_direction_list = []
launchSpeed_list = []
launchAngle_list = []
#totalDistance_list = []
trajectory_list = []
hardness_list = []
pitcher_id_list = []
pitcher_name_list = []
k_zone_top = []
k_zone_bottom = []
pitch_x = []
pitch_z = []
zone_list = []
pitch_x_what = []
pitch_y_what = []
from datetime import datetime
from datetime import timedelta
for i in range(0,len(test_list_2)):
#n = n + 10000
for j in range(0,len(test_list_2[i])):
if 'playId' in test_list_2[i][j]:
#print('test')
#n = n + 1
game_id_list.append(flat_list_games[i])
bat_play_id.append(str(flat_list_games[i])+str(1000+flat_list[i]['about']['atBatIndex'])+str(1000+test_list_2[i][j]['index']))
batter_id_list.append(test_list_3[i]['batter']['id'])
batter_name_list.append(test_list_3[i]['batter']['fullName'])
pitcher_id_list.append(test_list_3[i]['pitcher']['id'])
pitcher_name_list.append(test_list_3[i]['pitcher']['fullName'])
date_list.append((datetime.strptime(test_list_2[i][j]['startTime'][0:16], '%Y-%m-%dT%H:%M') - timedelta(hours=8)).date())
desc_list.append(test_list_2[i][j]['details']['description'] if 'description' in test_list_2[i][j]['details'] else np.nan)
code_list.append(test_list_2[i][j]['details']['code'] if 'code' in test_list_2[i][j]['details'] else np.nan)
# if 'hitData' in test_list_2[i][j]:
if 'pitchData' in test_list_2[i][j]:
#print(i,j)
#pitch_abb_list.append(test_list_2[i][j]['details']['type']['code'])
#pitch_name_list.append(test_list_2[i][j]['details']['type']['description'])
pitch_type_code_list.append(test_list_2[i][j]['details']['type']['description'] if 'type' in test_list_2[i][j]['details'] else np.nan)
pitch_type_list.append(test_list_2[i][j]['details']['type']['code'] if 'type' in test_list_2[i][j]['details'] else np.nan)
pitch_velo_list.append(test_list_2[i][j]['pitchData']['startSpeed'] if 'startSpeed' in test_list_2[i][j]['pitchData'] else np.nan)
pitch_break_length_list.append(test_list_2[i][j]['pitchData']['coordinates']['pfxX'] if 'pfxX' in test_list_2[i][j]['pitchData']['coordinates'] else np.nan)
pitch_break_angle_list.append(test_list_2[i][j]['pitchData']['breaks']['breakAngle'] if 'breakAngle' in test_list_2[i][j]['pitchData']['breaks'] else np.nan)
pitch_ivb_list.append(test_list_2[i][j]['pitchData']['coordinates']['pfxZ'] if 'pfxZ' in test_list_2[i][j]['pitchData']['coordinates'] else np.nan)
pitch_spin_list.append(test_list_2[i][j]['pitchData']['breaks']['spinRate'] if 'spinRate' in test_list_2[i][j]['pitchData']['breaks'] else np.nan)
pitch_spin_direction_list.append(test_list_2[i][j]['pitchData']['breaks']['spinDirection'] if 'spinDirection' in test_list_2[i][j]['pitchData']['breaks'] else np.nan)
k_zone_top.append(test_list_2[i][j]['pitchData']['strikeZoneTop'] if 'strikeZoneTop' in test_list_2[i][j]['pitchData'] else np.nan)
k_zone_bottom.append(test_list_2[i][j]['pitchData']['strikeZoneBottom'] if 'strikeZoneBottom' in test_list_2[i][j]['pitchData'] else np.nan)
pitch_x.append(test_list_2[i][j]['pitchData']['coordinates']['pX'] if 'pX' in test_list_2[i][j]['pitchData']['coordinates'] else np.nan)
pitch_z.append(test_list_2[i][j]['pitchData']['coordinates']['pZ'] if 'pZ' in test_list_2[i][j]['pitchData']['coordinates'] else np.nan)
zone_list.append(test_list_2[i][j]['pitchData']['zone'] if 'zone' in test_list_2[i][j]['pitchData'] else np.nan)
pitch_x_what.append(test_list_2[i][j]['pitchData']['coordinates']['x'] if 'x' in test_list_2[i][j]['pitchData']['coordinates'] else np.nan)
pitch_y_what.append(test_list_2[i][j]['pitchData']['coordinates']['y'] if 'y' in test_list_2[i][j]['pitchData']['coordinates'] else np.nan)
#totalDistance_list.append(test_list_2[i][j]['hitData']['totalDistance'])
#trajectory_list.append(test_list_2[i][j]['hitData']['trajectory'])
#hardness_list.append(test_list_2[i][j]['hitData']['hardness'])
# #print(i,j)
else:
pitch_velo_list.append(np.nan)
pitch_type_code_list.append(np.nan)
pitch_type_list.append(np.nan)
pitch_break_length_list.append(np.nan)
pitch_break_angle_list.append(np.nan)
pitch_ivb_list.append(np.nan)
pitch_spin_list.append(np.nan)
pitch_spin_direction_list.append(np.nan)
k_zone_top.append(np.nan)
k_zone_bottom.append(np.nan)
pitch_x.append(np.nan)
pitch_z.append(np.nan)
zone_list.append(np.nan)
pitch_x_what.append(np.nan)
pitch_y_what.append(np.nan)
if 'hitData' in test_list_2[i][j]:
# print(i,j)
#pitch_abb_list.append(test_list_2[i][j]['details']['type']['code'])
#pitch_name_list.append(test_list_2[i][j]['details']['type']['description'])
launchSpeed_list.append(test_list_2[i][j]['hitData']['launchSpeed'] if 'launchSpeed' in test_list_2[i][j]['hitData'] else np.nan)
launchAngle_list.append(test_list_2[i][j]['hitData']['launchAngle'] if 'launchAngle' in test_list_2[i][j]['hitData'] else np.nan)
#totalDistance_list.append(test_list_2[i][j]['hitData']['totalDistance'])
#trajectory_list.append(test_list_2[i][j]['hitData']['trajectory'])
#hardness_list.append(test_list_2[i][j]['hitData']['hardness'])
else:
launchSpeed_list.append(np.nan)
launchAngle_list.append(np.nan)
exit_velo_df = pd.DataFrame(data={'play_id':bat_play_id,
'game_id':game_id_list,
'date':date_list,
'pitcher_id':pitcher_id_list,
'pitcher':pitcher_name_list,
'pitch_code':pitch_type_code_list,
'pitch_type':pitch_type_list,
'pitch_velocity':pitch_velo_list,
'break_length':pitch_break_length_list,
'break_angle':pitch_break_angle_list,
'break_ivb':pitch_ivb_list,
'spin_rate':pitch_spin_list,
'spin_direction':pitch_spin_direction_list,
'batter_id':batter_id_list,
'batter':batter_name_list,
'code':code_list,
'description':desc_list,
'launch_speed':launchSpeed_list,
'launch_angle':launchAngle_list,
'k_zone_top':k_zone_top,
'k_zone_bottom':k_zone_bottom,
'pitch_x':pitch_x,
'pitch_z':pitch_z,
'zone':zone_list,
'pitch_x_what':pitch_x_what,
'pitch_y_what':pitch_y_what,
})
exit_velo_df['plate_negative'] = -17/12/2
exit_velo_df['plate_positive'] = 17/12/2
exit_velo_df[['k_zone_top','k_zone_bottom']] = exit_velo_df.groupby('batter_id')[['k_zone_top','k_zone_bottom']].transform('mean')
exit_velo_df.play_id = exit_velo_df.play_id.astype(float)
exit_velo_df = exit_velo_df.drop_duplicates(subset=['play_id'],keep='last').reset_index(drop=True)
# exit_velo_df['pitch_x_what'] = ((exit_velo_df['pitch_x_what'] - 117)*-1)/38.11904466310226
# exit_velo_df['pitch_y_what'] = ((exit_velo_df['pitch_y_what'] - 238.8)*-1)/27.008296321000604
# from joblib import load
# knn = load('knn_model.joblib')
# exit_velo_df['in_zone'] = exit_velo_df['zone'] < 10
# exit_velo_df['in_zone_what'] = knn.predict(exit_velo_df[['pitch_x_what','pitch_y_what']].fillna(0))
# #exit_velo_df['in_zone_what'] = (exit_velo_df.pitch_x_what > exit_velo_df.plate_negative-1.4/12)&(exit_velo_df.pitch_x_what < exit_velo_df.plate_positive+1.4/12)&(exit_velo_df.pitch_y_what > exit_velo_df.k_zone_bottom-1.4/12)&(exit_velo_df.pitch_y_what < exit_velo_df.k_zone_top+1.4/12)
# import math
# exit_velo_df['in_zone'] = [exit_velo_df['in_zone'][x] if not math.isnan(exit_velo_df['zone'][x]) else np.nan for x in range(len(exit_velo_df))]
# exit_velo_df['in_zone_what'] =[exit_velo_df['in_zone_what'][x] if not math.isnan(exit_velo_df['pitch_x_what'][x]) else np.nan for x in range(len(exit_velo_df))]
#exit_velo_df['in_zone_what'] = [exit_velo_df['in_zone_what'][x] if not math.isnan(exit_velo_df['pitch_x_what'][x]) else np.nan for x in range(len(exit_velo_df))]
# if os.path.isfile('exit_velo_df.csv'):
# exit_velo_df_full.play_id = exit_velo_df_full.play_id.astype(float)
# exit_velo_df = pd.concat([exit_velo_df_full,exit_velo_df])#
# print('check the length')
# print(len(exit_velo_df))
# exit_velo_df = exit_velo_df.drop_duplicates(subset='play_id',keep='last')
exit_velo_df = exit_velo_df.drop_duplicates(subset='play_id',keep='last')
print('check the length')
print(len(exit_velo_df))
player_id = []
team_id = []
for y in range(0,len(data_list)):
#print(game_df_filter.game_id.reset_index(drop=True)[y])
player_id.append([data_list[y]['liveData']['boxscore']['teams']['away']['players'][x]['person']['id'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['away']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['away']['players']])
player_id.append([data_list[y]['liveData']['boxscore']['teams']['home']['players'][x]['person']['id'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['home']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['home']['players']])
# player_name.append([data_list[y]['liveData']['boxscore']['teams']['away']['players'][x]['person']['fullName'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['away']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['away']['players']])
# player_name.append([data_list[y]['liveData']['boxscore']['teams']['home']['players'][x]['person']['fullName'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['home']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['home']['players']])
team_id.append([data_list[y]['liveData']['boxscore']['teams']['away']['players'][x]['parentTeamId'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['away']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['away']['players']])
team_id.append([data_list[y]['liveData']['boxscore']['teams']['home']['players'][x]['parentTeamId'] if 'parentTeamId' in data_list[y]['liveData']['boxscore']['teams']['home']['players'][x] else None for x in data_list[y]['liveData']['boxscore']['teams']['home']['players']])
game_id_pk_list = flat_list_games
play_id_list_full = [str(game_id_pk_list[i])+str(1000+flat_list[i]['about']['atBatIndex'])+str(1000+flat_list[i]['playEvents'][-1]['index']) if (len(flat_list[i]['playEvents'])>0 and 'playEvents' in flat_list[i] and 'startTime' in flat_list[i]['playEvents'][-1]) else np.nan for i in range(len(flat_list))]
date_list = [(datetime.strptime(flat_list[i]['about']['startTime'][0:16], '%Y-%m-%dT%H:%M') - timedelta(hours=8)).date() if 'startTime' in flat_list[i]['about'] else np.nan for i in range(len(flat_list))]
time_list = [flat_list[i]['playEvents'][-1]['startTime'] if (len(flat_list[i]['playEvents'])>0 and 'playEvents' in flat_list[i] and 'startTime' in flat_list[i]['playEvents'][-1]) else np.nan for i in range(len(flat_list))]
type_list = [flat_list[i]['result']['type'] if 'type' in flat_list[i]['result'] else np.nan for i in range(len(flat_list))]
eventType_list = [flat_list[i]['result']['eventType'] if 'eventType' in flat_list[i]['result'] else np.nan for i in range(len(flat_list))]
batter_id_type_list = [flat_list[i]['matchup']['batter']['id'] if 'id' in flat_list[i]['matchup']['batter'] else np.nan for i in range(len(flat_list))]
batter_name_type_list = [flat_list[i]['matchup']['batter']['fullName'] if 'fullName' in flat_list[i]['matchup']['batter'] else np.nan for i in range(len(flat_list))]
pitcher_id_type_list = [flat_list[i]['matchup']['pitcher']['id'] if 'id' in flat_list[i]['matchup']['pitcher'] else np.nan for i in range(len(flat_list))]
pitcher_name_type_list = [flat_list[i]['matchup']['pitcher']['fullName'] if 'fullName' in flat_list[i]['matchup']['pitcher'] else np.nan for i in range(len(flat_list))]
#play_id_type_list = [flat_list[i]['playEvents'][-1]['index'] if (len(flat_list[i]['playEvents'])>0 and 'playEvents' in flat_list[i] and 'index' in flat_list[i]['playEvents'][-1]) else np.nan for i in range(len(flat_list))]
is_out_id_type_list = [flat_list[i]['playEvents'][-1]['details']['isInPlay'] if (len(flat_list[i]['playEvents'])>0 and 'playEvents' in flat_list[i] and 'isInPlay' in flat_list[i]['playEvents'][-1]['details']) else np.nan for i in range(len(flat_list))]
is_in_play_id_type_list = [flat_list[i]['playEvents'][-1]['details']['isOut'] if (len(flat_list[i]['playEvents'])>0 and 'playEvents' in flat_list[i] and 'isInPlay' in flat_list[i]['playEvents'][-1]['details']) else np.nan for i in range(len(flat_list))]
complete_id_type_list = [flat_list[i]['about']['isComplete'] if 'isComplete' in flat_list[i]['about'] else np.nan for i in range(len(flat_list))]
pa_df = pd.DataFrame(data={'game_id':game_id_pk_list,'play_id':play_id_list_full,'date':date_list,'start_time':time_list,'type':type_list,'event_type':eventType_list,'batter_id':batter_id_type_list,
'batter_name':batter_name_type_list,'pitcher_id':pitcher_id_type_list,'pitcher_name':pitcher_name_type_list,
'is_out':is_out_id_type_list,'is_in_play':is_in_play_id_type_list,'complete_id':complete_id_type_list})
pa_df = pa_df.dropna(subset=['play_id']).drop_duplicates(subset='play_id',keep='last').reset_index(drop=True)
if os.path.isfile('pa_df.csv'):
pa_df_full = pd.read_csv('pa_df.csv',index_col=[0])
pa_df = pd.concat([pa_df_full,pa_df]).dropna(subset=['play_id']).drop_duplicates(subset=['play_id'],keep='last').reset_index(drop=True)
player_id_final = [item for sublist in player_id for item in sublist]
#player_name_final = [item for sublist in player_name for item in sublist]
team_id_final = [item for sublist in team_id for item in sublist]
player_list = pd.DataFrame(data={'player_id':player_id_final,'team_id':team_id_final})
player_list = player_list.drop_duplicates(subset=['player_id'],keep='last')
player_df_all = player_list.merge(right=mlb_teams_df[['team_id','abbreviation']],left_on = 'team_id',right_on='team_id',how='left').drop_duplicates(keep='last')
# player_df_all = player_df_all.merge(right=player_list,left_on='team_id',right_on='team_id',how='left',suffixes=['','_y'])
# player_df_all = player_df_all.drop(columns='player_id_y')
codes_in = ['In play, out(s)',
'Swinging Strike',
'Ball',
'Foul',
'In play, no out',
'Called Strike',
'Foul Tip',
'In play, run(s)',
'Hit By Pitch',
'Ball In Dirt',
'Pitchout',
'Swinging Strike (Blocked)',
'Foul Bunt',
'Missed Bunt',
'Foul Pitchout',
'Intent Ball',
'Swinging Pitchout']
# import pygsheets
# gc = pygsheets.authorize(service_file='stunning-hue-363921-db5ac144d947.json')
# sh = gc.open_by_key('1hlPJ-oL60cqsXqZ4aGmU0L4HsFl-KJTSgR2Oi039jTY')
exit_velo_df_copy = exit_velo_df.copy()
exit_velo_df_copy = exit_velo_df_copy[exit_velo_df_copy['description'].isin(codes_in)]
exit_velo_df_copy['pitch'] = exit_velo_df_copy.groupby('pitcher_id').cumcount() + 1
exit_velo_df_copy['pitch_type_count'] = exit_velo_df_copy.groupby(['pitcher_id','pitch_type']).cumcount() + 1
# if len(exit_velo_df_copy[exit_velo_df_copy['description'].isin(codes_in)].reset_index(drop=True)[len(sh[0].get_as_df()):].reset_index(drop=True).fillna('')) != 0:
# sh[0].set_dataframe(exit_velo_df_copy[['game_id','date','pitcher_id','pitcher','pitch_type','pitch_velocity','code','pitch','pitch_type_count']][len(sh[0].get_as_df()):],(len(sh[0].get_as_df())+1,1), copy_head=False)
exit_velo_df_batter = exit_velo_df.copy()
exit_velo_df_batter = exit_velo_df_batter.merge(right=player_df_all,left_on='batter_id',right_on='player_id',how='left',suffixes=('','_y'))
exit_velo_df = exit_velo_df.merge(right=player_df_all,left_on='pitcher_id',right_on='player_id',how='left',suffixes=('','_y')).merge(right=player_df_all,left_on='batter_id',right_on='player_id',how='left',suffixes=('','_batter'))
codes_in = ['In play, out(s)',
'Swinging Strike',
'Ball',
'Foul',
'In play, no out',
'Called Strike',
'Foul Tip',
'In play, run(s)',
'Hit By Pitch',
'Ball In Dirt',
'Pitchout',
'Swinging Strike (Blocked)',
'Foul Bunt',
'Missed Bunt']
exit_velo_df.date = pd.to_datetime(exit_velo_df.date).dt.date
pitch_df = exit_velo_df[exit_velo_df['description'].isin(codes_in)].groupby(['pitcher_id','pitcher','game_id','abbreviation','abbreviation_batter','date']).agg(
pitches = ('pitcher_id','count'),
#pitch_velocity = ('pitch_velocity','mean'),
# pitch_velocity = ('launch_speed',percentile(95)),
# launch_speed = ('launch_speed','mean'),
# launch_angle = ('launch_angle','mean'),
).reset_index().sort_values(by='pitches',ascending=False)
whiff_df = exit_velo_df[((exit_velo_df.code == 'S')|(exit_velo_df.code == 'W')|(exit_velo_df.code =='T'))].groupby(['pitcher_id','pitcher','game_id','abbreviation','abbreviation_batter','date']).agg(
whiffs = ('pitcher_id','count'),
#pitch_velocity = ('pitch_velocity','mean'),
# pitch_velocity = ('launch_speed',percentile(95)),
# launch_angle = ('launch_angle','mean'),
).reset_index().sort_values(by='whiffs',ascending=False)
csw_df = exit_velo_df[((exit_velo_df.code == 'S')|(exit_velo_df.code == 'W')|(exit_velo_df.code =='T')|(exit_velo_df.code =='C'))].groupby(['pitcher_id','pitcher','game_id','abbreviation','abbreviation_batter','date']).agg(
csw = ('pitcher_id','count'),
#pitch_velocity = ('pitch_velocity','mean'),
# pitch_velocity = ('launch_speed',percentile(95)),
# launch_speed = ('launch_speed','mean'),
# launch_angle = ('launch_angle','mean'),
).reset_index().sort_values(by='csw',ascending=False)
pitch_df_batter = exit_velo_df[exit_velo_df['description'].isin(codes_in)].groupby(['abbreviation_batter']).agg(
pitches = ('pitcher_id','count'),
#pitch_velocity = ('pitch_velocity','mean'),
# pitch_velocity = ('launch_speed',percentile(95)),
# launch_speed = ('launch_speed','mean'),
# launch_angle = ('launch_angle','mean'),
).reset_index().sort_values(by='abbreviation_batter',ascending=True)
whiff_df_batter = exit_velo_df[((exit_velo_df.code == 'S')|(exit_velo_df.code == 'W')|(exit_velo_df.code =='T'))].groupby(['abbreviation_batter']).agg(
whiffs = ('pitcher_id','count'),
#pitch_velocity = ('pitch_velocity','mean'),
# pitch_velocity = ('launch_speed',percentile(95)),
# launch_angle = ('launch_angle','mean'),
).reset_index().sort_values(by='abbreviation_batter',ascending=True)
csw_df_batter = exit_velo_df[((exit_velo_df.code == 'S')|(exit_velo_df.code == 'W')|(exit_velo_df.code =='T')|(exit_velo_df.code =='C'))].groupby(['abbreviation_batter']).agg(
csw = ('pitcher_id','count'),
#pitch_velocity = ('pitch_velocity','mean'),
# pitch_velocity = ('launch_speed',percentile(95)),
# launch_speed = ('launch_speed','mean'),
# launch_angle = ('launch_angle','mean'),
).reset_index().sort_values(by='abbreviation_batter',ascending=True)
pitch_df_batter = pitch_df_batter.merge(whiff_df_batter,how='left').merge(csw_df_batter,how='left').fillna(0)
pitch_df_batter['whiffs'] = pitch_df_batter['whiffs'].astype(int)
pitch_df_batter['csw'] = pitch_df_batter['csw'].astype(int)
pitch_df_batter['whiff_rate'] = pitch_df_batter.whiffs/pitch_df_batter.pitches
pitch_df_batter['csw_rate'] = pitch_df_batter.csw/pitch_df_batter.pitches
# pitch_df_batter = pitch_df_batter.sort_values(by='csw_rate',ascending=False).reset_index(drop=True)
# pitch_df_batter['rank'] = pitch_df_batter['csw_rate'].rank(ascending=False,method='min').astype(int)
# #pitch_df_batter = pitch_df_batter[pitch_df_batter.pitches>=50]
# pitch_df_batter = pitch_df_batter[['rank','pitcher','abbreviation','abbreviation_batter','pitches','whiffs','whiff_rate','csw','csw_rate']]
# pitch_df_batter.columns = ['Rank','Pitcher','Team','Opp.','Pitches','Whiffs','SwStr%','CSW','CSW%']
exit_velo_df.date = pd.to_datetime(exit_velo_df.date).dt.date
starter_list = exit_velo_df[['pitcher_id','pitcher','game_id','abbreviation']].drop_duplicates(subset=['game_id','abbreviation']).reset_index(drop=True)
#pitcher_40_list = pitch_df[pitch_df['pitches']>=50][['pitcher_id','pitcher','parent_org_abb_pitcher']].reset_index(drop=True)
#starter_list = starter_list.append(pitcher_40_list).drop_duplicates(subset='pitcher_id')
print('Creating df:')
print('Creating df:')
pitch_whiff_df = pitch_df.merge(whiff_df,how='left').merge(csw_df,how='left').fillna(0)
#pitch_whiff_df = pitch_whiff_df[((pitch_whiff_df['pitcher_id'].isin(starter_list.pitcher_id))&(pitch_whiff_df['pitches']>=0))|(pitch_whiff_df['pitches']>=0)]
pitch_whiff_df.date = pd.to_datetime(pitch_whiff_df.date).dt.date
# pitch_whiff_df = pitch_whiff_df[pitch_whiff_df.date == (datetime.today()-timedelta(hours=time_delta_fill)).date()]
pitch_whiff_df['whiffs'] = pitch_whiff_df['whiffs'].astype(int)
pitch_whiff_df['csw'] = pitch_whiff_df['csw'].astype(int)
pitch_whiff_df['whiff_rate'] = pitch_whiff_df.whiffs/pitch_whiff_df.pitches
pitch_whiff_df['csw_rate'] = pitch_whiff_df.csw/pitch_whiff_df.pitches
pitch_whiff_df = pitch_whiff_df.sort_values(by='whiffs',ascending=False).reset_index(drop=True)
pitch_whiff_df['rank'] = pitch_whiff_df['whiffs'].rank(ascending=False,method='min').astype(int)
pitch_whiff_df = pitch_whiff_df.merge(right=pitcher_summary_df,left_on=['pitcher_id','game_id'],right_on=['pitcher_id','game_id'],how='left')
#pitch_whiff_df = pitch_whiff_df[pitch_whiff_df.pitches>=50]
pitch_whiff_df = pitch_whiff_df[['rank','pitcher','abbreviation','abbreviation_batter','pitches','whiffs','whiff_rate','csw','csw_rate']]
pitch_whiff_df.columns = ['Rank','Pitcher','Team','Opp.','Pitches','Whiffs','SwStr%','CSW','CSW%']
print(pitch_whiff_df)#.sort_values(by='whiffs',ascending=False)
# import matplotlib.pyplot as plt
# import matplotlib.colors
# cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#FFFFFF","#F0E442"])
top_d_score = pitch_df.merge(whiff_df,how='left').merge(csw_df,how='left').fillna(0)
top_d_score = pitch_df.merge(whiff_df,how='left').merge(csw_df,how='left').fillna(0)
#top_d_score = top_d_score[(top_d_score['pitcher_id'].isin(starter_list.pitcher_id))|(top_d_score['pitches']>=50)]
#top_d_score = top_d_score[((top_d_score['pitcher_id'].isin(starter_list.pitcher_id))&(top_d_score['pitches']>=50))|(top_d_score['pitches']>=50)]
top_d_score.date = pd.to_datetime(top_d_score.date).dt.date
# top_d_score = top_d_score[top_d_score.date == (datetime.today()-timedelta(hours=time_delta_fill)).date()]
top_d_score['whiffs'] = top_d_score['whiffs'].astype(int)
top_d_score['csw'] = top_d_score['csw'].astype(int)
top_d_score['whiff_rate'] = top_d_score.whiffs/top_d_score.pitches
top_d_score['csw_rate'] = top_d_score.csw/top_d_score.pitches
top_d_score = top_d_score.merge(right=pitcher_summary_df,left_on=['pitcher_id','game_id'],right_on=['pitcher_id','game_id'])
top_d_score.summary = top_d_score.summary.str.replace(', ER', ', 1 ER')
top_d_score.summary = top_d_score.summary.str.replace(', K', ', 1 K')
top_d_score.summary = top_d_score.summary.str.replace(', BB', ', 1 BB')
top_d_score = top_d_score.merge(right=pitch_df_batter[['abbreviation_batter','csw_rate']],left_on='abbreviation_batter',right_on='abbreviation_batter',how='left',suffixes=['','_opp'])
top_d_score['diff'] = top_d_score.csw_rate - top_d_score.csw_rate_opp
top_d_score = top_d_score.sort_values(by=['whiffs','csw_rate'],ascending=False).reset_index(drop=True)
top_d_score['rank'] = top_d_score['whiffs'].rank(ascending=False,method='min').astype(int)
pitcher_to_select = top_d_score['pitcher_id'].values[0]
top_d_score = top_d_score[['rank','pitcher','abbreviation','abbreviation_batter','pitches','whiffs','whiff_rate','csw','csw_rate','k_bb_percent','summary']]#.head(30)
top_d_score.columns = ['Rank','Pitcher','Team','Opp.','Pitches','Whiffs','SwStr%','CSW','CSW%','K-BB%','Summary']
cols = top_d_score.columns.tolist();
#top_d_score = top_d_score[cols[:3] + cols[3:9]];
#top_d_score = top_d_score[cols[:3] + cols[3:9]];
df_combined_t_style = top_d_score.style.set_properties(**{'border': '0.4 px'},overwrite=False).set_caption('MLB Daily Whiff Leaders - ' + str((datetime.today()-timedelta(hours=time_delta_fill)).date())).set_table_styles([{
'selector': 'caption',
'props': [
('color', ''),
('fontname', 'Century Gothic'),
('font-size', '24px'),
('font-style', 'italic'),
('font-weight', ''),
('text-align', 'centre'),
]
},{'selector' :'th', 'props':[('text-align', 'center'),('Height','24px'),('border', '0.4px black solid !important'),('font-size', '16px')]},{'selector' :'td', 'props':[('text-align', 'center'),('font-size', '16px')]}],overwrite=False).set_table_styles(
[{'selector': 'tr', 'props': [('line-height', '0.4px')]}],overwrite=False).set_properties(
**{'Height': '24px'},**{'text-align': 'center'},overwrite=False).hide_index()
cmap_sum_2 = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#FFFFFF","#F0E442"])
##df_combined_t_style = df_combined_t_style.format({df_combined_t_style.columns[-1]:"{0:+g}"})
df_combined_t_style = df_combined_t_style.background_gradient(cmap=cmap_sum_2,subset = ((list(top_d_score.index[:]),top_d_score.columns[5])))
df_combined_t_style = df_combined_t_style.background_gradient(cmap=cmap_sum,subset = ((list(top_d_score.index[:]),top_d_score.columns[8])),vmax=.40,vmin=.20)
df_combined_t_style = df_combined_t_style.background_gradient(cmap=cmap_sum,subset = ((list(top_d_score.index[:]),top_d_score.columns[6])),vmax=.20,vmin=.05)
#df_combined_t_style = df_combined_t_style.background_gradient(cmap=cmap_sum,subset = ((list(top_d_score.index[:]),top_d_score.columns[-3])),vmax=.1,vmin=-0.1)
df_combined_t_style = df_combined_t_style.background_gradient(cmap=cmap_sum,subset = ((list(top_d_score.index[:]),top_d_score.columns[-2])),vmax=0.4,vmin=-.1)
#df_combined_t_style = df_combined_t_style.background_gradient(cmap=['#d4af37'],subset = ((list(top_d_score[top_d_score['CSW%']>.40].index[:]),top_d_score.columns[-4])))
def cond_formatting_csw(x):
if x > 0.45:
return 'background-color: #d4af37'
else:
return None
def cond_formatting_whiff(x):
if x > 0.25:
return 'background-color: #d4af37'
else:
return None
def cond_formatting_diff(x):
if x > 0.15:
return 'background-color: #d4af37'
else:
return None
df_combined_t_style = df_combined_t_style.applymap(cond_formatting_csw,subset=((list(top_d_score.index[:]),top_d_score.columns[8])))
df_combined_t_style = df_combined_t_style.applymap(cond_formatting_whiff,subset=((list(top_d_score.index[:]),top_d_score.columns[6])))
#df_combined_t_style = df_combined_t_style.applymap(cond_formatting_diff,subset=((list(top_d_score.index[:]),top_d_score.columns[-3])))
df_combined_t_style = df_combined_t_style.format(
{df_combined_t_style.columns[6]: '{:,.1%}'.format,
df_combined_t_style.columns[5]: '{:,.0f}'.format,
df_combined_t_style.columns[8]: '{:,.1%}'.format,
df_combined_t_style.columns[7]: '{:,.0f}'.format,
#df_combined_t_style.columns[-3]: '{:,.1%}'.format,
#df_combined_t_style.columns[-4]: '{:,.1%}'.format,
df_combined_t_style.columns[-2]:'{0:.1%}'.format,
# df_combined_t_style.columns[-3]:'{0:+.1%}'
})
df_combined_t_style = df_combined_t_style.set_properties(
**{'border': '0.4px black solid !important'},subset = ((list(top_d_score.index[:-1]),top_d_score.columns[:]))).set_properties(
**{'min-width':'75px'},subset = ((list(top_d_score.index[:-1]),top_d_score.columns[1])),overwrite=False).set_properties(
**{'min-width':'50px'},subset = ((list(top_d_score.index[:-1]),top_d_score.columns[2:])),overwrite=False).set_properties(
**{'min-width':'75px'},subset = ((list(top_d_score.index[:-1]),top_d_score.columns[-1])),overwrite=False).set_properties(
**{'border': '0.4px black solid !important'},subset = ((list(top_d_score.index[:]),top_d_score.columns[:])))
return df_combined_t_style
app = App(app_ui, server)