OwenStOnge's picture
Update app.R
ffc5941 verified
library(shiny)
library(tidyverse)
library(patchwork)
library(gt)
library(tidymodels)
library(conflicted)
library(arrow)
library(xgboost)
conflicts_prefer(shiny::observe)
conflicts_prefer(dplyr::filter)
conflicts_prefer(dplyr::select)
pbp <- read_parquet("created_pbp.parquet")
recipe_obj <- readRDS("recipe4.rds")
booster <- xgb.load("model4.json")
# =============================================
# Pre-compute pitcher arsenal profiles
# =============================================
predict_rv <- function(new_data) {
baked <- bake(recipe_obj, new_data = new_data)
# Remove outcome if present
baked <- baked %>% select(-any_of("run_value"))
dmat <- xgb.DMatrix(data = as.matrix(baked))
predict(booster, dmat)
}
# =============================================
# Name mappings
# =============================================
# =============================================
# Pre-compute pitcher arsenal profiles
# =============================================
arsenal_profiles <- pbp %>%
filter(!pitch_type %in% c("PO", "FA", "EP", "KN", "UN", "CS", "SC"),
!is.na(pitch_type)) %>%
mutate(pitch_group = case_when(
pitch_type == "FF" ~ "FF",
pitch_type %in% c("FC", "SI") ~ "FA2",
pitch_type %in% c("CH", "FS") ~ "OS",
pitch_type %in% c("ST", "CU", "SV", "SL", "KC") ~ "BR"
)) %>%
group_by(pitcher, player_name, pitch_group) %>%
summarize(
release_speed = mean(release_speed, na.rm = TRUE),
pfx_x = mean(pfx_x, na.rm = TRUE),
pfx_z = mean(pfx_z, na.rm = TRUE),
arm_angle = mean(arm_angle, na.rm = TRUE),
release_pos_x = mean(release_pos_x, na.rm = TRUE),
release_pos_z = mean(release_pos_z, na.rm = TRUE),
release_extension = mean(release_extension, na.rm = TRUE),
n = n(),
.groups = "drop"
) %>%
filter(n >= 20)
pitcher_primaries <- pbp %>%
filter(!pitch_type %in% c("PO", "FA", "EP", "KN", "UN", "CS", "SC")) %>%
group_by(pitcher) %>%
summarize(
primary_velo = mean(release_speed[pitch_type %in% c("FF", "SI")], na.rm = TRUE),
primary_pfxz = mean(pfx_z[pitch_type %in% c("FF", "SI")], na.rm = TRUE),
.groups = "drop"
)
arsenal_profiles <- arsenal_profiles %>%
left_join(pitcher_primaries, by = "pitcher") %>%
mutate(
pfxz_diff = pfx_z - primary_pfxz,
primary_velo_diff = release_speed - primary_velo
)
zone_centers <- data.frame(
zone = 1:9,
zone_label = c("1", "2", "3",
"4", "5", "6",
"7", "8", "9"),
plate_x = c(-0.55, 0, 0.55, -0.55, 0, 0.55, -0.55, 0, 0.55),
plate_z = c(3.1, 3.1, 3.1, 2.5, 2.5, 2.5, 1.9, 1.9, 1.9)
)
valid_pitchers <- arsenal_profiles %>%
group_by(pitcher, player_name) %>%
summarize(n_groups = n_distinct(pitch_group), .groups = "drop") %>%
filter(n_groups >= 2) %>%
arrange(player_name)
pitch_group_labels <- c(
"FF" = "Four-Seam Fastball",
"FA2" = "Cutter / Sinker",
"OS" = "Offspeed",
"BR" = "Breaking Ball"
)
pitch_colors <- c(
"Four-Seam Fastball" = "#d62728",
"Cutter / Sinker" = "#ff7f0e",
"Offspeed" = "#2ca02c",
"Breaking Ball" = "#1f77b4"
)
# =============================================
# UI
# =============================================
ui <- fluidPage(
tags$head(tags$style(HTML("
body { font-family: 'Helvetica Neue', sans-serif; background: #f8f8f8; }
.card { background: white; border-radius: 8px; padding: 20px; margin-bottom: 15px;
border: 1px solid #e0e0e0; }
.scenario-label { font-size: 14px; font-weight: 600; color: #1B3A5C; margin-bottom: 8px; }
.title-bar { background: #1B3A5C; color: white; padding: 12px 20px; border-radius: 8px;
margin-bottom: 15px; }
.title-bar h3 { margin: 0; font-weight: 700; letter-spacing: 0.5px; }
.title-bar p { margin: 2px 0 0 0; font-size: 13px; opacity: 0.8; }
"))),
div(class = "title-bar",
tags$h3("Pitch Sequencing Decision Tool"),
tags$p("Select a pitcher, set the scenario, and compare follow-up pitch options")
),
sidebarLayout(
sidebarPanel(
width = 3,
div(class = "card",
selectizeInput("pitcher_select", "Pitcher:",
choices = setNames(valid_pitchers$pitcher, valid_pitchers$player_name),
selected = NULL,
options = list(placeholder = "Search for a pitcher...")),
hr(),
div(class = "scenario-label", "Pitch #1 Setup"),
selectInput("pitch1_type", "Pitch Type:", choices = NULL),
selectInput("pitch1_zone", "Location:",
choices = setNames(zone_centers$zone, zone_centers$zone_label)),
selectInput("pitch1_result", "Swing Result:",
choices = c("Foul Ball" = "foul",
"Swinging Strike" = "swinging_strike",
"Foul Tip" = "foul_tip")),
sliderInput("timing_resid", "Batter Timing (inches):",
min = -25, max = 25, value = 0, step = 5),
helpText("Negative = batter was late | Positive = batter was early"),
hr(),
actionButton("generate", "Compare Follow-Up Pitches",
class = "btn-primary", style = "width: 100%; font-weight: 600;")
)
),
mainPanel(
width = 9,
fluidRow(
column(12, div(class = "card", uiOutput("scenario_description")))
),
fluidRow(
column(12, div(class = "card",
tags$h5("Expected Run Value by Follow-Up Pitch",
style = "font-weight: 700; color: #1B3A5C;"),
tags$p("Blue = pitcher-favorable | Red = hitter-favorable",
style = "font-size: 12px; color: #888; margin-top: -4px;"),
plotOutput("comparison_grid", height = "500px")
))
),
fluidRow(
column(6, div(class = "card",
tags$h5("Optimal Pitch by Location", style = "font-weight: 700; color: #1B3A5C;"),
tags$p("Pitch with lowest expected run value at each location",
style = "font-size: 12px; color: #888; margin-top: -4px;"),
plotOutput("best_pitch_zone", height = "450px")
)),
column(6, div(class = "card",
tags$h5("Pitch #1 Location", style = "font-weight: 700; color: #1B3A5C;"),
plotOutput("zone_diagram", height = "220px"),
tags$h5("Arsenal Profile", style = "font-weight: 700; color: #1B3A5C; margin-top: 12px;"),
gt_output("arsenal_table")
))
)
)
)
)
# =============================================
# Server
# =============================================
server <- function(input, output, session) {
observe({
req(input$pitcher_select)
pitcher_arsenal <- arsenal_profiles %>%
filter(pitcher == input$pitcher_select) %>%
mutate(label = pitch_group_labels[pitch_group]) %>%
arrange(desc(n))
choices <- setNames(pitcher_arsenal$pitch_group, pitcher_arsenal$label)
updateSelectInput(session, "pitch1_type", choices = choices)
})
get_pitch_profile <- function(pitcher_id, pg) {
arsenal_profiles %>%
filter(pitcher == pitcher_id, pitch_group == pg) %>%
slice(1)
}
make_zone_pred <- function(pitch2_prof, prev_scenario, timing_resid, prev_desc) {
grid <- expand.grid(
plate_x = seq(-1.5, 1.5, length.out = 50),
plate_z = seq(1, 4, length.out = 50)
)
pred_data <- grid %>%
mutate(
release_speed = pitch2_prof$release_speed,
pfx_x = pitch2_prof$pfx_x,
pfx_z = pitch2_prof$pfx_z,
arm_angle = pitch2_prof$arm_angle,
release_pos_x = pitch2_prof$release_pos_x,
release_pos_z = pitch2_prof$release_pos_z,
release_extension = pitch2_prof$release_extension,
pfxz_diff = pitch2_prof$pfxz_diff,
primary_velo_diff = pitch2_prof$primary_velo_diff,
sz_top = 3.5, sz_bot = 1.5,
p_throws = "R", stand = "R",
prev_velocity = prev_scenario$prev_velocity,
prev_plate_x = prev_scenario$prev_plate_x,
prev_plate_z = prev_scenario$prev_plate_z,
velo_diff = prev_scenario$prev_velocity - pitch2_prof$release_speed,
plate_x_diff = prev_scenario$prev_plate_x - plate_x,
plate_z_diff = prev_scenario$prev_plate_z - plate_z,
pfx_x_diff = 0, pfx_z_diff = 0,
prev_y_residual = timing_resid,
prev_description = prev_desc
)
grid$run_value <- predict_rv(pred_data)
grid
}
results <- reactiveValues(grids = NULL, arsenal = NULL)
observeEvent(input$generate, {
req(input$pitcher_select, input$pitch1_type)
available <- arsenal_profiles %>%
filter(pitcher == input$pitcher_select) %>%
pull(pitch_group)
pitch1_prof <- get_pitch_profile(input$pitcher_select, input$pitch1_type)
zone_row <- zone_centers %>% filter(zone == as.numeric(input$pitch1_zone))
prev_scenario <- list(
prev_velocity = pitch1_prof$release_speed,
prev_plate_x = zone_row$plate_x,
prev_plate_z = zone_row$plate_z
)
all_grids <- list()
for (pg in available) {
prof <- get_pitch_profile(input$pitcher_select, pg)
grid <- make_zone_pred(prof, prev_scenario, input$timing_resid, input$pitch1_result)
grid$pitch2 <- pg
grid$pitch2_label <- pitch_group_labels[pg]
grid$velo <- round(prof$release_speed, 1)
all_grids[[pg]] <- grid
}
results$grids <- bind_rows(all_grids)
results$prev_scenario <- prev_scenario
results$available <- available
results$arsenal <- arsenal_profiles %>%
filter(pitcher == input$pitcher_select) %>%
mutate(pitch_label = pitch_group_labels[pitch_group]) %>%
select(pitch_label, release_speed, pfx_x, pfx_z, n) %>%
mutate(release_speed = round(release_speed, 1),
pfx_x = round(pfx_x, 1),
pfx_z = round(pfx_z, 1))
})
output$scenario_description <- renderUI({
req(input$pitcher_select, input$pitch1_type)
pitcher_name <- valid_pitchers$player_name[valid_pitchers$pitcher == input$pitcher_select]
zone_label <- zone_centers$zone_label[zone_centers$zone == as.numeric(input$pitch1_zone)]
pitch1_label <- pitch_group_labels[input$pitch1_type]
result_label <- case_when(
input$pitch1_result == "foul" ~ "Foul Ball",
input$pitch1_result == "swinging_strike" ~ "Swinging Strike",
input$pitch1_result == "foul_tip" ~ "Foul Tip"
)
timing_label <- case_when(
input$timing_resid < -5 ~ paste0("Late (", input$timing_resid, " inches)"),
input$timing_resid > 5 ~ paste0("Early (+", input$timing_resid, " inches)"),
TRUE ~ "On Time"
)
tags$div(
tags$h4(style = "margin: 0; color: #1B3A5C; font-weight: 700;", pitcher_name),
tags$p(style = "margin: 4px 0; color: #555; font-size: 15px;",
paste0("Pitch #1: ", pitch1_label, " \u2192 Zone ", zone_label,
" | Result: ", result_label, " | Batter Timing: ", timing_label)),
tags$p(style = "margin: 0; color: #888; font-size: 13px;",
"Comparing expected run value across all follow-up pitch options.")
)
})
output$comparison_grid <- renderPlot({
req(results$grids)
rv_range <- range(results$grids$run_value)
results$grids %>%
mutate(facet_label = paste0(pitch2_label, "\n", velo, " mph")) %>%
ggplot(aes(x = plate_x, y = plate_z, fill = run_value)) +
geom_tile() +
scale_fill_gradient2(low = "#1f77b4", mid = "white", high = "#d62728", midpoint = 0,
limits = rv_range, name = "Expected\nRun Value") +
annotate("rect", xmin = -0.83, xmax = 0.83, ymin = 1.5, ymax = 3.5,
fill = NA, color = "black", linewidth = 1) +
annotate("point",
x = results$prev_scenario$prev_plate_x,
y = results$prev_scenario$prev_plate_z,
shape = 4, size = 4, color = "black", stroke = 2) +
facet_wrap(~facet_label, nrow = 1) +
coord_fixed() +
labs(x = "Horizontal Location (ft)\n\u2190 Inside | Outside \u2192 (RHH)",
y = "Pitch Height (ft)") +
theme_bw() +
theme(strip.text = element_text(face = "bold", size = 11))
})
output$best_pitch_zone <- renderPlot({
req(results$grids)
best <- results$grids %>%
group_by(plate_x, plate_z) %>%
slice_min(run_value, n = 1, with_ties = FALSE) %>%
ungroup()
ggplot(best, aes(x = plate_x, y = plate_z, fill = pitch2_label)) +
geom_tile() +
scale_fill_manual(values = pitch_colors, name = "Optimal Pitch") +
annotate("rect", xmin = -0.83, xmax = 0.83, ymin = 1.5, ymax = 3.5,
fill = NA, color = "black", linewidth = 1) +
annotate("point",
x = results$prev_scenario$prev_plate_x,
y = results$prev_scenario$prev_plate_z,
shape = 4, size = 4, color = "white", stroke = 2) +
coord_fixed() +
labs(x = "Horizontal Location (ft)\n\u2190 Inside | Outside \u2192 (RHH)",
y = "Pitch Height (ft)") +
theme_bw() +
theme(legend.position = "bottom",
legend.title = element_text(face = "bold"))
})
output$zone_diagram <- renderPlot({
req(input$pitch1_zone)
selected <- as.numeric(input$pitch1_zone)
ggplot() +
geom_rect(aes(xmin = -0.83, xmax = 0.83, ymin = 1.5, ymax = 3.5),
fill = NA, color = "black", linewidth = 1) +
geom_segment(aes(x = -0.83 + (1.66/3), xend = -0.83 + (1.66/3), y = 1.5, yend = 3.5),
color = "gray70") +
geom_segment(aes(x = -0.83 + 2*(1.66/3), xend = -0.83 + 2*(1.66/3), y = 1.5, yend = 3.5),
color = "gray70") +
geom_segment(aes(x = -0.83, xend = 0.83, y = 1.5 + (2/3), yend = 1.5 + (2/3)),
color = "gray70") +
geom_segment(aes(x = -0.83, xend = 0.83, y = 1.5 + 2*(2/3), yend = 1.5 + 2*(2/3)),
color = "gray70") +
geom_text(data = zone_centers,
aes(x = plate_x, y = plate_z, label = zone_label),
size = 3.5,
color = ifelse(zone_centers$zone == selected, "#d62728", "gray50"),
fontface = ifelse(zone_centers$zone == selected, "bold", "plain")) +
annotate("text", x = 0, y = 3.7, label = "Catcher's View (RHH)",
size = 3, color = "gray50", fontface = "italic") +
coord_fixed(xlim = c(-1.5, 1.5), ylim = c(1.2, 3.9)) +
labs(x = NULL, y = NULL) +
theme_bw() +
theme(axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank())
})
output$arsenal_table <- render_gt({
req(results$arsenal)
results$arsenal %>%
mutate(pfx_x = pfx_x * 12, pfx_z = pfx_z * 12) %>%
gt() %>%
cols_label(
pitch_label = "Pitch",
release_speed = "Velocity",
pfx_x = "Horizontal Break",
pfx_z = "Induced Vertical Break",
n = "Pitches Thrown"
) %>%
fmt_number(columns = c(release_speed), decimals = 1, pattern = "{x} mph") %>%
fmt_number(columns = c(pfx_x, pfx_z), decimals = 1, pattern = "{x} in") %>%
fmt_number(columns = n, decimals = 0, use_seps = TRUE) %>%
tab_options(
table.font.size = 12,
column_labels.font.weight = "bold"
)
})
}
shinyApp(ui, server)