library(shiny) library(tidyverse) library(patchwork) library(gt) library(tidymodels) library(conflicted) library(arrow) library(xgboost) conflicts_prefer(shiny::observe) conflicts_prefer(dplyr::filter) conflicts_prefer(dplyr::select) pbp <- read_parquet("created_pbp.parquet") recipe_obj <- readRDS("recipe4.rds") booster <- xgb.load("model4.json") # ============================================= # Pre-compute pitcher arsenal profiles # ============================================= predict_rv <- function(new_data) { baked <- bake(recipe_obj, new_data = new_data) # Remove outcome if present baked <- baked %>% select(-any_of("run_value")) dmat <- xgb.DMatrix(data = as.matrix(baked)) predict(booster, dmat) } # ============================================= # Name mappings # ============================================= # ============================================= # Pre-compute pitcher arsenal profiles # ============================================= arsenal_profiles <- pbp %>% filter(!pitch_type %in% c("PO", "FA", "EP", "KN", "UN", "CS", "SC"), !is.na(pitch_type)) %>% mutate(pitch_group = case_when( pitch_type == "FF" ~ "FF", pitch_type %in% c("FC", "SI") ~ "FA2", pitch_type %in% c("CH", "FS") ~ "OS", pitch_type %in% c("ST", "CU", "SV", "SL", "KC") ~ "BR" )) %>% group_by(pitcher, player_name, pitch_group) %>% summarize( release_speed = mean(release_speed, na.rm = TRUE), pfx_x = mean(pfx_x, na.rm = TRUE), pfx_z = mean(pfx_z, na.rm = TRUE), arm_angle = mean(arm_angle, na.rm = TRUE), release_pos_x = mean(release_pos_x, na.rm = TRUE), release_pos_z = mean(release_pos_z, na.rm = TRUE), release_extension = mean(release_extension, na.rm = TRUE), n = n(), .groups = "drop" ) %>% filter(n >= 20) pitcher_primaries <- pbp %>% filter(!pitch_type %in% c("PO", "FA", "EP", "KN", "UN", "CS", "SC")) %>% group_by(pitcher) %>% summarize( primary_velo = mean(release_speed[pitch_type %in% c("FF", "SI")], na.rm = TRUE), primary_pfxz = mean(pfx_z[pitch_type %in% c("FF", "SI")], na.rm = TRUE), .groups = "drop" ) arsenal_profiles <- arsenal_profiles %>% left_join(pitcher_primaries, by = "pitcher") %>% mutate( pfxz_diff = pfx_z - primary_pfxz, primary_velo_diff = release_speed - primary_velo ) zone_centers <- data.frame( zone = 1:9, zone_label = c("1", "2", "3", "4", "5", "6", "7", "8", "9"), plate_x = c(-0.55, 0, 0.55, -0.55, 0, 0.55, -0.55, 0, 0.55), plate_z = c(3.1, 3.1, 3.1, 2.5, 2.5, 2.5, 1.9, 1.9, 1.9) ) valid_pitchers <- arsenal_profiles %>% group_by(pitcher, player_name) %>% summarize(n_groups = n_distinct(pitch_group), .groups = "drop") %>% filter(n_groups >= 2) %>% arrange(player_name) pitch_group_labels <- c( "FF" = "Four-Seam Fastball", "FA2" = "Cutter / Sinker", "OS" = "Offspeed", "BR" = "Breaking Ball" ) pitch_colors <- c( "Four-Seam Fastball" = "#d62728", "Cutter / Sinker" = "#ff7f0e", "Offspeed" = "#2ca02c", "Breaking Ball" = "#1f77b4" ) # ============================================= # UI # ============================================= ui <- fluidPage( tags$head(tags$style(HTML(" body { font-family: 'Helvetica Neue', sans-serif; background: #f8f8f8; } .card { background: white; border-radius: 8px; padding: 20px; margin-bottom: 15px; border: 1px solid #e0e0e0; } .scenario-label { font-size: 14px; font-weight: 600; color: #1B3A5C; margin-bottom: 8px; } .title-bar { background: #1B3A5C; color: white; padding: 12px 20px; border-radius: 8px; margin-bottom: 15px; } .title-bar h3 { margin: 0; font-weight: 700; letter-spacing: 0.5px; } .title-bar p { margin: 2px 0 0 0; font-size: 13px; opacity: 0.8; } "))), div(class = "title-bar", tags$h3("Pitch Sequencing Decision Tool"), tags$p("Select a pitcher, set the scenario, and compare follow-up pitch options") ), sidebarLayout( sidebarPanel( width = 3, div(class = "card", selectizeInput("pitcher_select", "Pitcher:", choices = setNames(valid_pitchers$pitcher, valid_pitchers$player_name), selected = NULL, options = list(placeholder = "Search for a pitcher...")), hr(), div(class = "scenario-label", "Pitch #1 Setup"), selectInput("pitch1_type", "Pitch Type:", choices = NULL), selectInput("pitch1_zone", "Location:", choices = setNames(zone_centers$zone, zone_centers$zone_label)), selectInput("pitch1_result", "Swing Result:", choices = c("Foul Ball" = "foul", "Swinging Strike" = "swinging_strike", "Foul Tip" = "foul_tip")), sliderInput("timing_resid", "Batter Timing (inches):", min = -25, max = 25, value = 0, step = 5), helpText("Negative = batter was late | Positive = batter was early"), hr(), actionButton("generate", "Compare Follow-Up Pitches", class = "btn-primary", style = "width: 100%; font-weight: 600;") ) ), mainPanel( width = 9, fluidRow( column(12, div(class = "card", uiOutput("scenario_description"))) ), fluidRow( column(12, div(class = "card", tags$h5("Expected Run Value by Follow-Up Pitch", style = "font-weight: 700; color: #1B3A5C;"), tags$p("Blue = pitcher-favorable | Red = hitter-favorable", style = "font-size: 12px; color: #888; margin-top: -4px;"), plotOutput("comparison_grid", height = "500px") )) ), fluidRow( column(6, div(class = "card", tags$h5("Optimal Pitch by Location", style = "font-weight: 700; color: #1B3A5C;"), tags$p("Pitch with lowest expected run value at each location", style = "font-size: 12px; color: #888; margin-top: -4px;"), plotOutput("best_pitch_zone", height = "450px") )), column(6, div(class = "card", tags$h5("Pitch #1 Location", style = "font-weight: 700; color: #1B3A5C;"), plotOutput("zone_diagram", height = "220px"), tags$h5("Arsenal Profile", style = "font-weight: 700; color: #1B3A5C; margin-top: 12px;"), gt_output("arsenal_table") )) ) ) ) ) # ============================================= # Server # ============================================= server <- function(input, output, session) { observe({ req(input$pitcher_select) pitcher_arsenal <- arsenal_profiles %>% filter(pitcher == input$pitcher_select) %>% mutate(label = pitch_group_labels[pitch_group]) %>% arrange(desc(n)) choices <- setNames(pitcher_arsenal$pitch_group, pitcher_arsenal$label) updateSelectInput(session, "pitch1_type", choices = choices) }) get_pitch_profile <- function(pitcher_id, pg) { arsenal_profiles %>% filter(pitcher == pitcher_id, pitch_group == pg) %>% slice(1) } make_zone_pred <- function(pitch2_prof, prev_scenario, timing_resid, prev_desc) { grid <- expand.grid( plate_x = seq(-1.5, 1.5, length.out = 50), plate_z = seq(1, 4, length.out = 50) ) pred_data <- grid %>% mutate( release_speed = pitch2_prof$release_speed, pfx_x = pitch2_prof$pfx_x, pfx_z = pitch2_prof$pfx_z, arm_angle = pitch2_prof$arm_angle, release_pos_x = pitch2_prof$release_pos_x, release_pos_z = pitch2_prof$release_pos_z, release_extension = pitch2_prof$release_extension, pfxz_diff = pitch2_prof$pfxz_diff, primary_velo_diff = pitch2_prof$primary_velo_diff, sz_top = 3.5, sz_bot = 1.5, p_throws = "R", stand = "R", prev_velocity = prev_scenario$prev_velocity, prev_plate_x = prev_scenario$prev_plate_x, prev_plate_z = prev_scenario$prev_plate_z, velo_diff = prev_scenario$prev_velocity - pitch2_prof$release_speed, plate_x_diff = prev_scenario$prev_plate_x - plate_x, plate_z_diff = prev_scenario$prev_plate_z - plate_z, pfx_x_diff = 0, pfx_z_diff = 0, prev_y_residual = timing_resid, prev_description = prev_desc ) grid$run_value <- predict_rv(pred_data) grid } results <- reactiveValues(grids = NULL, arsenal = NULL) observeEvent(input$generate, { req(input$pitcher_select, input$pitch1_type) available <- arsenal_profiles %>% filter(pitcher == input$pitcher_select) %>% pull(pitch_group) pitch1_prof <- get_pitch_profile(input$pitcher_select, input$pitch1_type) zone_row <- zone_centers %>% filter(zone == as.numeric(input$pitch1_zone)) prev_scenario <- list( prev_velocity = pitch1_prof$release_speed, prev_plate_x = zone_row$plate_x, prev_plate_z = zone_row$plate_z ) all_grids <- list() for (pg in available) { prof <- get_pitch_profile(input$pitcher_select, pg) grid <- make_zone_pred(prof, prev_scenario, input$timing_resid, input$pitch1_result) grid$pitch2 <- pg grid$pitch2_label <- pitch_group_labels[pg] grid$velo <- round(prof$release_speed, 1) all_grids[[pg]] <- grid } results$grids <- bind_rows(all_grids) results$prev_scenario <- prev_scenario results$available <- available results$arsenal <- arsenal_profiles %>% filter(pitcher == input$pitcher_select) %>% mutate(pitch_label = pitch_group_labels[pitch_group]) %>% select(pitch_label, release_speed, pfx_x, pfx_z, n) %>% mutate(release_speed = round(release_speed, 1), pfx_x = round(pfx_x, 1), pfx_z = round(pfx_z, 1)) }) output$scenario_description <- renderUI({ req(input$pitcher_select, input$pitch1_type) pitcher_name <- valid_pitchers$player_name[valid_pitchers$pitcher == input$pitcher_select] zone_label <- zone_centers$zone_label[zone_centers$zone == as.numeric(input$pitch1_zone)] pitch1_label <- pitch_group_labels[input$pitch1_type] result_label <- case_when( input$pitch1_result == "foul" ~ "Foul Ball", input$pitch1_result == "swinging_strike" ~ "Swinging Strike", input$pitch1_result == "foul_tip" ~ "Foul Tip" ) timing_label <- case_when( input$timing_resid < -5 ~ paste0("Late (", input$timing_resid, " inches)"), input$timing_resid > 5 ~ paste0("Early (+", input$timing_resid, " inches)"), TRUE ~ "On Time" ) tags$div( tags$h4(style = "margin: 0; color: #1B3A5C; font-weight: 700;", pitcher_name), tags$p(style = "margin: 4px 0; color: #555; font-size: 15px;", paste0("Pitch #1: ", pitch1_label, " \u2192 Zone ", zone_label, " | Result: ", result_label, " | Batter Timing: ", timing_label)), tags$p(style = "margin: 0; color: #888; font-size: 13px;", "Comparing expected run value across all follow-up pitch options.") ) }) output$comparison_grid <- renderPlot({ req(results$grids) rv_range <- range(results$grids$run_value) results$grids %>% mutate(facet_label = paste0(pitch2_label, "\n", velo, " mph")) %>% ggplot(aes(x = plate_x, y = plate_z, fill = run_value)) + geom_tile() + scale_fill_gradient2(low = "#1f77b4", mid = "white", high = "#d62728", midpoint = 0, limits = rv_range, name = "Expected\nRun Value") + annotate("rect", xmin = -0.83, xmax = 0.83, ymin = 1.5, ymax = 3.5, fill = NA, color = "black", linewidth = 1) + annotate("point", x = results$prev_scenario$prev_plate_x, y = results$prev_scenario$prev_plate_z, shape = 4, size = 4, color = "black", stroke = 2) + facet_wrap(~facet_label, nrow = 1) + coord_fixed() + labs(x = "Horizontal Location (ft)\n\u2190 Inside | Outside \u2192 (RHH)", y = "Pitch Height (ft)") + theme_bw() + theme(strip.text = element_text(face = "bold", size = 11)) }) output$best_pitch_zone <- renderPlot({ req(results$grids) best <- results$grids %>% group_by(plate_x, plate_z) %>% slice_min(run_value, n = 1, with_ties = FALSE) %>% ungroup() ggplot(best, aes(x = plate_x, y = plate_z, fill = pitch2_label)) + geom_tile() + scale_fill_manual(values = pitch_colors, name = "Optimal Pitch") + annotate("rect", xmin = -0.83, xmax = 0.83, ymin = 1.5, ymax = 3.5, fill = NA, color = "black", linewidth = 1) + annotate("point", x = results$prev_scenario$prev_plate_x, y = results$prev_scenario$prev_plate_z, shape = 4, size = 4, color = "white", stroke = 2) + coord_fixed() + labs(x = "Horizontal Location (ft)\n\u2190 Inside | Outside \u2192 (RHH)", y = "Pitch Height (ft)") + theme_bw() + theme(legend.position = "bottom", legend.title = element_text(face = "bold")) }) output$zone_diagram <- renderPlot({ req(input$pitch1_zone) selected <- as.numeric(input$pitch1_zone) ggplot() + geom_rect(aes(xmin = -0.83, xmax = 0.83, ymin = 1.5, ymax = 3.5), fill = NA, color = "black", linewidth = 1) + geom_segment(aes(x = -0.83 + (1.66/3), xend = -0.83 + (1.66/3), y = 1.5, yend = 3.5), color = "gray70") + geom_segment(aes(x = -0.83 + 2*(1.66/3), xend = -0.83 + 2*(1.66/3), y = 1.5, yend = 3.5), color = "gray70") + geom_segment(aes(x = -0.83, xend = 0.83, y = 1.5 + (2/3), yend = 1.5 + (2/3)), color = "gray70") + geom_segment(aes(x = -0.83, xend = 0.83, y = 1.5 + 2*(2/3), yend = 1.5 + 2*(2/3)), color = "gray70") + geom_text(data = zone_centers, aes(x = plate_x, y = plate_z, label = zone_label), size = 3.5, color = ifelse(zone_centers$zone == selected, "#d62728", "gray50"), fontface = ifelse(zone_centers$zone == selected, "bold", "plain")) + annotate("text", x = 0, y = 3.7, label = "Catcher's View (RHH)", size = 3, color = "gray50", fontface = "italic") + coord_fixed(xlim = c(-1.5, 1.5), ylim = c(1.2, 3.9)) + labs(x = NULL, y = NULL) + theme_bw() + theme(axis.text = element_blank(), axis.ticks = element_blank(), panel.grid = element_blank()) }) output$arsenal_table <- render_gt({ req(results$arsenal) results$arsenal %>% mutate(pfx_x = pfx_x * 12, pfx_z = pfx_z * 12) %>% gt() %>% cols_label( pitch_label = "Pitch", release_speed = "Velocity", pfx_x = "Horizontal Break", pfx_z = "Induced Vertical Break", n = "Pitches Thrown" ) %>% fmt_number(columns = c(release_speed), decimals = 1, pattern = "{x} mph") %>% fmt_number(columns = c(pfx_x, pfx_z), decimals = 1, pattern = "{x} in") %>% fmt_number(columns = n, decimals = 0, use_seps = TRUE) %>% tab_options( table.font.size = 12, column_labels.font.weight = "bold" ) }) } shinyApp(ui, server)