DataProcess / app.R
OwenStOnge's picture
Update app.R
f3ab272 verified
Sys.setenv(RETICULATE_PYTHON = "/usr/bin/python3")
library(reticulate)
library(shiny)
library(shinydashboard)
library(shinyBS)
library(DT)
library(dplyr)
library(readr)
library(stringr)
library(jsonlite)
library(httr)
library(progressr)
library(RCurl)
library(curl)
library(xgboost)
library(recipes)
library(arrow)
library(base64enc)
PASSWORD <- Sys.getenv("password")
rv <- read_csv("non_context_run_values.csv")
stuffplus_model <- xgb.load("stuffplus_xgb.json")
stuffplus_recipe <- readRDS("stuffplus_recipe.rds")
# Define columns to remove if they exist
columns_to_remove <- c(
"SpinAxis3dTransverseAngle", "SpinAxis3dLongitudinalAngle", "SpinAxis3dActiveSpinRate",
"SpinAxis3dSpinEfficiency", "SpinAxis3dTilt", "SpinAxis3dVectorX", "SpinAxis3dVectorY",
"SpinAxis3dVectorZ", "SpinAxis3dSeamOrientationRotationX", "SpinAxis3dSeamOrientationRotationY",
"SpinAxis3dSeamOrientationRotationZ", "SpinAxis3dSeamOrientationBallYAmb1",
"SpinAxis3dSeamOrientationBallAngleHorizontalAmb1", "SpinAxis3dSeamOrientationBallZAmb1",
"SpinAxis3dSeamOrientationBallAngleVerticalAmb2", "SpinAxis3dSeamOrientationBallZAmb2",
"SpinAxis3dSeamOrientationBallXAmb4", "SpinAxis3dSeamOrientationBallYAmb4",
"SpinAxis3dSeamOrientationBallAngleHorizontalAmb2", "SpinAxis3dSeamOrientationBallAngleVerticalAmb1",
"SpinAxis3dSeamOrientationBallXAmb1", "SpinAxis3dSeamOrientationBallYAmb2",
"SpinAxis3dSeamOrientationBallAngleHorizontalAmb4", "SpinAxis3dSeamOrientationBallAngleVerticalAmb4",
"SpinAxis3dSeamOrientationBallXAmb2", "SpinAxis3dSeamOrientationBallAngleVerticalAmb3",
"SpinAxis3dSeamOrientationBallAngleHorizontalAmb3", "SpinAxis3dSeamOrientationBallXAmb3",
"SpinAxis3dSeamOrientationBallYAmb3", "SpinAxis3dSeamOrientationBallZAmb3",
"SpinAxis3dSeamOrientationBallZAmb4", "GameDate"
)
# Pitch colors for visualization (Coastal Carolina theme)
pitch_colors <- c(
"Fastball" = '#FA8072',
"Four-Seam" = '#FA8072',
"Sinker" = "#fdae61",
"Slider" = "#A020F0",
"Sweeper" = "magenta",
"Curveball" = '#2c7bb6',
"ChangeUp" = '#90EE90',
"Splitter" = '#90EE32',
"Cutter" = "red",
"Knuckleball" = "#FFB4B4",
"Other" = "#D3D3D3"
)
# Function to convert date formats
# input_string: the date string to convert
# output_format: "yyyy" for YYYY-MM-DD or "mdyy" for M/D/YY
convert_date_format <- function(date_string, output_format = "yyyy") {
if (is.na(date_string) || date_string == "") {
return(NA)
}
# Convert to character if not already
date_string <- as.character(date_string)
parsed_date <- NULL
# Try to parse YYYY-MM-DD format
if (grepl("^\\d{4}-\\d{2}-\\d{2}$", date_string)) {
parsed_date <- tryCatch({
as.Date(date_string, format = "%Y-%m-%d")
}, error = function(e) NULL)
}
# Try to parse MM/DD/YYYY or M/D/YYYY format
if (is.null(parsed_date) && grepl("^\\d{1,2}/\\d{1,2}/\\d{4}$", date_string)) {
parsed_date <- tryCatch({
as.Date(date_string, format = "%m/%d/%Y")
}, error = function(e) NULL)
}
# Try to parse MM/DD/YY or M/D/YY format
if (is.null(parsed_date) && grepl("^\\d{1,2}/\\d{1,2}/\\d{2}$", date_string)) {
parsed_date <- tryCatch({
as.Date(date_string, format = "%m/%d/%y")
}, error = function(e) NULL)
}
# If we successfully parsed a date, format it according to output_format
if (!is.null(parsed_date) && !is.na(parsed_date)) {
if (output_format == "mdyy") {
# M/D/YY format (no leading zeros, 2-digit year)
return(format(parsed_date, "%m/%d/%y") %>%
gsub("^0", "", .) %>% # Remove leading zero from month
gsub("/0", "/", .)) # Remove leading zero from day
} else {
# YYYY-MM-DD format
return(format(parsed_date, "%Y-%m-%d"))
}
}
# Return original if no conversion possible
return(date_string)
}
# Function to convert date columns in a dataframe
convert_date_columns <- function(df, output_format = "yyyy") {
# Common date column names in TrackMan data
date_columns <- c("Date", "GameDate", "UTCDate", "LocalDateTime")
for (col in date_columns) {
if (col %in% names(df)) {
df[[col]] <- sapply(df[[col]], function(x) convert_date_format(x, output_format), USE.NAMES = FALSE)
}
}
return(df)
}
# Function to parse bat tracking JSON
parse_bat_tracking_json <- function(json_path) {
tryCatch({
json_data <- fromJSON(json_path, simplifyVector = FALSE)
# Extract metadata
game_reference <- json_data$GameReference
session_id <- json_data$SessionId
# Extract plays
plays <- json_data$Plays
if (length(plays) == 0) {
return(list(
success = TRUE,
data = NULL,
game_reference = game_reference,
message = "JSON parsed but contains no bat tracking plays (empty Plays array)"
))
}
# Build data frame from plays
bat_tracking_df <- data.frame(
PitchUID = sapply(plays, function(p) p$PitchUID),
BatSpeed_Sensor = sapply(plays, function(p) p$BatSpeed),
VerticalAttackAngle_Sensor = sapply(plays, function(p) p$VerticalAttackAngle),
HorizontalAttackAngle_Sensor = sapply(plays, function(p) p$HorizontalAttackAngle),
BatTracking_PlayId = sapply(plays, function(p) p$PlayId),
BatTracking_Time = sapply(plays, function(p) p$Time),
stringsAsFactors = FALSE
)
return(list(
success = TRUE,
data = bat_tracking_df,
game_reference = game_reference,
session_id = session_id,
plays_count = length(plays),
message = paste("Successfully parsed", length(plays), "bat tracking play(s)")
))
}, error = function(e) {
return(list(
success = FALSE,
data = NULL,
message = paste("Error parsing JSON:", e$message)
))
})
}
# Function to merge CSV with bat tracking
merge_with_bat_tracking <- function(csv_data, bat_tracking_data) {
if (is.null(bat_tracking_data) || nrow(bat_tracking_data) == 0) {
return(list(
data = csv_data,
matched = 0,
total_bat = 0,
message = "No bat tracking data to merge"
))
}
# Check if PitchUID exists in CSV
if (!"PitchUID" %in% names(csv_data)) {
return(list(
data = csv_data,
matched = 0,
total_bat = nrow(bat_tracking_data),
message = "CSV does not contain PitchUID column - cannot merge"
))
}
# Perform left join
merged_data <- csv_data %>%
left_join(bat_tracking_data, by = "PitchUID")
# Count matches
matched_count <- sum(!is.na(merged_data$BatSpeed_Sensor))
# If original BatSpeed column exists and is empty, fill with sensor data
if ("BatSpeed" %in% names(merged_data)) {
merged_data <- merged_data %>%
mutate(BatSpeed = ifelse(is.na(BatSpeed) & !is.na(BatSpeed_Sensor),
BatSpeed_Sensor, BatSpeed))
}
if ("VerticalAttackAngle" %in% names(merged_data)) {
merged_data <- merged_data %>%
mutate(VerticalAttackAngle = ifelse(is.na(VerticalAttackAngle) & !is.na(VerticalAttackAngle_Sensor),
VerticalAttackAngle_Sensor, VerticalAttackAngle))
}
if ("HorizontalAttackAngle" %in% names(merged_data)) {
merged_data <- merged_data %>%
mutate(HorizontalAttackAngle = ifelse(is.na(HorizontalAttackAngle) & !is.na(HorizontalAttackAngle_Sensor),
HorizontalAttackAngle_Sensor, HorizontalAttackAngle))
}
return(list(
data = merged_data,
matched = matched_count,
total_bat = nrow(bat_tracking_data),
message = paste("Merged successfully:", matched_count, "of", nrow(bat_tracking_data), "bat tracking records matched")
))
}
clean_college_data <- function(data, teams = NA){
data <- data %>%
mutate(PlayResult = ifelse(PlayResult %in% c("HomeRun", "homerun"), "Homerun", PlayResult),
Batter = sub("(.*),\\s*(.*)", "\\2 \\1", Batter),
Pitcher = sub("(.*),\\s*(.*)", "\\2 \\1", Pitcher),
Catcher = sub("(.*),\\s*(.*)", "\\2 \\1", Catcher))
col <- colnames(data)
if ("Top/Bottom" %in% col){
data <- data %>%
rename(`Top.Bottom` = `Top/Bottom`)
}
numeric_columns <- c("PitchNo", "PAofInning", "PitchofPA", "PitcherId", "BatterId", "Inning", "Outs", "Balls",
"Strikes", "OutsOnPlay", "RunsScored", "RelSpeed", "VertRelAngle", "HorzRelAngle", "SpinRate",
"SpinAxis", "RelHeight", "RelSide", "Extension", "VertBreak", "InducedVertBreak", "HorzBreak",
"PlateLocHeight", "PlateLocSide", "ZoneSpeed", "VertApprAngle", "HorzApprAngle", "ZoneTime",
"ExitSpeed", "Angle", "Direction", "HitSpinRate", "Distance", "Bearing", "HangTime",
"LastTrackedDistance", "pfxx", "pfxz", "x0", "y0", "z0", "vx0", "vz0", "vy0", "ax0", "ay0",
"az0", "EffectiveVelo", "MaxHeight", "SpeedDrop", "ContactPositionX", "ContactPositionY",
"ContactPositionZ", "HomeTeamForeignID", "AwayTeamForeignID", "CatcherId", "ThrowSpeed",
"PopTime", "ExchangeTime", "TimeToBase")
data <- data %>%
mutate(across(any_of(numeric_columns), as.numeric),
PlateLocHeight = if ("PlateLocHeight" %in% names(.)) 12 * PlateLocHeight else PlateLocHeight,
PlateLocSide = if ("PlateLocSide" %in% names(.)) 12 * PlateLocSide else PlateLocSide)
data <- data %>%
mutate(TaggedPitchType = case_when(
TaggedPitchType == "FourSeamFastBall" ~ "Fastball",
TaggedPitchType %in% c("TwoSeamFastBall", "OneSeamFastBall") ~ "Sinker",
TaggedPitchType == "ChangeUp" ~ "Changeup",
TaggedPitchType == "Undefined" ~ "Other",
T ~ TaggedPitchType
))
data <- data %>%
mutate(
is_csw = case_when(
PitchCall %in% c("StrikeSwinging", "StrikeCalled") ~ 1,
TRUE ~ 0
),
is_swing = case_when(
PitchCall %in% c("StrikeSwinging", "FoulBallNotFieldable", "InPlay",
"FoulBallFieldable", "FoulBall") ~ 1,
TRUE ~ 0
),
is_whiff = case_when(
PitchCall == "StrikeSwinging" & is_swing == 1 ~ 1,
PitchCall != "StrikeSwinging" & is_swing == 1 ~ 0,
TRUE ~ NA_real_
),
in_zone = case_when(
PlateLocSide > 9.975 | PlateLocSide < -9.975 |
PlateLocHeight > 40 | PlateLocHeight < 20 ~ 0,
TRUE ~ 1
),
chase = case_when(
is_swing == 1 & in_zone == 0 ~ 1,
is_swing == 0 & in_zone == 0 ~ 0,
TRUE ~ NA_real_
),
in_zone_whiff = case_when(
is_swing == 1 & in_zone == 1 & is_whiff == 1 ~ 1,
is_swing == 1 & in_zone == 1 & is_whiff == 0 ~ 0,
TRUE ~ NA_real_
),
is_hit = case_when(
PlayResult %in% c("Single", "Double", "Triple", "Homerun", "HomeRun") & PitchCall == "InPlay" ~ 1,
!PlayResult %in% c("Single", "Double", "Triple", "Homerun", "HomeRun") & PitchCall == "InPlay" ~ 0,
KorBB == "Strikeout" ~ 0,
PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 0,
TRUE ~ NA_real_
),
slg = case_when(
PitchCall == "InPlay" & PlayResult == "Single" ~ 1,
PitchCall == "InPlay" & PlayResult == "Double" ~ 2,
PitchCall == "InPlay" & PlayResult == "Triple" ~ 3,
PitchCall == "InPlay" & PlayResult %in% c("Homerun", "HomeRun") ~ 4,
!PlayResult %in% c("Single", "Double", "Triple", "Homerun", "HomeRun") & PitchCall == "InPlay" ~ 0,
KorBB == "Strikeout" ~ 0,
PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 0,
TRUE ~ NA_real_
),
on_base = case_when(
PitchCall == "InPlay" & PlayResult %in% c("Single", "Double", "Triple", "Homerun", "HomeRun") ~ 1,
PitchCall %in% c("HitByPitch") | KorBB == "Walk" ~ 1,
PitchCall == "InPlay" & PlayResult %in% c("Out", "Error", "FieldersChoice") & PlayResult != "Sacrifice" ~ 0,
KorBB == "Strikeout" ~ 0,
PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 0,
TRUE ~ NA_real_
),
is_hard_hit = case_when(
ExitSpeed >= 95 & PitchCall == "InPlay" ~ 1,
ExitSpeed < 95 & PitchCall == "InPlay" ~ 0,
TRUE ~ NA_real_
),
woba = case_when(
PitchCall == "InPlay" & PlayResult == "Single" ~ 0.95,
PitchCall == "InPlay" & PlayResult == "Double" ~ 1.24,
PitchCall == "InPlay" & PlayResult == "Triple" ~ 1.47,
PitchCall == "InPlay" & PlayResult %in% c("Homerun", "HomeRun") ~ 1.71,
KorBB == "Walk" ~ 0.82,
PitchCall %in% c("HitByPitch") ~ 0.85,
KorBB == "Strikeout" ~ 0,
PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 0,
PitchCall == "InPlay" & !PlayResult %in% c("Single", "Double" ,"Triple" ,"Homerun", "HomeRun") ~ 0,
TRUE ~ NA_real_
),
wobacon = case_when(
PitchCall == "InPlay" & PlayResult == "Single" ~ 0.95,
PitchCall == "InPlay" & PlayResult == "Double" ~ 1.24,
PitchCall == "InPlay" & PlayResult == "Triple" ~ 1.47,
PitchCall == "InPlay" & PlayResult %in% c("Homerun", "HomeRun") ~ 1.71,
PitchCall == "InPlay" & !PlayResult %in% c("Single", "Double" ,"Triple" ,"Homerun", "HomeRun") ~ 0,
TRUE ~ NA_real_
),
is_plate_appearance = ifelse(
PitchCall %in% c("InPlay", "HitByPitch") | KorBB %in% c("Strikeout", "Walk") | PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking"), 1, 0
),
is_at_bat = case_when(
PitchCall == "InPlay" & !PlayResult %in% c("StolenBase", "Sacrifice", "CaughtStealing", "Undefined") ~ 1,
KorBB == "Strikeout" ~ 1,
PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 1,
TRUE ~ 0
),
is_walk = case_when(
is_plate_appearance == 1 & KorBB == "Walk" ~ 1,
is_plate_appearance == 1 & KorBB != "Walk" ~ 0,
TRUE ~ NA_real_
),
is_k = case_when(
is_at_bat == 1 & KorBB == "Strikeout" ~ 1,
is_at_bat == 1 & KorBB != "Strikeout" ~ 0,
PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 1,
TRUE ~ NA_real_
),
is_put_away = case_when(
Strikes == 2 & KorBB == "Strikeout" ~ 1,
Strikes == 2 & KorBB != "Strikeout" ~ 0,
Strikes == 2 & PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 1,
TRUE ~ NA_real_
),
OutsOnPlay = ifelse(KorBB == "Strikeout" | PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking"), OutsOnPlay + 1, OutsOnPlay)
)
data <- data %>%
mutate(event_type = case_when(
PitchCall %in% c("StrikeSwinging", "StrkeSwinging") ~ "Whiff",
PitchCall %in% c("StriekC", "StrikeCalled") ~ "Called Strike",
PitchCall %in% c("FoulBallFieldable", "FoulBall", "FoulBallNotFieldable",
"FouldBallNotFieldable") ~ "Foul Ball",
PitchCall %in% c("BallCalled", "BallinDirt", "BallIntentional", "BalIntentional") ~ "Ball",
PitchCall == "HitByPitch" ~ "HBP",
PitchCall == "InPlay" & PlayResult %in% c("Out", "FieldersChoice",
"Error", "error",
"Sacrifice") ~ "Field Out",
PitchCall == "InPlay" & PlayResult == "Single" ~ "Single",
PitchCall == "InPlay" & PlayResult == "Double" ~ "Double",
PitchCall == "InPlay" & PlayResult == "Triple" ~ "Triple",
PitchCall == "InPlay" & PlayResult == "Homerun" ~ "Home Run",
T ~ NA
)) %>%
left_join(rv, by = "event_type")
data <- data %>%
dplyr::select(
-PitchLastMeasuredX, -PitchLastMeasuredY, -PitchLastMeasuredZ,
-starts_with("PitchTrajectory"),
-HitSpinAxis,
-starts_with("HitTrajectory"),
-PitchReleaseConfidence, -PitchLocationConfidence, -PitchMovementConfidence,
-HitLaunchConfidence, -HitLandingConfidence,
-CatcherThrowCatchConfidence, -CatcherThrowReleaseConfidence, -CatcherThrowLocationConfidence,
-PositionAt110X, -PositionAt110Y, -PositionAt110Z, -BatSpeed, -VerticalAttackAngle,
-HorizontalAttackAngle
)
return(data)
}
predict_stuffplus <- function(data) {
predict_data <- data %>%
mutate(RelSide = case_when(
PitcherThrows == "Right" ~ RelSide,
PitcherThrows == "Left" ~ -RelSide,
PitcherThrows %in% c("Both", "Undefined") & RelSide > 0 ~ RelSide,
PitcherThrows %in% c("Both", "Undefined") & RelSide < 0 ~ -RelSide),
ax0 = case_when(
PitcherThrows == "Right" ~ ax0,
PitcherThrows == "Left" ~ -ax0,
PitcherThrows %in% c("Both", "Undefined") & ax0 > 0 ~ ax0,
PitcherThrows %in% c("Both", "Undefined") & ax0 < 0 ~ -ax0),
PlateLocHeight = PlateLocHeight*12,
PlateLocSide = PlateLocSide*12,
ax0 = -ax0) %>%
group_by(Pitcher, GameID) %>%
mutate(
primary_pitch = case_when(
any(TaggedPitchType == "Fastball") ~ "Fastball",
any(TaggedPitchType == "Sinker") ~ "Sinker",
TRUE ~ names(sort(table(TaggedPitchType), decreasing = TRUE))[1]
)
) %>%
group_by(Pitcher, GameID, primary_pitch) %>%
mutate(
primary_az0 = mean(az0[TaggedPitchType == primary_pitch], na.rm = TRUE),
primary_velo = mean(RelSpeed[TaggedPitchType == primary_pitch], na.rm = TRUE)
) %>%
ungroup() %>%
mutate(az0_diff = az0 - primary_az0,
velo_diff = RelSpeed - primary_velo)
df_processed <- bake(stuffplus_recipe, new_data = predict_data)
df_matrix <- as.matrix(df_processed)
raw_stuff <- predict(stuffplus_model, df_matrix)
data$raw_stuff <- raw_stuff
data <- data %>%
mutate(stuff_plus = ((raw_stuff - 0.004424894) / 0.01010482) * 10 + 100)
return(data)
}
login_ui <- fluidPage(
tags$style(HTML("
body {
background-color: #f0f4f8;
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
color: #006F71;
}
.login-container {
max-width: 360px;
margin: 120px auto;
background: #A27752;
padding: 30px 25px;
border-radius: 8px;
box-shadow: 0 4px 15px #A1A1A4;
text-align: center;
color: white;
}
.login-message {
margin-bottom: 20px;
font-size: 14px;
color: #ffffff;
font-weight: 600;
}
.btn-primary {
background-color: #006F71 !important;
border-color: #006F71 !important;
color: white !important;
font-weight: bold;
width: 100%;
margin-top: 10px;
box-shadow: 0 2px 5px #006F71;
transition: background-color 0.3s ease;
}
.btn-primary:hover {
background-color: #006F71 !important;
border-color: #A27752 !important;
}
.form-control {
border-radius: 4px;
border: 1.5px solid #006F71 !important;
color: #006F71;
font-weight: 600;
}
")),
div(class = "login-container",
tags$img(src = "https://upload.wikimedia.org/wikipedia/en/thumb/e/ef/Coastal_Carolina_Chanticleers_logo.svg/1200px-Coastal_Carolina_Chanticleers_logo.svg.png", height = "150px"),
passwordInput("password", "Password:"),
actionButton("login", "Login"),
textOutput("wrong_pass")
)
)
# UI
app_ui <- fluidPage(
tags$head(
tags$style(HTML("
body, table, .gt_table {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto,
Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji',
'Segoe UI Symbol';
}
/* Header styling */
.app-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 20px 40px;
background: #ffffff;
border-bottom: 3px solid darkcyan;
margin-bottom: 20px;
}
.header-logo-left, .header-logo-right {
width: 120px;
height: auto;
}
.header-logo-center {
max-width: 400px;
height: auto;
}
@media (max-width: 768px) {
.app-header {
flex-direction: column;
padding: 15px 20px;
}
.header-logo-left, .header-logo-right {
width: 80px;
}
.header-logo-center {
max-width: 250px;
margin: 10px 0;
}
}
/* Gradient pill tabs styling */
.nav-tabs {
border: none !important;
border-radius: 50px;
padding: 6px 12px;
margin: 20px auto 0;
max-width: 100%;
background: linear-gradient(135deg, #d4edeb 0%, #e8ddd0 50%, #d4edeb 100%);
box-shadow: 0 4px 16px rgba(0,139,139,.12), inset 0 2px 4px rgba(255,255,255,.6);
border: 1px solid rgba(0,139,139,.2);
position: relative;
overflow-x: auto;
-webkit-overflow-scrolling: touch;
display: flex;
justify-content: center;
align-items: center;
flex-wrap: wrap;
gap: 6px;
}
.nav-tabs::-webkit-scrollbar {
height: 0;
}
.nav-tabs::before {
content: '';
position: absolute;
inset: 0;
pointer-events: none;
border-radius: 50px;
background: linear-gradient(135deg, rgba(255,255,255,.4), transparent);
}
.nav-tabs > li > a {
color: darkcyan !important;
border: none !important;
border-radius: 50px !important;
background: transparent !important;
font-weight: 700;
font-size: 14.5px;
padding: 10px 22px;
white-space: nowrap;
letter-spacing: 0.2px;
transition: all 0.2s ease;
}
.nav-tabs > li > a:hover {
color: #006666 !important;
background: rgba(255,255,255,.5) !important;
transform: translateY(-1px);
}
.nav-tabs > li.active > a,
.nav-tabs > li.active > a:focus,
.nav-tabs > li.active > a:hover {
background: linear-gradient(135deg, #008b8b 0%, #20b2aa 30%, #00ced1 50%, #20b2aa 70%, #008b8b 100%) !important;
color: #fff !important;
text-shadow: 0 1px 2px rgba(0,0,0,.2);
box-shadow: 0 4px 16px rgba(0,139,139,.4), inset 0 2px 8px rgba(255,255,255,.4), inset 0 -2px 6px rgba(0,0,0,.2);
border: 1px solid rgba(255,255,255,.3) !important;
}
.nav-tabs > li > a:focus {
outline: 3px solid rgba(205,133,63,.6);
outline-offset: 2px;
}
.tab-content {
background: linear-gradient(135deg, rgba(255,255,255,.95), rgba(248,249,250,.95));
border-radius: 20px;
padding: 25px;
margin-top: 14px;
box-shadow: 0 15px 40px rgba(0,139,139,.1);
backdrop-filter: blur(15px);
border: 1px solid rgba(0,139,139,.1);
position: relative;
overflow: hidden;
}
.tab-content::before {
content: '';
position: absolute;
left: 0;
right: 0;
top: 0;
height: 4px;
background: linear-gradient(90deg, darkcyan, peru, darkcyan);
background-size: 200% 100%;
animation: shimmer 3s linear infinite;
}
@keyframes shimmer {
0% { background-position: -200% 0; }
100% { background-position: 200% 0; }
}
#name {
font-size: 10px;
font-weight: 500;
text-align: right;
margin-bottom: 8px;
color: #6C757D;
letter-spacing: 0.5px;
}
h3 {
color: black;
font-weight: 600;
margin-top: 25px;
margin-bottom: 15px;
padding-bottom: 8px;
border-bottom: 2px solid #007BA7;
}
h4 {
color: darkcyan;
font-weight: 500;
margin-top: 20px;
margin-bottom: 12px;
}
h1 {
color: #007BA7;
font-weight: 700;
margin-bottom: 20px;
text-shadow: 1px 1px 2px rgba(0,0,0,0.1);
}
label {
font-weight: 500;
color: peru;
margin-bottom: 5px;
}
.plot-title {
text-align: center;
font-weight: 600;
color: #2C3E50;
margin-bottom: 10px;
}
.dataTables_wrapper .dataTables_length,
.dataTables_wrapper .dataTables_filter,
.dataTables_wrapper .dataTables_info,
.dataTables_wrapper .dataTables_paginate {
color: #2C3E50;
}
thead th {
background-color: #F8F9FA;
color: #2C3E50;
font-weight: 600;
text-align: center !important;
padding: 10px !important;
}
.brand-teal { color: darkcyan; }
.brand-bronze { color: peru; }
/* Bat tracking upload box styling */
.bat-tracking-box {
background: linear-gradient(135deg, #e8f4f8 0%, #f0e6d3 100%);
border: 2px dashed darkcyan;
border-radius: 15px;
padding: 20px;
margin-top: 15px;
}
.merge-status-box {
background: #f8f9fa;
border-left: 4px solid darkcyan;
padding: 15px;
border-radius: 0 10px 10px 0;
margin-top: 15px;
}
.merge-success {
border-left-color: #28a745;
background: #d4edda;
}
.merge-warning {
border-left-color: #ffc107;
background: #fff3cd;
}
.merge-error {
border-left-color: #dc3545;
background: #f8d7da;
}
"))
),
# Header with three logos
div(class = "app-header",
tags$img(src = "https://i.imgur.com/7vx5Ci8.png", class = "header-logo-left", alt = "Logo Left"),
tags$img(src = "https://i.imgur.com/c3zCSg6.png", class = "header-logo-center", alt = "Main Logo"),
tags$img(src = "https://i.imgur.com/VbrN5WV.png", class = "header-logo-right", alt = "Logo Right")
),
tabsetPanel(id = "main_tabs",
# Upload & Process Tab
tabPanel(
"Upload & Process",
fluidRow(
column(6,
h3("1. Upload TrackMan CSV"),
fileInput("file", "Choose CSV File", accept = c(".csv")),
fluidRow(
column(3,
checkboxInput("header", "Header", TRUE)
),
column(3,
radioButtons("sep", "Separator",
choices = c(Comma = ",", Semicolon = ";", Tab = "\t"),
selected = ",", inline = TRUE)
),
column(3,
radioButtons("quote", "Quote",
choices = c(None = "", "Double Quote" = '"', "Single Quote" = "'"),
selected = '"', inline = TRUE)
),
column(3,
radioButtons("date_format", "Date Output Format",
choices = c("YYYY-MM-DD" = "yyyy", "M/D/YY" = "mdyy"),
selected = "yyyy")
)
),
verbatimTextOutput("csv_status")
),
column(6,
div(class = "bat-tracking-box",
h3("2. Upload Bat Tracking JSON (Optional)", style = "margin-top: 0;"),
fileInput("json_file", "Choose Bat Tracking JSON File", accept = c(".json")),
p(style = "color: #666; font-size: 12px;",
"Upload the corresponding _battracking.json file to merge bat speed and attack angle data."),
verbatimTextOutput("json_status"),
uiOutput("merge_status_ui")
)
)
),
hr(),
fluidRow(
column(8,
h3("3. Columns to Remove"),
p("Select which columns to remove from your dataset:"),
checkboxGroupInput("columns_to_remove", "Remove These Columns:",
choices = columns_to_remove,
selected = columns_to_remove)
),
column(4,
h3("Quick Actions"),
br(),
actionButton("select_all_cols", "Select All", class = "btn-primary"),
br(), br(),
actionButton("deselect_all_cols", "Deselect All", class = "btn-default"),
br(), br(),
actionButton("select_spinaxis", "Select SpinAxis3d Columns", class = "btn-info"),
br(), br(),
h4("Processing Summary"),
verbatimTextOutput("process_summary")
)
)
),
# Bat Tracking Details Tab
tabPanel(
"Bat Tracking Data",
fluidRow(
column(12,
h3("Bat Tracking Merge Details"),
uiOutput("bat_tracking_details"),
hr(),
h4("Pitches with Bat Tracking Data"),
DT::dataTableOutput("bat_tracking_table")
)
)
),
# Preview Data Tab
tabPanel(
"Preview Data",
fluidRow(
column(12,
h3("Data Preview"),
DT::dataTableOutput("preview")
)
)
),
# Pitch Movement Chart Tab
tabPanel(
"Pitch Movement Chart",
fluidRow(
column(3,
selectInput("pitcher_select", "Select Pitcher:",
choices = NULL, selected = NULL)
),
column(3,
h4("Selection Mode:"),
radioButtons("selection_mode", "",
choices = list("Single Click" = "single", "Drag Select" = "drag"),
selected = "single", inline = TRUE)
),
column(6,
conditionalPanel(
condition = "input.selection_mode == 'drag'",
h4("Bulk Edit:"),
fluidRow(
column(8,
selectInput("bulk_pitch_type", "Change all selected to:",
choices = c("Fastball", "Sinker", "Cutter", "Slider",
"Curveball", "ChangeUp", "Splitter", "Knuckleball", "Other"),
selected = "Fastball")
),
column(4,
br(),
actionButton("apply_bulk_change", "Apply to Selected", class = "btn-success")
)
)
)
)
),
fluidRow(
column(8,
h3("Interactive Pitch Movement Analysis"),
plotOutput("movement_plot", height = "600px",
click = "plot_click",
brush = brushOpts(id = "plot_brush"),
hover = hoverOpts(id = "plot_hover", delay = 100)),
h4("Instructions:"),
p(strong("Single Click Mode:"), "Click on any point to edit one pitch type at a time via popup modal."),
p(strong("Drag Select Mode:"), "Click and drag to select multiple points, then use the dropdown to change them all at once."),
conditionalPanel(
condition = "input.selection_mode == 'drag'",
div(style = "background-color: #f0f8ff; padding: 10px; border-radius: 5px; margin: 10px 0; border-left: 4px solid darkcyan;",
h4("Selected Points:", style = "margin-top: 0; color: darkcyan;"),
textOutput("selection_info")
)
),
verbatimTextOutput("hover_info"),
verbatimTextOutput("click_info")
),
column(4,
h3("Pitch Metrics Summary"),
DT::dataTableOutput("movement_stats")
)
)
),
# Download Tab
tabPanel(
"Download",
fluidRow(
column(12,
h3("Download Processed Data"),
h4("Your processed data is ready for download!"),
br(),
downloadButton("downloadData", "Download CSV", class = "btn-success btn-lg"),
br(), br(),
h4("Data Summary:"),
verbatimTextOutput("data_summary")
)
)
),
#Scrape Tab
tabPanel(
"Scraping",
fluidRow(
column(2,
h4("Data Source", style = "color: darkcyan; border-bottom: 2px solid darkcyan; padding-bottom: 6px;"),
radioButtons("scrape_source", NULL,
choices = c("TrackMan PBP" = "pbp",
"TrackMan Positional" = "pos",
"NCAA Scoreboard" = "ncaa"),
selected = "pbp")
),
column(4,
h3("Controls"),
dateInput("start_date", "Start Date:", value = Sys.Date() - 1),
dateInput("end_date", "End Date:", value = Sys.Date() - 1),
uiOutput("scrape_options"),
br(),
actionButton("scrape_btn", "Scrape Data", class = "btn-primary"),
br(), br(),
downloadButton("download_scrape", "Download CSV"),
actionButton("upload_hf_btn", "Upload to HF Dataset", class = "btn-download")
),
column(6,
h3("Progress"),
verbatimTextOutput("scrape_status"),
hr(),
h3("Data Preview"),
DT::dataTableOutput("scrape_preview")
)
)
)
),
# Modal for editing pitch type
bsModal("pitchEditModal", "Edit Pitch Type", "triggerModal", size = "medium",
div(style = "padding: 20px;",
h4("Selected Pitch Details:", style = "color: darkcyan;"),
verbatimTextOutput("selected_pitch_info"),
br(),
selectInput("modal_new_pitch_type", "Change Pitch Type To:",
choices = c("Fastball", "Sinker", "Cutter", "Slider",
"Curveball", "ChangeUp", "Splitter", "Knuckleball", "Other"),
selected = "Fastball"),
br(),
actionButton("update_pitch", "Update Pitch Type", class = "btn-primary btn-lg"),
actionButton("cancel_edit", "Cancel", class = "btn-default")
)
)
)
ui <- fluidPage(
uiOutput("page")
)
# Server
server <- function(input, output, session) {
logged_in <- reactiveVal(FALSE)
output$page <- renderUI({
if (logged_in()) {
app_ui
} else {
login_ui
}
})
observeEvent(input$login, {
if (input$password == PASSWORD) {
logged_in(TRUE)
output$wrong_pass <- renderText("")
} else {
output$wrong_pass <- renderText("Incorrect password, please try again.")
}
})
# Reactive values
processed_data <- reactiveVal(NULL)
plot_data <- reactiveVal(NULL)
selected_pitch <- reactiveVal(NULL)
selected_points <- reactiveVal(NULL)
csv_data_raw <- reactiveVal(NULL)
bat_tracking_parsed <- reactiveVal(NULL)
merge_result <- reactiveVal(NULL)
scraped_data <- reactiveVal(NULL)
scrape_polling <- reactiveVal(FALSE)
scrape_status_msg <- reactiveVal("Ready.")
# Handle column selection buttons
observeEvent(input$select_all_cols, {
updateCheckboxGroupInput(session, "columns_to_remove",
selected = columns_to_remove)
})
observeEvent(input$deselect_all_cols, {
updateCheckboxGroupInput(session, "columns_to_remove", selected = character(0))
})
observeEvent(input$select_spinaxis, {
spinaxis_cols <- columns_to_remove[grepl("SpinAxis3d", columns_to_remove)]
updateCheckboxGroupInput(session, "columns_to_remove", selected = spinaxis_cols)
})
# Re-process data when date format changes
observeEvent(input$date_format, {
req(input$file) # Only run if a file has been uploaded
# Re-read and process the CSV with new date format
tryCatch({
df <- read.csv(input$file$datapath,
header = input$header,
sep = input$sep,
quote = input$quote,
stringsAsFactors = FALSE)
# Auto-convert date formats based on user selection
df <- convert_date_columns(df, input$date_format)
csv_data_raw(df)
# If we already have bat tracking data, try to merge
if (!is.null(bat_tracking_parsed()) && !is.null(bat_tracking_parsed()$data)) {
result <- merge_with_bat_tracking(df, bat_tracking_parsed()$data)
merge_result(result)
df <- result$data
}
# Process the data (remove columns)
selected_cols_to_remove <- input$columns_to_remove %||% character(0)
processed_df <- df
if (length(selected_cols_to_remove) > 0) {
columns_to_drop <- intersect(names(df), selected_cols_to_remove)
if (length(columns_to_drop) > 0) {
processed_df <- processed_df %>% select(-all_of(columns_to_drop))
}
}
processed_df <- processed_df %>% distinct()
processed_data(processed_df)
plot_data(processed_df)
showNotification(
paste("Date format updated to:", if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"),
type = "message", duration = 3
)
}, error = function(e) {
showNotification(paste("Error updating date format:", e$message), type = "error")
})
}, ignoreInit = TRUE)
# Process uploaded CSV file
observeEvent(input$file, {
req(input$file)
tryCatch({
df <- read.csv(input$file$datapath,
header = input$header,
sep = input$sep,
quote = input$quote,
stringsAsFactors = FALSE)
# Auto-convert date formats based on user selection
df <- convert_date_columns(df, input$date_format)
csv_data_raw(df)
# If we already have bat tracking data, try to merge
if (!is.null(bat_tracking_parsed()) && !is.null(bat_tracking_parsed()$data)) {
result <- merge_with_bat_tracking(df, bat_tracking_parsed()$data)
merge_result(result)
df <- result$data
}
# Process the data (remove columns)
selected_cols_to_remove <- input$columns_to_remove %||% character(0)
processed_df <- df
if (length(selected_cols_to_remove) > 0) {
columns_to_drop <- intersect(names(df), selected_cols_to_remove)
if (length(columns_to_drop) > 0) {
processed_df <- processed_df %>% select(-all_of(columns_to_drop))
}
}
processed_df <- processed_df %>% distinct()
processed_data(processed_df)
plot_data(processed_df)
# Update pitcher choices
if ("Pitcher" %in% names(processed_df)) {
pitcher_choices <- sort(unique(processed_df$Pitcher[!is.na(processed_df$Pitcher)]))
updateSelectInput(session, "pitcher_select", choices = pitcher_choices, selected = pitcher_choices[1])
}
}, error = function(e) {
showNotification(paste("Error processing CSV:", e$message), type = "error")
})
})
# Process uploaded JSON file
observeEvent(input$json_file, {
req(input$json_file)
tryCatch({
parsed <- parse_bat_tracking_json(input$json_file$datapath)
bat_tracking_parsed(parsed)
# If we already have CSV data, merge
if (!is.null(csv_data_raw()) && parsed$success && !is.null(parsed$data)) {
result <- merge_with_bat_tracking(csv_data_raw(), parsed$data)
merge_result(result)
# Re-process with merged data
df <- result$data
selected_cols_to_remove <- input$columns_to_remove %||% character(0)
if (length(selected_cols_to_remove) > 0) {
columns_to_drop <- intersect(names(df), selected_cols_to_remove)
if (length(columns_to_drop) > 0) {
df <- df %>% select(-all_of(columns_to_drop))
}
}
df <- df %>% distinct()
processed_data(df)
plot_data(df)
showNotification(result$message, type = "message", duration = 5)
}
}, error = function(e) {
showNotification(paste("Error processing JSON:", e$message), type = "error")
})
})
# CSV status output
output$csv_status <- renderText({
if (is.null(input$file)) {
return("No CSV file uploaded yet.")
}
if (is.null(csv_data_raw())) {
return("Processing CSV...")
}
df <- csv_data_raw()
game_id <- if ("GameID" %in% names(df)) unique(df$GameID)[1] else "Unknown"
date_fmt <- if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"
paste(
"✓ CSV loaded successfully!",
paste(" Game ID:", game_id),
paste(" Rows:", nrow(df)),
paste(" Columns:", ncol(df)),
paste("✓ Date format:", date_fmt),
sep = "\n"
)
})
# JSON status output
output$json_status <- renderText({
if (is.null(input$json_file)) {
return("No JSON file uploaded yet.")
}
parsed <- bat_tracking_parsed()
if (is.null(parsed)) {
return("Processing JSON...")
}
if (!parsed$success) {
return(paste("✗", parsed$message))
}
paste(
"✓ JSON parsed successfully!",
paste(" Game Reference:", parsed$game_reference),
paste(" Plays found:", parsed$plays_count %||% 0),
sep = "\n"
)
})
# Merge status UI
output$merge_status_ui <- renderUI({
result <- merge_result()
parsed <- bat_tracking_parsed()
csv <- csv_data_raw()
if (is.null(parsed) || is.null(csv)) {
return(NULL)
}
if (!parsed$success) {
return(div(class = "merge-status-box merge-error",
h4("Merge Status", style = "margin-top: 0; color: #721c24;"),
p(parsed$message)
))
}
if (is.null(parsed$data) || is.null(result)) {
# Check game ID match
csv_game <- if ("GameID" %in% names(csv)) unique(csv$GameID)[1] else NULL
json_game <- parsed$game_reference
if (!is.null(csv_game) && !is.null(json_game) && csv_game != json_game) {
return(div(class = "merge-status-box merge-warning",
h4("⚠ Game ID Mismatch", style = "margin-top: 0; color: #856404;"),
p(paste("CSV Game:", csv_game)),
p(paste("JSON Game:", json_game)),
p("Files may be from different games!")
))
}
return(div(class = "merge-status-box merge-warning",
h4("No Data to Merge", style = "margin-top: 0; color: #856404;"),
p(parsed$message)
))
}
# Check game ID match
csv_game <- if ("GameID" %in% names(csv)) unique(csv$GameID)[1] else NULL
json_game <- parsed$game_reference
game_match <- is.null(csv_game) || is.null(json_game) || csv_game == json_game
if (result$matched > 0) {
div(class = "merge-status-box merge-success",
h4("✓ Merge Successful!", style = "margin-top: 0; color: #155724;"),
p(paste("Matched:", result$matched, "of", result$total_bat, "bat tracking records")),
if (!game_match) p(style = "color: #856404;", "⚠ Note: Game IDs differ but PitchUIDs matched")
)
} else {
div(class = "merge-status-box merge-warning",
h4("⚠ No Matches Found", style = "margin-top: 0; color: #856404;"),
p(paste("0 of", result$total_bat, "bat tracking records matched")),
if (!game_match) p(paste("Game ID mismatch: CSV =", csv_game, ", JSON =", json_game))
)
}
})
# Bat tracking details
output$bat_tracking_details <- renderUI({
parsed <- bat_tracking_parsed()
result <- merge_result()
if (is.null(parsed)) {
return(div(
p("No bat tracking JSON file uploaded."),
p("Upload a _battracking.json file in the 'Upload & Process' tab to see bat tracking data here.")
))
}
if (!parsed$success) {
return(div(class = "alert alert-danger", parsed$message))
}
if (is.null(parsed$data)) {
return(div(class = "alert alert-warning",
h4("Empty Bat Tracking File"),
p(parsed$message),
p("The JSON file was valid but contained no swing data in the Plays array.")
))
}
# Show summary
div(
div(class = "row",
div(class = "col-md-4",
div(class = "well",
h4("Game Reference"),
p(parsed$game_reference)
)
),
div(class = "col-md-4",
div(class = "well",
h4("Total Swings Tracked"),
p(style = "font-size: 24px; font-weight: bold; color: darkcyan;", parsed$plays_count)
)
),
div(class = "col-md-4",
div(class = "well",
h4("Matched to CSV"),
p(style = "font-size: 24px; font-weight: bold; color: #28a745;",
if (!is.null(result)) result$matched else "N/A")
)
)
)
)
})
# Bat tracking table
output$bat_tracking_table <- DT::renderDataTable({
df <- processed_data()
if (is.null(df)) {
return(NULL)
}
# Filter to rows with bat tracking data
if ("BatSpeed_Sensor" %in% names(df)) {
bat_rows <- df %>%
filter(!is.na(BatSpeed_Sensor)) %>%
select(
any_of(c("PitchNo", "Time", "Pitcher", "Batter", "TaggedPitchType", "PitchCall",
"RelSpeed", "ExitSpeed", "Angle",
"BatSpeed", "BatSpeed_Sensor",
"VerticalAttackAngle", "VerticalAttackAngle_Sensor",
"HorizontalAttackAngle", "HorizontalAttackAngle_Sensor"))
)
if (nrow(bat_rows) == 0) {
return(NULL)
}
DT::datatable(bat_rows,
options = list(scrollX = TRUE, pageLength = 10),
rownames = FALSE) %>%
DT::formatRound(columns = intersect(names(bat_rows),
c("BatSpeed_Sensor", "VerticalAttackAngle_Sensor",
"HorizontalAttackAngle_Sensor", "RelSpeed",
"ExitSpeed", "Angle")),
digits = 1)
} else {
return(NULL)
}
})
# Processing summary
output$process_summary <- renderText({
if (is.null(input$file)) {
return("No file uploaded yet.")
}
if (is.null(processed_data())) {
return("Processing...")
}
df <- processed_data()
original_df <- csv_data_raw()
selected_cols_to_remove <- input$columns_to_remove %||% character(0)
removed_cols <- intersect(selected_cols_to_remove, names(original_df))
result <- merge_result()
removed_cols_text <- if (length(removed_cols) > 0) {
cols_display <- if (length(removed_cols) > 5) {
paste(paste(head(removed_cols, 5), collapse = ", "), "...")
} else {
paste(removed_cols, collapse = ", ")
}
paste("✓ Removed columns:", length(removed_cols))
} else {
"✓ Removed columns: 0"
}
bat_tracking_text <- if (!is.null(result) && result$matched > 0) {
paste("✓ Bat tracking merged:", result$matched, "pitches")
} else if (!is.null(bat_tracking_parsed())) {
"⚠ Bat tracking: No matches"
} else {
"○ Bat tracking: Not uploaded"
}
summary_text <- paste(
"✓ File processed successfully!",
paste("✓ Original columns:", ncol(original_df)),
paste("✓ Final columns:", ncol(df)),
paste("✓ Rows processed:", nrow(df)),
removed_cols_text,
bat_tracking_text,
"✓ Duplicates removed",
paste("✓ Date format:", if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"),
sep = "\n"
)
return(summary_text)
})
# Preview table
output$preview <- DT::renderDataTable({
req(processed_data())
DT::datatable(processed_data(),
options = list(scrollX = TRUE, pageLength = 10),
filter = "top")
})
# Movement plot
output$movement_plot <- renderPlot({
req(plot_data(), input$pitcher_select)
pitcher_data <- plot_data() %>%
filter(Pitcher == input$pitcher_select) %>%
filter(!is.na(TaggedPitchType), TaggedPitchType != "Other",
!is.na(HorzBreak), !is.na(InducedVertBreak), !is.na(RelSpeed)) %>%
mutate(pitch_id = row_number())
if (nrow(pitcher_data) == 0) {
plot.new()
text(0.5, 0.5, "No data available for selected pitcher", cex = 1.5)
return()
}
pitcher_data$color <- pitch_colors[pitcher_data$TaggedPitchType]
pitcher_data$color[is.na(pitcher_data$color)] <- "#D3D3D3"
par(mar = c(5, 5, 4, 8), xpd = TRUE)
plot(pitcher_data$HorzBreak, pitcher_data$InducedVertBreak,
col = pitcher_data$color,
pch = 19, cex = 1.5,
xlim = c(-25, 25), ylim = c(-25, 25),
xlab = "Horizontal Break (inches)",
ylab = "Induced Vertical Break (inches)",
main = paste("Pitch Movement Chart -", input$pitcher_select))
grid(nx = NULL, ny = NULL, col = "lightgray", lty = 1, lwd = 0.5)
abline(h = 0, col = "gray", lty = 2, lwd = 1)
abline(v = 0, col = "gray", lty = 2, lwd = 1)
for (r in c(6, 12, 18, 24)) {
circle_x <- r * cos(seq(0, 2*pi, length.out = 100))
circle_y <- r * sin(seq(0, 2*pi, length.out = 100))
lines(circle_x, circle_y, col = "lightgray", lty = 3)
}
if (input$selection_mode == "drag" && !is.null(selected_points())) {
sel_points <- selected_points()
points(sel_points$HorzBreak, sel_points$InducedVertBreak,
pch = 21, cex = 2, col = "red", lwd = 3)
}
unique_pitches <- unique(pitcher_data$TaggedPitchType)
unique_colors <- pitch_colors[unique_pitches]
legend("topright", inset = c(-0.15, 0),
legend = unique_pitches,
col = unique_colors,
pch = 19,
cex = 0.8,
title = "Pitch Type")
})
# Handle plot clicks (single mode only)
observeEvent(input$plot_click, {
req(plot_data(), input$pitcher_select, input$plot_click)
if (input$selection_mode != "single") return()
pitcher_data <- plot_data() %>%
filter(Pitcher == input$pitcher_select) %>%
filter(!is.na(TaggedPitchType), TaggedPitchType != "Other",
!is.na(HorzBreak), !is.na(InducedVertBreak), !is.na(RelSpeed)) %>%
mutate(pitch_id = row_number())
if (nrow(pitcher_data) == 0) return()
click_x <- input$plot_click$x
click_y <- input$plot_click$y
distances <- sqrt((pitcher_data$HorzBreak - click_x)^2 +
(pitcher_data$InducedVertBreak - click_y)^2)
closest_idx <- which.min(distances)
if (min(distances) <= 2) {
clicked_pitch <- pitcher_data[closest_idx, ]
full_data <- plot_data() %>% filter(Pitcher == input$pitcher_select)
original_row <- which(full_data$HorzBreak == clicked_pitch$HorzBreak &
full_data$InducedVertBreak == clicked_pitch$InducedVertBreak &
full_data$RelSpeed == clicked_pitch$RelSpeed)[1]
selected_pitch(list(
pitcher = input$pitcher_select,
row_in_pitcher_data = original_row,
data = clicked_pitch,
original_type = clicked_pitch$TaggedPitchType
))
updateSelectInput(session, "modal_new_pitch_type",
selected = clicked_pitch$TaggedPitchType)
showModal(modalDialog(
title = "Edit Pitch Type",
div(style = "padding: 20px;",
h4("Selected Pitch Details:", style = "color: darkcyan;"),
verbatimTextOutput("selected_pitch_info"),
br(),
selectInput("modal_new_pitch_type", "Change Pitch Type To:",
choices = c("Fastball", "Sinker", "Cutter", "Slider",
"Curveball", "ChangeUp", "Splitter", "Knuckleball", "Other"),
selected = clicked_pitch$TaggedPitchType),
br(),
actionButton("update_pitch", "Update Pitch Type", class = "btn-primary btn-lg"),
actionButton("cancel_edit", "Cancel", class = "btn-default")
),
footer = NULL,
size = "m",
easyClose = TRUE
))
}
})
# Handle brush selection (drag mode)
observeEvent(input$plot_brush, {
req(plot_data(), input$pitcher_select, input$plot_brush)
if (input$selection_mode != "drag") return()
pitcher_data <- plot_data() %>%
filter(Pitcher == input$pitcher_select) %>%
filter(!is.na(TaggedPitchType), TaggedPitchType != "Other",
!is.na(HorzBreak), !is.na(InducedVertBreak), !is.na(RelSpeed))
if (nrow(pitcher_data) == 0) return()
brush <- input$plot_brush
brushed_points <- pitcher_data %>%
filter(
HorzBreak >= brush$xmin & HorzBreak <= brush$xmax &
InducedVertBreak >= brush$ymin & InducedVertBreak <= brush$ymax
)
if (nrow(brushed_points) > 0) {
selected_points(brushed_points)
} else {
selected_points(NULL)
}
})
# Apply bulk change
observeEvent(input$apply_bulk_change, {
req(selected_points(), input$bulk_pitch_type)
sel_points <- selected_points()
if (nrow(sel_points) == 0) {
showNotification("No points selected", type = "warning")
return()
}
current_data <- plot_data()
for (i in 1:nrow(sel_points)) {
point <- sel_points[i, ]
current_data <- current_data %>%
mutate(TaggedPitchType = ifelse(
Pitcher == input$pitcher_select &
abs(HorzBreak - point$HorzBreak) < 0.01 &
abs(InducedVertBreak - point$InducedVertBreak) < 0.01 &
abs(RelSpeed - point$RelSpeed) < 0.01,
input$bulk_pitch_type,
TaggedPitchType
))
}
plot_data(current_data)
processed_data(current_data)
selected_points(NULL)
showNotification(
paste("Updated", nrow(sel_points), "pitches to", input$bulk_pitch_type),
type = "message", duration = 3
)
})
# Selection info for drag mode
output$selection_info <- renderText({
if (input$selection_mode == "drag" && !is.null(selected_points())) {
sel_points <- selected_points()
pitch_counts <- table(sel_points$TaggedPitchType)
paste(nrow(sel_points), "points selected:",
paste(names(pitch_counts), "(", pitch_counts, ")", collapse = ", "))
} else {
"No points selected. Click and drag to select multiple pitches."
}
})
# Hover info
output$hover_info <- renderText({
req(input$plot_hover, plot_data(), input$pitcher_select)
pitcher_data <- plot_data() %>%
filter(Pitcher == input$pitcher_select) %>%
filter(!is.na(TaggedPitchType), TaggedPitchType != "Other",
!is.na(HorzBreak), !is.na(InducedVertBreak), !is.na(RelSpeed))
if (nrow(pitcher_data) == 0) return("")
hover_x <- input$plot_hover$x
hover_y <- input$plot_hover$y
distances <- sqrt((pitcher_data$HorzBreak - hover_x)^2 +
(pitcher_data$InducedVertBreak - hover_y)^2)
if (min(distances) <= 2) {
closest_idx <- which.min(distances)
hover_pitch <- pitcher_data[closest_idx, ]
# Include bat tracking info if available
bat_info <- ""
if ("BatSpeed_Sensor" %in% names(hover_pitch) && !is.na(hover_pitch$BatSpeed_Sensor)) {
bat_info <- paste(" | Bat Speed:", round(hover_pitch$BatSpeed_Sensor, 1), "mph")
}
paste("Hovering over:",
paste("Type:", hover_pitch$TaggedPitchType),
paste("Velocity:", round(hover_pitch$RelSpeed, 1), "mph"),
paste("HB:", round(hover_pitch$HorzBreak, 1), "in"),
paste("IVB:", round(hover_pitch$InducedVertBreak, 1), "in"),
bat_info,
sep = " | ")
} else {
""
}
})
# Movement stats table
output$movement_stats <- DT::renderDataTable({
req(plot_data(), input$pitcher_select)
data <- plot_data()
movement_stats <- data %>%
filter(Pitcher == input$pitcher_select) %>%
filter(!is.na(HorzBreak), !is.na(InducedVertBreak), !is.na(TaggedPitchType)) %>%
mutate(
pitch_group = case_when(
TaggedPitchType %in% c("Fastball", "FourSeamFastBall", "FourSeamFastB", "Four-Seam", "4-Seam") ~ "Fastball",
TaggedPitchType %in% c("OneSeamFastBall", "TwoSeamFastBall", "Sinker", "Two-Seam", "One-Seam") ~ "Sinker",
TaggedPitchType %in% c("ChangeUp", "Changeup") ~ "Changeup",
TRUE ~ TaggedPitchType
),
in_zone = ifelse("StrikeZoneIndicator" %in% names(.), StrikeZoneIndicator,
ifelse(!is.na(PlateLocSide) & !is.na(PlateLocHeight) &
PlateLocSide >= -0.95 & PlateLocSide <= 0.95 &
PlateLocHeight >= 1.6 & PlateLocHeight <= 3.5, 1, 0)),
is_whiff = ifelse("WhiffIndicator" %in% names(.), WhiffIndicator,
ifelse(!is.na(PitchCall) & PitchCall == "StrikeSwinging", 1, 0)),
chase = ifelse("Chaseindicator" %in% names(.), Chaseindicator,
ifelse(!is.na(PitchCall) & !is.na(PlateLocSide) & !is.na(PlateLocHeight) &
PitchCall %in% c("StrikeSwinging", "FoulBallNotFieldable", "FoulBall", "InPlay") &
(PlateLocSide < -0.95 | PlateLocSide > 0.95 | PlateLocHeight < 1.6 | PlateLocHeight > 3.5), 1, 0))
)
total_pitches <- nrow(movement_stats)
# Check if bat tracking columns exist
has_bat_speed <- "BatSpeed_Sensor" %in% names(movement_stats)
summary_stats <- movement_stats %>%
group_by(`Pitch Type` = pitch_group) %>%
summarise(
Count = n(),
`Usage%` = sprintf("%.1f%%", (n() / total_pitches) * 100),
`Avg Velo` = sprintf("%.1f", mean(RelSpeed, na.rm = TRUE)),
`Max Velo` = sprintf("%.1f", max(RelSpeed, na.rm = TRUE)),
`Avg IVB` = sprintf("%.1f", mean(InducedVertBreak, na.rm = TRUE)),
`Avg HB` = sprintf("%.1f", mean(HorzBreak, na.rm = TRUE)),
`Avg Spin` = ifelse("SpinRate" %in% names(movement_stats),
sprintf("%.0f", mean(SpinRate, na.rm = TRUE)),
"—"),
`Avg Bat Speed` = if (has_bat_speed) {
bat_vals <- BatSpeed_Sensor[!is.na(BatSpeed_Sensor)]
if (length(bat_vals) > 0) sprintf("%.1f", mean(bat_vals)) else "—"
} else "—",
`Zone%` = sprintf("%.1f%%", round(mean(in_zone, na.rm = TRUE) * 100, 1)),
`Whiff%` = sprintf("%.1f%%", round(mean(is_whiff, na.rm = TRUE) * 100, 1)),
.groups = "drop"
) %>%
arrange(desc(Count))
DT::datatable(summary_stats,
options = list(pageLength = 15, dom = 't', scrollX = TRUE),
rownames = FALSE) %>%
DT::formatStyle(columns = names(summary_stats), fontSize = '12px')
})
# Selected pitch info in modal
output$selected_pitch_info <- renderText({
pitch_info <- selected_pitch()
if (!is.null(pitch_info)) {
pitch_data <- pitch_info$data
info_lines <- c(
paste("Pitcher:", pitch_info$pitcher),
paste("Current Type:", pitch_data$TaggedPitchType),
paste("Velocity:", round(pitch_data$RelSpeed, 1), "mph"),
paste("Horizontal Break:", round(pitch_data$HorzBreak, 1), "inches"),
paste("Induced Vertical Break:", round(pitch_data$InducedVertBreak, 1), "inches")
)
if ("SpinRate" %in% names(pitch_data) && !is.na(pitch_data$SpinRate)) {
info_lines <- c(info_lines, paste("Spin Rate:", round(pitch_data$SpinRate, 0), "rpm"))
}
# Add bat tracking info if available
if ("BatSpeed_Sensor" %in% names(pitch_data) && !is.na(pitch_data$BatSpeed_Sensor)) {
info_lines <- c(info_lines,
paste("Bat Speed:", round(pitch_data$BatSpeed_Sensor, 1), "mph"),
paste("Vertical Attack Angle:", round(pitch_data$VerticalAttackAngle_Sensor, 1), "°"),
paste("Horizontal Attack Angle:", round(pitch_data$HorizontalAttackAngle_Sensor, 1), "°"))
}
if ("Date" %in% names(pitch_data) && !is.na(pitch_data$Date)) {
info_lines <- c(info_lines, paste("Date:", pitch_data$Date))
}
return(paste(info_lines, collapse = "\n"))
} else {
return("No pitch selected")
}
})
# Update pitch type
observeEvent(input$update_pitch, {
pitch_info <- selected_pitch()
if (!is.null(pitch_info)) {
current_data <- plot_data()
target_pitcher <- pitch_info$pitcher
target_hb <- pitch_info$data$HorzBreak
target_ivb <- pitch_info$data$InducedVertBreak
target_velo <- pitch_info$data$RelSpeed
current_data <- current_data %>%
mutate(TaggedPitchType = ifelse(
Pitcher == target_pitcher &
abs(HorzBreak - target_hb) < 0.01 &
abs(InducedVertBreak - target_ivb) < 0.01 &
abs(RelSpeed - target_velo) < 0.01,
input$modal_new_pitch_type,
TaggedPitchType
))
plot_data(current_data)
processed_data(current_data)
removeModal()
showNotification(
paste("Updated pitch from", pitch_info$original_type, "to", input$modal_new_pitch_type),
type = "message", duration = 3
)
selected_pitch(NULL)
}
})
# Cancel edit
observeEvent(input$cancel_edit, {
removeModal()
selected_pitch(NULL)
})
# Click info output
output$click_info <- renderText({
if (!is.null(selected_pitch())) {
pitch_info <- selected_pitch()
paste("Last selected pitch:", pitch_info$original_type,
"| Position: (", round(pitch_info$data$HorzBreak, 1), ",",
round(pitch_info$data$InducedVertBreak, 1), ")")
} else {
"No point selected yet. Click on a point in the chart above to edit its pitch type."
}
})
# Data summary for download page
output$data_summary <- renderText({
req(processed_data())
df <- processed_data()
result <- merge_result()
bat_tracking_summary <- if (!is.null(result) && result$matched > 0) {
paste("Bat tracking data:", result$matched, "pitches with swing metrics")
} else {
"Bat tracking data: None"
}
summary_text <- paste(
paste("Total rows:", nrow(df)),
paste("Total columns:", ncol(df)),
paste("Date range:",
if ("Date" %in% names(df) && !all(is.na(df$Date))) {
paste(min(as.Date(df$Date), na.rm = TRUE), "to", max(as.Date(df$Date), na.rm = TRUE))
} else {
"Date column not available"
}),
paste("Unique pitchers:",
if ("Pitcher" %in% names(df)) {
length(unique(df$Pitcher[!is.na(df$Pitcher)]))
} else {
"Pitcher column not available"
}),
paste("Pitch types:",
if ("TaggedPitchType" %in% names(df)) {
paste(sort(unique(df$TaggedPitchType[!is.na(df$TaggedPitchType)])), collapse = ", ")
} else {
"TaggedPitchType column not available"
}),
bat_tracking_summary,
paste("Date format:", if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"),
sep = "\n"
)
return(summary_text)
})
# Download handler
output$downloadData <- downloadHandler(
filename = function() {
paste("app_ready_COA_", Sys.Date(), ".csv", sep = "")
},
content = function(file) {
write.csv(processed_data(), file, row.names = FALSE)
}
)
#SCRAPER STUFF
#Handles the middle column where it is dynamically based off the left column
output$scrape_options <- renderUI({
switch(input$scrape_source,
"pbp" = tagList(
p("Scrapes TrackMan play-by-play data from FTP.")
),
"pos" = tagList(
p("Scrapes TrackMan player positioning data from FTP.")
),
"ncaa" = tagList(
selectInput("ncaa_division", "Division:", choices = c("D1", "D2", "D3")),
p("Scrapes NCAA scoreboard data via API.")
)
)
})
# Scrape button
observeEvent(input$scrape_btn, {
scrape_status_msg("Triggering scrape on GitHub...")
gh_token <- Sys.getenv("GITHUB_TOKEN")
gh_repo <- Sys.getenv("GITHUB_REPO")
result <- tryCatch({
httr::POST(
paste0("https://api.github.com/repos/", gh_repo, "/actions/workflows/scrape.yml/dispatches"),
httr::add_headers(
Authorization = paste("Bearer", gh_token),
Accept = "application/vnd.github.v3+json"
),
body = jsonlite::toJSON(list(
ref = "main",
inputs = list(
start_date = as.character(input$start_date),
end_date = as.character(input$end_date),
data_type = input$scrape_source
)
), auto_unbox = TRUE),
encode = "raw"
)
}, error = function(e) {
scrape_status_msg(paste("Failed:", e$message))
return(NULL)
})
if (is.null(result)) return()
if (httr::status_code(result) == 204) {
scrape_status_msg("Scrape triggered! Waiting for GitHub to finish...")
scrape_polling(TRUE)
} else {
scrape_status_msg(paste("GitHub API error:", httr::status_code(result)))
}
})
# Poll GitHub every 15 seconds to check if done
observe({
req(scrape_polling())
invalidateLater(15000, session)
gh_token <- Sys.getenv("GITHUB_TOKEN")
gh_repo <- Sys.getenv("GITHUB_REPO")
resp <- tryCatch({
httr::GET(
paste0("https://api.github.com/repos/", gh_repo, "/actions/runs?per_page=1"),
httr::add_headers(
Authorization = paste("Bearer", gh_token),
Accept = "application/vnd.github.v3+json"
)
)
}, error = function(e) { NULL })
if (is.null(resp)) return()
runs <- jsonlite::fromJSON(httr::content(resp, as = "text", encoding = "UTF-8"))
if (length(runs$workflow_runs) == 0) return()
latest <- runs$workflow_runs[1, ]
status <- latest$status
conclusion <- latest$conclusion
if (status == "completed") {
scrape_polling(FALSE)
if (conclusion == "success") {
scrape_status_msg("GitHub finished! Fetching data...")
# Auto-fetch the CSV
filename <- paste0(input$scrape_source, "_", input$start_date, "_to_", input$end_date, ".csv.gz")
url <- paste0("https://api.github.com/repos/", gh_repo, "/contents/data/", filename)
data <- tryCatch({
file_resp <- httr::GET(
url,
httr::add_headers(
Authorization = paste("Bearer", gh_token),
Accept = "application/vnd.github.v3.raw"
)
)
if (httr::status_code(file_resp) == 200) {
tmp <- tempfile(fileext = ".csv.gz")
writeBin(httr::content(file_resp, as = "raw"), tmp)
read_csv(gzfile(tmp))
} else {
NULL
}
}, error = function(e) { NULL })
if (!is.null(data) && nrow(data) > 0) {
if (input$scrape_source == "pbp") {
scrape_status_msg("Processing data...")
data <- tryCatch({
d <- clean_college_data(data)
d <- predict_stuffplus(d)
d
}, error = function(e) {
scrape_status_msg(paste("Processing error:", e$message))
data
})
}
scraped_data(data)
scrape_status_msg(paste0("Done! ", nrow(data), " rows × ", ncol(data), " columns."))
} else {
scrape_status_msg("Scrape finished but couldn't fetch the file. Try 'Fetch Results' manually.")
}
} else {
scrape_status_msg(paste("GitHub Action failed:", conclusion))
}
} else {
scrape_status_msg(paste0("GitHub is running... (status: ", status, ")"))
}
})
# Status text
output$scrape_status <- renderText({ scrape_status_msg() })
# Preview table
output$scrape_preview <- DT::renderDataTable({
req(scraped_data())
DT::datatable(scraped_data(), options = list(scrollX = TRUE, pageLength = 10))
})
# Download
output$download_scrape <- downloadHandler(
filename = function() {
label <- switch(input$scrape_source, "pbp" = "pbp", "pos" = "positional", "ncaa" = "ncaa")
paste0("trackman_", label, "_",
format(input$start_date, "%Y%m%d"), "_to_",
format(input$end_date, "%Y%m%d"), ".csv")
},
content = function(file) {
req(scraped_data())
write.csv(scraped_data(), file, row.names = FALSE)
}
)
observeEvent(input$upload_hf_btn, {
req(scraped_data())
hf_token <- Sys.getenv("HF_WRITE_TOKEN")
repo_id <- "CoastalBaseball/2026MasterDataset"
timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S")
upload_to_hf <- function(new_data, folder, index_file, label) {
scrape_status_msg(paste0("Checking existing UIDs for ", label, "..."))
existing_uids <- tryCatch({
tmp_idx <- tempfile(fileext = ".csv.gz")
resp <- httr::GET(
paste0("https://huggingface.co/datasets/", repo_id, "/resolve/main/", index_file),
httr::add_headers(Authorization = paste("Bearer", hf_token)),
httr::write_disk(tmp_idx, overwrite = TRUE)
)
if (httr::status_code(resp) == 200) {
d <- read.csv(gzfile(tmp_idx), stringsAsFactors = FALSE)
file.remove(tmp_idx)
d$PitchUID
} else {
file.remove(tmp_idx)
character(0)
}
}, error = function(e) { character(0) })
scraped_rows <- nrow(new_data)
if (length(existing_uids) > 0 && "PitchUID" %in% names(new_data)) {
new_only <- new_data %>% filter(!PitchUID %in% existing_uids)
} else {
new_only <- new_data
}
new_rows <- nrow(new_only)
total_after <- length(existing_uids) + new_rows
if (new_rows == 0) {
return(paste0(label, ": ", scraped_rows, " rows scraped, 0 new rows added (", length(existing_uids), " total)"))
}
scrape_status_msg(paste0("Uploading ", new_rows, " new rows for ", label, "..."))
hf <- reticulate::import("huggingface_hub")
api <- hf$HfApi()
tmp_data <- tempfile(fileext = ".parquet")
arrow::write_parquet(new_only, tmp_data)
api$upload_file(
path_or_fileobj = tmp_data,
path_in_repo = paste0(folder, "/", timestamp, ".parquet"),
repo_id = repo_id,
repo_type = "dataset",
token = hf_token
)
file.remove(tmp_data)
scrape_status_msg(paste0("Updating ", label, " index..."))
all_uids <- data.frame(PitchUID = c(existing_uids, new_only$PitchUID))
tmp_idx <- tempfile(fileext = ".csv.gz")
gz <- gzfile(tmp_idx, "w")
write.csv(all_uids, gz, row.names = FALSE)
close(gz)
api$upload_file(
path_or_fileobj = tmp_idx,
path_in_repo = index_file,
repo_id = repo_id,
repo_type = "dataset",
token = hf_token
)
file.remove(tmp_idx)
rm(new_only, all_uids); gc()
paste0(label, ": ", scraped_rows, " rows scraped, ", new_rows, " new rows added (", total_after, " total)")
}
if (input$scrape_source == "pbp") {
msg1 <- upload_to_hf(scraped_data(), "pbp", "pbp_uid_index.csv.gz", "Master Dataset")
gc()
cp <- scraped_data() %>% filter(PitcherTeam == "COA_CHA")
msg2 <- if (nrow(cp) > 0) {
upload_to_hf(cp, "coastal_pitchers", "coastal_pitchers_uid_index.csv.gz", "Coastal Pitchers")
} else { "Coastal Pitchers: No matching rows" }
rm(cp); gc()
ch <- scraped_data() %>% filter(BatterTeam == "COA_CHA")
msg3 <- if (nrow(ch) > 0) {
upload_to_hf(ch, "coastal_hitters", "coastal_hitters_uid_index.csv.gz", "Coastal Hitters")
} else { "Coastal Hitters: No matching rows" }
rm(ch); gc()
scrape_status_msg(paste(msg1, msg2, msg3, sep = "\n"))
} else if (input$scrape_source == "pos") {
msg1 <- upload_to_hf(scraped_data(), "pos", "pos_uid_index.csv.gz", "Positional Dataset")
scrape_status_msg(msg1)
} else if (input$scrape_source == "ncaa") {
msg1 <- upload_to_hf(scraped_data(), "ncaa_pbp", "ncaa_pbp_uid_index.csv.gz", "NCAA PBP Dataset")
scrape_status_msg(msg1)
}
})
}
# Run the app
shinyApp(ui = ui, server = server)