DataProcess / app.R
OwenStOnge's picture
Update app.R
5895511 verified
raw
history blame
87.5 kB
library(shiny)
library(shinydashboard)
library(shinyBS)
library(DT)
library(dplyr)
library(readr)
library(stringr)
library(jsonlite)
# Define columns to remove if they exist
columns_to_remove <- c(
"SpinAxis3dTransverseAngle", "SpinAxis3dLongitudinalAngle", "SpinAxis3dActiveSpinRate",
"SpinAxis3dSpinEfficiency", "SpinAxis3dTilt", "SpinAxis3dVectorX", "SpinAxis3dVectorY",
"SpinAxis3dVectorZ", "SpinAxis3dSeamOrientationRotationX", "SpinAxis3dSeamOrientationRotationY",
"SpinAxis3dSeamOrientationRotationZ", "SpinAxis3dSeamOrientationBallYAmb1",
"SpinAxis3dSeamOrientationBallAngleHorizontalAmb1", "SpinAxis3dSeamOrientationBallZAmb1",
"SpinAxis3dSeamOrientationBallAngleVerticalAmb2", "SpinAxis3dSeamOrientationBallZAmb2",
"SpinAxis3dSeamOrientationBallXAmb4", "SpinAxis3dSeamOrientationBallYAmb4",
"SpinAxis3dSeamOrientationBallAngleHorizontalAmb2", "SpinAxis3dSeamOrientationBallAngleVerticalAmb1",
"SpinAxis3dSeamOrientationBallXAmb1", "SpinAxis3dSeamOrientationBallYAmb2",
"SpinAxis3dSeamOrientationBallAngleHorizontalAmb4", "SpinAxis3dSeamOrientationBallAngleVerticalAmb4",
"SpinAxis3dSeamOrientationBallXAmb2", "SpinAxis3dSeamOrientationBallAngleVerticalAmb3",
"SpinAxis3dSeamOrientationBallAngleHorizontalAmb3", "SpinAxis3dSeamOrientationBallXAmb3",
"SpinAxis3dSeamOrientationBallYAmb3", "SpinAxis3dSeamOrientationBallZAmb3",
"SpinAxis3dSeamOrientationBallZAmb4", "GameDate"
)
# Pitch colors for visualization (Coastal Carolina theme)
pitch_colors <- c(
"Fastball" = '#FA8072',
"Four-Seam" = '#FA8072',
"Sinker" = "#fdae61",
"Slider" = "#A020F0",
"Sweeper" = "magenta",
"Curveball" = '#2c7bb6',
"ChangeUp" = '#90EE90',
"Splitter" = '#90EE32',
"Cutter" = "red",
"Knuckleball" = "#FFB4B4",
"Other" = "#D3D3D3"
)
# Function to convert date formats
# input_string: the date string to convert
# output_format: "yyyy" for YYYY-MM-DD or "mdyy" for M/D/YY
convert_date_format <- function(date_string, output_format = "yyyy") {
if (is.na(date_string) || date_string == "") {
return(NA)
}
# Convert to character if not already
date_string <- as.character(date_string)
parsed_date <- NULL
# Try to parse YYYY-MM-DD format
if (grepl("^\\d{4}-\\d{2}-\\d{2}$", date_string)) {
parsed_date <- tryCatch({
as.Date(date_string, format = "%Y-%m-%d")
}, error = function(e) NULL)
}
# Try to parse MM/DD/YYYY or M/D/YYYY format
if (is.null(parsed_date) && grepl("^\\d{1,2}/\\d{1,2}/\\d{4}$", date_string)) {
parsed_date <- tryCatch({
as.Date(date_string, format = "%m/%d/%Y")
}, error = function(e) NULL)
}
# Try to parse MM/DD/YY or M/D/YY format
if (is.null(parsed_date) && grepl("^\\d{1,2}/\\d{1,2}/\\d{2}$", date_string)) {
parsed_date <- tryCatch({
as.Date(date_string, format = "%m/%d/%y")
}, error = function(e) NULL)
}
# If we successfully parsed a date, format it according to output_format
if (!is.null(parsed_date) && !is.na(parsed_date)) {
if (output_format == "mdyy") {
# M/D/YY format (no leading zeros, 2-digit year)
return(format(parsed_date, "%m/%d/%y") %>%
gsub("^0", "", .) %>% # Remove leading zero from month
gsub("/0", "/", .)) # Remove leading zero from day
} else {
# YYYY-MM-DD format
return(format(parsed_date, "%Y-%m-%d"))
}
}
# Return original if no conversion possible
return(date_string)
}
# Function to convert date columns in a dataframe
convert_date_columns <- function(df, output_format = "yyyy") {
# Common date column names in TrackMan data
date_columns <- c("Date", "GameDate", "UTCDate", "LocalDateTime")
for (col in date_columns) {
if (col %in% names(df)) {
df[[col]] <- sapply(df[[col]], function(x) convert_date_format(x, output_format), USE.NAMES = FALSE)
}
}
return(df)
}
# Function to parse bat tracking JSON
parse_bat_tracking_json <- function(json_path) {
tryCatch({
json_data <- fromJSON(json_path, simplifyVector = FALSE)
# Extract metadata
game_reference <- json_data$GameReference
session_id <- json_data$SessionId
# Extract plays
plays <- json_data$Plays
if (length(plays) == 0) {
return(list(
success = TRUE,
data = NULL,
game_reference = game_reference,
message = "JSON parsed but contains no bat tracking plays (empty Plays array)"
))
}
# Build data frame from plays
bat_tracking_df <- data.frame(
PitchUID = sapply(plays, function(p) p$PitchUID),
BatSpeed_Sensor = sapply(plays, function(p) p$BatSpeed),
VerticalAttackAngle_Sensor = sapply(plays, function(p) p$VerticalAttackAngle),
HorizontalAttackAngle_Sensor = sapply(plays, function(p) p$HorizontalAttackAngle),
BatTracking_PlayId = sapply(plays, function(p) p$PlayId),
BatTracking_Time = sapply(plays, function(p) p$Time),
stringsAsFactors = FALSE
)
return(list(
success = TRUE,
data = bat_tracking_df,
game_reference = game_reference,
session_id = session_id,
plays_count = length(plays),
message = paste("Successfully parsed", length(plays), "bat tracking play(s)")
))
}, error = function(e) {
return(list(
success = FALSE,
data = NULL,
message = paste("Error parsing JSON:", e$message)
))
})
}
# Function to merge CSV with bat tracking
merge_with_bat_tracking <- function(csv_data, bat_tracking_data) {
if (is.null(bat_tracking_data) || nrow(bat_tracking_data) == 0) {
return(list(
data = csv_data,
matched = 0,
total_bat = 0,
message = "No bat tracking data to merge"
))
}
# Check if PitchUID exists in CSV
if (!"PitchUID" %in% names(csv_data)) {
return(list(
data = csv_data,
matched = 0,
total_bat = nrow(bat_tracking_data),
message = "CSV does not contain PitchUID column - cannot merge"
))
}
# Perform left join
merged_data <- csv_data %>%
left_join(bat_tracking_data, by = "PitchUID")
# Count matches
matched_count <- sum(!is.na(merged_data$BatSpeed_Sensor))
# If original BatSpeed column exists and is empty, fill with sensor data
if ("BatSpeed" %in% names(merged_data)) {
merged_data <- merged_data %>%
mutate(BatSpeed = ifelse(is.na(BatSpeed) & !is.na(BatSpeed_Sensor),
BatSpeed_Sensor, BatSpeed))
}
if ("VerticalAttackAngle" %in% names(merged_data)) {
merged_data <- merged_data %>%
mutate(VerticalAttackAngle = ifelse(is.na(VerticalAttackAngle) & !is.na(VerticalAttackAngle_Sensor),
VerticalAttackAngle_Sensor, VerticalAttackAngle))
}
if ("HorizontalAttackAngle" %in% names(merged_data)) {
merged_data <- merged_data %>%
mutate(HorizontalAttackAngle = ifelse(is.na(HorizontalAttackAngle) & !is.na(HorizontalAttackAngle_Sensor),
HorizontalAttackAngle_Sensor, HorizontalAttackAngle))
}
return(list(
data = merged_data,
matched = matched_count,
total_bat = nrow(bat_tracking_data),
message = paste("Merged successfully:", matched_count, "of", nrow(bat_tracking_data), "bat tracking records matched")
))
}
##SCRAPER FUNCTIONS
ftp_server <- "ftp.trackmanbaseball.com"
username <- "FTP_USER"
password <- "FTP_PASSWORD"
ftp_base_dir <- "/v3"
#Trackman pitch by pitch
scrape_trackman_pbp <- function(start_date, end_date) {
dates <- as.character(seq.Date(as.Date(start_date), as.Date(end_date), by = "day"))
all_data <- data.frame()
for (date in dates) {
year <- format(as.Date(date), "%Y")
month <- format(as.Date(date), "%m")
day <- format(as.Date(date), "%d")
ftp_dir <- paste0(ftp_base_dir, "/", year, "/", month, "/", day, "/CSV")
ftp_url <- paste0("ftp://", username, ":", password, "@", ftp_server, ftp_dir, "/")
# Get list of files for this day
file_list <- tryCatch({
getURL(ftp_url, ftp.use.epsv = FALSE, dirlistonly = TRUE)
}, error = function(e) { NULL })
if (is.null(file_list)) next
file_list <- unlist(strsplit(file_list, "\n"))
# Filter to verified/unverified CSVs (prefer verified)
pattern_verified <- "(\\d{8})-(.+)-\\d+\\.csv$"
pattern_unverified <- "(\\d{8})-(.+)-\\d+_unverified\\.csv$"
verified <- grep(pattern_verified, file_list, value = TRUE)
unverified <- grep(pattern_unverified, file_list, value = TRUE)
verified_stadiums <- unique(str_match(verified, pattern_verified)[, 2])
unverified_stadiums <- str_match(unverified, pattern_unverified)[, 2]
files_to_read <- c(
verified,
unverified[!unverified_stadiums %in% verified_stadiums]
)
# Read each file directly into memory
for (file in files_to_read) {
file_url <- paste0(ftp_url, URLencode(trimws(file)))
tryCatch({
csv_text <- getURL(file_url)
rows <- read_csv(I(csv_text), col_types = cols(.default = "c"), show_col_types = FALSE)
if ("PlayResult" %in% names(rows)) {
all_data <- bind_rows(all_data, rows)
}
}, error = function(e) {
message("Failed: ", file, " - ", e$message)
})
}
message("Done with ", date)
}
# Deduplicate
# all_data %>%
# distinct(PitchUID, .keep_all = TRUE)
}
#trackman positional
scrape_trackman_positional <- function(start_date, end_date) {
dates <- as.character(seq.Date(as.Date(start_date), as.Date(end_date), by = "day"))
all_data <- data.frame()
for (date in dates) {
year <- format(as.Date(date), "%Y")
month <- format(as.Date(date), "%m")
day <- format(as.Date(date), "%d")
ftp_dir <- paste0(ftp_base_dir, "/", year, "/", month, "/", day, "/CSV")
ftp_url <- paste0("ftp://", username, ":", password, "@", ftp_server, ftp_dir, "/")
# Get list of files for this day
file_list <- tryCatch({
getURL(ftp_url, ftp.use.epsv = FALSE, dirlistonly = TRUE)
}, error = function(e) { NULL })
if (is.null(file_list)) next
file_list <- unlist(strsplit(file_list, "\n"))
# Filter to verified/unverified CSVs (prefer verified)
pattern_verified <- "(\\d{8})-(.+)-\\d+_playerpositioning_FHC\\.csv$"
pattern_unverified <- "(\\d{8})-(.+)-\\d+_unverified_playerpositioning_FHC\\.csv$"
verified <- grep(pattern_verified, file_list, value = TRUE)
unverified <- grep(pattern_unverified, file_list, value = TRUE)
verified_stadiums <- unique(str_match(verified, pattern_verified)[, 2])
unverified_stadiums <- str_match(unverified, pattern_unverified)[, 2]
files_to_read <- c(
verified,
unverified[!unverified_stadiums %in% verified_stadiums]
)
# Read each file directly into memory
for (file in files_to_read) {
file_url <- paste0(ftp_url, URLencode(trimws(file)))
tryCatch({
csv_text <- getURL(file_url)
rows <- read_csv(I(csv_text), col_types = cols(.default = "c"), show_col_types = FALSE)
if ("PlayResult" %in% names(rows)) {
all_data <- bind_rows(all_data, rows)
}
}, error = function(e) {
message("Failed: ", file, " - ", e$message)
})
}
message("Done with ", date)
}
# Deduplicate
all_data %>%
distinct(PitchUID, .keep_all = TRUE)
}
#Next section is a large section of functions from the pbp parser github to parse ncaa pbp data to get base states
stripwhite <- function(x) gsub("\\s*$", "", gsub("^\\s*", "", x))
strip_punc <- function(x){
x=stripwhite(x)
x=ifelse(str_sub(x,-1)=='.',gsub("\\.", "", x),x)
return(x)}
##########################################################
# Functions for parsing
inn_end = function(top_inn){
m=length(top_inn)
inn_end=integer(m)
for (i in 1:(m-1)){
inn_end[i]=ifelse(top_inn[i]!=top_inn[i+1], 1,0)
}
inn_end[m]=1
return(inn_end)
}
game_end = function(game_id){
m=length(game_id)
game_end=integer(m)
for (i in 2:m){
if (game_id[i]!=game_id[i-1]){
game_end[i-1]=1
}
game_end[m]=1
}
return(game_end)
}
runs_on_play= function(a_txt, h_txt, a_score,h_score){
m=length(a_txt)
runs_on_play=integer(m)
runs_on_play[1]=a_score[1]
for (i in 2:m){
runs_on_play[i]=case_when(
a_txt[i]=='' ~ as.integer(h_score[i]-h_score[i-1]),
a_txt[i]!='' ~ as.integer(a_score[i]-a_score[i-1])
)
}
return(runs_on_play)
}
r1_name = function(bat_text, bat_name, r1_text, r1_name, inn_end, game_end, sub_in, sub_out){
m=length(bat_text)
r1_name= character(m)
for (i in 2:m){
if (isTRUE(inn_end[i-1]==0 & game_end[i-1]==0)) {
r1_name[i]=case_when(
sub_out[i-1]!=''&sub_out[i-1]==stripwhite(r1_name[i-1])~sub_in[i-1],
(str_detect(bat_text[i-1], '(singled|walked|hit by pitch|reached)') == TRUE) & (str_detect(bat_text[i-1], '(doubled|tripled|homered|advanced|scored|out|stole)') == FALSE) ~ bat_name[i-1],
(str_detect(bat_text[i-1], '(reached first)') == TRUE) & (str_detect(bat_text[i-1], '(struck out)') == TRUE) ~ bat_name[i-1],
(r1_text[i-1]==''|(str_detect(r1_text[i-1], '(advanced to second|stole second|advanced to third|stole third|scored|out)') == FALSE)) & (str_detect(bat_text[i-1], '(double play|advanced to second|stole second|advanced to third|stole third|scored|caught stealing|picked off|homered)') == FALSE) ~ r1_name[i-1],
(str_detect(bat_text[i-1], '(singled|doubled|tripled|advanced to second|stole second|advanced to third|stole third|scored|homered|out at second c to)') == FALSE) & (str_detect(r1_text[i-1], '(advanced to third|stole third|scored|out at third)') == TRUE) & stripwhite(gsub('((advanced to second|stole second|stole third|advanced to third|scored|out).*$)', '', r1_text[i-1]))!=stripwhite(gsub('((singled|reached).*$)', '', r1_name[i-1])) ~ r1_name[i-1],
r1_text[i-1]=='' & stripwhite(gsub('((advanced to second|stole second|stole third|advanced to third|scored|out|failed|Failed|picked off).*$)', '', bat_text[i-1]))!=stripwhite(r1_name[i-1]) ~ r1_name[i-1]
)}}
return(stripwhite(r1_name))
}
r2_name = function(bat_text, bat_name, r1_text, r1_name, r2_text, r2_name, inn_end, game_end, sub_in, sub_out){
m=length(bat_text)
r2_name= character(m)
for (i in 2:m){
if (isTRUE(inn_end[i-1]==0 & game_end[i-1]==0)) {
r2_name[i]=case_when(
sub_out[i-1]!=''&sub_out[i-1]==stripwhite(r2_name[i-1])~sub_in[i-1],
((str_detect(bat_text[i-1], '(doubled|advanced to second|stole second)') == TRUE) & (str_detect(bat_text[i-1], '(advanced to third|scored|out|stole third)') == FALSE)) ~ stripwhite(gsub('((doubled|advanced to second|stole second).*$)', '', bat_text[i-1])),
((str_detect(r1_text[i-1], '(advanced to second|stole second)') == TRUE) & (str_detect(r1_text[i-1], '(advanced to third|scored|out|stole third)') == FALSE)) ~ stripwhite(gsub('((advanced to second|stole second).*$)', '', r1_text[i-1])),
r2_text[i-1]=='' & stripwhite(gsub('((stole third|advanced to third|scored|out).*$)', '', r1_text[i-1]))!=stripwhite(r2_name[i-1]) & (str_detect(bat_text[i-1], '(advanced to third|stole third|scored|picked off|caught stealing)') == FALSE) ~ r2_name[i-1],
r2_text[i-1]=='' & stripwhite(gsub('((out on the play).*$)', '', r1_text[i-1]))!=stripwhite(r2_name[i-1]) & (str_detect(bat_text[i-1], '(double play)') == TRUE) ~ r2_name[i-1],
r1_text[i-1]=='' & (str_detect(bat_text[i-1], '(stole third|advanced to third|scored|picked off|homered|caught stealing)') == FALSE) ~ r2_name[i-1],
sub_out[i-1]!=''&sub_out[i-1]==stripwhite(r2_name[i-1])~sub_in[i-1]
)
r2_name[i]=stripwhite(gsub('((singled|reached).*$)', '', r2_name[i]))
}
}
return(stripwhite(r2_name))
}
r3_name = function(bat_text, bat_name, r1_text, r1_name, r2_text, r2_name, r3_text, r3_name, inn_end, game_end, sub_in, sub_out){
m=length(bat_text)
r3_name= character(m)
for (i in 2:m){
if (isTRUE(inn_end[i-1]==0 & game_end[i-1]==0)) {
r3_name[i]=case_when(
sub_out[i-1]!=''&sub_out[i-1]==stripwhite(r3_name[i-1])~sub_in[i-1],
((str_detect(bat_text[i-1], '(tripled|advanced to third|stole third)') == TRUE) & (str_detect(bat_text[i-1], '(scored|out)') == FALSE)) ~ stripwhite(gsub('((tripled|advanced to third|stole third).*$)', '', bat_text[i-1])),
((str_detect(r1_text[i-1], '(advanced to third|stole third)') == TRUE) & (str_detect(r1_text[i-1], '(scored|out)') == FALSE)) ~ stripwhite(gsub('((advanced to third|stole third).*$)', '', r1_text[i-1])),
((str_detect(r2_text[i-1], '(advanced to third|stole third)') == TRUE) & (str_detect(r2_text[i-1], '(scored|out)') == FALSE)) ~ stripwhite(gsub('((advanced to third|stole third).*$)', '', r2_text[i-1])),
r1_text[i-1]=='' & (str_detect(bat_text[i-1], '(scored|stole home|homered)') == FALSE) ~ r3_name[i-1],
r2_text[i-1]=='' & stripwhite(gsub('((scored|stole home|out).*$)', '', r1_text[i-1]))!=stripwhite(r3_name[i-1]) & (str_detect(bat_text[i-1], '(scored|stole home)') == FALSE) ~ r3_name[i-1],
r3_text[i-1]=='' & (str_detect(r2_text[i-1], '(scored|stole home|out)') == FALSE) & (str_detect(r1_text[i-1], '(scored|stole home|out)') == FALSE) & (str_detect(bat_text[i-1], '(scored|stole home)') == FALSE) ~ r3_name[i-1])
r3_name[i]=stripwhite(gsub('((singled|doubled|reached|advanced|stole|failed|Failed|picked off).*$)', '', r3_name[i]))
}
}
return(stripwhite(r3_name))
}
new_game=function(game_end){
m = length(game_end)
new_game=integer(m)
new_game[1]=1
for (i in 2:m){
new_game[i]=game_end[i-1]
}
return(new_game)
}
new_inn=function(inn_end){
m = length(inn_end)
new_inn=integer(m)
new_inn[1]=1
for (i in 2:m){
new_inn[i]=inn_end[i-1]
}
return(new_inn)
}
outs_before= function(outs_on_play, new_game, new_inn){
m=length(outs_on_play)
inn_outs=integer(m)
for (i in 2:m){
if (isTRUE(new_game[i] == 0 & new_inn[i] == 0)) {
inn_outs[i]=((inn_outs[i-1]+outs_on_play[i-1]) %% 3)
}
}
return(inn_outs)
}
score_before=function(new_game, runs_on_play, top_inning, home_team=1){
m=length(new_game)
home_score_before=integer(m)
away_score_before=integer(m)
for (i in 2:m){
home_score_before[i]= case_when(
new_game[i]==0 & top_inning[i-1]==0 ~ as.numeric(home_score_before[i-1]+runs_on_play[i-1]),
new_game[i]==0 & top_inning[i-1]==1 ~ as.numeric(home_score_before[i-1]),
TRUE ~ 0)
away_score_before[i]= case_when(
new_game[i]==0 & top_inning[i-1]==1 ~ as.numeric(away_score_before[i-1]+runs_on_play[i-1]),
new_game[i]==0 & top_inning[i-1]==0 ~ as.numeric(away_score_before[i-1]),
TRUE ~ 0)
}
if(home_team==1){
return(home_score_before)
}
else{return(away_score_before)}
}
runs_play=function(home_score, away_score, home_score_before, away_score_before, top_inn){
n=length(homescore)
runs_play=integer(n)
for (i in 2:n){
case_when(top_inn[i]==0 ~ homescore[i]-homescore_before[i])
if (top_inn[i]==0){
runs_play[i]=homescore[i]-homescore_before[i]
} else{
runs_play[i]=roadscore[i]-roadscore_before[i]
}
}
return(runs_play)
}
runs_this_inn=function(end_inn, runs_on_play){
m=length(end_inn)
runs=integer(m)
endinnloc=c(0,grep(1,end_inn))
numinns=length(endinnloc)
for (j in 2:numinns){
for (k in (endinnloc[j-1]+1):endinnloc[j]){
runs[k]=sum(runs_on_play[(endinnloc[j-1]+1):endinnloc[j]])
}
}
return(runs)
}
runs_rest_of_inn=function(end_inn, runs_on_play, runs_this_inn){
m=length(end_inn)
runs=integer(m)
endinnloc=c(0,grep(1,end_inn))
numinns=length(endinnloc)
for (j in 2:numinns){
for (k in (endinnloc[j-1]+1):endinnloc[j]){
runs[k]=runs_this_inn[k]-sum(runs_on_play[(endinnloc[j-1]+1):(k)])
}
}
runs=runs+runs_on_play
return(runs)
}
bat_order_id = function(new_game, top_inn, bat_name){
m = length(top_inn)
batorder = rep(NA_character_, m)
newgameloc = c(grep(1, new_game), (m+1))
numgames = length(newgameloc)
for (j in 2:numgames){
kk = 0
jj = 0
for (i in newgameloc[j-1]:(newgameloc[j]-1)){
if (!is.na(top_inn[i]) && !is.na(bat_name[i]) &&
top_inn[i] == 1 && bat_name[i] != ''){
batorder[i] = (kk %% 9) + 1
kk = kk + 1
} else if (!is.na(top_inn[i]) && !is.na(bat_name[i]) &&
top_inn[i] == 0 && bat_name[i] != ''){
batorder[i] = (jj %% 9) + 1
jj = jj + 1
} else {
batorder[i] = '' # leave empty if NA or no name
}
}
}
return(batorder)
}
bat_order_fill=function(bat_order, end_game){
m=length(bat_order)
for (i in (m):2){
if(is.na(bat_order[i-1])==TRUE & end_game[i-1]==0){
bat_order[i-1]=bat_order[i]
}
}
for (i in 2:m){
if(is.na(bat_order[i])==TRUE){
bat_order[i]=bat_order[i-1]
}
}
return(bat_order)
}
##########################################################
ncaa_parse=function(pbp_data_frame){
pbp_data_frame=pbp_data_frame%>%
mutate(
tmp_text=paste(away_text, home_text),
# #
sub_fl=case_when(
str_detect(tmp_text, '(singled|doubled|tripled|homered|walked|reached|struck out|grounded|flied|lined|popped| hit|infield fly|infield fly|out|double play|triple play)')==TRUE & str_detect(tmp_text, c('pinch hit'))==FALSE ~ 0,
str_detect(tmp_text, c('to (p|c|1b|2b|3b|ss|lf|rf|cf|dh)'))==TRUE ~ 1,
str_detect(tmp_text, c('pinch hit'))==TRUE ~ 1,
str_detect(tmp_text, c('pinch ran'))==TRUE ~ 1,
TRUE ~ 0),
# Split the text up
bat_text=gsub('(;|3a|:).*$','', tmp_text),
r1_text=case_when(
str_detect(tmp_text, '(;|3a|:)')==TRUE ~ stripwhite(gsub('^.*?(;|3a|:)','',tmp_text)),
TRUE~''),
r2_text=case_when(
str_detect(r1_text, '(;|3a|:)')==TRUE ~ stripwhite(gsub('^.*?(;|3a|:)','',r1_text)),
TRUE~''),
r3_text=case_when(
str_detect(r2_text, '(;|3a|:)')==TRUE ~ stripwhite(gsub('^.*?(;|3a|:)','',r2_text)),
TRUE~''),
r2_text=stripwhite(gsub('(;|3a|:).*$','',r2_text)),
r1_text=stripwhite(gsub('(;|3a|:).*$','',r1_text)),
# Event code: same as retrosheet
event_cd=case_when(
sub_fl==1 ~ 1,
str_sub(stripwhite(tmp_text),1,1)=='(' ~ 1,
str_detect(tmp_text, '(hitting out of turn| for |No play|halted|delay|postponed|ejected|suspended|coach|sunny|review|challenged|HC|\\*\\*)') == TRUE ~ 1,
str_detect(tmp_text,'struck out') == TRUE ~ 3,
str_detect(tmp_text,'stole') == TRUE ~ 4,
(str_detect(tmp_text,'(caught stealing|out at second c to|out at third c to)') == TRUE) & (str_detect(tmp_text,'(bunt|grounded)') == FALSE) ~ 6,
str_detect(tmp_text,'picked off') == TRUE ~ 8,
str_detect(tmp_text,'wild pitch') == TRUE ~ 9,
str_detect(tmp_text,'passed ball') == TRUE ~ 10,
str_detect(tmp_text,'balk') == TRUE ~ 11,
str_detect(tmp_text,'Dropped foul') == TRUE ~ 13,
str_detect(tmp_text,'walked') == TRUE ~ 14,
str_detect(tmp_text,'hit by pitch') == TRUE ~ 16,
str_detect(tmp_text,'interference') == TRUE ~ 17,
str_detect(tmp_text,'error') == TRUE ~ 18,
str_detect(tmp_text,'muffed') == TRUE ~ 18,
str_detect(tmp_text,'dropped') == TRUE ~ 18,
str_detect(tmp_text,'fielder\'s choice') == TRUE ~ 19,
str_detect(tmp_text,'singled') == TRUE ~ 20,
str_detect(tmp_text,'doubled') == TRUE ~ 21,
str_detect(tmp_text,'tripled') == TRUE ~ 22,
str_detect(tmp_text,'homered') == TRUE ~ 23,
str_detect(tmp_text, '(flied out|grounded out|popped|fouled out|lined out| infield fly|double play|triple play|out at (first|second|third|home))') == TRUE ~ 2,
str_detect(tmp_text, 'advanced') == TRUE ~ 12,
TRUE ~ 0),
# Bat name
bat_name= case_when(
event_cd %in% c(0,1)~'',
str_detect(bat_text, '(Batter|Runner\'s interference)')==TRUE ~'',
str_detect(bat_text, '(walked|singled|doubled|tripled|reached|struck out|grounded out)')==FALSE & str_detect(bat_text, '(advanced|caught stealing|stole|picked off|out at (first|second|third|home)|tagged out)')==TRUE ~ '',
str_detect(bat_text, '(singled|doubled|tripled|homered|walked|reached|struck out|grounded|flied|lined|popped|hit | out |fouled out|pinch hit|infield fly|intentionally walked|was intentionally walked|fouled into double play)')==TRUE ~ gsub('((singled|doubled|tripled|homered|walked|reached|struck out|grounded|flied|lined|popped|hit | out |fouled out|pinch hit|infield fly|intentionally walked|was intentionally walked|fouled into double play).*$)', '', bat_text),
str_detect(stripwhite(r1_text), 'caught stealing c to (2b|3b), double play.')==TRUE ~ bat_text,
TRUE ~ ''),
# Sub in
sub_in= case_when(
sub_fl==1&str_detect(bat_text, 'to (p|c|1b|2b|3b|ss|lf|rf|cf|dh)')==TRUE ~ stripwhite(gsub('(to (p|c|1b|2b|3b|ss|lf|rf|cf|dh).*$)', '', bat_text)),
sub_fl==1&str_detect(bat_text, 'pinch ran for')==TRUE ~ stripwhite(gsub('pinch ran for.*$', '', bat_text)),
sub_fl==1&str_detect(bat_text, 'pinch hit for')==TRUE ~ stripwhite(gsub('pinch hit for.*$', '', bat_text)),
TRUE ~ ''),
# Sub out
sub_out= case_when(
sub_fl==1&str_detect(bat_text, 'to (p|c|1b|2b|3b|ss|lf|rf|cf|dh) for')==TRUE ~ gsub('^.*to (p|c|1b|2b|3b|ss|lf|rf|cf|dh) for', '', bat_text),
sub_fl==1&str_detect(bat_text, 'pinch ran for')==TRUE ~ gsub('^.*pinch ran for', '', bat_text),
sub_fl==1&str_detect(bat_text, 'pinch hit')==TRUE ~ gsub('^.*pinch hit for', '', bat_text),
TRUE ~ ''),
# Clean sub out
sub_out=strip_punc(sub_out),
# Game end
game_end = game_end(game_id),
# New game
new_game=new_game(game_end),
# Top inning
top_inning=ifelse(away_text=='', 0,1),
# End of inning
inn_end = inn_end(top_inning),
# Runner names
r1_name=r1_name(bat_text, bat_name, r1_text, r1_name, inn_end, game_end, sub_in, sub_out),
r2_name =r2_name(bat_text, bat_name, r1_text, r1_name, r2_text, r2_name, inn_end, game_end, sub_in, sub_out),
r3_name =r3_name(bat_text, bat_name, r1_text, r1_name, r2_text, r2_name, r3_text, r3_name, inn_end, game_end, sub_in, sub_out),
# Clean runner names
r1_name=replace(r1_name,is.na(r1_name),''),
r2_name=replace(r2_name,is.na(r2_name),''),
r3_name=replace(r3_name,is.na(r3_name),''),
# Fix repeat bat names
bat_name=case_when(
bat_name!='' & stripwhite(bat_name)==stripwhite(r1_name)~ '',
bat_name!='' & stripwhite(bat_name)==stripwhite(r2_name)~ '',
bat_name!='' & stripwhite(bat_name)==stripwhite(r3_name)~ '',
TRUE ~ bat_name),
#
outs_on_play=case_when(
event_cd %in% c(0,1) ~ 0,
str_count(bat_text, 'triple play') == 1 ~ 3,
str_count(bat_text, 'double play') == 1 ~ 2,
(str_detect(bat_text, '( out|popped)') == TRUE) & (str_detect(bat_text, '(reached)') == TRUE) ~ 0,
# 1 out
((str_detect(bat_text, '( out |popped|infield fly)') == TRUE) & (str_detect(r1_text, '( out |popped)')==FALSE) & (str_detect(r2_text, '( out |popped)')==FALSE) &(str_detect(r3_text, '( out |popped)')==FALSE)) |
((str_detect(bat_text, '( out |popped|infield fly)') == FALSE) & (str_detect(r1_text, '( out |popped)')==TRUE) & (str_detect(r2_text, '( out |popped)')==FALSE) &(str_detect(r3_text, '( out |popped)')==FALSE)) |
((str_detect(bat_text, '( out |popped|infield fly)') == FALSE) & (str_detect(r1_text, '( out |popped)')==FALSE) & (str_detect(r2_text, '( out |popped)')==TRUE) &(str_detect(r3_text, '( out |popped)')==FALSE)) |
((str_detect(bat_text, '( out |popped|infield fly)') == FALSE) & (str_detect(r1_text, '( out |popped)')==FALSE) & (str_detect(r2_text, '( out |popped)')==FALSE) &(str_detect(r3_text, '( out |popped)')==TRUE)) ~ 1,
# 2 outs
((str_detect(bat_text, '( out |popped|infield fly)') == TRUE) & (str_detect(r1_text, '( out |popped)')==TRUE) & (str_detect(r2_text, '( out |popped)')==FALSE) &(str_detect(r3_text, '( out |popped)')==FALSE)) |
((str_detect(bat_text, '( out |popped|infield fly)') == TRUE) & (str_detect(r1_text, '( out |popped)')==FALSE) & (str_detect(r2_text, '( out |popped)')==TRUE) &(str_detect(r3_text, '( out |popped)')==FALSE)) |
((str_detect(bat_text, '( out |popped|infield fly)') == TRUE) & (str_detect(r1_text, '( out |popped)')==FALSE) & (str_detect(r2_text, '( out |popped)')==FALSE) &(str_detect(r3_text, '( out |popped)')==TRUE)) |
((str_detect(bat_text, '( out |popped|infield fly)') == FALSE) & (str_detect(r1_text, '( out |popped)')==TRUE) & (str_detect(r2_text, '( out |popped)')==TRUE) &(str_detect(r3_text, '( out |popped)')==FALSE)) |
((str_detect(bat_text, '( out |popped|infield fly)') == FALSE) & (str_detect(r1_text, '( out |popped)')==TRUE) & (str_detect(r2_text, '( out |popped)')==FALSE) &(str_detect(r3_text, '( out |popped)')==TRUE)) |
((str_detect(bat_text, '( out |popped|infield fly)') == FALSE) & (str_detect(r1_text, '( out |popped)')==FALSE) & (str_detect(r2_text, '( out |popped)')==TRUE) &(str_detect(r3_text, '( out |popped)')==TRUE)) ~ 2,
# 3 outs
((str_detect(bat_text, '( out |popped|infield fly)') == TRUE) & (str_detect(r1_text, '( out |popped)')==TRUE) & (str_detect(r2_text, '( out |popped)')==TRUE) &(str_detect(r3_text, '( out |popped)')==FALSE)) |
((str_detect(bat_text, '( out |popped|infield fly)') == TRUE) & (str_detect(r1_text, '( out |popped)')==FALSE) & (str_detect(r2_text, '( out |popped)')==TRUE) &(str_detect(r3_text, '( out |popped)')==TRUE)) |
((str_detect(bat_text, '( out |popped|infield fly)') == TRUE) & (str_detect(r1_text, '( out |popped)')==TRUE) & (str_detect(r2_text, '( out |popped)')==FALSE) &(str_detect(r3_text, '( out |popped)')==TRUE)) |
((str_detect(bat_text, '( out |popped)') == FALSE) & (str_detect(r1_text, '( out |popped)')==TRUE) & (str_detect(r2_text, '( out |popped)')==TRUE) &(str_detect(r3_text, '( out |popped)')==TRUE)) ~ 3,
TRUE ~ 0),
# New inning
new_inn=new_inn(inn_end),
# Outs before
outs_before=outs_before(outs_on_play, new_game, new_inn),
# Outs after
outs_after=outs_before+outs_on_play,
# Base code
base_cd_before=case_when(
stripwhite(r1_name)!='' & r2_name=='' & r3_name=='' ~ 1,
r1_name=='' & r2_name!='' & r3_name=='' ~ 2,
r1_name!='' & r2_name!='' & r3_name=='' ~ 3,
r1_name=='' & r2_name=='' & r3_name!='' ~ 4,
r1_name!='' & r2_name=='' & r3_name!='' ~ 5,
r1_name=='' & r2_name!='' & r3_name!='' ~ 6,
r1_name!='' & r2_name!='' & r3_name!='' ~ 7,
TRUE~0),
# Batting order
bat_order=bat_order_id(new_game, top_inning, bat_name),
# Hit type
hit_type=case_when(
event_cd==3 ~ 'K',
str_detect(bat_text,'(bunt)')==TRUE ~ 'B',
str_detect(bat_text, '(bunt)')==FALSE & str_detect(bat_text, '(SAC)')==TRUE & str_detect(bat_text, '(flied|popped)')==FALSE ~ 'B',
str_detect(bat_text,'(grounded out|(p|3b|2b|ss|1b) to (p|3b|2b|ss|1b|c))')==TRUE ~ 'GO',
str_detect(bat_text,'(flied|fouled out to (lf|rf))')==TRUE ~ 'FO',
str_detect(bat_text,'(lined)')==TRUE ~ 'LO',
str_detect(bat_text,'(popped|infield fly|fouled out to (p|3b|2b|ss|1b|c))')==TRUE ~ 'PO',
TRUE ~ '' ),
# Runs on play
runs_on_play=(as.numeric(str_count(tmp_text, '(advanced to home)'))+as.numeric(str_count(tmp_text, '(scored)')) + as.numeric(str_count(tmp_text, '(homered)')) + as.numeric(str_count(tmp_text, '(stole home)'))-as.numeric(str_count(tmp_text, '(scored, scored)'))),
# Away score
away_score_before=score_before(new_game, runs_on_play, top_inning, home_team=0),
# Home score
home_score_before=score_before(new_game, runs_on_play, top_inning, home_team=1),
# # Away score after
away_score_after=case_when(
top_inning==1 ~away_score_before+ runs_on_play,
TRUE ~ away_score_before),
# # Home score after
home_score_after=case_when(
top_inning==0 ~home_score_before+ runs_on_play,
TRUE ~ home_score_before),
# Runs this inning
runs_this_inn=runs_this_inn(inn_end, runs_on_play),
# Runs rest of inning
runs_roi=runs_rest_of_inn(inn_end,runs_on_play, runs_this_inn),
# Intentional walk
int_bb_fl=case_when(
str_detect(tmp_text,'intentionally ') == TRUE ~ 1,
TRUE ~ 0
),
# Sac bunts
sh_fl=case_when(
str_detect(bat_text, '(SAC)')==TRUE & str_detect(bat_text, '(flied|popped)')==FALSE ~ 1,
TRUE~0),
# Sac flys
sf_fl=case_when(
str_detect(bat_text, '(SAC)')==TRUE & str_detect(bat_text, '(flied|popped)')==TRUE ~ 1,
str_detect(bat_text, '(SAC)')==FALSE & str_detect(bat_text, '(flied|popped)')==TRUE & str_detect(bat_text, '(RBI)')==TRUE~1,
TRUE~0 )
)
pbp_data_frame=pbp_data_frame%>%
mutate(bat_order=bat_order_fill(bat_order, game_end))
return(pbp_data_frame)
}
prefixes <- c("St\\.", "Mc", "De", "Di", "Van", "Von")
#Get NCAA Game IDs
get_ncaa_schedule <- function(date) {
date <- as.Date(date)
api_url <- sprintf(
"https://ncaa-api.henrygd.me/scoreboard/baseball/d1/%04d/%02d/%02d/all-conf",
as.integer(format(date, "%Y")),
as.integer(format(date, "%m")),
as.integer(format(date, "%d"))
)
res <- GET(api_url, user_agent("Mozilla/5.0"))
stop_for_status(res)
dat <- fromJSON(content(res, "text", encoding = "UTF-8"), flatten = TRUE)
dat$games <- dat$games %>%
tidyr::unnest(game.away.conferences) %>%
rename(away_conference = conferenceName) %>%
dplyr::select(-conferenceSeo) %>%
tidyr::unnest(game.home.conferences) %>%
rename(home_conference = conferenceName)
tibble(
Date = dat$games$game.startDate,
GameID = dat$games$game.url %>%
str_extract("(?<=/game/)\\d+"),
HomeTeam = dat$games$game.home.names.short,
AwayTeam = dat$games$game.away.names.short,
StartTime = dat$games$game.startTime,
HomeScore = dat$games$game.home.score,
AwayScore = dat$games$game.away.score,
HomeRecord = dat$games$game.home.description,
AwayRecord = dat$games$game.away.description,
HomeConference = dat$games$home_conference,
AwayConference = dat$games$away_conference
)
}
#Scrapes NCAA PBP based on game IDs
get_ncaa_pbp <- function(game_id){
url <- sprintf("https://ncaa-api.henrygd.me/game/%s/play-by-play", game_id)
res <- GET(url)
stop_for_status(res)
dat <- fromJSON(content(res, "text", encoding = "UTF-8"), flatten = TRUE)
teams_df <- dat$teams
team_home <- teams_df %>% filter(isHome == TRUE) %>% pull(nameShort)
team_visitor <- teams_df %>% filter(isHome == FALSE) %>% pull(nameShort)
home_id <- teams_df %>% filter(isHome == TRUE) %>% pull(teamId)
visitor_id <- teams_df %>% filter(isHome == FALSE) %>% pull(teamId)
home_name_id <- teams_df %>%
filter(isHome == TRUE) %>%
mutate(name_id = paste0(toupper(str_sub(nameShort, 1, 3)), "_", toupper(str_sub(teamName, 1, 3)))) %>%
pull(name_id)
away_name_id <- teams_df %>%
filter(isHome == FALSE) %>%
mutate(name_id = paste0(toupper(str_sub(nameShort, 1, 3)), "_", toupper(str_sub(teamName, 1, 3)))) %>%
pull(name_id)
pbp <- dat$periods %>%
unnest(playbyplayStats, names_sep = "_") %>%
unnest(playbyplayStats_plays, names_sep = "_") %>%
mutate(
game_id = game_id,
away_team = team_visitor,
home_team = team_home,
PlayNumber = row_number(),
BatterTeam = ifelse(playbyplayStats_teamId == home_id, team_home, team_visitor),
PitcherTeam = ifelse(playbyplayStats_teamId == home_id, team_visitor, team_home),
away_text = ifelse(playbyplayStats_teamId == visitor_id, playbyplayStats_plays_playText, ""),
home_text = ifelse(playbyplayStats_teamId == home_id, playbyplayStats_plays_playText, ""),
Score = ifelse(is.na(playbyplayStats_plays_homeScore) & is.na(playbyplayStats_plays_visitorScore),
"",
paste0(playbyplayStats_plays_visitorScore, "-", playbyplayStats_plays_homeScore)),
home_name_id = home_name_id,
away_name_id = away_name_id
) %>%
dplyr::select(game_id, Inning = periodNumber, PlayNumber, BatterTeam, PitcherTeam,
away_team, home_team, away_text, home_text, Score, home_name_id, away_name_id)
return(pbp)
}
scrape_clean_ncaa_pbp <- function(start_date, end_date) {
games_list <- tibble()
counter <- 0
dates_list <- seq(as.Date(start_date), as.Date(end_date), by = "day")
#Get Schedule Data for Game IDs
for (date in dates_list) {
counter <- counter + 1
message(sprintf(
"%5.1f%% | %d / %d | %s",
100 * counter / length(dates_list), counter, length(dates_list), as.Date(date)
))
new_game_ids <- tryCatch(
get_ncaa_schedule(date),
error = function(e) NULL
)
if (is.null(new_game_ids) || nrow(new_game_ids) == 0) {
message(" -- no games, skipping")
next
}
new_game_ids <- new_game_ids %>%
distinct(GameID, .keep_all = TRUE)
games_list <- rbind(games_list, new_game_ids)
Sys.sleep(0.2)
}
df <- tibble()
counter <- 0
#Get PBP Data
for (i in seq_len(nrow(games_list))){
counter <- counter + 1
new_data <- tryCatch(
get_ncaa_pbp(games_list$GameID[i]),
error = function(e) NULL
)
if (is.null(new_data) || nrow(new_data) == 0) {
message(" -- no data, skipping")
next
}
new_data <- new_data %>%
mutate(Date = games_list$Date[i])
df <- rbind(new_data, df)
message(sprintf(
"%5.1f%% | %d / %d | %s",
100 * counter / length(unique(games_list$GameID)), counter, length(unique(games_list$GameID)), games_list$GameID[i]
))
Sys.sleep(0.2)
}
pbp <- ncaa_parse(df2)
}
# UI
ui <- fluidPage(
tags$head(
tags$style(HTML("
body, table, .gt_table {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto,
Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji',
'Segoe UI Symbol';
}
/* Header styling */
.app-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 20px 40px;
background: #ffffff;
border-bottom: 3px solid darkcyan;
margin-bottom: 20px;
}
.header-logo-left, .header-logo-right {
width: 120px;
height: auto;
}
.header-logo-center {
max-width: 400px;
height: auto;
}
@media (max-width: 768px) {
.app-header {
flex-direction: column;
padding: 15px 20px;
}
.header-logo-left, .header-logo-right {
width: 80px;
}
.header-logo-center {
max-width: 250px;
margin: 10px 0;
}
}
/* Gradient pill tabs styling */
.nav-tabs {
border: none !important;
border-radius: 50px;
padding: 6px 12px;
margin: 20px auto 0;
max-width: 100%;
background: linear-gradient(135deg, #d4edeb 0%, #e8ddd0 50%, #d4edeb 100%);
box-shadow: 0 4px 16px rgba(0,139,139,.12), inset 0 2px 4px rgba(255,255,255,.6);
border: 1px solid rgba(0,139,139,.2);
position: relative;
overflow-x: auto;
-webkit-overflow-scrolling: touch;
display: flex;
justify-content: center;
align-items: center;
flex-wrap: wrap;
gap: 6px;
}
.nav-tabs::-webkit-scrollbar {
height: 0;
}
.nav-tabs::before {
content: '';
position: absolute;
inset: 0;
pointer-events: none;
border-radius: 50px;
background: linear-gradient(135deg, rgba(255,255,255,.4), transparent);
}
.nav-tabs > li > a {
color: darkcyan !important;
border: none !important;
border-radius: 50px !important;
background: transparent !important;
font-weight: 700;
font-size: 14.5px;
padding: 10px 22px;
white-space: nowrap;
letter-spacing: 0.2px;
transition: all 0.2s ease;
}
.nav-tabs > li > a:hover {
color: #006666 !important;
background: rgba(255,255,255,.5) !important;
transform: translateY(-1px);
}
.nav-tabs > li.active > a,
.nav-tabs > li.active > a:focus,
.nav-tabs > li.active > a:hover {
background: linear-gradient(135deg, #008b8b 0%, #20b2aa 30%, #00ced1 50%, #20b2aa 70%, #008b8b 100%) !important;
color: #fff !important;
text-shadow: 0 1px 2px rgba(0,0,0,.2);
box-shadow: 0 4px 16px rgba(0,139,139,.4), inset 0 2px 8px rgba(255,255,255,.4), inset 0 -2px 6px rgba(0,0,0,.2);
border: 1px solid rgba(255,255,255,.3) !important;
}
.nav-tabs > li > a:focus {
outline: 3px solid rgba(205,133,63,.6);
outline-offset: 2px;
}
.tab-content {
background: linear-gradient(135deg, rgba(255,255,255,.95), rgba(248,249,250,.95));
border-radius: 20px;
padding: 25px;
margin-top: 14px;
box-shadow: 0 15px 40px rgba(0,139,139,.1);
backdrop-filter: blur(15px);
border: 1px solid rgba(0,139,139,.1);
position: relative;
overflow: hidden;
}
.tab-content::before {
content: '';
position: absolute;
left: 0;
right: 0;
top: 0;
height: 4px;
background: linear-gradient(90deg, darkcyan, peru, darkcyan);
background-size: 200% 100%;
animation: shimmer 3s linear infinite;
}
@keyframes shimmer {
0% { background-position: -200% 0; }
100% { background-position: 200% 0; }
}
#name {
font-size: 10px;
font-weight: 500;
text-align: right;
margin-bottom: 8px;
color: #6C757D;
letter-spacing: 0.5px;
}
h3 {
color: black;
font-weight: 600;
margin-top: 25px;
margin-bottom: 15px;
padding-bottom: 8px;
border-bottom: 2px solid #007BA7;
}
h4 {
color: darkcyan;
font-weight: 500;
margin-top: 20px;
margin-bottom: 12px;
}
h1 {
color: #007BA7;
font-weight: 700;
margin-bottom: 20px;
text-shadow: 1px 1px 2px rgba(0,0,0,0.1);
}
label {
font-weight: 500;
color: peru;
margin-bottom: 5px;
}
.plot-title {
text-align: center;
font-weight: 600;
color: #2C3E50;
margin-bottom: 10px;
}
.dataTables_wrapper .dataTables_length,
.dataTables_wrapper .dataTables_filter,
.dataTables_wrapper .dataTables_info,
.dataTables_wrapper .dataTables_paginate {
color: #2C3E50;
}
thead th {
background-color: #F8F9FA;
color: #2C3E50;
font-weight: 600;
text-align: center !important;
padding: 10px !important;
}
.brand-teal { color: darkcyan; }
.brand-bronze { color: peru; }
/* Bat tracking upload box styling */
.bat-tracking-box {
background: linear-gradient(135deg, #e8f4f8 0%, #f0e6d3 100%);
border: 2px dashed darkcyan;
border-radius: 15px;
padding: 20px;
margin-top: 15px;
}
.merge-status-box {
background: #f8f9fa;
border-left: 4px solid darkcyan;
padding: 15px;
border-radius: 0 10px 10px 0;
margin-top: 15px;
}
.merge-success {
border-left-color: #28a745;
background: #d4edda;
}
.merge-warning {
border-left-color: #ffc107;
background: #fff3cd;
}
.merge-error {
border-left-color: #dc3545;
background: #f8d7da;
}
"))
),
# Header with three logos
div(class = "app-header",
tags$img(src = "https://i.imgur.com/7vx5Ci8.png", class = "header-logo-left", alt = "Logo Left"),
tags$img(src = "https://i.imgur.com/c3zCSg6.png", class = "header-logo-center", alt = "Main Logo"),
tags$img(src = "https://i.imgur.com/VbrN5WV.png", class = "header-logo-right", alt = "Logo Right")
),
tabsetPanel(id = "main_tabs",
# Upload & Process Tab
tabPanel(
"Upload & Process",
fluidRow(
column(6,
h3("1. Upload TrackMan CSV"),
fileInput("file", "Choose CSV File", accept = c(".csv")),
fluidRow(
column(3,
checkboxInput("header", "Header", TRUE)
),
column(3,
radioButtons("sep", "Separator",
choices = c(Comma = ",", Semicolon = ";", Tab = "\t"),
selected = ",", inline = TRUE)
),
column(3,
radioButtons("quote", "Quote",
choices = c(None = "", "Double Quote" = '"', "Single Quote" = "'"),
selected = '"', inline = TRUE)
),
column(3,
radioButtons("date_format", "Date Output Format",
choices = c("YYYY-MM-DD" = "yyyy", "M/D/YY" = "mdyy"),
selected = "yyyy")
)
),
verbatimTextOutput("csv_status")
),
column(6,
div(class = "bat-tracking-box",
h3("2. Upload Bat Tracking JSON (Optional)", style = "margin-top: 0;"),
fileInput("json_file", "Choose Bat Tracking JSON File", accept = c(".json")),
p(style = "color: #666; font-size: 12px;",
"Upload the corresponding _battracking.json file to merge bat speed and attack angle data."),
verbatimTextOutput("json_status"),
uiOutput("merge_status_ui")
)
)
),
hr(),
fluidRow(
column(8,
h3("3. Columns to Remove"),
p("Select which columns to remove from your dataset:"),
checkboxGroupInput("columns_to_remove", "Remove These Columns:",
choices = columns_to_remove,
selected = columns_to_remove)
),
column(4,
h3("Quick Actions"),
br(),
actionButton("select_all_cols", "Select All", class = "btn-primary"),
br(), br(),
actionButton("deselect_all_cols", "Deselect All", class = "btn-default"),
br(), br(),
actionButton("select_spinaxis", "Select SpinAxis3d Columns", class = "btn-info"),
br(), br(),
h4("Processing Summary"),
verbatimTextOutput("process_summary")
)
)
),
# Bat Tracking Details Tab
tabPanel(
"Bat Tracking Data",
fluidRow(
column(12,
h3("Bat Tracking Merge Details"),
uiOutput("bat_tracking_details"),
hr(),
h4("Pitches with Bat Tracking Data"),
DT::dataTableOutput("bat_tracking_table")
)
)
),
# Preview Data Tab
tabPanel(
"Preview Data",
fluidRow(
column(12,
h3("Data Preview"),
DT::dataTableOutput("preview")
)
)
),
# Pitch Movement Chart Tab
tabPanel(
"Pitch Movement Chart",
fluidRow(
column(3,
selectInput("pitcher_select", "Select Pitcher:",
choices = NULL, selected = NULL)
),
column(3,
h4("Selection Mode:"),
radioButtons("selection_mode", "",
choices = list("Single Click" = "single", "Drag Select" = "drag"),
selected = "single", inline = TRUE)
),
column(6,
conditionalPanel(
condition = "input.selection_mode == 'drag'",
h4("Bulk Edit:"),
fluidRow(
column(8,
selectInput("bulk_pitch_type", "Change all selected to:",
choices = c("Fastball", "Sinker", "Cutter", "Slider",
"Curveball", "ChangeUp", "Splitter", "Knuckleball", "Other"),
selected = "Fastball")
),
column(4,
br(),
actionButton("apply_bulk_change", "Apply to Selected", class = "btn-success")
)
)
)
)
),
fluidRow(
column(8,
h3("Interactive Pitch Movement Analysis"),
plotOutput("movement_plot", height = "600px",
click = "plot_click",
brush = brushOpts(id = "plot_brush"),
hover = hoverOpts(id = "plot_hover", delay = 100)),
h4("Instructions:"),
p(strong("Single Click Mode:"), "Click on any point to edit one pitch type at a time via popup modal."),
p(strong("Drag Select Mode:"), "Click and drag to select multiple points, then use the dropdown to change them all at once."),
conditionalPanel(
condition = "input.selection_mode == 'drag'",
div(style = "background-color: #f0f8ff; padding: 10px; border-radius: 5px; margin: 10px 0; border-left: 4px solid darkcyan;",
h4("Selected Points:", style = "margin-top: 0; color: darkcyan;"),
textOutput("selection_info")
)
),
verbatimTextOutput("hover_info"),
verbatimTextOutput("click_info")
),
column(4,
h3("Pitch Metrics Summary"),
DT::dataTableOutput("movement_stats")
)
)
),
# Download Tab
tabPanel(
"Download",
fluidRow(
column(12,
h3("Download Processed Data"),
h4("Your processed data is ready for download!"),
br(),
downloadButton("downloadData", "Download CSV", class = "btn-success btn-lg"),
br(), br(),
h4("Data Summary:"),
verbatimTextOutput("data_summary")
)
)
),
#Scrape Tab
tabPanel(
"Scraping",
fluidRow(
column(2,
h4("Data Source", style = "color: darkcyan; border-bottom: 2px solid darkcyan; padding-bottom: 6px;"),
radioButtons("scrape_source", NULL,
choices = c("TrackMan PBP" = "pbp",
"TrackMan Positional" = "pos",
"NCAA Scoreboard" = "ncaa"),
selected = "pbp")
),
column(4,
h3("Controls"),
dateInput("start_date", "Start Date:", value = Sys.Date() - 1),
dateInput("end_date", "End Date:", value = Sys.Date() - 1),
uiOutput("scrape_options"),
br(),
actionButton("scrape_btn", "Scrape Data", class = "btn-primary"),
br(), br(),
downloadButton("download_scrape", "Download CSV")
),
column(6,
h3("Progress"),
verbatimTextOutput("scrape_status"),
hr(),
h3("Data Preview"),
DT::dataTableOutput("scrape_preview")
)
)
)
),
# Modal for editing pitch type
bsModal("pitchEditModal", "Edit Pitch Type", "triggerModal", size = "medium",
div(style = "padding: 20px;",
h4("Selected Pitch Details:", style = "color: darkcyan;"),
verbatimTextOutput("selected_pitch_info"),
br(),
selectInput("modal_new_pitch_type", "Change Pitch Type To:",
choices = c("Fastball", "Sinker", "Cutter", "Slider",
"Curveball", "ChangeUp", "Splitter", "Knuckleball", "Other"),
selected = "Fastball"),
br(),
actionButton("update_pitch", "Update Pitch Type", class = "btn-primary btn-lg"),
actionButton("cancel_edit", "Cancel", class = "btn-default")
)
)
)
# Server
server <- function(input, output, session) {
# Reactive values
processed_data <- reactiveVal(NULL)
plot_data <- reactiveVal(NULL)
selected_pitch <- reactiveVal(NULL)
selected_points <- reactiveVal(NULL)
csv_data_raw <- reactiveVal(NULL)
bat_tracking_parsed <- reactiveVal(NULL)
merge_result <- reactiveVal(NULL)
scraped_data <- reactiveVal(NULL)
scrape_status_msg <- reactiveVal("Ready.")
# Handle column selection buttons
observeEvent(input$select_all_cols, {
updateCheckboxGroupInput(session, "columns_to_remove",
selected = columns_to_remove)
})
observeEvent(input$deselect_all_cols, {
updateCheckboxGroupInput(session, "columns_to_remove", selected = character(0))
})
observeEvent(input$select_spinaxis, {
spinaxis_cols <- columns_to_remove[grepl("SpinAxis3d", columns_to_remove)]
updateCheckboxGroupInput(session, "columns_to_remove", selected = spinaxis_cols)
})
# Re-process data when date format changes
observeEvent(input$date_format, {
req(input$file) # Only run if a file has been uploaded
# Re-read and process the CSV with new date format
tryCatch({
df <- read.csv(input$file$datapath,
header = input$header,
sep = input$sep,
quote = input$quote,
stringsAsFactors = FALSE)
# Auto-convert date formats based on user selection
df <- convert_date_columns(df, input$date_format)
csv_data_raw(df)
# If we already have bat tracking data, try to merge
if (!is.null(bat_tracking_parsed()) && !is.null(bat_tracking_parsed()$data)) {
result <- merge_with_bat_tracking(df, bat_tracking_parsed()$data)
merge_result(result)
df <- result$data
}
# Process the data (remove columns)
selected_cols_to_remove <- input$columns_to_remove %||% character(0)
processed_df <- df
if (length(selected_cols_to_remove) > 0) {
columns_to_drop <- intersect(names(df), selected_cols_to_remove)
if (length(columns_to_drop) > 0) {
processed_df <- processed_df %>% select(-all_of(columns_to_drop))
}
}
processed_df <- processed_df %>% distinct()
processed_data(processed_df)
plot_data(processed_df)
showNotification(
paste("Date format updated to:", if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"),
type = "message", duration = 3
)
}, error = function(e) {
showNotification(paste("Error updating date format:", e$message), type = "error")
})
}, ignoreInit = TRUE)
# Process uploaded CSV file
observeEvent(input$file, {
req(input$file)
tryCatch({
df <- read.csv(input$file$datapath,
header = input$header,
sep = input$sep,
quote = input$quote,
stringsAsFactors = FALSE)
# Auto-convert date formats based on user selection
df <- convert_date_columns(df, input$date_format)
csv_data_raw(df)
# If we already have bat tracking data, try to merge
if (!is.null(bat_tracking_parsed()) && !is.null(bat_tracking_parsed()$data)) {
result <- merge_with_bat_tracking(df, bat_tracking_parsed()$data)
merge_result(result)
df <- result$data
}
# Process the data (remove columns)
selected_cols_to_remove <- input$columns_to_remove %||% character(0)
processed_df <- df
if (length(selected_cols_to_remove) > 0) {
columns_to_drop <- intersect(names(df), selected_cols_to_remove)
if (length(columns_to_drop) > 0) {
processed_df <- processed_df %>% select(-all_of(columns_to_drop))
}
}
processed_df <- processed_df %>% distinct()
processed_data(processed_df)
plot_data(processed_df)
# Update pitcher choices
if ("Pitcher" %in% names(processed_df)) {
pitcher_choices <- sort(unique(processed_df$Pitcher[!is.na(processed_df$Pitcher)]))
updateSelectInput(session, "pitcher_select", choices = pitcher_choices, selected = pitcher_choices[1])
}
}, error = function(e) {
showNotification(paste("Error processing CSV:", e$message), type = "error")
})
})
# Process uploaded JSON file
observeEvent(input$json_file, {
req(input$json_file)
tryCatch({
parsed <- parse_bat_tracking_json(input$json_file$datapath)
bat_tracking_parsed(parsed)
# If we already have CSV data, merge
if (!is.null(csv_data_raw()) && parsed$success && !is.null(parsed$data)) {
result <- merge_with_bat_tracking(csv_data_raw(), parsed$data)
merge_result(result)
# Re-process with merged data
df <- result$data
selected_cols_to_remove <- input$columns_to_remove %||% character(0)
if (length(selected_cols_to_remove) > 0) {
columns_to_drop <- intersect(names(df), selected_cols_to_remove)
if (length(columns_to_drop) > 0) {
df <- df %>% select(-all_of(columns_to_drop))
}
}
df <- df %>% distinct()
processed_data(df)
plot_data(df)
showNotification(result$message, type = "message", duration = 5)
}
}, error = function(e) {
showNotification(paste("Error processing JSON:", e$message), type = "error")
})
})
# CSV status output
output$csv_status <- renderText({
if (is.null(input$file)) {
return("No CSV file uploaded yet.")
}
if (is.null(csv_data_raw())) {
return("Processing CSV...")
}
df <- csv_data_raw()
game_id <- if ("GameID" %in% names(df)) unique(df$GameID)[1] else "Unknown"
date_fmt <- if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"
paste(
"✓ CSV loaded successfully!",
paste(" Game ID:", game_id),
paste(" Rows:", nrow(df)),
paste(" Columns:", ncol(df)),
paste("✓ Date format:", date_fmt),
sep = "\n"
)
})
# JSON status output
output$json_status <- renderText({
if (is.null(input$json_file)) {
return("No JSON file uploaded yet.")
}
parsed <- bat_tracking_parsed()
if (is.null(parsed)) {
return("Processing JSON...")
}
if (!parsed$success) {
return(paste("✗", parsed$message))
}
paste(
"✓ JSON parsed successfully!",
paste(" Game Reference:", parsed$game_reference),
paste(" Plays found:", parsed$plays_count %||% 0),
sep = "\n"
)
})
# Merge status UI
output$merge_status_ui <- renderUI({
result <- merge_result()
parsed <- bat_tracking_parsed()
csv <- csv_data_raw()
if (is.null(parsed) || is.null(csv)) {
return(NULL)
}
if (!parsed$success) {
return(div(class = "merge-status-box merge-error",
h4("Merge Status", style = "margin-top: 0; color: #721c24;"),
p(parsed$message)
))
}
if (is.null(parsed$data) || is.null(result)) {
# Check game ID match
csv_game <- if ("GameID" %in% names(csv)) unique(csv$GameID)[1] else NULL
json_game <- parsed$game_reference
if (!is.null(csv_game) && !is.null(json_game) && csv_game != json_game) {
return(div(class = "merge-status-box merge-warning",
h4("⚠ Game ID Mismatch", style = "margin-top: 0; color: #856404;"),
p(paste("CSV Game:", csv_game)),
p(paste("JSON Game:", json_game)),
p("Files may be from different games!")
))
}
return(div(class = "merge-status-box merge-warning",
h4("No Data to Merge", style = "margin-top: 0; color: #856404;"),
p(parsed$message)
))
}
# Check game ID match
csv_game <- if ("GameID" %in% names(csv)) unique(csv$GameID)[1] else NULL
json_game <- parsed$game_reference
game_match <- is.null(csv_game) || is.null(json_game) || csv_game == json_game
if (result$matched > 0) {
div(class = "merge-status-box merge-success",
h4("✓ Merge Successful!", style = "margin-top: 0; color: #155724;"),
p(paste("Matched:", result$matched, "of", result$total_bat, "bat tracking records")),
if (!game_match) p(style = "color: #856404;", "⚠ Note: Game IDs differ but PitchUIDs matched")
)
} else {
div(class = "merge-status-box merge-warning",
h4("⚠ No Matches Found", style = "margin-top: 0; color: #856404;"),
p(paste("0 of", result$total_bat, "bat tracking records matched")),
if (!game_match) p(paste("Game ID mismatch: CSV =", csv_game, ", JSON =", json_game))
)
}
})
# Bat tracking details
output$bat_tracking_details <- renderUI({
parsed <- bat_tracking_parsed()
result <- merge_result()
if (is.null(parsed)) {
return(div(
p("No bat tracking JSON file uploaded."),
p("Upload a _battracking.json file in the 'Upload & Process' tab to see bat tracking data here.")
))
}
if (!parsed$success) {
return(div(class = "alert alert-danger", parsed$message))
}
if (is.null(parsed$data)) {
return(div(class = "alert alert-warning",
h4("Empty Bat Tracking File"),
p(parsed$message),
p("The JSON file was valid but contained no swing data in the Plays array.")
))
}
# Show summary
div(
div(class = "row",
div(class = "col-md-4",
div(class = "well",
h4("Game Reference"),
p(parsed$game_reference)
)
),
div(class = "col-md-4",
div(class = "well",
h4("Total Swings Tracked"),
p(style = "font-size: 24px; font-weight: bold; color: darkcyan;", parsed$plays_count)
)
),
div(class = "col-md-4",
div(class = "well",
h4("Matched to CSV"),
p(style = "font-size: 24px; font-weight: bold; color: #28a745;",
if (!is.null(result)) result$matched else "N/A")
)
)
)
)
})
# Bat tracking table
output$bat_tracking_table <- DT::renderDataTable({
df <- processed_data()
if (is.null(df)) {
return(NULL)
}
# Filter to rows with bat tracking data
if ("BatSpeed_Sensor" %in% names(df)) {
bat_rows <- df %>%
filter(!is.na(BatSpeed_Sensor)) %>%
select(
any_of(c("PitchNo", "Time", "Pitcher", "Batter", "TaggedPitchType", "PitchCall",
"RelSpeed", "ExitSpeed", "Angle",
"BatSpeed", "BatSpeed_Sensor",
"VerticalAttackAngle", "VerticalAttackAngle_Sensor",
"HorizontalAttackAngle", "HorizontalAttackAngle_Sensor"))
)
if (nrow(bat_rows) == 0) {
return(NULL)
}
DT::datatable(bat_rows,
options = list(scrollX = TRUE, pageLength = 10),
rownames = FALSE) %>%
DT::formatRound(columns = intersect(names(bat_rows),
c("BatSpeed_Sensor", "VerticalAttackAngle_Sensor",
"HorizontalAttackAngle_Sensor", "RelSpeed",
"ExitSpeed", "Angle")),
digits = 1)
} else {
return(NULL)
}
})
# Processing summary
output$process_summary <- renderText({
if (is.null(input$file)) {
return("No file uploaded yet.")
}
if (is.null(processed_data())) {
return("Processing...")
}
df <- processed_data()
original_df <- csv_data_raw()
selected_cols_to_remove <- input$columns_to_remove %||% character(0)
removed_cols <- intersect(selected_cols_to_remove, names(original_df))
result <- merge_result()
removed_cols_text <- if (length(removed_cols) > 0) {
cols_display <- if (length(removed_cols) > 5) {
paste(paste(head(removed_cols, 5), collapse = ", "), "...")
} else {
paste(removed_cols, collapse = ", ")
}
paste("✓ Removed columns:", length(removed_cols))
} else {
"✓ Removed columns: 0"
}
bat_tracking_text <- if (!is.null(result) && result$matched > 0) {
paste("✓ Bat tracking merged:", result$matched, "pitches")
} else if (!is.null(bat_tracking_parsed())) {
"⚠ Bat tracking: No matches"
} else {
"○ Bat tracking: Not uploaded"
}
summary_text <- paste(
"✓ File processed successfully!",
paste("✓ Original columns:", ncol(original_df)),
paste("✓ Final columns:", ncol(df)),
paste("✓ Rows processed:", nrow(df)),
removed_cols_text,
bat_tracking_text,
"✓ Duplicates removed",
paste("✓ Date format:", if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"),
sep = "\n"
)
return(summary_text)
})
# Preview table
output$preview <- DT::renderDataTable({
req(processed_data())
DT::datatable(processed_data(),
options = list(scrollX = TRUE, pageLength = 10),
filter = "top")
})
# Movement plot
output$movement_plot <- renderPlot({
req(plot_data(), input$pitcher_select)
pitcher_data <- plot_data() %>%
filter(Pitcher == input$pitcher_select) %>%
filter(!is.na(TaggedPitchType), TaggedPitchType != "Other",
!is.na(HorzBreak), !is.na(InducedVertBreak), !is.na(RelSpeed)) %>%
mutate(pitch_id = row_number())
if (nrow(pitcher_data) == 0) {
plot.new()
text(0.5, 0.5, "No data available for selected pitcher", cex = 1.5)
return()
}
pitcher_data$color <- pitch_colors[pitcher_data$TaggedPitchType]
pitcher_data$color[is.na(pitcher_data$color)] <- "#D3D3D3"
par(mar = c(5, 5, 4, 8), xpd = TRUE)
plot(pitcher_data$HorzBreak, pitcher_data$InducedVertBreak,
col = pitcher_data$color,
pch = 19, cex = 1.5,
xlim = c(-25, 25), ylim = c(-25, 25),
xlab = "Horizontal Break (inches)",
ylab = "Induced Vertical Break (inches)",
main = paste("Pitch Movement Chart -", input$pitcher_select))
grid(nx = NULL, ny = NULL, col = "lightgray", lty = 1, lwd = 0.5)
abline(h = 0, col = "gray", lty = 2, lwd = 1)
abline(v = 0, col = "gray", lty = 2, lwd = 1)
for (r in c(6, 12, 18, 24)) {
circle_x <- r * cos(seq(0, 2*pi, length.out = 100))
circle_y <- r * sin(seq(0, 2*pi, length.out = 100))
lines(circle_x, circle_y, col = "lightgray", lty = 3)
}
if (input$selection_mode == "drag" && !is.null(selected_points())) {
sel_points <- selected_points()
points(sel_points$HorzBreak, sel_points$InducedVertBreak,
pch = 21, cex = 2, col = "red", lwd = 3)
}
unique_pitches <- unique(pitcher_data$TaggedPitchType)
unique_colors <- pitch_colors[unique_pitches]
legend("topright", inset = c(-0.15, 0),
legend = unique_pitches,
col = unique_colors,
pch = 19,
cex = 0.8,
title = "Pitch Type")
})
# Handle plot clicks (single mode only)
observeEvent(input$plot_click, {
req(plot_data(), input$pitcher_select, input$plot_click)
if (input$selection_mode != "single") return()
pitcher_data <- plot_data() %>%
filter(Pitcher == input$pitcher_select) %>%
filter(!is.na(TaggedPitchType), TaggedPitchType != "Other",
!is.na(HorzBreak), !is.na(InducedVertBreak), !is.na(RelSpeed)) %>%
mutate(pitch_id = row_number())
if (nrow(pitcher_data) == 0) return()
click_x <- input$plot_click$x
click_y <- input$plot_click$y
distances <- sqrt((pitcher_data$HorzBreak - click_x)^2 +
(pitcher_data$InducedVertBreak - click_y)^2)
closest_idx <- which.min(distances)
if (min(distances) <= 2) {
clicked_pitch <- pitcher_data[closest_idx, ]
full_data <- plot_data() %>% filter(Pitcher == input$pitcher_select)
original_row <- which(full_data$HorzBreak == clicked_pitch$HorzBreak &
full_data$InducedVertBreak == clicked_pitch$InducedVertBreak &
full_data$RelSpeed == clicked_pitch$RelSpeed)[1]
selected_pitch(list(
pitcher = input$pitcher_select,
row_in_pitcher_data = original_row,
data = clicked_pitch,
original_type = clicked_pitch$TaggedPitchType
))
updateSelectInput(session, "modal_new_pitch_type",
selected = clicked_pitch$TaggedPitchType)
showModal(modalDialog(
title = "Edit Pitch Type",
div(style = "padding: 20px;",
h4("Selected Pitch Details:", style = "color: darkcyan;"),
verbatimTextOutput("selected_pitch_info"),
br(),
selectInput("modal_new_pitch_type", "Change Pitch Type To:",
choices = c("Fastball", "Sinker", "Cutter", "Slider",
"Curveball", "ChangeUp", "Splitter", "Knuckleball", "Other"),
selected = clicked_pitch$TaggedPitchType),
br(),
actionButton("update_pitch", "Update Pitch Type", class = "btn-primary btn-lg"),
actionButton("cancel_edit", "Cancel", class = "btn-default")
),
footer = NULL,
size = "m",
easyClose = TRUE
))
}
})
# Handle brush selection (drag mode)
observeEvent(input$plot_brush, {
req(plot_data(), input$pitcher_select, input$plot_brush)
if (input$selection_mode != "drag") return()
pitcher_data <- plot_data() %>%
filter(Pitcher == input$pitcher_select) %>%
filter(!is.na(TaggedPitchType), TaggedPitchType != "Other",
!is.na(HorzBreak), !is.na(InducedVertBreak), !is.na(RelSpeed))
if (nrow(pitcher_data) == 0) return()
brush <- input$plot_brush
brushed_points <- pitcher_data %>%
filter(
HorzBreak >= brush$xmin & HorzBreak <= brush$xmax &
InducedVertBreak >= brush$ymin & InducedVertBreak <= brush$ymax
)
if (nrow(brushed_points) > 0) {
selected_points(brushed_points)
} else {
selected_points(NULL)
}
})
# Apply bulk change
observeEvent(input$apply_bulk_change, {
req(selected_points(), input$bulk_pitch_type)
sel_points <- selected_points()
if (nrow(sel_points) == 0) {
showNotification("No points selected", type = "warning")
return()
}
current_data <- plot_data()
for (i in 1:nrow(sel_points)) {
point <- sel_points[i, ]
current_data <- current_data %>%
mutate(TaggedPitchType = ifelse(
Pitcher == input$pitcher_select &
abs(HorzBreak - point$HorzBreak) < 0.01 &
abs(InducedVertBreak - point$InducedVertBreak) < 0.01 &
abs(RelSpeed - point$RelSpeed) < 0.01,
input$bulk_pitch_type,
TaggedPitchType
))
}
plot_data(current_data)
processed_data(current_data)
selected_points(NULL)
showNotification(
paste("Updated", nrow(sel_points), "pitches to", input$bulk_pitch_type),
type = "message", duration = 3
)
})
# Selection info for drag mode
output$selection_info <- renderText({
if (input$selection_mode == "drag" && !is.null(selected_points())) {
sel_points <- selected_points()
pitch_counts <- table(sel_points$TaggedPitchType)
paste(nrow(sel_points), "points selected:",
paste(names(pitch_counts), "(", pitch_counts, ")", collapse = ", "))
} else {
"No points selected. Click and drag to select multiple pitches."
}
})
# Hover info
output$hover_info <- renderText({
req(input$plot_hover, plot_data(), input$pitcher_select)
pitcher_data <- plot_data() %>%
filter(Pitcher == input$pitcher_select) %>%
filter(!is.na(TaggedPitchType), TaggedPitchType != "Other",
!is.na(HorzBreak), !is.na(InducedVertBreak), !is.na(RelSpeed))
if (nrow(pitcher_data) == 0) return("")
hover_x <- input$plot_hover$x
hover_y <- input$plot_hover$y
distances <- sqrt((pitcher_data$HorzBreak - hover_x)^2 +
(pitcher_data$InducedVertBreak - hover_y)^2)
if (min(distances) <= 2) {
closest_idx <- which.min(distances)
hover_pitch <- pitcher_data[closest_idx, ]
# Include bat tracking info if available
bat_info <- ""
if ("BatSpeed_Sensor" %in% names(hover_pitch) && !is.na(hover_pitch$BatSpeed_Sensor)) {
bat_info <- paste(" | Bat Speed:", round(hover_pitch$BatSpeed_Sensor, 1), "mph")
}
paste("Hovering over:",
paste("Type:", hover_pitch$TaggedPitchType),
paste("Velocity:", round(hover_pitch$RelSpeed, 1), "mph"),
paste("HB:", round(hover_pitch$HorzBreak, 1), "in"),
paste("IVB:", round(hover_pitch$InducedVertBreak, 1), "in"),
bat_info,
sep = " | ")
} else {
""
}
})
# Movement stats table
output$movement_stats <- DT::renderDataTable({
req(plot_data(), input$pitcher_select)
data <- plot_data()
movement_stats <- data %>%
filter(Pitcher == input$pitcher_select) %>%
filter(!is.na(HorzBreak), !is.na(InducedVertBreak), !is.na(TaggedPitchType)) %>%
mutate(
pitch_group = case_when(
TaggedPitchType %in% c("Fastball", "FourSeamFastBall", "FourSeamFastB", "Four-Seam", "4-Seam") ~ "Fastball",
TaggedPitchType %in% c("OneSeamFastBall", "TwoSeamFastBall", "Sinker", "Two-Seam", "One-Seam") ~ "Sinker",
TaggedPitchType %in% c("ChangeUp", "Changeup") ~ "Changeup",
TRUE ~ TaggedPitchType
),
in_zone = ifelse("StrikeZoneIndicator" %in% names(.), StrikeZoneIndicator,
ifelse(!is.na(PlateLocSide) & !is.na(PlateLocHeight) &
PlateLocSide >= -0.95 & PlateLocSide <= 0.95 &
PlateLocHeight >= 1.6 & PlateLocHeight <= 3.5, 1, 0)),
is_whiff = ifelse("WhiffIndicator" %in% names(.), WhiffIndicator,
ifelse(!is.na(PitchCall) & PitchCall == "StrikeSwinging", 1, 0)),
chase = ifelse("Chaseindicator" %in% names(.), Chaseindicator,
ifelse(!is.na(PitchCall) & !is.na(PlateLocSide) & !is.na(PlateLocHeight) &
PitchCall %in% c("StrikeSwinging", "FoulBallNotFieldable", "FoulBall", "InPlay") &
(PlateLocSide < -0.95 | PlateLocSide > 0.95 | PlateLocHeight < 1.6 | PlateLocHeight > 3.5), 1, 0))
)
total_pitches <- nrow(movement_stats)
# Check if bat tracking columns exist
has_bat_speed <- "BatSpeed_Sensor" %in% names(movement_stats)
summary_stats <- movement_stats %>%
group_by(`Pitch Type` = pitch_group) %>%
summarise(
Count = n(),
`Usage%` = sprintf("%.1f%%", (n() / total_pitches) * 100),
`Avg Velo` = sprintf("%.1f", mean(RelSpeed, na.rm = TRUE)),
`Max Velo` = sprintf("%.1f", max(RelSpeed, na.rm = TRUE)),
`Avg IVB` = sprintf("%.1f", mean(InducedVertBreak, na.rm = TRUE)),
`Avg HB` = sprintf("%.1f", mean(HorzBreak, na.rm = TRUE)),
`Avg Spin` = ifelse("SpinRate" %in% names(movement_stats),
sprintf("%.0f", mean(SpinRate, na.rm = TRUE)),
"—"),
`Avg Bat Speed` = if (has_bat_speed) {
bat_vals <- BatSpeed_Sensor[!is.na(BatSpeed_Sensor)]
if (length(bat_vals) > 0) sprintf("%.1f", mean(bat_vals)) else "—"
} else "—",
`Zone%` = sprintf("%.1f%%", round(mean(in_zone, na.rm = TRUE) * 100, 1)),
`Whiff%` = sprintf("%.1f%%", round(mean(is_whiff, na.rm = TRUE) * 100, 1)),
.groups = "drop"
) %>%
arrange(desc(Count))
DT::datatable(summary_stats,
options = list(pageLength = 15, dom = 't', scrollX = TRUE),
rownames = FALSE) %>%
DT::formatStyle(columns = names(summary_stats), fontSize = '12px')
})
# Selected pitch info in modal
output$selected_pitch_info <- renderText({
pitch_info <- selected_pitch()
if (!is.null(pitch_info)) {
pitch_data <- pitch_info$data
info_lines <- c(
paste("Pitcher:", pitch_info$pitcher),
paste("Current Type:", pitch_data$TaggedPitchType),
paste("Velocity:", round(pitch_data$RelSpeed, 1), "mph"),
paste("Horizontal Break:", round(pitch_data$HorzBreak, 1), "inches"),
paste("Induced Vertical Break:", round(pitch_data$InducedVertBreak, 1), "inches")
)
if ("SpinRate" %in% names(pitch_data) && !is.na(pitch_data$SpinRate)) {
info_lines <- c(info_lines, paste("Spin Rate:", round(pitch_data$SpinRate, 0), "rpm"))
}
# Add bat tracking info if available
if ("BatSpeed_Sensor" %in% names(pitch_data) && !is.na(pitch_data$BatSpeed_Sensor)) {
info_lines <- c(info_lines,
paste("Bat Speed:", round(pitch_data$BatSpeed_Sensor, 1), "mph"),
paste("Vertical Attack Angle:", round(pitch_data$VerticalAttackAngle_Sensor, 1), "°"),
paste("Horizontal Attack Angle:", round(pitch_data$HorizontalAttackAngle_Sensor, 1), "°"))
}
if ("Date" %in% names(pitch_data) && !is.na(pitch_data$Date)) {
info_lines <- c(info_lines, paste("Date:", pitch_data$Date))
}
return(paste(info_lines, collapse = "\n"))
} else {
return("No pitch selected")
}
})
# Update pitch type
observeEvent(input$update_pitch, {
pitch_info <- selected_pitch()
if (!is.null(pitch_info)) {
current_data <- plot_data()
target_pitcher <- pitch_info$pitcher
target_hb <- pitch_info$data$HorzBreak
target_ivb <- pitch_info$data$InducedVertBreak
target_velo <- pitch_info$data$RelSpeed
current_data <- current_data %>%
mutate(TaggedPitchType = ifelse(
Pitcher == target_pitcher &
abs(HorzBreak - target_hb) < 0.01 &
abs(InducedVertBreak - target_ivb) < 0.01 &
abs(RelSpeed - target_velo) < 0.01,
input$modal_new_pitch_type,
TaggedPitchType
))
plot_data(current_data)
processed_data(current_data)
removeModal()
showNotification(
paste("Updated pitch from", pitch_info$original_type, "to", input$modal_new_pitch_type),
type = "message", duration = 3
)
selected_pitch(NULL)
}
})
# Cancel edit
observeEvent(input$cancel_edit, {
removeModal()
selected_pitch(NULL)
})
# Click info output
output$click_info <- renderText({
if (!is.null(selected_pitch())) {
pitch_info <- selected_pitch()
paste("Last selected pitch:", pitch_info$original_type,
"| Position: (", round(pitch_info$data$HorzBreak, 1), ",",
round(pitch_info$data$InducedVertBreak, 1), ")")
} else {
"No point selected yet. Click on a point in the chart above to edit its pitch type."
}
})
# Data summary for download page
output$data_summary <- renderText({
req(processed_data())
df <- processed_data()
result <- merge_result()
bat_tracking_summary <- if (!is.null(result) && result$matched > 0) {
paste("Bat tracking data:", result$matched, "pitches with swing metrics")
} else {
"Bat tracking data: None"
}
summary_text <- paste(
paste("Total rows:", nrow(df)),
paste("Total columns:", ncol(df)),
paste("Date range:",
if ("Date" %in% names(df) && !all(is.na(df$Date))) {
paste(min(as.Date(df$Date), na.rm = TRUE), "to", max(as.Date(df$Date), na.rm = TRUE))
} else {
"Date column not available"
}),
paste("Unique pitchers:",
if ("Pitcher" %in% names(df)) {
length(unique(df$Pitcher[!is.na(df$Pitcher)]))
} else {
"Pitcher column not available"
}),
paste("Pitch types:",
if ("TaggedPitchType" %in% names(df)) {
paste(sort(unique(df$TaggedPitchType[!is.na(df$TaggedPitchType)])), collapse = ", ")
} else {
"TaggedPitchType column not available"
}),
bat_tracking_summary,
paste("Date format:", if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"),
sep = "\n"
)
return(summary_text)
})
# Download handler
output$downloadData <- downloadHandler(
filename = function() {
paste("app_ready_COA_", Sys.Date(), ".csv", sep = "")
},
content = function(file) {
write.csv(processed_data(), file, row.names = FALSE)
}
)
#SCRAPER STUFF
#Handles the middle column where it is dynamically based off the left column
output$scrape_options <- renderUI({
switch(input$scrape_source,
"pbp" = tagList(
p("Scrapes TrackMan play-by-play data from FTP.")
),
"pos" = tagList(
p("Scrapes TrackMan player positioning data from FTP.")
),
"ncaa" = tagList(
selectInput("ncaa_division", "Division:", choices = c("D1", "D2", "D3")),
p("Scrapes NCAA scoreboard data via API.")
)
)
})
# Scrape button
observeEvent(input$scrape_btn, {
scrape_status_msg("Testing FTP connection...")
result <- tryCatch({
ftp_url <- paste0("ftp://", Sys.getenv("FTP_USERNAME"), ":",
Sys.getenv("FTP_PASSWORD"),
"@ftp.trackmanbaseball.com/v3/2025/04/01/CSV/")
file_list <- RCurl::getURL(ftp_url, ftp.use.epsv = FALSE,
dirlistonly = TRUE, connecttimeout = 10)
paste("Connected! Files found:", nchar(file_list), "chars")
}, error = function(e) {
paste("FTP failed:", e$message)
})
scrape_status_msg(result)
})
# Status text
output$scrape_status <- renderText({ scrape_status_msg() })
# Preview table
output$scrape_preview <- DT::renderDataTable({
req(scraped_data())
DT::datatable(scraped_data(), options = list(scrollX = TRUE, pageLength = 10))
})
# Download
output$download_scrape <- downloadHandler(
filename = function() {
label <- switch(input$scrape_source, "pbp" = "pbp", "pos" = "positional", "ncaa" = "ncaa")
paste0("trackman_", label, "_",
format(input$start_date, "%Y%m%d"), "_to_",
format(input$end_date, "%Y%m%d"), ".csv")
},
content = function(file) {
req(scraped_data())
write.csv(scraped_data(), file, row.names = FALSE)
}
)
}
# Run the app
shinyApp(ui = ui, server = server)