igroffman commited on
Commit
3a3252b
·
verified ·
1 Parent(s): dc5a003

Update app.R

Browse files
Files changed (1) hide show
  1. app.R +175 -163
app.R CHANGED
@@ -17,7 +17,8 @@ library(recipes)
17
  library(arrow)
18
  library(base64enc)
19
 
20
-
 
21
 
22
  PASSWORD <- Sys.getenv("password")
23
 
@@ -59,59 +60,47 @@ pitch_colors <- c(
59
  )
60
 
61
  # Function to convert date formats
62
- # input_string: the date string to convert
63
- # output_format: "yyyy" for YYYY-MM-DD or "mdyy" for M/D/YY
64
  convert_date_format <- function(date_string, output_format = "yyyy") {
65
  if (is.na(date_string) || date_string == "") {
66
  return(NA)
67
  }
68
 
69
- # Convert to character if not already
70
  date_string <- as.character(date_string)
71
 
72
  parsed_date <- NULL
73
 
74
- # Try to parse YYYY-MM-DD format
75
  if (grepl("^\\d{4}-\\d{2}-\\d{2}$", date_string)) {
76
  parsed_date <- tryCatch({
77
  as.Date(date_string, format = "%Y-%m-%d")
78
  }, error = function(e) NULL)
79
  }
80
 
81
- # Try to parse MM/DD/YYYY or M/D/YYYY format
82
  if (is.null(parsed_date) && grepl("^\\d{1,2}/\\d{1,2}/\\d{4}$", date_string)) {
83
  parsed_date <- tryCatch({
84
  as.Date(date_string, format = "%m/%d/%Y")
85
  }, error = function(e) NULL)
86
  }
87
 
88
- # Try to parse MM/DD/YY or M/D/YY format
89
  if (is.null(parsed_date) && grepl("^\\d{1,2}/\\d{1,2}/\\d{2}$", date_string)) {
90
  parsed_date <- tryCatch({
91
  as.Date(date_string, format = "%m/%d/%y")
92
  }, error = function(e) NULL)
93
  }
94
 
95
- # If we successfully parsed a date, format it according to output_format
96
  if (!is.null(parsed_date) && !is.na(parsed_date)) {
97
  if (output_format == "mdyy") {
98
- # M/D/YY format (no leading zeros, 2-digit year)
99
  return(format(parsed_date, "%m/%d/%y") %>%
100
- gsub("^0", "", .) %>% # Remove leading zero from month
101
- gsub("/0", "/", .)) # Remove leading zero from day
102
  } else {
103
- # YYYY-MM-DD format
104
  return(format(parsed_date, "%Y-%m-%d"))
105
  }
106
  }
107
 
108
- # Return original if no conversion possible
109
  return(date_string)
110
  }
111
 
112
- # Function to convert date columns in a dataframe
113
  convert_date_columns <- function(df, output_format = "yyyy") {
114
- # Common date column names in TrackMan data
115
  date_columns <- c("Date", "GameDate", "UTCDate", "LocalDateTime")
116
 
117
  for (col in date_columns) {
@@ -123,16 +112,36 @@ convert_date_columns <- function(df, output_format = "yyyy") {
123
  return(df)
124
  }
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  # Function to parse bat tracking JSON
127
  parse_bat_tracking_json <- function(json_path) {
128
  tryCatch({
129
  json_data <- fromJSON(json_path, simplifyVector = FALSE)
130
 
131
- # Extract metadata
132
  game_reference <- json_data$GameReference
133
  session_id <- json_data$SessionId
134
 
135
- # Extract plays
136
  plays <- json_data$Plays
137
 
138
  if (length(plays) == 0) {
@@ -144,7 +153,6 @@ parse_bat_tracking_json <- function(json_path) {
144
  ))
145
  }
146
 
147
- # Build data frame from plays
148
  bat_tracking_df <- data.frame(
149
  PitchUID = sapply(plays, function(p) p$PitchUID),
150
  BatSpeed_Sensor = sapply(plays, function(p) p$BatSpeed),
@@ -173,7 +181,6 @@ parse_bat_tracking_json <- function(json_path) {
173
  })
174
  }
175
 
176
- # Function to merge CSV with bat tracking
177
  merge_with_bat_tracking <- function(csv_data, bat_tracking_data) {
178
  if (is.null(bat_tracking_data) || nrow(bat_tracking_data) == 0) {
179
  return(list(
@@ -184,7 +191,6 @@ merge_with_bat_tracking <- function(csv_data, bat_tracking_data) {
184
  ))
185
  }
186
 
187
- # Check if PitchUID exists in CSV
188
  if (!"PitchUID" %in% names(csv_data)) {
189
  return(list(
190
  data = csv_data,
@@ -194,14 +200,11 @@ merge_with_bat_tracking <- function(csv_data, bat_tracking_data) {
194
  ))
195
  }
196
 
197
- # Perform left join
198
  merged_data <- csv_data %>%
199
  left_join(bat_tracking_data, by = "PitchUID")
200
 
201
- # Count matches
202
  matched_count <- sum(!is.na(merged_data$BatSpeed_Sensor))
203
 
204
- # If original BatSpeed column exists and is empty, fill with sensor data
205
  if ("BatSpeed" %in% names(merged_data)) {
206
  merged_data <- merged_data %>%
207
  mutate(BatSpeed = ifelse(is.na(BatSpeed) & !is.na(BatSpeed_Sensor),
@@ -411,11 +414,7 @@ clean_college_data <- function(data, teams = NA){
411
  -PositionAt110X, -PositionAt110Y, -PositionAt110Z
412
  )
413
 
414
-
415
-
416
  return(data)
417
-
418
-
419
  }
420
 
421
 
@@ -759,6 +758,15 @@ app_ui <- fluidPage(
759
  border-left-color: #dc3545;
760
  background: #f8d7da;
761
  }
 
 
 
 
 
 
 
 
 
762
  "))
763
  ),
764
 
@@ -776,8 +784,9 @@ app_ui <- fluidPage(
776
  "Upload & Process",
777
  fluidRow(
778
  column(6,
779
- h3("1. Upload TrackMan CSV"),
780
- fileInput("file", "Choose CSV File", accept = c(".csv")),
 
781
  fluidRow(
782
  column(3,
783
  checkboxInput("header", "Header", TRUE)
@@ -798,6 +807,8 @@ app_ui <- fluidPage(
798
  selected = "yyyy")
799
  )
800
  ),
 
 
801
  verbatimTextOutput("csv_status")
802
  ),
803
  column(6,
@@ -931,10 +942,26 @@ app_ui <- fluidPage(
931
  fluidRow(
932
  column(12,
933
  h3("Download Processed Data"),
934
- h4("Your processed data is ready for download!"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
935
  br(),
936
- downloadButton("downloadData", "Download CSV", class = "btn-success btn-lg"),
937
- br(), br(),
938
  h4("Data Summary:"),
939
  verbatimTextOutput("data_summary")
940
  )
@@ -999,11 +1026,12 @@ app_ui <- fluidPage(
999
  ui <- fluidPage(
1000
  uiOutput("page")
1001
  )
 
1002
  # Server
1003
  server <- function(input, output, session) {
1004
 
1005
-
1006
  logged_in <- reactiveVal(FALSE)
 
1007
 
1008
  output$page <- renderUI({
1009
  if (logged_in()) {
@@ -1051,45 +1079,44 @@ server <- function(input, output, session) {
1051
  updateCheckboxGroupInput(session, "columns_to_remove", selected = spinaxis_cols)
1052
  })
1053
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1054
  # Re-process data when date format changes
1055
  observeEvent(input$date_format, {
1056
- req(input$file) # Only run if a file has been uploaded
1057
 
1058
- # Re-read and process the CSV with new date format
1059
  tryCatch({
1060
- df <- read.csv(input$file$datapath,
1061
- header = input$header,
1062
- sep = input$sep,
1063
- quote = input$quote,
1064
- stringsAsFactors = FALSE)
1065
 
1066
- # Auto-convert date formats based on user selection
1067
  df <- convert_date_columns(df, input$date_format)
1068
-
1069
  csv_data_raw(df)
1070
-
1071
- # If we already have bat tracking data, try to merge
1072
- if (!is.null(bat_tracking_parsed()) && !is.null(bat_tracking_parsed()$data)) {
1073
- result <- merge_with_bat_tracking(df, bat_tracking_parsed()$data)
1074
- merge_result(result)
1075
- df <- result$data
1076
- }
1077
-
1078
- # Process the data (remove columns)
1079
- selected_cols_to_remove <- input$columns_to_remove %||% character(0)
1080
- processed_df <- df
1081
-
1082
- if (length(selected_cols_to_remove) > 0) {
1083
- columns_to_drop <- intersect(names(df), selected_cols_to_remove)
1084
- if (length(columns_to_drop) > 0) {
1085
- processed_df <- processed_df %>% select(-all_of(columns_to_drop))
1086
- }
1087
- }
1088
-
1089
- processed_df <- processed_df %>% distinct()
1090
-
1091
- processed_data(processed_df)
1092
- plot_data(processed_df)
1093
 
1094
  showNotification(
1095
  paste("Date format updated to:", if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"),
@@ -1101,44 +1128,21 @@ server <- function(input, output, session) {
1101
  })
1102
  }, ignoreInit = TRUE)
1103
 
1104
- # Process uploaded CSV file
1105
  observeEvent(input$file, {
1106
  req(input$file)
1107
 
1108
  tryCatch({
1109
- df <- read.csv(input$file$datapath,
1110
- header = input$header,
1111
- sep = input$sep,
1112
- quote = input$quote,
1113
- stringsAsFactors = FALSE)
1114
 
1115
- # Auto-convert date formats based on user selection
1116
- df <- convert_date_columns(df, input$date_format)
1117
 
 
1118
  csv_data_raw(df)
1119
 
1120
- # If we already have bat tracking data, try to merge
1121
- if (!is.null(bat_tracking_parsed()) && !is.null(bat_tracking_parsed()$data)) {
1122
- result <- merge_with_bat_tracking(df, bat_tracking_parsed()$data)
1123
- merge_result(result)
1124
- df <- result$data
1125
- }
1126
-
1127
- # Process the data (remove columns)
1128
- selected_cols_to_remove <- input$columns_to_remove %||% character(0)
1129
- processed_df <- df
1130
-
1131
- if (length(selected_cols_to_remove) > 0) {
1132
- columns_to_drop <- intersect(names(df), selected_cols_to_remove)
1133
- if (length(columns_to_drop) > 0) {
1134
- processed_df <- processed_df %>% select(-all_of(columns_to_drop))
1135
- }
1136
- }
1137
-
1138
- processed_df <- processed_df %>% distinct()
1139
-
1140
- processed_data(processed_df)
1141
- plot_data(processed_df)
1142
 
1143
  # Update pitcher choices
1144
  if ("Pitcher" %in% names(processed_df)) {
@@ -1146,8 +1150,18 @@ server <- function(input, output, session) {
1146
  updateSelectInput(session, "pitcher_select", choices = pitcher_choices, selected = pitcher_choices[1])
1147
  }
1148
 
 
 
 
 
 
 
 
 
 
 
1149
  }, error = function(e) {
1150
- showNotification(paste("Error processing CSV:", e$message), type = "error")
1151
  })
1152
  })
1153
 
@@ -1159,12 +1173,10 @@ server <- function(input, output, session) {
1159
  parsed <- parse_bat_tracking_json(input$json_file$datapath)
1160
  bat_tracking_parsed(parsed)
1161
 
1162
- # If we already have CSV data, merge
1163
  if (!is.null(csv_data_raw()) && parsed$success && !is.null(parsed$data)) {
1164
  result <- merge_with_bat_tracking(csv_data_raw(), parsed$data)
1165
  merge_result(result)
1166
 
1167
- # Re-process with merged data
1168
  df <- result$data
1169
  selected_cols_to_remove <- input$columns_to_remove %||% character(0)
1170
 
@@ -1188,26 +1200,28 @@ server <- function(input, output, session) {
1188
  })
1189
  })
1190
 
1191
- # CSV status output
1192
  output$csv_status <- renderText({
1193
  if (is.null(input$file)) {
1194
- return("No CSV file uploaded yet.")
1195
  }
1196
 
1197
  if (is.null(csv_data_raw())) {
1198
- return("Processing CSV...")
1199
  }
1200
 
1201
  df <- csv_data_raw()
 
 
1202
  game_id <- if ("GameID" %in% names(df)) unique(df$GameID)[1] else "Unknown"
1203
  date_fmt <- if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"
1204
 
1205
  paste(
1206
- " CSV loaded successfully!",
1207
  paste(" Game ID:", game_id),
1208
  paste(" Rows:", nrow(df)),
1209
  paste(" Columns:", ncol(df)),
1210
- paste(" Date format:", date_fmt),
1211
  sep = "\n"
1212
  )
1213
  })
@@ -1224,11 +1238,11 @@ server <- function(input, output, session) {
1224
  }
1225
 
1226
  if (!parsed$success) {
1227
- return(paste("", parsed$message))
1228
  }
1229
 
1230
  paste(
1231
- " JSON parsed successfully!",
1232
  paste(" Game Reference:", parsed$game_reference),
1233
  paste(" Plays found:", parsed$plays_count %||% 0),
1234
  sep = "\n"
@@ -1253,13 +1267,12 @@ server <- function(input, output, session) {
1253
  }
1254
 
1255
  if (is.null(parsed$data) || is.null(result)) {
1256
- # Check game ID match
1257
  csv_game <- if ("GameID" %in% names(csv)) unique(csv$GameID)[1] else NULL
1258
  json_game <- parsed$game_reference
1259
 
1260
  if (!is.null(csv_game) && !is.null(json_game) && csv_game != json_game) {
1261
  return(div(class = "merge-status-box merge-warning",
1262
- h4(" Game ID Mismatch", style = "margin-top: 0; color: #856404;"),
1263
  p(paste("CSV Game:", csv_game)),
1264
  p(paste("JSON Game:", json_game)),
1265
  p("Files may be from different games!")
@@ -1272,20 +1285,19 @@ server <- function(input, output, session) {
1272
  ))
1273
  }
1274
 
1275
- # Check game ID match
1276
  csv_game <- if ("GameID" %in% names(csv)) unique(csv$GameID)[1] else NULL
1277
  json_game <- parsed$game_reference
1278
  game_match <- is.null(csv_game) || is.null(json_game) || csv_game == json_game
1279
 
1280
  if (result$matched > 0) {
1281
  div(class = "merge-status-box merge-success",
1282
- h4(" Merge Successful!", style = "margin-top: 0; color: #155724;"),
1283
  p(paste("Matched:", result$matched, "of", result$total_bat, "bat tracking records")),
1284
- if (!game_match) p(style = "color: #856404;", " Note: Game IDs differ but PitchUIDs matched")
1285
  )
1286
  } else {
1287
  div(class = "merge-status-box merge-warning",
1288
- h4(" No Matches Found", style = "margin-top: 0; color: #856404;"),
1289
  p(paste("0 of", result$total_bat, "bat tracking records matched")),
1290
  if (!game_match) p(paste("Game ID mismatch: CSV =", csv_game, ", JSON =", json_game))
1291
  )
@@ -1316,7 +1328,6 @@ server <- function(input, output, session) {
1316
  ))
1317
  }
1318
 
1319
- # Show summary
1320
  div(
1321
  div(class = "row",
1322
  div(class = "col-md-4",
@@ -1350,7 +1361,6 @@ server <- function(input, output, session) {
1350
  return(NULL)
1351
  }
1352
 
1353
- # Filter to rows with bat tracking data
1354
  if ("BatSpeed_Sensor" %in% names(df)) {
1355
  bat_rows <- df %>%
1356
  filter(!is.na(BatSpeed_Sensor)) %>%
@@ -1394,35 +1404,32 @@ server <- function(input, output, session) {
1394
  selected_cols_to_remove <- input$columns_to_remove %||% character(0)
1395
  removed_cols <- intersect(selected_cols_to_remove, names(original_df))
1396
  result <- merge_result()
 
 
1397
 
1398
  removed_cols_text <- if (length(removed_cols) > 0) {
1399
- cols_display <- if (length(removed_cols) > 5) {
1400
- paste(paste(head(removed_cols, 5), collapse = ", "), "...")
1401
- } else {
1402
- paste(removed_cols, collapse = ", ")
1403
- }
1404
- paste("✓ Removed columns:", length(removed_cols))
1405
  } else {
1406
- " Removed columns: 0"
1407
  }
1408
 
1409
  bat_tracking_text <- if (!is.null(result) && result$matched > 0) {
1410
- paste(" Bat tracking merged:", result$matched, "pitches")
1411
  } else if (!is.null(bat_tracking_parsed())) {
1412
- " Bat tracking: No matches"
1413
  } else {
1414
- " Bat tracking: Not uploaded"
1415
  }
1416
 
1417
  summary_text <- paste(
1418
- " File processed successfully!",
1419
- paste(" Original columns:", ncol(original_df)),
1420
- paste(" Final columns:", ncol(df)),
1421
- paste(" Rows processed:", nrow(df)),
1422
  removed_cols_text,
1423
  bat_tracking_text,
1424
- " Duplicates removed",
1425
- paste(" Date format:", if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"),
1426
  sep = "\n"
1427
  )
1428
 
@@ -1533,23 +1540,23 @@ server <- function(input, output, session) {
1533
  selected = clicked_pitch$TaggedPitchType)
1534
 
1535
  showModal(modalDialog(
1536
- title = "Edit Pitch Type",
1537
- div(style = "padding: 20px;",
1538
- h4("Selected Pitch Details:", style = "color: darkcyan;"),
1539
- verbatimTextOutput("selected_pitch_info"),
1540
- br(),
1541
- selectInput("modal_new_pitch_type", "Change Pitch Type To:",
1542
- choices = c("Fastball", "Sinker", "Cutter", "Slider",
1543
- "Curveball", "ChangeUp", "Splitter", "Knuckleball", "Sweeper","Other"),
1544
- selected = clicked_pitch$TaggedPitchType),
1545
- br(),
1546
- actionButton("update_pitch", "Update Pitch Type", class = "btn-primary btn-lg"),
1547
- actionButton("cancel_edit", "Cancel", class = "btn-default")
1548
- ),
1549
- footer = NULL,
1550
- size = "m",
1551
- easyClose = TRUE
1552
- ))
1553
  }
1554
  })
1555
 
@@ -1649,7 +1656,6 @@ server <- function(input, output, session) {
1649
  closest_idx <- which.min(distances)
1650
  hover_pitch <- pitcher_data[closest_idx, ]
1651
 
1652
- # Include bat tracking info if available
1653
  bat_info <- ""
1654
  if ("BatSpeed_Sensor" %in% names(hover_pitch) && !is.na(hover_pitch$BatSpeed_Sensor)) {
1655
  bat_info <- paste(" | Bat Speed:", round(hover_pitch$BatSpeed_Sensor, 1), "mph")
@@ -1697,7 +1703,6 @@ server <- function(input, output, session) {
1697
 
1698
  total_pitches <- nrow(movement_stats)
1699
 
1700
- # Check if bat tracking columns exist
1701
  has_bat_speed <- "BatSpeed_Sensor" %in% names(movement_stats)
1702
 
1703
  summary_stats <- movement_stats %>%
@@ -1711,11 +1716,11 @@ server <- function(input, output, session) {
1711
  `Avg HB` = sprintf("%.1f", mean(HorzBreak, na.rm = TRUE)),
1712
  `Avg Spin` = ifelse("SpinRate" %in% names(movement_stats),
1713
  sprintf("%.0f", mean(SpinRate, na.rm = TRUE)),
1714
- ""),
1715
  `Avg Bat Speed` = if (has_bat_speed) {
1716
  bat_vals <- BatSpeed_Sensor[!is.na(BatSpeed_Sensor)]
1717
- if (length(bat_vals) > 0) sprintf("%.1f", mean(bat_vals)) else ""
1718
- } else "",
1719
  `Zone%` = sprintf("%.1f%%", round(mean(in_zone, na.rm = TRUE) * 100, 1)),
1720
  `Whiff%` = sprintf("%.1f%%", round(mean(is_whiff, na.rm = TRUE) * 100, 1)),
1721
  .groups = "drop"
@@ -1748,12 +1753,11 @@ server <- function(input, output, session) {
1748
  info_lines <- c(info_lines, paste("Spin Rate:", round(pitch_data$SpinRate, 0), "rpm"))
1749
  }
1750
 
1751
- # Add bat tracking info if available
1752
  if ("BatSpeed_Sensor" %in% names(pitch_data) && !is.na(pitch_data$BatSpeed_Sensor)) {
1753
  info_lines <- c(info_lines,
1754
  paste("Bat Speed:", round(pitch_data$BatSpeed_Sensor, 1), "mph"),
1755
- paste("Vertical Attack Angle:", round(pitch_data$VerticalAttackAngle_Sensor, 1), "°"),
1756
- paste("Horizontal Attack Angle:", round(pitch_data$HorizontalAttackAngle_Sensor, 1), "°"))
1757
  }
1758
 
1759
  if ("Date" %in% names(pitch_data) && !is.na(pitch_data$Date)) {
@@ -1791,7 +1795,7 @@ server <- function(input, output, session) {
1791
  plot_data(current_data)
1792
  processed_data(current_data)
1793
 
1794
- removeModal()
1795
 
1796
  showNotification(
1797
  paste("Updated pitch from", pitch_info$original_type, "to", input$modal_new_pitch_type),
@@ -1804,7 +1808,7 @@ removeModal()
1804
 
1805
  # Cancel edit
1806
  observeEvent(input$cancel_edit, {
1807
- removeModal()
1808
  selected_pitch(NULL)
1809
  })
1810
 
@@ -1854,6 +1858,7 @@ removeModal()
1854
  "TaggedPitchType column not available"
1855
  }),
1856
  bat_tracking_summary,
 
1857
  paste("Date format:", if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"),
1858
  sep = "\n"
1859
  )
@@ -1861,13 +1866,21 @@ removeModal()
1861
  return(summary_text)
1862
  })
1863
 
1864
- # Download handler
1865
  output$downloadData <- downloadHandler(
1866
  filename = function() {
1867
- paste("app_ready_COA_", Sys.Date(), ".csv", sep = "")
 
 
 
 
1868
  },
1869
  content = function(file) {
1870
- write.csv(processed_data(), file, row.names = FALSE)
 
 
 
 
1871
  }
1872
  )
1873
 
@@ -1875,7 +1888,6 @@ removeModal()
1875
 
1876
  #SCRAPER STUFF
1877
 
1878
- #Handles the middle column where it is dynamically based off the left column
1879
  output$scrape_options <- renderUI({
1880
  switch(input$scrape_source,
1881
  "pbp" = tagList(
@@ -1965,7 +1977,6 @@ observe({
1965
  if (conclusion == "success") {
1966
  scrape_status_msg("GitHub finished! Fetching data...")
1967
 
1968
- # Auto-fetch the CSV
1969
  filename <- paste0(input$scrape_source, "_", input$start_date, "_to_", input$end_date, ".csv.gz")
1970
  url <- paste0("https://api.github.com/repos/", gh_repo, "/contents/data/", filename)
1971
 
@@ -1986,7 +1997,8 @@ observe({
1986
  NULL
1987
  }
1988
  }, error = function(e) { NULL })
1989
- if (!is.null(data) && nrow(data) > 0) {
 
1990
 
1991
  if (input$scrape_source == "pbp") {
1992
  scrape_status_msg("Processing data...")
@@ -2002,7 +2014,7 @@ observe({
2002
  }
2003
 
2004
  scraped_data(data)
2005
- scrape_status_msg(paste0("Done! ", nrow(data), " rows × ", ncol(data), " columns."))
2006
  } else {
2007
  scrape_status_msg("Scrape finished but couldn't fetch the file. Try 'Fetch Results' manually.")
2008
  }
 
17
  library(arrow)
18
  library(base64enc)
19
 
20
+ # Maximum rows allowed for upload
21
+ MAX_UPLOAD_ROWS <- 5000
22
 
23
  PASSWORD <- Sys.getenv("password")
24
 
 
60
  )
61
 
62
  # Function to convert date formats
 
 
63
  convert_date_format <- function(date_string, output_format = "yyyy") {
64
  if (is.na(date_string) || date_string == "") {
65
  return(NA)
66
  }
67
 
 
68
  date_string <- as.character(date_string)
69
 
70
  parsed_date <- NULL
71
 
 
72
  if (grepl("^\\d{4}-\\d{2}-\\d{2}$", date_string)) {
73
  parsed_date <- tryCatch({
74
  as.Date(date_string, format = "%Y-%m-%d")
75
  }, error = function(e) NULL)
76
  }
77
 
 
78
  if (is.null(parsed_date) && grepl("^\\d{1,2}/\\d{1,2}/\\d{4}$", date_string)) {
79
  parsed_date <- tryCatch({
80
  as.Date(date_string, format = "%m/%d/%Y")
81
  }, error = function(e) NULL)
82
  }
83
 
 
84
  if (is.null(parsed_date) && grepl("^\\d{1,2}/\\d{1,2}/\\d{2}$", date_string)) {
85
  parsed_date <- tryCatch({
86
  as.Date(date_string, format = "%m/%d/%y")
87
  }, error = function(e) NULL)
88
  }
89
 
 
90
  if (!is.null(parsed_date) && !is.na(parsed_date)) {
91
  if (output_format == "mdyy") {
 
92
  return(format(parsed_date, "%m/%d/%y") %>%
93
+ gsub("^0", "", .) %>%
94
+ gsub("/0", "/", .))
95
  } else {
 
96
  return(format(parsed_date, "%Y-%m-%d"))
97
  }
98
  }
99
 
 
100
  return(date_string)
101
  }
102
 
 
103
  convert_date_columns <- function(df, output_format = "yyyy") {
 
104
  date_columns <- c("Date", "GameDate", "UTCDate", "LocalDateTime")
105
 
106
  for (col in date_columns) {
 
112
  return(df)
113
  }
114
 
115
+ # ── Read uploaded file: CSV or Parquet, with row-limit enforcement ──
116
+ read_uploaded_file <- function(filepath, filename, header = TRUE, sep = ",", quote = '"') {
117
+ ext <- tolower(tools::file_ext(filename))
118
+
119
+ if (ext == "parquet") {
120
+ df <- as.data.frame(arrow::read_parquet(filepath))
121
+ } else {
122
+ df <- read.csv(filepath,
123
+ header = header,
124
+ sep = sep,
125
+ quote = quote,
126
+ stringsAsFactors = FALSE)
127
+ }
128
+
129
+ if (nrow(df) > MAX_UPLOAD_ROWS) {
130
+ stop(paste0("File contains ", format(nrow(df), big.mark = ","), " rows which exceeds the ",
131
+ format(MAX_UPLOAD_ROWS, big.mark = ","), " row limit. Please upload a smaller file."))
132
+ }
133
+
134
+ return(df)
135
+ }
136
+
137
  # Function to parse bat tracking JSON
138
  parse_bat_tracking_json <- function(json_path) {
139
  tryCatch({
140
  json_data <- fromJSON(json_path, simplifyVector = FALSE)
141
 
 
142
  game_reference <- json_data$GameReference
143
  session_id <- json_data$SessionId
144
 
 
145
  plays <- json_data$Plays
146
 
147
  if (length(plays) == 0) {
 
153
  ))
154
  }
155
 
 
156
  bat_tracking_df <- data.frame(
157
  PitchUID = sapply(plays, function(p) p$PitchUID),
158
  BatSpeed_Sensor = sapply(plays, function(p) p$BatSpeed),
 
181
  })
182
  }
183
 
 
184
  merge_with_bat_tracking <- function(csv_data, bat_tracking_data) {
185
  if (is.null(bat_tracking_data) || nrow(bat_tracking_data) == 0) {
186
  return(list(
 
191
  ))
192
  }
193
 
 
194
  if (!"PitchUID" %in% names(csv_data)) {
195
  return(list(
196
  data = csv_data,
 
200
  ))
201
  }
202
 
 
203
  merged_data <- csv_data %>%
204
  left_join(bat_tracking_data, by = "PitchUID")
205
 
 
206
  matched_count <- sum(!is.na(merged_data$BatSpeed_Sensor))
207
 
 
208
  if ("BatSpeed" %in% names(merged_data)) {
209
  merged_data <- merged_data %>%
210
  mutate(BatSpeed = ifelse(is.na(BatSpeed) & !is.na(BatSpeed_Sensor),
 
414
  -PositionAt110X, -PositionAt110Y, -PositionAt110Z
415
  )
416
 
 
 
417
  return(data)
 
 
418
  }
419
 
420
 
 
758
  border-left-color: #dc3545;
759
  background: #f8d7da;
760
  }
761
+
762
+ /* Download section styling */
763
+ .download-option-box {
764
+ background: linear-gradient(135deg, #e8f4f8 0%, #f0e6d3 100%);
765
+ border: 1px solid rgba(0,139,139,.2);
766
+ border-radius: 12px;
767
+ padding: 20px;
768
+ margin-bottom: 15px;
769
+ }
770
  "))
771
  ),
772
 
 
784
  "Upload & Process",
785
  fluidRow(
786
  column(6,
787
+ h3("1. Upload TrackMan CSV or Parquet"),
788
+ fileInput("file", "Choose CSV or Parquet File (max 5,000 rows)",
789
+ accept = c(".csv", ".parquet")),
790
  fluidRow(
791
  column(3,
792
  checkboxInput("header", "Header", TRUE)
 
807
  selected = "yyyy")
808
  )
809
  ),
810
+ p(style = "color: #666; font-size: 12px;",
811
+ "CSV options (Header, Separator, Quote) are ignored for Parquet files."),
812
  verbatimTextOutput("csv_status")
813
  ),
814
  column(6,
 
942
  fluidRow(
943
  column(12,
944
  h3("Download Processed Data"),
945
+
946
+ div(class = "download-option-box",
947
+ fluidRow(
948
+ column(5,
949
+ textInput("download_filename", "File Name (without extension):",
950
+ value = paste0("app_ready_COA_", Sys.Date()))
951
+ ),
952
+ column(3,
953
+ radioButtons("download_format", "Export Format:",
954
+ choices = c("CSV" = "csv", "Parquet" = "parquet"),
955
+ selected = "csv", inline = TRUE)
956
+ ),
957
+ column(4,
958
+ br(),
959
+ downloadButton("downloadData", "Download", class = "btn-success btn-lg")
960
+ )
961
+ )
962
+ ),
963
+
964
  br(),
 
 
965
  h4("Data Summary:"),
966
  verbatimTextOutput("data_summary")
967
  )
 
1026
  ui <- fluidPage(
1027
  uiOutput("page")
1028
  )
1029
+
1030
  # Server
1031
  server <- function(input, output, session) {
1032
 
 
1033
  logged_in <- reactiveVal(FALSE)
1034
+ uploaded_file_type <- reactiveVal("csv")
1035
 
1036
  output$page <- renderUI({
1037
  if (logged_in()) {
 
1079
  updateCheckboxGroupInput(session, "columns_to_remove", selected = spinaxis_cols)
1080
  })
1081
 
1082
+ # ── Shared helper: process raw data into processed_data / plot_data ──
1083
+ run_processing <- function(df) {
1084
+ # Merge bat tracking if available
1085
+ if (!is.null(bat_tracking_parsed()) && !is.null(bat_tracking_parsed()$data)) {
1086
+ result <- merge_with_bat_tracking(df, bat_tracking_parsed()$data)
1087
+ merge_result(result)
1088
+ df <- result$data
1089
+ }
1090
+
1091
+ selected_cols_to_remove <- input$columns_to_remove %||% character(0)
1092
+ processed_df <- df
1093
+
1094
+ if (length(selected_cols_to_remove) > 0) {
1095
+ columns_to_drop <- intersect(names(df), selected_cols_to_remove)
1096
+ if (length(columns_to_drop) > 0) {
1097
+ processed_df <- processed_df %>% select(-all_of(columns_to_drop))
1098
+ }
1099
+ }
1100
+
1101
+ processed_df <- processed_df %>% distinct()
1102
+
1103
+ processed_data(processed_df)
1104
+ plot_data(processed_df)
1105
+
1106
+ return(processed_df)
1107
+ }
1108
+
1109
  # Re-process data when date format changes
1110
  observeEvent(input$date_format, {
1111
+ req(input$file)
1112
 
 
1113
  tryCatch({
1114
+ df <- read_uploaded_file(input$file$datapath, input$file$name,
1115
+ input$header, input$sep, input$quote)
 
 
 
1116
 
 
1117
  df <- convert_date_columns(df, input$date_format)
 
1118
  csv_data_raw(df)
1119
+ run_processing(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1120
 
1121
  showNotification(
1122
  paste("Date format updated to:", if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"),
 
1128
  })
1129
  }, ignoreInit = TRUE)
1130
 
1131
+ # ── Process uploaded file (CSV or Parquet) ──
1132
  observeEvent(input$file, {
1133
  req(input$file)
1134
 
1135
  tryCatch({
1136
+ ext <- tolower(tools::file_ext(input$file$name))
1137
+ uploaded_file_type(ext)
 
 
 
1138
 
1139
+ df <- read_uploaded_file(input$file$datapath, input$file$name,
1140
+ input$header, input$sep, input$quote)
1141
 
1142
+ df <- convert_date_columns(df, input$date_format)
1143
  csv_data_raw(df)
1144
 
1145
+ processed_df <- run_processing(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1146
 
1147
  # Update pitcher choices
1148
  if ("Pitcher" %in% names(processed_df)) {
 
1150
  updateSelectInput(session, "pitcher_select", choices = pitcher_choices, selected = pitcher_choices[1])
1151
  }
1152
 
1153
+ # Auto-populate download filename from uploaded file
1154
+ base_name <- tools::file_path_sans_ext(input$file$name)
1155
+ updateTextInput(session, "download_filename", value = paste0(base_name, "_processed"))
1156
+
1157
+ format_label <- if (ext == "parquet") "Parquet" else "CSV"
1158
+ showNotification(
1159
+ paste0(format_label, " loaded: ", nrow(df), " rows x ", ncol(df), " columns"),
1160
+ type = "message", duration = 3
1161
+ )
1162
+
1163
  }, error = function(e) {
1164
+ showNotification(paste("Error processing file:", e$message), type = "error")
1165
  })
1166
  })
1167
 
 
1173
  parsed <- parse_bat_tracking_json(input$json_file$datapath)
1174
  bat_tracking_parsed(parsed)
1175
 
 
1176
  if (!is.null(csv_data_raw()) && parsed$success && !is.null(parsed$data)) {
1177
  result <- merge_with_bat_tracking(csv_data_raw(), parsed$data)
1178
  merge_result(result)
1179
 
 
1180
  df <- result$data
1181
  selected_cols_to_remove <- input$columns_to_remove %||% character(0)
1182
 
 
1200
  })
1201
  })
1202
 
1203
+ # CSV/Parquet status output
1204
  output$csv_status <- renderText({
1205
  if (is.null(input$file)) {
1206
+ return("No file uploaded yet. Accepts CSV or Parquet (max 5,000 rows).")
1207
  }
1208
 
1209
  if (is.null(csv_data_raw())) {
1210
+ return("Processing file...")
1211
  }
1212
 
1213
  df <- csv_data_raw()
1214
+ ext <- uploaded_file_type()
1215
+ format_label <- if (ext == "parquet") "Parquet" else "CSV"
1216
  game_id <- if ("GameID" %in% names(df)) unique(df$GameID)[1] else "Unknown"
1217
  date_fmt <- if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"
1218
 
1219
  paste(
1220
+ paste0("\u2713 ", format_label, " loaded successfully!"),
1221
  paste(" Game ID:", game_id),
1222
  paste(" Rows:", nrow(df)),
1223
  paste(" Columns:", ncol(df)),
1224
+ paste("\u2713 Date format:", date_fmt),
1225
  sep = "\n"
1226
  )
1227
  })
 
1238
  }
1239
 
1240
  if (!parsed$success) {
1241
+ return(paste("\u2717", parsed$message))
1242
  }
1243
 
1244
  paste(
1245
+ "\u2713 JSON parsed successfully!",
1246
  paste(" Game Reference:", parsed$game_reference),
1247
  paste(" Plays found:", parsed$plays_count %||% 0),
1248
  sep = "\n"
 
1267
  }
1268
 
1269
  if (is.null(parsed$data) || is.null(result)) {
 
1270
  csv_game <- if ("GameID" %in% names(csv)) unique(csv$GameID)[1] else NULL
1271
  json_game <- parsed$game_reference
1272
 
1273
  if (!is.null(csv_game) && !is.null(json_game) && csv_game != json_game) {
1274
  return(div(class = "merge-status-box merge-warning",
1275
+ h4("\u26A0 Game ID Mismatch", style = "margin-top: 0; color: #856404;"),
1276
  p(paste("CSV Game:", csv_game)),
1277
  p(paste("JSON Game:", json_game)),
1278
  p("Files may be from different games!")
 
1285
  ))
1286
  }
1287
 
 
1288
  csv_game <- if ("GameID" %in% names(csv)) unique(csv$GameID)[1] else NULL
1289
  json_game <- parsed$game_reference
1290
  game_match <- is.null(csv_game) || is.null(json_game) || csv_game == json_game
1291
 
1292
  if (result$matched > 0) {
1293
  div(class = "merge-status-box merge-success",
1294
+ h4("\u2713 Merge Successful!", style = "margin-top: 0; color: #155724;"),
1295
  p(paste("Matched:", result$matched, "of", result$total_bat, "bat tracking records")),
1296
+ if (!game_match) p(style = "color: #856404;", "\u26A0 Note: Game IDs differ but PitchUIDs matched")
1297
  )
1298
  } else {
1299
  div(class = "merge-status-box merge-warning",
1300
+ h4("\u26A0 No Matches Found", style = "margin-top: 0; color: #856404;"),
1301
  p(paste("0 of", result$total_bat, "bat tracking records matched")),
1302
  if (!game_match) p(paste("Game ID mismatch: CSV =", csv_game, ", JSON =", json_game))
1303
  )
 
1328
  ))
1329
  }
1330
 
 
1331
  div(
1332
  div(class = "row",
1333
  div(class = "col-md-4",
 
1361
  return(NULL)
1362
  }
1363
 
 
1364
  if ("BatSpeed_Sensor" %in% names(df)) {
1365
  bat_rows <- df %>%
1366
  filter(!is.na(BatSpeed_Sensor)) %>%
 
1404
  selected_cols_to_remove <- input$columns_to_remove %||% character(0)
1405
  removed_cols <- intersect(selected_cols_to_remove, names(original_df))
1406
  result <- merge_result()
1407
+ ext <- uploaded_file_type()
1408
+ format_label <- if (ext == "parquet") "Parquet" else "CSV"
1409
 
1410
  removed_cols_text <- if (length(removed_cols) > 0) {
1411
+ paste("\u2713 Removed columns:", length(removed_cols))
 
 
 
 
 
1412
  } else {
1413
+ "\u2713 Removed columns: 0"
1414
  }
1415
 
1416
  bat_tracking_text <- if (!is.null(result) && result$matched > 0) {
1417
+ paste("\u2713 Bat tracking merged:", result$matched, "pitches")
1418
  } else if (!is.null(bat_tracking_parsed())) {
1419
+ "\u26A0 Bat tracking: No matches"
1420
  } else {
1421
+ "\u25CB Bat tracking: Not uploaded"
1422
  }
1423
 
1424
  summary_text <- paste(
1425
+ paste0("\u2713 ", format_label, " file processed successfully!"),
1426
+ paste("\u2713 Original columns:", ncol(original_df)),
1427
+ paste("\u2713 Final columns:", ncol(df)),
1428
+ paste("\u2713 Rows processed:", nrow(df)),
1429
  removed_cols_text,
1430
  bat_tracking_text,
1431
+ "\u2713 Duplicates removed",
1432
+ paste("\u2713 Date format:", if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"),
1433
  sep = "\n"
1434
  )
1435
 
 
1540
  selected = clicked_pitch$TaggedPitchType)
1541
 
1542
  showModal(modalDialog(
1543
+ title = "Edit Pitch Type",
1544
+ div(style = "padding: 20px;",
1545
+ h4("Selected Pitch Details:", style = "color: darkcyan;"),
1546
+ verbatimTextOutput("selected_pitch_info"),
1547
+ br(),
1548
+ selectInput("modal_new_pitch_type", "Change Pitch Type To:",
1549
+ choices = c("Fastball", "Sinker", "Cutter", "Slider",
1550
+ "Curveball", "ChangeUp", "Splitter", "Knuckleball", "Sweeper","Other"),
1551
+ selected = clicked_pitch$TaggedPitchType),
1552
+ br(),
1553
+ actionButton("update_pitch", "Update Pitch Type", class = "btn-primary btn-lg"),
1554
+ actionButton("cancel_edit", "Cancel", class = "btn-default")
1555
+ ),
1556
+ footer = NULL,
1557
+ size = "m",
1558
+ easyClose = TRUE
1559
+ ))
1560
  }
1561
  })
1562
 
 
1656
  closest_idx <- which.min(distances)
1657
  hover_pitch <- pitcher_data[closest_idx, ]
1658
 
 
1659
  bat_info <- ""
1660
  if ("BatSpeed_Sensor" %in% names(hover_pitch) && !is.na(hover_pitch$BatSpeed_Sensor)) {
1661
  bat_info <- paste(" | Bat Speed:", round(hover_pitch$BatSpeed_Sensor, 1), "mph")
 
1703
 
1704
  total_pitches <- nrow(movement_stats)
1705
 
 
1706
  has_bat_speed <- "BatSpeed_Sensor" %in% names(movement_stats)
1707
 
1708
  summary_stats <- movement_stats %>%
 
1716
  `Avg HB` = sprintf("%.1f", mean(HorzBreak, na.rm = TRUE)),
1717
  `Avg Spin` = ifelse("SpinRate" %in% names(movement_stats),
1718
  sprintf("%.0f", mean(SpinRate, na.rm = TRUE)),
1719
+ "\u2014"),
1720
  `Avg Bat Speed` = if (has_bat_speed) {
1721
  bat_vals <- BatSpeed_Sensor[!is.na(BatSpeed_Sensor)]
1722
+ if (length(bat_vals) > 0) sprintf("%.1f", mean(bat_vals)) else "\u2014"
1723
+ } else "\u2014",
1724
  `Zone%` = sprintf("%.1f%%", round(mean(in_zone, na.rm = TRUE) * 100, 1)),
1725
  `Whiff%` = sprintf("%.1f%%", round(mean(is_whiff, na.rm = TRUE) * 100, 1)),
1726
  .groups = "drop"
 
1753
  info_lines <- c(info_lines, paste("Spin Rate:", round(pitch_data$SpinRate, 0), "rpm"))
1754
  }
1755
 
 
1756
  if ("BatSpeed_Sensor" %in% names(pitch_data) && !is.na(pitch_data$BatSpeed_Sensor)) {
1757
  info_lines <- c(info_lines,
1758
  paste("Bat Speed:", round(pitch_data$BatSpeed_Sensor, 1), "mph"),
1759
+ paste("Vertical Attack Angle:", round(pitch_data$VerticalAttackAngle_Sensor, 1), "\u00B0"),
1760
+ paste("Horizontal Attack Angle:", round(pitch_data$HorizontalAttackAngle_Sensor, 1), "\u00B0"))
1761
  }
1762
 
1763
  if ("Date" %in% names(pitch_data) && !is.na(pitch_data$Date)) {
 
1795
  plot_data(current_data)
1796
  processed_data(current_data)
1797
 
1798
+ removeModal()
1799
 
1800
  showNotification(
1801
  paste("Updated pitch from", pitch_info$original_type, "to", input$modal_new_pitch_type),
 
1808
 
1809
  # Cancel edit
1810
  observeEvent(input$cancel_edit, {
1811
+ removeModal()
1812
  selected_pitch(NULL)
1813
  })
1814
 
 
1858
  "TaggedPitchType column not available"
1859
  }),
1860
  bat_tracking_summary,
1861
+ paste("Source format:", toupper(uploaded_file_type())),
1862
  paste("Date format:", if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"),
1863
  sep = "\n"
1864
  )
 
1866
  return(summary_text)
1867
  })
1868
 
1869
+ # ── Download handler: CSV or Parquet with custom filename ──
1870
  output$downloadData <- downloadHandler(
1871
  filename = function() {
1872
+ base_name <- gsub("[^A-Za-z0-9_\\-]", "_", input$download_filename)
1873
+ if (nchar(trimws(base_name)) == 0) base_name <- paste0("app_ready_COA_", Sys.Date())
1874
+
1875
+ ext <- input$download_format
1876
+ paste0(base_name, ".", ext)
1877
  },
1878
  content = function(file) {
1879
+ if (input$download_format == "parquet") {
1880
+ arrow::write_parquet(processed_data(), file)
1881
+ } else {
1882
+ write.csv(processed_data(), file, row.names = FALSE)
1883
+ }
1884
  }
1885
  )
1886
 
 
1888
 
1889
  #SCRAPER STUFF
1890
 
 
1891
  output$scrape_options <- renderUI({
1892
  switch(input$scrape_source,
1893
  "pbp" = tagList(
 
1977
  if (conclusion == "success") {
1978
  scrape_status_msg("GitHub finished! Fetching data...")
1979
 
 
1980
  filename <- paste0(input$scrape_source, "_", input$start_date, "_to_", input$end_date, ".csv.gz")
1981
  url <- paste0("https://api.github.com/repos/", gh_repo, "/contents/data/", filename)
1982
 
 
1997
  NULL
1998
  }
1999
  }, error = function(e) { NULL })
2000
+
2001
+ if (!is.null(data) && nrow(data) > 0) {
2002
 
2003
  if (input$scrape_source == "pbp") {
2004
  scrape_status_msg("Processing data...")
 
2014
  }
2015
 
2016
  scraped_data(data)
2017
+ scrape_status_msg(paste0("Done! ", nrow(data), " rows \u00D7 ", ncol(data), " columns."))
2018
  } else {
2019
  scrape_status_msg("Scrape finished but couldn't fetch the file. Try 'Fetch Results' manually.")
2020
  }