igroffman commited on
Commit
3d1fade
·
verified ·
1 Parent(s): 9cfd949

Delete app.R

Browse files
Files changed (1) hide show
  1. app.R +0 -2193
app.R DELETED
@@ -1,2193 +0,0 @@
1
- Sys.setenv(RETICULATE_PYTHON = "/usr/bin/python3")
2
- library(reticulate)
3
- library(shiny)
4
- library(shinydashboard)
5
- library(shinyBS)
6
- library(DT)
7
- library(dplyr)
8
- library(readr)
9
- library(stringr)
10
- library(jsonlite)
11
- library(httr)
12
- library(progressr)
13
- library(RCurl)
14
- library(curl)
15
- library(xgboost)
16
- library(recipes)
17
- library(arrow)
18
- library(base64enc)
19
-
20
- # Maximum rows allowed for upload
21
- MAX_UPLOAD_ROWS <- 5000
22
-
23
- PASSWORD <- Sys.getenv("password")
24
-
25
- rv <- read_csv("non_context_run_values.csv")
26
- stuffplus_model <- xgb.load("stuffplus_xgb.json")
27
- stuffplus_recipe <- readRDS("stuffplus_recipe.rds")
28
-
29
- # Define columns to remove if they exist
30
- columns_to_remove <- c(
31
- "SpinAxis3dTransverseAngle", "SpinAxis3dLongitudinalAngle", "SpinAxis3dActiveSpinRate",
32
- "SpinAxis3dSpinEfficiency", "SpinAxis3dTilt", "SpinAxis3dVectorX", "SpinAxis3dVectorY",
33
- "SpinAxis3dVectorZ", "SpinAxis3dSeamOrientationRotationX", "SpinAxis3dSeamOrientationRotationY",
34
- "SpinAxis3dSeamOrientationRotationZ", "SpinAxis3dSeamOrientationBallYAmb1",
35
- "SpinAxis3dSeamOrientationBallAngleHorizontalAmb1", "SpinAxis3dSeamOrientationBallZAmb1",
36
- "SpinAxis3dSeamOrientationBallAngleVerticalAmb2", "SpinAxis3dSeamOrientationBallZAmb2",
37
- "SpinAxis3dSeamOrientationBallXAmb4", "SpinAxis3dSeamOrientationBallYAmb4",
38
- "SpinAxis3dSeamOrientationBallAngleHorizontalAmb2", "SpinAxis3dSeamOrientationBallAngleVerticalAmb1",
39
- "SpinAxis3dSeamOrientationBallXAmb1", "SpinAxis3dSeamOrientationBallYAmb2",
40
- "SpinAxis3dSeamOrientationBallAngleHorizontalAmb4", "SpinAxis3dSeamOrientationBallAngleVerticalAmb4",
41
- "SpinAxis3dSeamOrientationBallXAmb2", "SpinAxis3dSeamOrientationBallAngleVerticalAmb3",
42
- "SpinAxis3dSeamOrientationBallAngleHorizontalAmb3", "SpinAxis3dSeamOrientationBallXAmb3",
43
- "SpinAxis3dSeamOrientationBallYAmb3", "SpinAxis3dSeamOrientationBallZAmb3",
44
- "SpinAxis3dSeamOrientationBallZAmb4", "GameDate"
45
- )
46
-
47
- # Pitch colors for visualization (Coastal Carolina theme)
48
- pitch_colors <- c(
49
- "Fastball" = '#FA8072',
50
- "Four-Seam" = '#FA8072',
51
- "Sinker" = "#fdae61",
52
- "Slider" = "#A020F0",
53
- "Sweeper" = "magenta",
54
- "Curveball" = '#2c7bb6',
55
- "ChangeUp" = '#90EE90',
56
- "Splitter" = '#90EE32',
57
- "Cutter" = "red",
58
- "Knuckleball" = "#FFB4B4",
59
- "Other" = "#D3D3D3"
60
- )
61
-
62
- # Function to convert date formats
63
- convert_date_format <- function(date_string, output_format = "yyyy") {
64
- # Handle NULL, zero-length, or NA inputs safely
65
- if (is.null(date_string) || length(date_string) == 0) return(NA_character_)
66
- if (inherits(date_string, "Date") || inherits(date_string, "POSIXct")) {
67
- # Already a proper date object (common from parquet) — format directly
68
- if (is.na(date_string)) return(NA_character_)
69
- parsed_date <- as.Date(date_string)
70
- if (output_format == "mdyy") {
71
- return(gsub("/0", "/", gsub("^0", "", format(parsed_date, "%m/%d/%y"))))
72
- } else {
73
- return(format(parsed_date, "%Y-%m-%d"))
74
- }
75
- }
76
- if (is.na(date_string) || identical(as.character(date_string), "")) {
77
- return(NA_character_)
78
- }
79
-
80
- date_string <- as.character(date_string)
81
-
82
- parsed_date <- NULL
83
-
84
- if (grepl("^\\d{4}-\\d{2}-\\d{2}$", date_string)) {
85
- parsed_date <- tryCatch({
86
- as.Date(date_string, format = "%Y-%m-%d")
87
- }, error = function(e) NULL)
88
- }
89
-
90
- if (is.null(parsed_date) && grepl("^\\d{1,2}/\\d{1,2}/\\d{4}$", date_string)) {
91
- parsed_date <- tryCatch({
92
- as.Date(date_string, format = "%m/%d/%Y")
93
- }, error = function(e) NULL)
94
- }
95
-
96
- if (is.null(parsed_date) && grepl("^\\d{1,2}/\\d{1,2}/\\d{2}$", date_string)) {
97
- parsed_date <- tryCatch({
98
- as.Date(date_string, format = "%m/%d/%y")
99
- }, error = function(e) NULL)
100
- }
101
-
102
- if (!is.null(parsed_date) && !is.na(parsed_date)) {
103
- if (output_format == "mdyy") {
104
- return(format(parsed_date, "%m/%d/%y") %>%
105
- gsub("^0", "", .) %>%
106
- gsub("/0", "/", .))
107
- } else {
108
- return(format(parsed_date, "%Y-%m-%d"))
109
- }
110
- }
111
-
112
- return(date_string)
113
- }
114
-
115
- convert_date_columns <- function(df, output_format = "yyyy") {
116
- date_columns <- c("Date", "GameDate", "UTCDate", "LocalDateTime")
117
-
118
- for (col in date_columns) {
119
- if (col %in% names(df)) {
120
- col_data <- df[[col]]
121
-
122
- # If the column is already a Date or POSIXct (common from parquet),
123
- # format it directly instead of running through the regex-based parser
124
- if (inherits(col_data, "Date") || inherits(col_data, "POSIXct")) {
125
- if (output_format == "mdyy") {
126
- df[[col]] <- ifelse(is.na(col_data), NA_character_,
127
- gsub("/0", "/", gsub("^0", "", format(as.Date(col_data), "%m/%d/%y"))))
128
- } else {
129
- df[[col]] <- ifelse(is.na(col_data), NA_character_,
130
- format(as.Date(col_data), "%Y-%m-%d"))
131
- }
132
- } else {
133
- # Character column — use the string-parsing path
134
- df[[col]] <- sapply(df[[col]], function(x) convert_date_format(x, output_format), USE.NAMES = FALSE)
135
- }
136
- }
137
- }
138
-
139
- return(df)
140
- }
141
-
142
- # ── Read uploaded file: CSV or Parquet, with row-limit enforcement ──
143
- read_uploaded_file <- function(filepath, filename, header = TRUE, sep = ",", quote = '"') {
144
- ext <- tolower(tools::file_ext(filename))
145
-
146
- if (ext == "parquet") {
147
- df <- as.data.frame(arrow::read_parquet(filepath))
148
- } else {
149
- df <- read.csv(filepath,
150
- header = header,
151
- sep = sep,
152
- quote = quote,
153
- stringsAsFactors = FALSE)
154
- }
155
-
156
- if (nrow(df) > MAX_UPLOAD_ROWS) {
157
- stop(paste0("File contains ", format(nrow(df), big.mark = ","), " rows which exceeds the ",
158
- format(MAX_UPLOAD_ROWS, big.mark = ","), " row limit. Please upload a smaller file."))
159
- }
160
-
161
- return(df)
162
- }
163
-
164
- # Function to parse bat tracking JSON
165
- parse_bat_tracking_json <- function(json_path) {
166
- tryCatch({
167
- json_data <- fromJSON(json_path, simplifyVector = FALSE)
168
-
169
- game_reference <- json_data$GameReference
170
- session_id <- json_data$SessionId
171
-
172
- plays <- json_data$Plays
173
-
174
- if (length(plays) == 0) {
175
- return(list(
176
- success = TRUE,
177
- data = NULL,
178
- game_reference = game_reference,
179
- message = "JSON parsed but contains no bat tracking plays (empty Plays array)"
180
- ))
181
- }
182
-
183
- bat_tracking_df <- data.frame(
184
- PitchUID = sapply(plays, function(p) p$PitchUID),
185
- BatSpeed_Sensor = sapply(plays, function(p) p$BatSpeed),
186
- VerticalAttackAngle_Sensor = sapply(plays, function(p) p$VerticalAttackAngle),
187
- HorizontalAttackAngle_Sensor = sapply(plays, function(p) p$HorizontalAttackAngle),
188
- BatTracking_PlayId = sapply(plays, function(p) p$PlayId),
189
- BatTracking_Time = sapply(plays, function(p) p$Time),
190
- stringsAsFactors = FALSE
191
- )
192
-
193
- return(list(
194
- success = TRUE,
195
- data = bat_tracking_df,
196
- game_reference = game_reference,
197
- session_id = session_id,
198
- plays_count = length(plays),
199
- message = paste("Successfully parsed", length(plays), "bat tracking play(s)")
200
- ))
201
-
202
- }, error = function(e) {
203
- return(list(
204
- success = FALSE,
205
- data = NULL,
206
- message = paste("Error parsing JSON:", e$message)
207
- ))
208
- })
209
- }
210
-
211
- merge_with_bat_tracking <- function(csv_data, bat_tracking_data) {
212
- if (is.null(bat_tracking_data) || nrow(bat_tracking_data) == 0) {
213
- return(list(
214
- data = csv_data,
215
- matched = 0,
216
- total_bat = 0,
217
- message = "No bat tracking data to merge"
218
- ))
219
- }
220
-
221
- if (!"PitchUID" %in% names(csv_data)) {
222
- return(list(
223
- data = csv_data,
224
- matched = 0,
225
- total_bat = nrow(bat_tracking_data),
226
- message = "CSV does not contain PitchUID column - cannot merge"
227
- ))
228
- }
229
-
230
- merged_data <- csv_data %>%
231
- left_join(bat_tracking_data, by = "PitchUID")
232
-
233
- matched_count <- sum(!is.na(merged_data$BatSpeed_Sensor))
234
-
235
- if ("BatSpeed" %in% names(merged_data)) {
236
- merged_data <- merged_data %>%
237
- mutate(BatSpeed = ifelse(is.na(BatSpeed) & !is.na(BatSpeed_Sensor),
238
- BatSpeed_Sensor, BatSpeed))
239
- }
240
-
241
- if ("VerticalAttackAngle" %in% names(merged_data)) {
242
- merged_data <- merged_data %>%
243
- mutate(VerticalAttackAngle = ifelse(is.na(VerticalAttackAngle) & !is.na(VerticalAttackAngle_Sensor),
244
- VerticalAttackAngle_Sensor, VerticalAttackAngle))
245
- }
246
-
247
- if ("HorizontalAttackAngle" %in% names(merged_data)) {
248
- merged_data <- merged_data %>%
249
- mutate(HorizontalAttackAngle = ifelse(is.na(HorizontalAttackAngle) & !is.na(HorizontalAttackAngle_Sensor),
250
- HorizontalAttackAngle_Sensor, HorizontalAttackAngle))
251
- }
252
-
253
- return(list(
254
- data = merged_data,
255
- matched = matched_count,
256
- total_bat = nrow(bat_tracking_data),
257
- message = paste("Merged successfully:", matched_count, "of", nrow(bat_tracking_data), "bat tracking records matched")
258
- ))
259
- }
260
-
261
-
262
- clean_college_data <- function(data, teams = NA){
263
-
264
- data <- data %>%
265
- mutate(PlayResult = ifelse(PlayResult %in% c("HomeRun", "homerun"), "Homerun", PlayResult),
266
- Batter = sub("(.*),\\s*(.*)", "\\2 \\1", Batter),
267
- Pitcher = sub("(.*),\\s*(.*)", "\\2 \\1", Pitcher),
268
- Catcher = sub("(.*),\\s*(.*)", "\\2 \\1", Catcher))
269
-
270
-
271
- col <- colnames(data)
272
-
273
- if ("Top/Bottom" %in% col){
274
- data <- data %>%
275
- rename(`Top.Bottom` = `Top/Bottom`)
276
- }
277
-
278
- numeric_columns <- c("PitchNo", "PAofInning", "PitchofPA", "PitcherId", "BatterId", "Inning", "Outs", "Balls",
279
- "Strikes", "OutsOnPlay", "RunsScored", "RelSpeed", "VertRelAngle", "HorzRelAngle", "SpinRate",
280
- "SpinAxis", "RelHeight", "RelSide", "Extension", "VertBreak", "InducedVertBreak", "HorzBreak",
281
- "PlateLocHeight", "PlateLocSide", "ZoneSpeed", "VertApprAngle", "HorzApprAngle", "ZoneTime",
282
- "ExitSpeed", "Angle", "Direction", "HitSpinRate", "Distance", "Bearing", "HangTime",
283
- "LastTrackedDistance", "pfxx", "pfxz", "x0", "y0", "z0", "vx0", "vz0", "vy0", "ax0", "ay0",
284
- "az0", "EffectiveVelo", "MaxHeight", "SpeedDrop", "ContactPositionX", "ContactPositionY",
285
- "ContactPositionZ", "HomeTeamForeignID", "AwayTeamForeignID", "CatcherId", "ThrowSpeed",
286
- "PopTime", "ExchangeTime", "TimeToBase")
287
-
288
- data <- data %>%
289
- mutate(across(any_of(numeric_columns), as.numeric),
290
- PlateLocHeight = if ("PlateLocHeight" %in% names(.)) 12 * PlateLocHeight else PlateLocHeight,
291
- PlateLocSide = if ("PlateLocSide" %in% names(.)) 12 * PlateLocSide else PlateLocSide)
292
-
293
-
294
- data <- data %>%
295
- mutate(TaggedPitchType = case_when(
296
- TaggedPitchType == "FourSeamFastBall" ~ "Fastball",
297
- TaggedPitchType %in% c("TwoSeamFastBall", "OneSeamFastBall") ~ "Sinker",
298
- TaggedPitchType == "ChangeUp" ~ "Changeup",
299
- TaggedPitchType == "Undefined" ~ "Other",
300
- T ~ TaggedPitchType
301
- ))
302
-
303
- data <- data %>%
304
- mutate(
305
- is_csw = case_when(
306
- PitchCall %in% c("StrikeSwinging", "StrikeCalled") ~ 1,
307
- TRUE ~ 0
308
- ),
309
- is_swing = case_when(
310
- PitchCall %in% c("StrikeSwinging", "FoulBallNotFieldable", "InPlay",
311
- "FoulBallFieldable", "FoulBall") ~ 1,
312
- TRUE ~ 0
313
- ),
314
- is_whiff = case_when(
315
- PitchCall == "StrikeSwinging" & is_swing == 1 ~ 1,
316
- PitchCall != "StrikeSwinging" & is_swing == 1 ~ 0,
317
- TRUE ~ NA_real_
318
- ),
319
- in_zone = case_when(
320
- PlateLocSide > 9.975 | PlateLocSide < -9.975 |
321
- PlateLocHeight > 40 | PlateLocHeight < 20 ~ 0,
322
- TRUE ~ 1
323
- ),
324
- chase = case_when(
325
- is_swing == 1 & in_zone == 0 ~ 1,
326
- is_swing == 0 & in_zone == 0 ~ 0,
327
- TRUE ~ NA_real_
328
- ),
329
- in_zone_whiff = case_when(
330
- is_swing == 1 & in_zone == 1 & is_whiff == 1 ~ 1,
331
- is_swing == 1 & in_zone == 1 & is_whiff == 0 ~ 0,
332
- TRUE ~ NA_real_
333
- ),
334
- is_hit = case_when(
335
- PlayResult %in% c("Single", "Double", "Triple", "Homerun", "HomeRun") & PitchCall == "InPlay" ~ 1,
336
- !PlayResult %in% c("Single", "Double", "Triple", "Homerun", "HomeRun") & PitchCall == "InPlay" ~ 0,
337
- KorBB == "Strikeout" ~ 0,
338
- PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 0,
339
- TRUE ~ NA_real_
340
- ),
341
- slg = case_when(
342
- PitchCall == "InPlay" & PlayResult == "Single" ~ 1,
343
- PitchCall == "InPlay" & PlayResult == "Double" ~ 2,
344
- PitchCall == "InPlay" & PlayResult == "Triple" ~ 3,
345
- PitchCall == "InPlay" & PlayResult %in% c("Homerun", "HomeRun") ~ 4,
346
- !PlayResult %in% c("Single", "Double", "Triple", "Homerun", "HomeRun") & PitchCall == "InPlay" ~ 0,
347
- KorBB == "Strikeout" ~ 0,
348
- PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 0,
349
- TRUE ~ NA_real_
350
- ),
351
- on_base = case_when(
352
- PitchCall == "InPlay" & PlayResult %in% c("Single", "Double", "Triple", "Homerun", "HomeRun") ~ 1,
353
- PitchCall %in% c("HitByPitch") | KorBB == "Walk" ~ 1,
354
- PitchCall == "InPlay" & PlayResult %in% c("Out", "Error", "FieldersChoice") & PlayResult != "Sacrifice" ~ 0,
355
- KorBB == "Strikeout" ~ 0,
356
- PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 0,
357
- TRUE ~ NA_real_
358
- ),
359
- is_hard_hit = case_when(
360
- ExitSpeed >= 95 & PitchCall == "InPlay" ~ 1,
361
- ExitSpeed < 95 & PitchCall == "InPlay" ~ 0,
362
- TRUE ~ NA_real_
363
- ),
364
- woba = case_when(
365
- PitchCall == "InPlay" & PlayResult == "Single" ~ 0.95,
366
- PitchCall == "InPlay" & PlayResult == "Double" ~ 1.24,
367
- PitchCall == "InPlay" & PlayResult == "Triple" ~ 1.47,
368
- PitchCall == "InPlay" & PlayResult %in% c("Homerun", "HomeRun") ~ 1.71,
369
- KorBB == "Walk" ~ 0.82,
370
- PitchCall %in% c("HitByPitch") ~ 0.85,
371
- KorBB == "Strikeout" ~ 0,
372
- PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 0,
373
- PitchCall == "InPlay" & !PlayResult %in% c("Single", "Double" ,"Triple" ,"Homerun", "HomeRun") ~ 0,
374
- TRUE ~ NA_real_
375
- ),
376
- wobacon = case_when(
377
- PitchCall == "InPlay" & PlayResult == "Single" ~ 0.95,
378
- PitchCall == "InPlay" & PlayResult == "Double" ~ 1.24,
379
- PitchCall == "InPlay" & PlayResult == "Triple" ~ 1.47,
380
- PitchCall == "InPlay" & PlayResult %in% c("Homerun", "HomeRun") ~ 1.71,
381
- PitchCall == "InPlay" & !PlayResult %in% c("Single", "Double" ,"Triple" ,"Homerun", "HomeRun") ~ 0,
382
- TRUE ~ NA_real_
383
- ),
384
- is_plate_appearance = ifelse(
385
- PitchCall %in% c("InPlay", "HitByPitch") | KorBB %in% c("Strikeout", "Walk") | PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking"), 1, 0
386
- ),
387
- is_at_bat = case_when(
388
- PitchCall == "InPlay" & !PlayResult %in% c("StolenBase", "Sacrifice", "CaughtStealing", "Undefined") ~ 1,
389
- KorBB == "Strikeout" ~ 1,
390
- PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 1,
391
- TRUE ~ 0
392
- ),
393
- is_walk = case_when(
394
- is_plate_appearance == 1 & KorBB == "Walk" ~ 1,
395
- is_plate_appearance == 1 & KorBB != "Walk" ~ 0,
396
- TRUE ~ NA_real_
397
- ),
398
- is_k = case_when(
399
- is_at_bat == 1 & KorBB == "Strikeout" ~ 1,
400
- is_at_bat == 1 & KorBB != "Strikeout" ~ 0,
401
- PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 1,
402
- TRUE ~ NA_real_
403
- ),
404
- is_put_away = case_when(
405
- Strikes == 2 & KorBB == "Strikeout" ~ 1,
406
- Strikes == 2 & KorBB != "Strikeout" ~ 0,
407
- Strikes == 2 & PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 1,
408
- TRUE ~ NA_real_
409
- ),
410
- OutsOnPlay = ifelse(KorBB == "Strikeout" | PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking"), OutsOnPlay + 1, OutsOnPlay)
411
- )
412
-
413
- data <- data %>%
414
- mutate(event_type = case_when(
415
- PitchCall %in% c("StrikeSwinging", "StrkeSwinging") ~ "Whiff",
416
- PitchCall %in% c("StriekC", "StrikeCalled") ~ "Called Strike",
417
- PitchCall %in% c("FoulBallFieldable", "FoulBall", "FoulBallNotFieldable",
418
- "FouldBallNotFieldable") ~ "Foul Ball",
419
- PitchCall %in% c("BallCalled", "BallinDirt", "BallIntentional", "BalIntentional") ~ "Ball",
420
- PitchCall == "HitByPitch" ~ "HBP",
421
- PitchCall == "InPlay" & PlayResult %in% c("Out", "FieldersChoice",
422
- "Error", "error",
423
- "Sacrifice") ~ "Field Out",
424
- PitchCall == "InPlay" & PlayResult == "Single" ~ "Single",
425
- PitchCall == "InPlay" & PlayResult == "Double" ~ "Double",
426
- PitchCall == "InPlay" & PlayResult == "Triple" ~ "Triple",
427
- PitchCall == "InPlay" & PlayResult == "Homerun" ~ "Home Run",
428
- T ~ NA
429
- )) %>%
430
- left_join(rv, by = "event_type")
431
-
432
- data <- data %>%
433
- dplyr::select(
434
- -PitchLastMeasuredX, -PitchLastMeasuredY, -PitchLastMeasuredZ,
435
- -starts_with("PitchTrajectory"),
436
- -HitSpinAxis,
437
- -starts_with("HitTrajectory"),
438
- -PitchReleaseConfidence, -PitchLocationConfidence, -PitchMovementConfidence,
439
- -HitLaunchConfidence, -HitLandingConfidence,
440
- -CatcherThrowCatchConfidence, -CatcherThrowReleaseConfidence, -CatcherThrowLocationConfidence,
441
- -PositionAt110X, -PositionAt110Y, -PositionAt110Z
442
- )
443
-
444
- return(data)
445
- }
446
-
447
-
448
-
449
- predict_stuffplus <- function(data) {
450
-
451
- predict_data <- data %>%
452
- mutate(RelSide = case_when(
453
- PitcherThrows == "Right" ~ RelSide,
454
- PitcherThrows == "Left" ~ -RelSide,
455
- PitcherThrows %in% c("Both", "Undefined") & RelSide > 0 ~ RelSide,
456
- PitcherThrows %in% c("Both", "Undefined") & RelSide < 0 ~ -RelSide),
457
- ax0 = case_when(
458
- PitcherThrows == "Right" ~ ax0,
459
- PitcherThrows == "Left" ~ -ax0,
460
- PitcherThrows %in% c("Both", "Undefined") & ax0 > 0 ~ ax0,
461
- PitcherThrows %in% c("Both", "Undefined") & ax0 < 0 ~ -ax0),
462
- PlateLocHeight = PlateLocHeight*12,
463
- PlateLocSide = PlateLocSide*12,
464
- ax0 = -ax0) %>%
465
- group_by(Pitcher, GameID) %>%
466
- mutate(
467
- primary_pitch = case_when(
468
- any(TaggedPitchType == "Fastball") ~ "Fastball",
469
- any(TaggedPitchType == "Sinker") ~ "Sinker",
470
- TRUE ~ names(sort(table(TaggedPitchType), decreasing = TRUE))[1]
471
- )
472
- ) %>%
473
- group_by(Pitcher, GameID, primary_pitch) %>%
474
- mutate(
475
- primary_az0 = mean(az0[TaggedPitchType == primary_pitch], na.rm = TRUE),
476
- primary_velo = mean(RelSpeed[TaggedPitchType == primary_pitch], na.rm = TRUE)
477
- ) %>%
478
- ungroup() %>%
479
- mutate(az0_diff = az0 - primary_az0,
480
- velo_diff = RelSpeed - primary_velo)
481
-
482
-
483
- df_processed <- bake(stuffplus_recipe, new_data = predict_data)
484
-
485
- df_matrix <- as.matrix(df_processed)
486
-
487
- raw_stuff <- predict(stuffplus_model, df_matrix)
488
-
489
- data$raw_stuff <- raw_stuff
490
-
491
-
492
- data <- data %>%
493
- mutate(stuff_plus = ((raw_stuff - 0.004424894) / 0.01010482) * 10 + 100)
494
-
495
- return(data)
496
-
497
- }
498
-
499
-
500
- login_ui <- fluidPage(
501
- tags$style(HTML("
502
- body {
503
- background-color: #f0f4f8;
504
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
505
- color: #006F71;
506
- }
507
- .login-container {
508
- max-width: 360px;
509
- margin: 120px auto;
510
- background: #A27752;
511
- padding: 30px 25px;
512
- border-radius: 8px;
513
- box-shadow: 0 4px 15px #A1A1A4;
514
- text-align: center;
515
- color: white;
516
- }
517
- .login-message {
518
- margin-bottom: 20px;
519
- font-size: 14px;
520
- color: #ffffff;
521
- font-weight: 600;
522
- }
523
- .btn-primary {
524
- background-color: #006F71 !important;
525
- border-color: #006F71 !important;
526
- color: white !important;
527
- font-weight: bold;
528
- width: 100%;
529
- margin-top: 10px;
530
- box-shadow: 0 2px 5px #006F71;
531
- transition: background-color 0.3s ease;
532
- }
533
- .btn-primary:hover {
534
- background-color: #006F71 !important;
535
- border-color: #A27752 !important;
536
- }
537
- .form-control {
538
- border-radius: 4px;
539
- border: 1.5px solid #006F71 !important;
540
- color: #006F71;
541
- font-weight: 600;
542
- }
543
- ")),
544
-
545
- div(class = "login-container",
546
- tags$img(src = "https://upload.wikimedia.org/wikipedia/en/thumb/e/ef/Coastal_Carolina_Chanticleers_logo.svg/1200px-Coastal_Carolina_Chanticleers_logo.svg.png", height = "150px"),
547
- passwordInput("password", "Password:"),
548
- actionButton("login", "Login"),
549
- textOutput("wrong_pass")
550
- )
551
- )
552
-
553
- # UI
554
- app_ui <- fluidPage(
555
- tags$head(
556
- tags$style(HTML("
557
- body, table, .gt_table {
558
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto,
559
- Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji',
560
- 'Segoe UI Symbol';
561
- }
562
-
563
- /* Header styling */
564
- .app-header {
565
- display: flex;
566
- justify-content: space-between;
567
- align-items: center;
568
- padding: 20px 40px;
569
- background: #ffffff;
570
- border-bottom: 3px solid darkcyan;
571
- margin-bottom: 20px;
572
- }
573
-
574
- .header-logo-left, .header-logo-right {
575
- width: 120px;
576
- height: auto;
577
- }
578
-
579
- .header-logo-center {
580
- max-width: 400px;
581
- height: auto;
582
- }
583
-
584
- @media (max-width: 768px) {
585
- .app-header {
586
- flex-direction: column;
587
- padding: 15px 20px;
588
- }
589
- .header-logo-left, .header-logo-right {
590
- width: 80px;
591
- }
592
- .header-logo-center {
593
- max-width: 250px;
594
- margin: 10px 0;
595
- }
596
- }
597
-
598
- /* Gradient pill tabs styling */
599
- .nav-tabs {
600
- border: none !important;
601
- border-radius: 50px;
602
- padding: 6px 12px;
603
- margin: 20px auto 0;
604
- max-width: 100%;
605
- background: linear-gradient(135deg, #d4edeb 0%, #e8ddd0 50%, #d4edeb 100%);
606
- box-shadow: 0 4px 16px rgba(0,139,139,.12), inset 0 2px 4px rgba(255,255,255,.6);
607
- border: 1px solid rgba(0,139,139,.2);
608
- position: relative;
609
- overflow-x: auto;
610
- -webkit-overflow-scrolling: touch;
611
- display: flex;
612
- justify-content: center;
613
- align-items: center;
614
- flex-wrap: wrap;
615
- gap: 6px;
616
- }
617
-
618
- .nav-tabs::-webkit-scrollbar {
619
- height: 0;
620
- }
621
-
622
- .nav-tabs::before {
623
- content: '';
624
- position: absolute;
625
- inset: 0;
626
- pointer-events: none;
627
- border-radius: 50px;
628
- background: linear-gradient(135deg, rgba(255,255,255,.4), transparent);
629
- }
630
-
631
- .nav-tabs > li > a {
632
- color: darkcyan !important;
633
- border: none !important;
634
- border-radius: 50px !important;
635
- background: transparent !important;
636
- font-weight: 700;
637
- font-size: 14.5px;
638
- padding: 10px 22px;
639
- white-space: nowrap;
640
- letter-spacing: 0.2px;
641
- transition: all 0.2s ease;
642
- }
643
-
644
- .nav-tabs > li > a:hover {
645
- color: #006666 !important;
646
- background: rgba(255,255,255,.5) !important;
647
- transform: translateY(-1px);
648
- }
649
-
650
- .nav-tabs > li.active > a,
651
- .nav-tabs > li.active > a:focus,
652
- .nav-tabs > li.active > a:hover {
653
- background: linear-gradient(135deg, #008b8b 0%, #20b2aa 30%, #00ced1 50%, #20b2aa 70%, #008b8b 100%) !important;
654
- color: #fff !important;
655
- text-shadow: 0 1px 2px rgba(0,0,0,.2);
656
- box-shadow: 0 4px 16px rgba(0,139,139,.4), inset 0 2px 8px rgba(255,255,255,.4), inset 0 -2px 6px rgba(0,0,0,.2);
657
- border: 1px solid rgba(255,255,255,.3) !important;
658
- }
659
-
660
- .nav-tabs > li > a:focus {
661
- outline: 3px solid rgba(205,133,63,.6);
662
- outline-offset: 2px;
663
- }
664
-
665
- .tab-content {
666
- background: linear-gradient(135deg, rgba(255,255,255,.95), rgba(248,249,250,.95));
667
- border-radius: 20px;
668
- padding: 25px;
669
- margin-top: 14px;
670
- box-shadow: 0 15px 40px rgba(0,139,139,.1);
671
- backdrop-filter: blur(15px);
672
- border: 1px solid rgba(0,139,139,.1);
673
- position: relative;
674
- overflow: hidden;
675
- }
676
-
677
- .tab-content::before {
678
- content: '';
679
- position: absolute;
680
- left: 0;
681
- right: 0;
682
- top: 0;
683
- height: 4px;
684
- background: linear-gradient(90deg, darkcyan, peru, darkcyan);
685
- background-size: 200% 100%;
686
- animation: shimmer 3s linear infinite;
687
- }
688
-
689
- @keyframes shimmer {
690
- 0% { background-position: -200% 0; }
691
- 100% { background-position: 200% 0; }
692
- }
693
-
694
- #name {
695
- font-size: 10px;
696
- font-weight: 500;
697
- text-align: right;
698
- margin-bottom: 8px;
699
- color: #6C757D;
700
- letter-spacing: 0.5px;
701
- }
702
-
703
- h3 {
704
- color: black;
705
- font-weight: 600;
706
- margin-top: 25px;
707
- margin-bottom: 15px;
708
- padding-bottom: 8px;
709
- border-bottom: 2px solid #007BA7;
710
- }
711
-
712
- h4 {
713
- color: darkcyan;
714
- font-weight: 500;
715
- margin-top: 20px;
716
- margin-bottom: 12px;
717
- }
718
-
719
- h1 {
720
- color: #007BA7;
721
- font-weight: 700;
722
- margin-bottom: 20px;
723
- text-shadow: 1px 1px 2px rgba(0,0,0,0.1);
724
- }
725
-
726
- label {
727
- font-weight: 500;
728
- color: peru;
729
- margin-bottom: 5px;
730
- }
731
-
732
- .plot-title {
733
- text-align: center;
734
- font-weight: 600;
735
- color: #2C3E50;
736
- margin-bottom: 10px;
737
- }
738
-
739
- .dataTables_wrapper .dataTables_length,
740
- .dataTables_wrapper .dataTables_filter,
741
- .dataTables_wrapper .dataTables_info,
742
- .dataTables_wrapper .dataTables_paginate {
743
- color: #2C3E50;
744
- }
745
-
746
- thead th {
747
- background-color: #F8F9FA;
748
- color: #2C3E50;
749
- font-weight: 600;
750
- text-align: center !important;
751
- padding: 10px !important;
752
- }
753
-
754
- .brand-teal { color: darkcyan; }
755
- .brand-bronze { color: peru; }
756
-
757
- /* Bat tracking upload box styling */
758
- .bat-tracking-box {
759
- background: linear-gradient(135deg, #e8f4f8 0%, #f0e6d3 100%);
760
- border: 2px dashed darkcyan;
761
- border-radius: 15px;
762
- padding: 20px;
763
- margin-top: 15px;
764
- }
765
-
766
- .merge-status-box {
767
- background: #f8f9fa;
768
- border-left: 4px solid darkcyan;
769
- padding: 15px;
770
- border-radius: 0 10px 10px 0;
771
- margin-top: 15px;
772
- }
773
-
774
- .merge-success {
775
- border-left-color: #28a745;
776
- background: #d4edda;
777
- }
778
-
779
- .merge-warning {
780
- border-left-color: #ffc107;
781
- background: #fff3cd;
782
- }
783
-
784
- .merge-error {
785
- border-left-color: #dc3545;
786
- background: #f8d7da;
787
- }
788
-
789
- /* Download section styling */
790
- .download-option-box {
791
- background: linear-gradient(135deg, #e8f4f8 0%, #f0e6d3 100%);
792
- border: 1px solid rgba(0,139,139,.2);
793
- border-radius: 12px;
794
- padding: 20px;
795
- margin-bottom: 15px;
796
- }
797
- "))
798
- ),
799
-
800
- # Header with three logos
801
- div(class = "app-header",
802
- tags$img(src = "https://i.imgur.com/7vx5Ci8.png", class = "header-logo-left", alt = "Logo Left"),
803
- tags$img(src = "https://i.imgur.com/c3zCSg6.png", class = "header-logo-center", alt = "Main Logo"),
804
- tags$img(src = "https://i.imgur.com/VbrN5WV.png", class = "header-logo-right", alt = "Logo Right")
805
- ),
806
-
807
- tabsetPanel(id = "main_tabs",
808
-
809
- # Upload & Process Tab
810
- tabPanel(
811
- "Upload & Process",
812
- fluidRow(
813
- column(6,
814
- h3("1. Upload TrackMan CSV or Parquet"),
815
- fileInput("file", "Choose CSV or Parquet File (max 5,000 rows)",
816
- accept = c(".csv", ".parquet")),
817
- fluidRow(
818
- column(3,
819
- checkboxInput("header", "Header", TRUE)
820
- ),
821
- column(3,
822
- radioButtons("sep", "Separator",
823
- choices = c(Comma = ",", Semicolon = ";", Tab = "\t"),
824
- selected = ",", inline = TRUE)
825
- ),
826
- column(3,
827
- radioButtons("quote", "Quote",
828
- choices = c(None = "", "Double Quote" = '"', "Single Quote" = "'"),
829
- selected = '"', inline = TRUE)
830
- ),
831
- column(3,
832
- radioButtons("date_format", "Date Output Format",
833
- choices = c("YYYY-MM-DD" = "yyyy", "M/D/YY" = "mdyy"),
834
- selected = "yyyy")
835
- )
836
- ),
837
- p(style = "color: #666; font-size: 12px;",
838
- "CSV options (Header, Separator, Quote) are ignored for Parquet files."),
839
- verbatimTextOutput("csv_status")
840
- ),
841
- column(6,
842
- div(class = "bat-tracking-box",
843
- h3("2. Upload Bat Tracking JSON (Optional)", style = "margin-top: 0;"),
844
- fileInput("json_file", "Choose Bat Tracking JSON File", accept = c(".json")),
845
- p(style = "color: #666; font-size: 12px;",
846
- "Upload the corresponding _battracking.json file to merge bat speed and attack angle data."),
847
- verbatimTextOutput("json_status"),
848
- uiOutput("merge_status_ui")
849
- )
850
- )
851
- ),
852
-
853
- hr(),
854
-
855
- fluidRow(
856
- column(8,
857
- h3("3. Columns to Remove"),
858
- p("Select which columns to remove from your dataset:"),
859
- checkboxGroupInput("columns_to_remove", "Remove These Columns:",
860
- choices = columns_to_remove,
861
- selected = columns_to_remove)
862
- ),
863
- column(4,
864
- h3("Quick Actions"),
865
- br(),
866
- actionButton("select_all_cols", "Select All", class = "btn-primary"),
867
- br(), br(),
868
- actionButton("deselect_all_cols", "Deselect All", class = "btn-default"),
869
- br(), br(),
870
- actionButton("select_spinaxis", "Select SpinAxis3d Columns", class = "btn-info"),
871
- br(), br(),
872
- h4("Processing Summary"),
873
- verbatimTextOutput("process_summary")
874
- )
875
- )
876
- ),
877
-
878
- # Bat Tracking Details Tab
879
- tabPanel(
880
- "Bat Tracking Data",
881
- fluidRow(
882
- column(12,
883
- h3("Bat Tracking Merge Details"),
884
- uiOutput("bat_tracking_details"),
885
- hr(),
886
- h4("Pitches with Bat Tracking Data"),
887
- DT::dataTableOutput("bat_tracking_table")
888
- )
889
- )
890
- ),
891
-
892
- # Preview Data Tab
893
- tabPanel(
894
- "Preview Data",
895
- fluidRow(
896
- column(12,
897
- h3("Data Preview"),
898
- DT::dataTableOutput("preview")
899
- )
900
- )
901
- ),
902
-
903
- # Pitch Movement Chart Tab
904
- tabPanel(
905
- "Pitch Movement Chart",
906
- fluidRow(
907
- column(3,
908
- selectInput("pitcher_select", "Select Pitcher:",
909
- choices = NULL, selected = NULL)
910
- ),
911
- column(3,
912
- h4("Selection Mode:"),
913
- radioButtons("selection_mode", "",
914
- choices = list("Single Click" = "single", "Drag Select" = "drag"),
915
- selected = "single", inline = TRUE)
916
- ),
917
- column(6,
918
- conditionalPanel(
919
- condition = "input.selection_mode == 'drag'",
920
- h4("Bulk Edit:"),
921
- fluidRow(
922
- column(8,
923
- selectInput("bulk_pitch_type", "Change all selected to:",
924
- choices = c("Fastball", "Sinker", "Cutter", "Slider",
925
- "Curveball", "ChangeUp", "Splitter", "Knuckleball", "Sweeper", "Other"),
926
- selected = "Fastball")
927
- ),
928
- column(4,
929
- br(),
930
- actionButton("apply_bulk_change", "Apply to Selected", class = "btn-success")
931
- )
932
- )
933
- )
934
- )
935
- ),
936
-
937
- fluidRow(
938
- column(8,
939
- h3("Interactive Pitch Movement Analysis"),
940
- plotOutput("movement_plot", height = "600px",
941
- click = "plot_click",
942
- brush = brushOpts(id = "plot_brush"),
943
- hover = hoverOpts(id = "plot_hover", delay = 100)),
944
-
945
- h4("Instructions:"),
946
- p(strong("Single Click Mode:"), "Click on any point to edit one pitch type at a time via popup modal."),
947
- p(strong("Drag Select Mode:"), "Click and drag to select multiple points, then use the dropdown to change them all at once."),
948
- conditionalPanel(
949
- condition = "input.selection_mode == 'drag'",
950
- div(style = "background-color: #f0f8ff; padding: 10px; border-radius: 5px; margin: 10px 0; border-left: 4px solid darkcyan;",
951
- h4("Selected Points:", style = "margin-top: 0; color: darkcyan;"),
952
- textOutput("selection_info")
953
- )
954
- ),
955
- verbatimTextOutput("hover_info"),
956
- verbatimTextOutput("click_info")
957
- ),
958
-
959
- column(4,
960
- h3("Pitch Metrics Summary"),
961
- DT::dataTableOutput("movement_stats")
962
- )
963
- )
964
- ),
965
-
966
- # Download Tab
967
- tabPanel(
968
- "Download",
969
- fluidRow(
970
- column(12,
971
- h3("Download Processed Data"),
972
-
973
- div(class = "download-option-box",
974
- fluidRow(
975
- column(5,
976
- textInput("download_filename", "File Name (without extension):",
977
- value = paste0("app_ready_COA_", Sys.Date()))
978
- ),
979
- column(3,
980
- radioButtons("download_format", "Export Format:",
981
- choices = c("CSV" = "csv", "Parquet" = "parquet"),
982
- selected = "csv", inline = TRUE)
983
- ),
984
- column(4,
985
- br(),
986
- downloadButton("downloadData", "Download", class = "btn-success btn-lg")
987
- )
988
- )
989
- ),
990
-
991
- br(),
992
- h4("Data Summary:"),
993
- verbatimTextOutput("data_summary")
994
- )
995
- )
996
- ),
997
- #Scrape Tab
998
- tabPanel(
999
- "Scraping",
1000
- fluidRow(
1001
- column(2,
1002
- h4("Data Source", style = "color: darkcyan; border-bottom: 2px solid darkcyan; padding-bottom: 6px;"),
1003
- radioButtons("scrape_source", NULL,
1004
- choices = c("TrackMan PBP" = "pbp",
1005
- "TrackMan Positional" = "pos",
1006
- "NCAA Scoreboard" = "ncaa"),
1007
- selected = "pbp")
1008
- ),
1009
-
1010
- column(4,
1011
- h3("Controls"),
1012
-
1013
- dateInput("start_date", "Start Date:", value = Sys.Date() - 1),
1014
- dateInput("end_date", "End Date:", value = Sys.Date() - 1),
1015
-
1016
- uiOutput("scrape_options"),
1017
-
1018
- br(),
1019
- actionButton("scrape_btn", "Scrape Data", class = "btn-primary"),
1020
- br(), br(),
1021
- downloadButton("download_scrape", "Download CSV"),
1022
- actionButton("upload_hf_btn", "Upload to HF Dataset", class = "btn-download")
1023
- ),
1024
-
1025
- column(6,
1026
- h3("Progress"),
1027
- verbatimTextOutput("scrape_status"),
1028
- hr(),
1029
- h3("Data Preview"),
1030
- DT::dataTableOutput("scrape_preview")
1031
- )
1032
- )
1033
- )
1034
- ),
1035
-
1036
- # Modal for editing pitch type
1037
- bsModal("pitchEditModal", "Edit Pitch Type", "triggerModal", size = "medium",
1038
- div(style = "padding: 20px;",
1039
- h4("Selected Pitch Details:", style = "color: darkcyan;"),
1040
- verbatimTextOutput("selected_pitch_info"),
1041
- br(),
1042
- selectInput("modal_new_pitch_type", "Change Pitch Type To:",
1043
- choices = c("Fastball", "Sinker", "Cutter", "Slider",
1044
- "Curveball", "ChangeUp", "Splitter", "Knuckleball", "Sweeper", "Other"),
1045
- selected = "Fastball"),
1046
- br(),
1047
- actionButton("update_pitch", "Update Pitch Type", class = "btn-primary btn-lg"),
1048
- actionButton("cancel_edit", "Cancel", class = "btn-default")
1049
- )
1050
- )
1051
- )
1052
-
1053
- ui <- fluidPage(
1054
- uiOutput("page")
1055
- )
1056
-
1057
- # Server
1058
- server <- function(input, output, session) {
1059
-
1060
- logged_in <- reactiveVal(FALSE)
1061
- uploaded_file_type <- reactiveVal("csv")
1062
-
1063
- output$page <- renderUI({
1064
- if (logged_in()) {
1065
- app_ui
1066
- } else {
1067
- login_ui
1068
- }
1069
- })
1070
-
1071
- observeEvent(input$login, {
1072
- if (input$password == PASSWORD) {
1073
- logged_in(TRUE)
1074
- output$wrong_pass <- renderText("")
1075
- } else {
1076
- output$wrong_pass <- renderText("Incorrect password, please try again.")
1077
- }
1078
- })
1079
-
1080
-
1081
- # Reactive values
1082
- processed_data <- reactiveVal(NULL)
1083
- plot_data <- reactiveVal(NULL)
1084
- selected_pitch <- reactiveVal(NULL)
1085
- selected_points <- reactiveVal(NULL)
1086
- csv_data_raw <- reactiveVal(NULL)
1087
- bat_tracking_parsed <- reactiveVal(NULL)
1088
- merge_result <- reactiveVal(NULL)
1089
- scraped_data <- reactiveVal(NULL)
1090
- scrape_polling <- reactiveVal(FALSE)
1091
-
1092
- scrape_status_msg <- reactiveVal("Ready.")
1093
-
1094
- # Handle column selection buttons
1095
- observeEvent(input$select_all_cols, {
1096
- updateCheckboxGroupInput(session, "columns_to_remove",
1097
- selected = columns_to_remove)
1098
- })
1099
-
1100
- observeEvent(input$deselect_all_cols, {
1101
- updateCheckboxGroupInput(session, "columns_to_remove", selected = character(0))
1102
- })
1103
-
1104
- observeEvent(input$select_spinaxis, {
1105
- spinaxis_cols <- columns_to_remove[grepl("SpinAxis3d", columns_to_remove)]
1106
- updateCheckboxGroupInput(session, "columns_to_remove", selected = spinaxis_cols)
1107
- })
1108
-
1109
- # ── Shared helper: process raw data into processed_data / plot_data ──
1110
- run_processing <- function(df) {
1111
- # Merge bat tracking if available
1112
- if (!is.null(bat_tracking_parsed()) && !is.null(bat_tracking_parsed()$data)) {
1113
- result <- merge_with_bat_tracking(df, bat_tracking_parsed()$data)
1114
- merge_result(result)
1115
- df <- result$data
1116
- }
1117
-
1118
- selected_cols_to_remove <- input$columns_to_remove %||% character(0)
1119
- processed_df <- df
1120
-
1121
- if (length(selected_cols_to_remove) > 0) {
1122
- columns_to_drop <- intersect(names(df), selected_cols_to_remove)
1123
- if (length(columns_to_drop) > 0) {
1124
- processed_df <- processed_df %>% select(-all_of(columns_to_drop))
1125
- }
1126
- }
1127
-
1128
- processed_df <- processed_df %>% distinct()
1129
-
1130
- processed_data(processed_df)
1131
- plot_data(processed_df)
1132
-
1133
- return(processed_df)
1134
- }
1135
-
1136
- # Re-process data when date format changes
1137
- observeEvent(input$date_format, {
1138
- req(input$file)
1139
-
1140
- tryCatch({
1141
- df <- read_uploaded_file(input$file$datapath, input$file$name,
1142
- input$header, input$sep, input$quote)
1143
-
1144
- df <- convert_date_columns(df, input$date_format)
1145
- csv_data_raw(df)
1146
- run_processing(df)
1147
-
1148
- showNotification(
1149
- paste("Date format updated to:", if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"),
1150
- type = "message", duration = 3
1151
- )
1152
-
1153
- }, error = function(e) {
1154
- showNotification(paste("Error updating date format:", e$message), type = "error")
1155
- })
1156
- }, ignoreInit = TRUE)
1157
-
1158
- # ── Process uploaded file (CSV or Parquet) ──
1159
- observeEvent(input$file, {
1160
- req(input$file)
1161
-
1162
- tryCatch({
1163
- ext <- tolower(tools::file_ext(input$file$name))
1164
- uploaded_file_type(ext)
1165
-
1166
- df <- read_uploaded_file(input$file$datapath, input$file$name,
1167
- input$header, input$sep, input$quote)
1168
-
1169
- df <- convert_date_columns(df, input$date_format)
1170
- csv_data_raw(df)
1171
-
1172
- processed_df <- run_processing(df)
1173
-
1174
- # Update pitcher choices
1175
- if ("Pitcher" %in% names(processed_df)) {
1176
- pitcher_choices <- sort(unique(processed_df$Pitcher[!is.na(processed_df$Pitcher)]))
1177
- updateSelectInput(session, "pitcher_select", choices = pitcher_choices, selected = pitcher_choices[1])
1178
- }
1179
-
1180
- # Auto-populate download filename from uploaded file
1181
- base_name <- tools::file_path_sans_ext(input$file$name)
1182
- updateTextInput(session, "download_filename", value = paste0(base_name, "_processed"))
1183
-
1184
- format_label <- if (ext == "parquet") "Parquet" else "CSV"
1185
- showNotification(
1186
- paste0(format_label, " loaded: ", nrow(df), " rows x ", ncol(df), " columns"),
1187
- type = "message", duration = 3
1188
- )
1189
-
1190
- }, error = function(e) {
1191
- showNotification(paste("Error processing file:", e$message), type = "error")
1192
- })
1193
- })
1194
-
1195
- # Process uploaded JSON file
1196
- observeEvent(input$json_file, {
1197
- req(input$json_file)
1198
-
1199
- tryCatch({
1200
- parsed <- parse_bat_tracking_json(input$json_file$datapath)
1201
- bat_tracking_parsed(parsed)
1202
-
1203
- if (!is.null(csv_data_raw()) && parsed$success && !is.null(parsed$data)) {
1204
- result <- merge_with_bat_tracking(csv_data_raw(), parsed$data)
1205
- merge_result(result)
1206
-
1207
- df <- result$data
1208
- selected_cols_to_remove <- input$columns_to_remove %||% character(0)
1209
-
1210
- if (length(selected_cols_to_remove) > 0) {
1211
- columns_to_drop <- intersect(names(df), selected_cols_to_remove)
1212
- if (length(columns_to_drop) > 0) {
1213
- df <- df %>% select(-all_of(columns_to_drop))
1214
- }
1215
- }
1216
-
1217
- df <- df %>% distinct()
1218
-
1219
- processed_data(df)
1220
- plot_data(df)
1221
-
1222
- showNotification(result$message, type = "message", duration = 5)
1223
- }
1224
-
1225
- }, error = function(e) {
1226
- showNotification(paste("Error processing JSON:", e$message), type = "error")
1227
- })
1228
- })
1229
-
1230
- # CSV/Parquet status output
1231
- output$csv_status <- renderText({
1232
- if (is.null(input$file)) {
1233
- return("No file uploaded yet. Accepts CSV or Parquet (max 5,000 rows).")
1234
- }
1235
-
1236
- if (is.null(csv_data_raw())) {
1237
- return("Processing file...")
1238
- }
1239
-
1240
- df <- csv_data_raw()
1241
- ext <- uploaded_file_type()
1242
- format_label <- if (ext == "parquet") "Parquet" else "CSV"
1243
- game_id <- if ("GameID" %in% names(df)) unique(df$GameID)[1] else "Unknown"
1244
- date_fmt <- if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"
1245
-
1246
- paste(
1247
- paste0("\u2713 ", format_label, " loaded successfully!"),
1248
- paste(" Game ID:", game_id),
1249
- paste(" Rows:", nrow(df)),
1250
- paste(" Columns:", ncol(df)),
1251
- paste("\u2713 Date format:", date_fmt),
1252
- sep = "\n"
1253
- )
1254
- })
1255
-
1256
- # JSON status output
1257
- output$json_status <- renderText({
1258
- if (is.null(input$json_file)) {
1259
- return("No JSON file uploaded yet.")
1260
- }
1261
-
1262
- parsed <- bat_tracking_parsed()
1263
- if (is.null(parsed)) {
1264
- return("Processing JSON...")
1265
- }
1266
-
1267
- if (!parsed$success) {
1268
- return(paste("\u2717", parsed$message))
1269
- }
1270
-
1271
- paste(
1272
- "\u2713 JSON parsed successfully!",
1273
- paste(" Game Reference:", parsed$game_reference),
1274
- paste(" Plays found:", parsed$plays_count %||% 0),
1275
- sep = "\n"
1276
- )
1277
- })
1278
-
1279
- # Merge status UI
1280
- output$merge_status_ui <- renderUI({
1281
- result <- merge_result()
1282
- parsed <- bat_tracking_parsed()
1283
- csv <- csv_data_raw()
1284
-
1285
- if (is.null(parsed) || is.null(csv)) {
1286
- return(NULL)
1287
- }
1288
-
1289
- if (!parsed$success) {
1290
- return(div(class = "merge-status-box merge-error",
1291
- h4("Merge Status", style = "margin-top: 0; color: #721c24;"),
1292
- p(parsed$message)
1293
- ))
1294
- }
1295
-
1296
- if (is.null(parsed$data) || is.null(result)) {
1297
- csv_game <- if ("GameID" %in% names(csv)) unique(csv$GameID)[1] else NULL
1298
- json_game <- parsed$game_reference
1299
-
1300
- if (!is.null(csv_game) && !is.null(json_game) && csv_game != json_game) {
1301
- return(div(class = "merge-status-box merge-warning",
1302
- h4("\u26A0 Game ID Mismatch", style = "margin-top: 0; color: #856404;"),
1303
- p(paste("CSV Game:", csv_game)),
1304
- p(paste("JSON Game:", json_game)),
1305
- p("Files may be from different games!")
1306
- ))
1307
- }
1308
-
1309
- return(div(class = "merge-status-box merge-warning",
1310
- h4("No Data to Merge", style = "margin-top: 0; color: #856404;"),
1311
- p(parsed$message)
1312
- ))
1313
- }
1314
-
1315
- csv_game <- if ("GameID" %in% names(csv)) unique(csv$GameID)[1] else NULL
1316
- json_game <- parsed$game_reference
1317
- game_match <- is.null(csv_game) || is.null(json_game) || csv_game == json_game
1318
-
1319
- if (result$matched > 0) {
1320
- div(class = "merge-status-box merge-success",
1321
- h4("\u2713 Merge Successful!", style = "margin-top: 0; color: #155724;"),
1322
- p(paste("Matched:", result$matched, "of", result$total_bat, "bat tracking records")),
1323
- if (!game_match) p(style = "color: #856404;", "\u26A0 Note: Game IDs differ but PitchUIDs matched")
1324
- )
1325
- } else {
1326
- div(class = "merge-status-box merge-warning",
1327
- h4("\u26A0 No Matches Found", style = "margin-top: 0; color: #856404;"),
1328
- p(paste("0 of", result$total_bat, "bat tracking records matched")),
1329
- if (!game_match) p(paste("Game ID mismatch: CSV =", csv_game, ", JSON =", json_game))
1330
- )
1331
- }
1332
- })
1333
-
1334
- # Bat tracking details
1335
- output$bat_tracking_details <- renderUI({
1336
- parsed <- bat_tracking_parsed()
1337
- result <- merge_result()
1338
-
1339
- if (is.null(parsed)) {
1340
- return(div(
1341
- p("No bat tracking JSON file uploaded."),
1342
- p("Upload a _battracking.json file in the 'Upload & Process' tab to see bat tracking data here.")
1343
- ))
1344
- }
1345
-
1346
- if (!parsed$success) {
1347
- return(div(class = "alert alert-danger", parsed$message))
1348
- }
1349
-
1350
- if (is.null(parsed$data)) {
1351
- return(div(class = "alert alert-warning",
1352
- h4("Empty Bat Tracking File"),
1353
- p(parsed$message),
1354
- p("The JSON file was valid but contained no swing data in the Plays array.")
1355
- ))
1356
- }
1357
-
1358
- div(
1359
- div(class = "row",
1360
- div(class = "col-md-4",
1361
- div(class = "well",
1362
- h4("Game Reference"),
1363
- p(parsed$game_reference)
1364
- )
1365
- ),
1366
- div(class = "col-md-4",
1367
- div(class = "well",
1368
- h4("Total Swings Tracked"),
1369
- p(style = "font-size: 24px; font-weight: bold; color: darkcyan;", parsed$plays_count)
1370
- )
1371
- ),
1372
- div(class = "col-md-4",
1373
- div(class = "well",
1374
- h4("Matched to CSV"),
1375
- p(style = "font-size: 24px; font-weight: bold; color: #28a745;",
1376
- if (!is.null(result)) result$matched else "N/A")
1377
- )
1378
- )
1379
- )
1380
- )
1381
- })
1382
-
1383
- # Bat tracking table
1384
- output$bat_tracking_table <- DT::renderDataTable({
1385
- df <- processed_data()
1386
-
1387
- if (is.null(df)) {
1388
- return(NULL)
1389
- }
1390
-
1391
- if ("BatSpeed_Sensor" %in% names(df)) {
1392
- bat_rows <- df %>%
1393
- filter(!is.na(BatSpeed_Sensor)) %>%
1394
- select(
1395
- any_of(c("PitchNo", "Time", "Pitcher", "Batter", "TaggedPitchType", "PitchCall",
1396
- "RelSpeed", "ExitSpeed", "Angle",
1397
- "BatSpeed", "BatSpeed_Sensor",
1398
- "VerticalAttackAngle", "VerticalAttackAngle_Sensor",
1399
- "HorizontalAttackAngle", "HorizontalAttackAngle_Sensor"))
1400
- )
1401
-
1402
- if (nrow(bat_rows) == 0) {
1403
- return(NULL)
1404
- }
1405
-
1406
- DT::datatable(bat_rows,
1407
- options = list(scrollX = TRUE, pageLength = 10),
1408
- rownames = FALSE) %>%
1409
- DT::formatRound(columns = intersect(names(bat_rows),
1410
- c("BatSpeed_Sensor", "VerticalAttackAngle_Sensor",
1411
- "HorizontalAttackAngle_Sensor", "RelSpeed",
1412
- "ExitSpeed", "Angle")),
1413
- digits = 1)
1414
- } else {
1415
- return(NULL)
1416
- }
1417
- })
1418
-
1419
- # Processing summary
1420
- output$process_summary <- renderText({
1421
- if (is.null(input$file)) {
1422
- return("No file uploaded yet.")
1423
- }
1424
-
1425
- if (is.null(processed_data())) {
1426
- return("Processing...")
1427
- }
1428
-
1429
- df <- processed_data()
1430
- original_df <- csv_data_raw()
1431
- selected_cols_to_remove <- input$columns_to_remove %||% character(0)
1432
- removed_cols <- intersect(selected_cols_to_remove, names(original_df))
1433
- result <- merge_result()
1434
- ext <- uploaded_file_type()
1435
- format_label <- if (ext == "parquet") "Parquet" else "CSV"
1436
-
1437
- removed_cols_text <- if (length(removed_cols) > 0) {
1438
- paste("\u2713 Removed columns:", length(removed_cols))
1439
- } else {
1440
- "\u2713 Removed columns: 0"
1441
- }
1442
-
1443
- bat_tracking_text <- if (!is.null(result) && result$matched > 0) {
1444
- paste("\u2713 Bat tracking merged:", result$matched, "pitches")
1445
- } else if (!is.null(bat_tracking_parsed())) {
1446
- "\u26A0 Bat tracking: No matches"
1447
- } else {
1448
- "\u25CB Bat tracking: Not uploaded"
1449
- }
1450
-
1451
- summary_text <- paste(
1452
- paste0("\u2713 ", format_label, " file processed successfully!"),
1453
- paste("\u2713 Original columns:", ncol(original_df)),
1454
- paste("\u2713 Final columns:", ncol(df)),
1455
- paste("\u2713 Rows processed:", nrow(df)),
1456
- removed_cols_text,
1457
- bat_tracking_text,
1458
- "\u2713 Duplicates removed",
1459
- paste("\u2713 Date format:", if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"),
1460
- sep = "\n"
1461
- )
1462
-
1463
- return(summary_text)
1464
- })
1465
-
1466
- # Preview table
1467
- output$preview <- DT::renderDataTable({
1468
- req(processed_data())
1469
-
1470
- DT::datatable(processed_data(),
1471
- options = list(scrollX = TRUE, pageLength = 10),
1472
- filter = "top")
1473
- })
1474
-
1475
- # Movement plot
1476
- output$movement_plot <- renderPlot({
1477
- req(plot_data(), input$pitcher_select)
1478
-
1479
- pitcher_data <- plot_data() %>%
1480
- filter(Pitcher == input$pitcher_select) %>%
1481
- filter(!is.na(TaggedPitchType), TaggedPitchType != "Other",
1482
- !is.na(HorzBreak), !is.na(InducedVertBreak), !is.na(RelSpeed)) %>%
1483
- mutate(pitch_id = row_number())
1484
-
1485
- if (nrow(pitcher_data) == 0) {
1486
- plot.new()
1487
- text(0.5, 0.5, "No data available for selected pitcher", cex = 1.5)
1488
- return()
1489
- }
1490
-
1491
- pitcher_data$color <- pitch_colors[pitcher_data$TaggedPitchType]
1492
- pitcher_data$color[is.na(pitcher_data$color)] <- "#D3D3D3"
1493
-
1494
- par(mar = c(5, 5, 4, 8), xpd = TRUE)
1495
- plot(pitcher_data$HorzBreak, pitcher_data$InducedVertBreak,
1496
- col = pitcher_data$color,
1497
- pch = 19, cex = 1.5,
1498
- xlim = c(-25, 25), ylim = c(-25, 25),
1499
- xlab = "Horizontal Break (inches)",
1500
- ylab = "Induced Vertical Break (inches)",
1501
- main = paste("Pitch Movement Chart -", input$pitcher_select))
1502
-
1503
- grid(nx = NULL, ny = NULL, col = "lightgray", lty = 1, lwd = 0.5)
1504
- abline(h = 0, col = "gray", lty = 2, lwd = 1)
1505
- abline(v = 0, col = "gray", lty = 2, lwd = 1)
1506
-
1507
- for (r in c(6, 12, 18, 24)) {
1508
- circle_x <- r * cos(seq(0, 2*pi, length.out = 100))
1509
- circle_y <- r * sin(seq(0, 2*pi, length.out = 100))
1510
- lines(circle_x, circle_y, col = "lightgray", lty = 3)
1511
- }
1512
-
1513
- if (input$selection_mode == "drag" && !is.null(selected_points())) {
1514
- sel_points <- selected_points()
1515
- points(sel_points$HorzBreak, sel_points$InducedVertBreak,
1516
- pch = 21, cex = 2, col = "red", lwd = 3)
1517
- }
1518
-
1519
- unique_pitches <- unique(pitcher_data$TaggedPitchType)
1520
- unique_colors <- pitch_colors[unique_pitches]
1521
- legend("topright", inset = c(-0.15, 0),
1522
- legend = unique_pitches,
1523
- col = unique_colors,
1524
- pch = 19,
1525
- cex = 0.8,
1526
- title = "Pitch Type")
1527
- })
1528
-
1529
- # Handle plot clicks (single mode only)
1530
- observeEvent(input$plot_click, {
1531
- req(plot_data(), input$pitcher_select, input$plot_click)
1532
-
1533
- if (input$selection_mode != "single") return()
1534
-
1535
- pitcher_data <- plot_data() %>%
1536
- filter(Pitcher == input$pitcher_select) %>%
1537
- filter(!is.na(TaggedPitchType), TaggedPitchType != "Other",
1538
- !is.na(HorzBreak), !is.na(InducedVertBreak), !is.na(RelSpeed)) %>%
1539
- mutate(pitch_id = row_number())
1540
-
1541
- if (nrow(pitcher_data) == 0) return()
1542
-
1543
- click_x <- input$plot_click$x
1544
- click_y <- input$plot_click$y
1545
-
1546
- distances <- sqrt((pitcher_data$HorzBreak - click_x)^2 +
1547
- (pitcher_data$InducedVertBreak - click_y)^2)
1548
-
1549
- closest_idx <- which.min(distances)
1550
-
1551
- if (min(distances) <= 2) {
1552
- clicked_pitch <- pitcher_data[closest_idx, ]
1553
-
1554
- full_data <- plot_data() %>% filter(Pitcher == input$pitcher_select)
1555
- original_row <- which(full_data$HorzBreak == clicked_pitch$HorzBreak &
1556
- full_data$InducedVertBreak == clicked_pitch$InducedVertBreak &
1557
- full_data$RelSpeed == clicked_pitch$RelSpeed)[1]
1558
-
1559
- selected_pitch(list(
1560
- pitcher = input$pitcher_select,
1561
- row_in_pitcher_data = original_row,
1562
- data = clicked_pitch,
1563
- original_type = clicked_pitch$TaggedPitchType
1564
- ))
1565
-
1566
- updateSelectInput(session, "modal_new_pitch_type",
1567
- selected = clicked_pitch$TaggedPitchType)
1568
-
1569
- showModal(modalDialog(
1570
- title = "Edit Pitch Type",
1571
- div(style = "padding: 20px;",
1572
- h4("Selected Pitch Details:", style = "color: darkcyan;"),
1573
- verbatimTextOutput("selected_pitch_info"),
1574
- br(),
1575
- selectInput("modal_new_pitch_type", "Change Pitch Type To:",
1576
- choices = c("Fastball", "Sinker", "Cutter", "Slider",
1577
- "Curveball", "ChangeUp", "Splitter", "Knuckleball", "Sweeper","Other"),
1578
- selected = clicked_pitch$TaggedPitchType),
1579
- br(),
1580
- actionButton("update_pitch", "Update Pitch Type", class = "btn-primary btn-lg"),
1581
- actionButton("cancel_edit", "Cancel", class = "btn-default")
1582
- ),
1583
- footer = NULL,
1584
- size = "m",
1585
- easyClose = TRUE
1586
- ))
1587
- }
1588
- })
1589
-
1590
- # Handle brush selection (drag mode)
1591
- observeEvent(input$plot_brush, {
1592
- req(plot_data(), input$pitcher_select, input$plot_brush)
1593
-
1594
- if (input$selection_mode != "drag") return()
1595
-
1596
- pitcher_data <- plot_data() %>%
1597
- filter(Pitcher == input$pitcher_select) %>%
1598
- filter(!is.na(TaggedPitchType), TaggedPitchType != "Other",
1599
- !is.na(HorzBreak), !is.na(InducedVertBreak), !is.na(RelSpeed))
1600
-
1601
- if (nrow(pitcher_data) == 0) return()
1602
-
1603
- brush <- input$plot_brush
1604
- brushed_points <- pitcher_data %>%
1605
- filter(
1606
- HorzBreak >= brush$xmin & HorzBreak <= brush$xmax &
1607
- InducedVertBreak >= brush$ymin & InducedVertBreak <= brush$ymax
1608
- )
1609
-
1610
- if (nrow(brushed_points) > 0) {
1611
- selected_points(brushed_points)
1612
- } else {
1613
- selected_points(NULL)
1614
- }
1615
- })
1616
-
1617
- # Apply bulk change
1618
- observeEvent(input$apply_bulk_change, {
1619
- req(selected_points(), input$bulk_pitch_type)
1620
-
1621
- sel_points <- selected_points()
1622
-
1623
- if (nrow(sel_points) == 0) {
1624
- showNotification("No points selected", type = "warning")
1625
- return()
1626
- }
1627
-
1628
- current_data <- plot_data()
1629
-
1630
- for (i in 1:nrow(sel_points)) {
1631
- point <- sel_points[i, ]
1632
- current_data <- current_data %>%
1633
- mutate(TaggedPitchType = ifelse(
1634
- Pitcher == input$pitcher_select &
1635
- abs(HorzBreak - point$HorzBreak) < 0.01 &
1636
- abs(InducedVertBreak - point$InducedVertBreak) < 0.01 &
1637
- abs(RelSpeed - point$RelSpeed) < 0.01,
1638
- input$bulk_pitch_type,
1639
- TaggedPitchType
1640
- ))
1641
- }
1642
-
1643
- plot_data(current_data)
1644
- processed_data(current_data)
1645
- selected_points(NULL)
1646
-
1647
- showNotification(
1648
- paste("Updated", nrow(sel_points), "pitches to", input$bulk_pitch_type),
1649
- type = "message", duration = 3
1650
- )
1651
- })
1652
-
1653
- # Selection info for drag mode
1654
- output$selection_info <- renderText({
1655
- if (input$selection_mode == "drag" && !is.null(selected_points())) {
1656
- sel_points <- selected_points()
1657
- pitch_counts <- table(sel_points$TaggedPitchType)
1658
- paste(nrow(sel_points), "points selected:",
1659
- paste(names(pitch_counts), "(", pitch_counts, ")", collapse = ", "))
1660
- } else {
1661
- "No points selected. Click and drag to select multiple pitches."
1662
- }
1663
- })
1664
-
1665
- # Hover info
1666
- output$hover_info <- renderText({
1667
- req(input$plot_hover, plot_data(), input$pitcher_select)
1668
-
1669
- pitcher_data <- plot_data() %>%
1670
- filter(Pitcher == input$pitcher_select) %>%
1671
- filter(!is.na(TaggedPitchType), TaggedPitchType != "Other",
1672
- !is.na(HorzBreak), !is.na(InducedVertBreak), !is.na(RelSpeed))
1673
-
1674
- if (nrow(pitcher_data) == 0) return("")
1675
-
1676
- hover_x <- input$plot_hover$x
1677
- hover_y <- input$plot_hover$y
1678
-
1679
- distances <- sqrt((pitcher_data$HorzBreak - hover_x)^2 +
1680
- (pitcher_data$InducedVertBreak - hover_y)^2)
1681
-
1682
- if (min(distances) <= 2) {
1683
- closest_idx <- which.min(distances)
1684
- hover_pitch <- pitcher_data[closest_idx, ]
1685
-
1686
- bat_info <- ""
1687
- if ("BatSpeed_Sensor" %in% names(hover_pitch) && !is.na(hover_pitch$BatSpeed_Sensor)) {
1688
- bat_info <- paste(" | Bat Speed:", round(hover_pitch$BatSpeed_Sensor, 1), "mph")
1689
- }
1690
-
1691
- paste("Hovering over:",
1692
- paste("Type:", hover_pitch$TaggedPitchType),
1693
- paste("Velocity:", round(hover_pitch$RelSpeed, 1), "mph"),
1694
- paste("HB:", round(hover_pitch$HorzBreak, 1), "in"),
1695
- paste("IVB:", round(hover_pitch$InducedVertBreak, 1), "in"),
1696
- bat_info,
1697
- sep = " | ")
1698
- } else {
1699
- ""
1700
- }
1701
- })
1702
-
1703
- # Movement stats table
1704
- output$movement_stats <- DT::renderDataTable({
1705
- req(plot_data(), input$pitcher_select)
1706
-
1707
- data <- plot_data()
1708
-
1709
- movement_stats <- data %>%
1710
- filter(Pitcher == input$pitcher_select) %>%
1711
- filter(!is.na(HorzBreak), !is.na(InducedVertBreak), !is.na(TaggedPitchType)) %>%
1712
- mutate(
1713
- pitch_group = case_when(
1714
- TaggedPitchType %in% c("Fastball", "FourSeamFastBall", "FourSeamFastB", "Four-Seam", "4-Seam") ~ "Fastball",
1715
- TaggedPitchType %in% c("OneSeamFastBall", "TwoSeamFastBall", "Sinker", "Two-Seam", "One-Seam") ~ "Sinker",
1716
- TaggedPitchType %in% c("ChangeUp", "Changeup") ~ "Changeup",
1717
- TRUE ~ TaggedPitchType
1718
- ),
1719
- in_zone = ifelse("StrikeZoneIndicator" %in% names(.), StrikeZoneIndicator,
1720
- ifelse(!is.na(PlateLocSide) & !is.na(PlateLocHeight) &
1721
- PlateLocSide >= -0.95 & PlateLocSide <= 0.95 &
1722
- PlateLocHeight >= 1.6 & PlateLocHeight <= 3.5, 1, 0)),
1723
- is_whiff = ifelse("WhiffIndicator" %in% names(.), WhiffIndicator,
1724
- ifelse(!is.na(PitchCall) & PitchCall == "StrikeSwinging", 1, 0)),
1725
- chase = ifelse("Chaseindicator" %in% names(.), Chaseindicator,
1726
- ifelse(!is.na(PitchCall) & !is.na(PlateLocSide) & !is.na(PlateLocHeight) &
1727
- PitchCall %in% c("StrikeSwinging", "FoulBallNotFieldable", "FoulBall", "InPlay") &
1728
- (PlateLocSide < -0.95 | PlateLocSide > 0.95 | PlateLocHeight < 1.6 | PlateLocHeight > 3.5), 1, 0))
1729
- )
1730
-
1731
- total_pitches <- nrow(movement_stats)
1732
-
1733
- has_bat_speed <- "BatSpeed_Sensor" %in% names(movement_stats)
1734
-
1735
- summary_stats <- movement_stats %>%
1736
- group_by(`Pitch Type` = pitch_group) %>%
1737
- summarise(
1738
- Count = n(),
1739
- `Usage%` = sprintf("%.1f%%", (n() / total_pitches) * 100),
1740
- `Avg Velo` = sprintf("%.1f", mean(RelSpeed, na.rm = TRUE)),
1741
- `Max Velo` = sprintf("%.1f", max(RelSpeed, na.rm = TRUE)),
1742
- `Avg IVB` = sprintf("%.1f", mean(InducedVertBreak, na.rm = TRUE)),
1743
- `Avg HB` = sprintf("%.1f", mean(HorzBreak, na.rm = TRUE)),
1744
- `Avg Spin` = ifelse("SpinRate" %in% names(movement_stats),
1745
- sprintf("%.0f", mean(SpinRate, na.rm = TRUE)),
1746
- "\u2014"),
1747
- `Avg Bat Speed` = if (has_bat_speed) {
1748
- bat_vals <- BatSpeed_Sensor[!is.na(BatSpeed_Sensor)]
1749
- if (length(bat_vals) > 0) sprintf("%.1f", mean(bat_vals)) else "\u2014"
1750
- } else "\u2014",
1751
- `Zone%` = sprintf("%.1f%%", round(mean(in_zone, na.rm = TRUE) * 100, 1)),
1752
- `Whiff%` = sprintf("%.1f%%", round(mean(is_whiff, na.rm = TRUE) * 100, 1)),
1753
- .groups = "drop"
1754
- ) %>%
1755
- arrange(desc(Count))
1756
-
1757
- DT::datatable(summary_stats,
1758
- options = list(pageLength = 15, dom = 't', scrollX = TRUE),
1759
- rownames = FALSE) %>%
1760
- DT::formatStyle(columns = names(summary_stats), fontSize = '12px')
1761
- })
1762
-
1763
- # Selected pitch info in modal
1764
- output$selected_pitch_info <- renderText({
1765
- pitch_info <- selected_pitch()
1766
- if (!is.null(pitch_info)) {
1767
- pitch_data <- pitch_info$data
1768
-
1769
- info_lines <- c(
1770
- paste("Pitcher:", pitch_info$pitcher),
1771
- if ("PitchNo" %in% names(pitch_data) && !is.na(pitch_data$PitchNo)) paste("Pitch No:", pitch_data$PitchNo) else NULL,
1772
- if ("Batter" %in% names(pitch_data) && !is.na(pitch_data$Batter)) paste("Batter:", pitch_data$Batter) else NULL,
1773
- paste("Current Type:", pitch_data$TaggedPitchType),
1774
- paste("Velocity:", round(pitch_data$RelSpeed, 1), "mph"),
1775
- paste("Horizontal Break:", round(pitch_data$HorzBreak, 1), "inches"),
1776
- paste("Induced Vertical Break:", round(pitch_data$InducedVertBreak, 1), "inches")
1777
- )
1778
-
1779
- if ("SpinRate" %in% names(pitch_data) && !is.na(pitch_data$SpinRate)) {
1780
- info_lines <- c(info_lines, paste("Spin Rate:", round(pitch_data$SpinRate, 0), "rpm"))
1781
- }
1782
-
1783
- if ("BatSpeed_Sensor" %in% names(pitch_data) && !is.na(pitch_data$BatSpeed_Sensor)) {
1784
- info_lines <- c(info_lines,
1785
- paste("Bat Speed:", round(pitch_data$BatSpeed_Sensor, 1), "mph"),
1786
- paste("Vertical Attack Angle:", round(pitch_data$VerticalAttackAngle_Sensor, 1), "\u00B0"),
1787
- paste("Horizontal Attack Angle:", round(pitch_data$HorizontalAttackAngle_Sensor, 1), "\u00B0"))
1788
- }
1789
-
1790
- if ("Date" %in% names(pitch_data) && !is.na(pitch_data$Date)) {
1791
- info_lines <- c(info_lines, paste("Date:", pitch_data$Date))
1792
- }
1793
-
1794
- return(paste(info_lines, collapse = "\n"))
1795
- } else {
1796
- return("No pitch selected")
1797
- }
1798
- })
1799
-
1800
- # Update pitch type
1801
- observeEvent(input$update_pitch, {
1802
- pitch_info <- selected_pitch()
1803
-
1804
- if (!is.null(pitch_info)) {
1805
- current_data <- plot_data()
1806
-
1807
- target_pitcher <- pitch_info$pitcher
1808
- target_hb <- pitch_info$data$HorzBreak
1809
- target_ivb <- pitch_info$data$InducedVertBreak
1810
- target_velo <- pitch_info$data$RelSpeed
1811
-
1812
- current_data <- current_data %>%
1813
- mutate(TaggedPitchType = ifelse(
1814
- Pitcher == target_pitcher &
1815
- abs(HorzBreak - target_hb) < 0.01 &
1816
- abs(InducedVertBreak - target_ivb) < 0.01 &
1817
- abs(RelSpeed - target_velo) < 0.01,
1818
- input$modal_new_pitch_type,
1819
- TaggedPitchType
1820
- ))
1821
-
1822
- plot_data(current_data)
1823
- processed_data(current_data)
1824
-
1825
- removeModal()
1826
-
1827
- showNotification(
1828
- paste("Updated pitch from", pitch_info$original_type, "to", input$modal_new_pitch_type),
1829
- type = "message", duration = 3
1830
- )
1831
-
1832
- selected_pitch(NULL)
1833
- }
1834
- })
1835
-
1836
- # Cancel edit
1837
- observeEvent(input$cancel_edit, {
1838
- removeModal()
1839
- selected_pitch(NULL)
1840
- })
1841
-
1842
- # Click info output
1843
- output$click_info <- renderText({
1844
- if (!is.null(selected_pitch())) {
1845
- pitch_info <- selected_pitch()
1846
- paste("Last selected pitch:", pitch_info$original_type,
1847
- "| Position: (", round(pitch_info$data$HorzBreak, 1), ",",
1848
- round(pitch_info$data$InducedVertBreak, 1), ")")
1849
- } else {
1850
- "No point selected yet. Click on a point in the chart above to edit its pitch type."
1851
- }
1852
- })
1853
-
1854
- # Data summary for download page
1855
- output$data_summary <- renderText({
1856
- req(processed_data())
1857
- df <- processed_data()
1858
- result <- merge_result()
1859
-
1860
- bat_tracking_summary <- if (!is.null(result) && result$matched > 0) {
1861
- paste("Bat tracking data:", result$matched, "pitches with swing metrics")
1862
- } else {
1863
- "Bat tracking data: None"
1864
- }
1865
-
1866
- summary_text <- paste(
1867
- paste("Total rows:", nrow(df)),
1868
- paste("Total columns:", ncol(df)),
1869
- paste("Date range:",
1870
- if ("Date" %in% names(df) && !all(is.na(df$Date))) {
1871
- paste(min(as.Date(df$Date), na.rm = TRUE), "to", max(as.Date(df$Date), na.rm = TRUE))
1872
- } else {
1873
- "Date column not available"
1874
- }),
1875
- paste("Unique pitchers:",
1876
- if ("Pitcher" %in% names(df)) {
1877
- length(unique(df$Pitcher[!is.na(df$Pitcher)]))
1878
- } else {
1879
- "Pitcher column not available"
1880
- }),
1881
- paste("Pitch types:",
1882
- if ("TaggedPitchType" %in% names(df)) {
1883
- paste(sort(unique(df$TaggedPitchType[!is.na(df$TaggedPitchType)])), collapse = ", ")
1884
- } else {
1885
- "TaggedPitchType column not available"
1886
- }),
1887
- bat_tracking_summary,
1888
- paste("Source format:", toupper(uploaded_file_type())),
1889
- paste("Date format:", if (input$date_format == "mdyy") "M/D/YY" else "YYYY-MM-DD"),
1890
- sep = "\n"
1891
- )
1892
-
1893
- return(summary_text)
1894
- })
1895
-
1896
- # ── Download handler: CSV or Parquet with custom filename ──
1897
- output$downloadData <- downloadHandler(
1898
- filename = function() {
1899
- base_name <- gsub("[^A-Za-z0-9_\\-]", "_", input$download_filename)
1900
- if (nchar(trimws(base_name)) == 0) base_name <- paste0("app_ready_COA_", Sys.Date())
1901
-
1902
- ext <- input$download_format
1903
- paste0(base_name, ".", ext)
1904
- },
1905
- content = function(file) {
1906
- if (input$download_format == "parquet") {
1907
- arrow::write_parquet(processed_data(), file)
1908
- } else {
1909
- write.csv(processed_data(), file, row.names = FALSE)
1910
- }
1911
- }
1912
- )
1913
-
1914
-
1915
-
1916
- #SCRAPER STUFF
1917
-
1918
- output$scrape_options <- renderUI({
1919
- switch(input$scrape_source,
1920
- "pbp" = tagList(
1921
- p("Scrapes TrackMan play-by-play data from FTP.")
1922
- ),
1923
- "pos" = tagList(
1924
- p("Scrapes TrackMan player positioning data from FTP.")
1925
- ),
1926
- "ncaa" = tagList(
1927
- selectInput("ncaa_division", "Division:", choices = c("D1", "D2", "D3")),
1928
- p("Scrapes NCAA scoreboard data via API.")
1929
- )
1930
- )
1931
- })
1932
-
1933
- # Scrape button
1934
- observeEvent(input$scrape_btn, {
1935
- scrape_status_msg("Triggering scrape on GitHub...")
1936
-
1937
- gh_token <- Sys.getenv("GITHUB_TOKEN")
1938
- gh_repo <- Sys.getenv("GITHUB_REPO")
1939
-
1940
- result <- tryCatch({
1941
- httr::POST(
1942
- paste0("https://api.github.com/repos/", gh_repo, "/actions/workflows/scrape.yml/dispatches"),
1943
- httr::add_headers(
1944
- Authorization = paste("Bearer", gh_token),
1945
- Accept = "application/vnd.github.v3+json"
1946
- ),
1947
- body = jsonlite::toJSON(list(
1948
- ref = "main",
1949
- inputs = list(
1950
- start_date = as.character(input$start_date),
1951
- end_date = as.character(input$end_date),
1952
- data_type = input$scrape_source
1953
- )
1954
- ), auto_unbox = TRUE),
1955
- encode = "raw"
1956
- )
1957
- }, error = function(e) {
1958
- scrape_status_msg(paste("Failed:", e$message))
1959
- return(NULL)
1960
- })
1961
-
1962
- if (is.null(result)) return()
1963
-
1964
- if (httr::status_code(result) == 204) {
1965
- scrape_status_msg("Scrape triggered! Waiting for GitHub to finish...")
1966
- scrape_polling(TRUE)
1967
- } else {
1968
- scrape_status_msg(paste("GitHub API error:", httr::status_code(result)))
1969
- }
1970
- })
1971
-
1972
- # Poll GitHub every 15 seconds to check if done
1973
- observe({
1974
- req(scrape_polling())
1975
-
1976
- invalidateLater(15000, session)
1977
-
1978
- gh_token <- Sys.getenv("GITHUB_TOKEN")
1979
- gh_repo <- Sys.getenv("GITHUB_REPO")
1980
-
1981
- resp <- tryCatch({
1982
- httr::GET(
1983
- paste0("https://api.github.com/repos/", gh_repo, "/actions/runs?per_page=1"),
1984
- httr::add_headers(
1985
- Authorization = paste("Bearer", gh_token),
1986
- Accept = "application/vnd.github.v3+json"
1987
- )
1988
- )
1989
- }, error = function(e) { NULL })
1990
-
1991
- if (is.null(resp)) return()
1992
-
1993
- runs <- jsonlite::fromJSON(httr::content(resp, as = "text", encoding = "UTF-8"))
1994
-
1995
- if (length(runs$workflow_runs) == 0) return()
1996
-
1997
- latest <- runs$workflow_runs[1, ]
1998
- status <- latest$status
1999
- conclusion <- latest$conclusion
2000
-
2001
- if (status == "completed") {
2002
- scrape_polling(FALSE)
2003
-
2004
- if (conclusion == "success") {
2005
- scrape_status_msg("GitHub finished! Fetching data...")
2006
-
2007
- filename <- paste0(input$scrape_source, "_", input$start_date, "_to_", input$end_date, ".csv.gz")
2008
- url <- paste0("https://api.github.com/repos/", gh_repo, "/contents/data/", filename)
2009
-
2010
- data <- tryCatch({
2011
- file_resp <- httr::GET(
2012
- url,
2013
- httr::add_headers(
2014
- Authorization = paste("Bearer", gh_token),
2015
- Accept = "application/vnd.github.v3.raw"
2016
- )
2017
- )
2018
-
2019
- if (httr::status_code(file_resp) == 200) {
2020
- tmp <- tempfile(fileext = ".csv.gz")
2021
- writeBin(httr::content(file_resp, as = "raw"), tmp)
2022
- read_csv(gzfile(tmp))
2023
- } else {
2024
- NULL
2025
- }
2026
- }, error = function(e) { NULL })
2027
-
2028
- if (!is.null(data) && nrow(data) > 0) {
2029
-
2030
- if (input$scrape_source == "pbp") {
2031
- scrape_status_msg("Processing data...")
2032
-
2033
- data <- tryCatch({
2034
- d <- clean_college_data(data)
2035
- d <- predict_stuffplus(d)
2036
- d
2037
- }, error = function(e) {
2038
- scrape_status_msg(paste("Processing error:", e$message))
2039
- data
2040
- })
2041
- }
2042
-
2043
- scraped_data(data)
2044
- scrape_status_msg(paste0("Done! ", nrow(data), " rows \u00D7 ", ncol(data), " columns."))
2045
- } else {
2046
- scrape_status_msg("Scrape finished but couldn't fetch the file. Try 'Fetch Results' manually.")
2047
- }
2048
-
2049
- } else {
2050
- scrape_status_msg(paste("GitHub Action failed:", conclusion))
2051
- }
2052
-
2053
- } else {
2054
- scrape_status_msg(paste0("GitHub is running... (status: ", status, ")"))
2055
- }
2056
- })
2057
-
2058
- # Status text
2059
- output$scrape_status <- renderText({ scrape_status_msg() })
2060
-
2061
- # Preview table
2062
- output$scrape_preview <- DT::renderDataTable({
2063
- req(scraped_data())
2064
- DT::datatable(scraped_data(), options = list(scrollX = TRUE, pageLength = 10))
2065
- })
2066
-
2067
- # Download
2068
- output$download_scrape <- downloadHandler(
2069
- filename = function() {
2070
- label <- switch(input$scrape_source, "pbp" = "pbp", "pos" = "positional", "ncaa" = "ncaa")
2071
- paste0("trackman_", label, "_",
2072
- format(input$start_date, "%Y%m%d"), "_to_",
2073
- format(input$end_date, "%Y%m%d"), ".csv")
2074
- },
2075
- content = function(file) {
2076
- req(scraped_data())
2077
- write.csv(scraped_data(), file, row.names = FALSE)
2078
- }
2079
- )
2080
-
2081
-
2082
- observeEvent(input$upload_hf_btn, {
2083
- req(scraped_data())
2084
-
2085
- hf_token <- Sys.getenv("HF_WRITE_TOKEN")
2086
- repo_id <- "CoastalBaseball/2026MasterDataset"
2087
- timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S")
2088
-
2089
- upload_to_hf <- function(new_data, folder, index_file, label) {
2090
- scrape_status_msg(paste0("Checking existing UIDs for ", label, "..."))
2091
-
2092
- existing_uids <- tryCatch({
2093
- tmp_idx <- tempfile(fileext = ".csv.gz")
2094
- resp <- httr::GET(
2095
- paste0("https://huggingface.co/datasets/", repo_id, "/resolve/main/", index_file),
2096
- httr::add_headers(Authorization = paste("Bearer", hf_token)),
2097
- httr::write_disk(tmp_idx, overwrite = TRUE)
2098
- )
2099
- if (httr::status_code(resp) == 200) {
2100
- d <- read.csv(gzfile(tmp_idx), stringsAsFactors = FALSE)
2101
- file.remove(tmp_idx)
2102
- d$PitchUID
2103
- } else {
2104
- file.remove(tmp_idx)
2105
- character(0)
2106
- }
2107
- }, error = function(e) { character(0) })
2108
-
2109
- scraped_rows <- nrow(new_data)
2110
-
2111
- if (length(existing_uids) > 0 && "PitchUID" %in% names(new_data)) {
2112
- new_only <- new_data %>% filter(!PitchUID %in% existing_uids)
2113
- } else {
2114
- new_only <- new_data
2115
- }
2116
-
2117
- new_rows <- nrow(new_only)
2118
- total_after <- length(existing_uids) + new_rows
2119
-
2120
- if (new_rows == 0) {
2121
- return(paste0(label, ": ", scraped_rows, " rows scraped, 0 new rows added (", length(existing_uids), " total)"))
2122
- }
2123
-
2124
- scrape_status_msg(paste0("Uploading ", new_rows, " new rows for ", label, "..."))
2125
-
2126
- hf <- reticulate::import("huggingface_hub")
2127
- api <- hf$HfApi()
2128
-
2129
- tmp_data <- tempfile(fileext = ".parquet")
2130
- arrow::write_parquet(new_only, tmp_data)
2131
-
2132
- api$upload_file(
2133
- path_or_fileobj = tmp_data,
2134
- path_in_repo = paste0(folder, "/", timestamp, ".parquet"),
2135
- repo_id = repo_id,
2136
- repo_type = "dataset",
2137
- token = hf_token
2138
- )
2139
- file.remove(tmp_data)
2140
-
2141
- scrape_status_msg(paste0("Updating ", label, " index..."))
2142
-
2143
- all_uids <- data.frame(PitchUID = c(existing_uids, new_only$PitchUID))
2144
- tmp_idx <- tempfile(fileext = ".csv.gz")
2145
- gz <- gzfile(tmp_idx, "w")
2146
- write.csv(all_uids, gz, row.names = FALSE)
2147
- close(gz)
2148
-
2149
- api$upload_file(
2150
- path_or_fileobj = tmp_idx,
2151
- path_in_repo = index_file,
2152
- repo_id = repo_id,
2153
- repo_type = "dataset",
2154
- token = hf_token
2155
- )
2156
- file.remove(tmp_idx)
2157
-
2158
- rm(new_only, all_uids); gc()
2159
- paste0(label, ": ", scraped_rows, " rows scraped, ", new_rows, " new rows added (", total_after, " total)")
2160
- }
2161
-
2162
- if (input$scrape_source == "pbp") {
2163
- msg1 <- upload_to_hf(scraped_data(), "pbp", "pbp_uid_index.csv.gz", "Master Dataset")
2164
- gc()
2165
-
2166
- cp <- scraped_data() %>% filter(PitcherTeam == "COA_CHA")
2167
- msg2 <- if (nrow(cp) > 0) {
2168
- upload_to_hf(cp, "coastal_pitchers", "coastal_pitchers_uid_index.csv.gz", "Coastal Pitchers")
2169
- } else { "Coastal Pitchers: No matching rows" }
2170
- rm(cp); gc()
2171
-
2172
- ch <- scraped_data() %>% filter(BatterTeam == "COA_CHA")
2173
- msg3 <- if (nrow(ch) > 0) {
2174
- upload_to_hf(ch, "coastal_hitters", "coastal_hitters_uid_index.csv.gz", "Coastal Hitters")
2175
- } else { "Coastal Hitters: No matching rows" }
2176
- rm(ch); gc()
2177
-
2178
- scrape_status_msg(paste(msg1, msg2, msg3, sep = "\n"))
2179
-
2180
- } else if (input$scrape_source == "pos") {
2181
- msg1 <- upload_to_hf(scraped_data(), "pos", "pos_uid_index.csv.gz", "Positional Dataset")
2182
- scrape_status_msg(msg1)
2183
-
2184
- } else if (input$scrape_source == "ncaa") {
2185
- msg1 <- upload_to_hf(scraped_data(), "ncaa_pbp", "ncaa_pbp_uid_index.csv.gz", "NCAA PBP Dataset")
2186
- scrape_status_msg(msg1)
2187
- }
2188
- })
2189
-
2190
- }
2191
-
2192
- # Run the app
2193
- shinyApp(ui = ui, server = server)