OwenStOnge commited on
Commit
6f9f7ce
·
verified ·
1 Parent(s): e80ebc7

Update app.R

Browse files
Files changed (1) hide show
  1. app.R +67 -823
app.R CHANGED
@@ -215,820 +215,6 @@ merge_with_bat_tracking <- function(csv_data, bat_tracking_data) {
215
  ))
216
  }
217
 
218
-
219
- ##SCRAPER FUNCTIONS
220
-
221
- ftp_server <- "ftp.trackmanbaseball.com"
222
- username <- Sys.getenv("FTP_USER")
223
- password <- Sys.getenv("FTP_PASSWORD")
224
- ftp_base_dir <- "/v3"
225
-
226
-
227
- #Trackman pitch by pitch
228
- scrape_trackman_pbp <- function(start_date, end_date) {
229
-
230
- dates <- as.character(seq.Date(as.Date(start_date), as.Date(end_date), by = "day"))
231
- all_data <- data.frame()
232
-
233
- for (date in dates) {
234
- year <- format(as.Date(date), "%Y")
235
- month <- format(as.Date(date), "%m")
236
- day <- format(as.Date(date), "%d")
237
-
238
- ftp_dir <- paste0(ftp_base_dir, "/", year, "/", month, "/", day, "/CSV")
239
- ftp_url <- paste0("ftp://", username, ":", password, "@", ftp_server, ftp_dir, "/")
240
-
241
- # Get list of files for this day
242
- file_list <- tryCatch({
243
- getURL(ftp_url, ftp.use.epsv = FALSE, dirlistonly = TRUE)
244
- }, error = function(e) { NULL })
245
-
246
- if (is.null(file_list)) next
247
-
248
- file_list <- unlist(strsplit(file_list, "\n"))
249
-
250
- # Filter to verified/unverified CSVs (prefer verified)
251
- pattern_verified <- "(\\d{8})-(.+)-\\d+\\.csv$"
252
- pattern_unverified <- "(\\d{8})-(.+)-\\d+_unverified\\.csv$"
253
-
254
- verified <- grep(pattern_verified, file_list, value = TRUE)
255
- unverified <- grep(pattern_unverified, file_list, value = TRUE)
256
-
257
- verified_stadiums <- unique(str_match(verified, pattern_verified)[, 2])
258
- unverified_stadiums <- str_match(unverified, pattern_unverified)[, 2]
259
-
260
- files_to_read <- c(
261
- verified,
262
- unverified[!unverified_stadiums %in% verified_stadiums]
263
- )
264
-
265
- # Read each file directly into memory
266
- for (file in files_to_read) {
267
- file_url <- paste0(ftp_url, URLencode(trimws(file)))
268
-
269
- tryCatch({
270
- csv_text <- getURL(file_url)
271
- rows <- read_csv(I(csv_text), col_types = cols(.default = "c"), show_col_types = FALSE)
272
-
273
- if ("PlayResult" %in% names(rows)) {
274
- all_data <- bind_rows(all_data, rows)
275
- }
276
- }, error = function(e) {
277
- message("Failed: ", file, " - ", e$message)
278
- })
279
- }
280
-
281
- message("Done with ", date)
282
- }
283
-
284
- # Deduplicate
285
- # all_data %>%
286
- # distinct(PitchUID, .keep_all = TRUE)
287
- }
288
-
289
-
290
-
291
- #trackman positional
292
- scrape_trackman_positional <- function(start_date, end_date) {
293
-
294
- dates <- as.character(seq.Date(as.Date(start_date), as.Date(end_date), by = "day"))
295
- all_data <- data.frame()
296
-
297
- for (date in dates) {
298
- year <- format(as.Date(date), "%Y")
299
- month <- format(as.Date(date), "%m")
300
- day <- format(as.Date(date), "%d")
301
-
302
- ftp_dir <- paste0(ftp_base_dir, "/", year, "/", month, "/", day, "/CSV")
303
- ftp_url <- paste0("ftp://", username, ":", password, "@", ftp_server, ftp_dir, "/")
304
-
305
- # Get list of files for this day
306
- file_list <- tryCatch({
307
- getURL(ftp_url, ftp.use.epsv = FALSE, dirlistonly = TRUE)
308
- }, error = function(e) { NULL })
309
-
310
- if (is.null(file_list)) next
311
-
312
- file_list <- unlist(strsplit(file_list, "\n"))
313
-
314
- # Filter to verified/unverified CSVs (prefer verified)
315
- pattern_verified <- "(\\d{8})-(.+)-\\d+_playerpositioning_FHC\\.csv$"
316
- pattern_unverified <- "(\\d{8})-(.+)-\\d+_unverified_playerpositioning_FHC\\.csv$"
317
-
318
- verified <- grep(pattern_verified, file_list, value = TRUE)
319
- unverified <- grep(pattern_unverified, file_list, value = TRUE)
320
-
321
- verified_stadiums <- unique(str_match(verified, pattern_verified)[, 2])
322
- unverified_stadiums <- str_match(unverified, pattern_unverified)[, 2]
323
-
324
- files_to_read <- c(
325
- verified,
326
- unverified[!unverified_stadiums %in% verified_stadiums]
327
- )
328
-
329
- # Read each file directly into memory
330
- for (file in files_to_read) {
331
- file_url <- paste0(ftp_url, URLencode(trimws(file)))
332
-
333
- tryCatch({
334
- csv_text <- getURL(file_url)
335
- rows <- read_csv(I(csv_text), col_types = cols(.default = "c"), show_col_types = FALSE)
336
-
337
- if ("PlayResult" %in% names(rows)) {
338
- all_data <- bind_rows(all_data, rows)
339
- }
340
- }, error = function(e) {
341
- message("Failed: ", file, " - ", e$message)
342
- })
343
- }
344
-
345
- message("Done with ", date)
346
- }
347
-
348
- # Deduplicate
349
- all_data %>%
350
- distinct(PitchUID, .keep_all = TRUE)
351
- }
352
-
353
-
354
-
355
-
356
- #Next section is a large section of functions from the pbp parser github to parse ncaa pbp data to get base states
357
- stripwhite <- function(x) gsub("\\s*$", "", gsub("^\\s*", "", x))
358
-
359
- strip_punc <- function(x){
360
- x=stripwhite(x)
361
- x=ifelse(str_sub(x,-1)=='.',gsub("\\.", "", x),x)
362
- return(x)}
363
-
364
-
365
-
366
- ##########################################################
367
- # Functions for parsing
368
-
369
- inn_end = function(top_inn){
370
- m=length(top_inn)
371
- inn_end=integer(m)
372
- for (i in 1:(m-1)){
373
- inn_end[i]=ifelse(top_inn[i]!=top_inn[i+1], 1,0)
374
- }
375
- inn_end[m]=1
376
- return(inn_end)
377
- }
378
-
379
-
380
- game_end = function(game_id){
381
- m=length(game_id)
382
- game_end=integer(m)
383
- for (i in 2:m){
384
- if (game_id[i]!=game_id[i-1]){
385
- game_end[i-1]=1
386
- }
387
- game_end[m]=1
388
- }
389
- return(game_end)
390
- }
391
-
392
-
393
-
394
-
395
- runs_on_play= function(a_txt, h_txt, a_score,h_score){
396
- m=length(a_txt)
397
- runs_on_play=integer(m)
398
- runs_on_play[1]=a_score[1]
399
- for (i in 2:m){
400
- runs_on_play[i]=case_when(
401
- a_txt[i]=='' ~ as.integer(h_score[i]-h_score[i-1]),
402
- a_txt[i]!='' ~ as.integer(a_score[i]-a_score[i-1])
403
- )
404
- }
405
- return(runs_on_play)
406
- }
407
-
408
-
409
- r1_name = function(bat_text, bat_name, r1_text, r1_name, inn_end, game_end, sub_in, sub_out){
410
- m=length(bat_text)
411
- r1_name= character(m)
412
- for (i in 2:m){
413
- if (isTRUE(inn_end[i-1]==0 & game_end[i-1]==0)) {
414
- r1_name[i]=case_when(
415
- sub_out[i-1]!=''&sub_out[i-1]==stripwhite(r1_name[i-1])~sub_in[i-1],
416
- (str_detect(bat_text[i-1], '(singled|walked|hit by pitch|reached)') == TRUE) & (str_detect(bat_text[i-1], '(doubled|tripled|homered|advanced|scored|out|stole)') == FALSE) ~ bat_name[i-1],
417
- (str_detect(bat_text[i-1], '(reached first)') == TRUE) & (str_detect(bat_text[i-1], '(struck out)') == TRUE) ~ bat_name[i-1],
418
- (r1_text[i-1]==''|(str_detect(r1_text[i-1], '(advanced to second|stole second|advanced to third|stole third|scored|out)') == FALSE)) & (str_detect(bat_text[i-1], '(double play|advanced to second|stole second|advanced to third|stole third|scored|caught stealing|picked off|homered)') == FALSE) ~ r1_name[i-1],
419
- (str_detect(bat_text[i-1], '(singled|doubled|tripled|advanced to second|stole second|advanced to third|stole third|scored|homered|out at second c to)') == FALSE) & (str_detect(r1_text[i-1], '(advanced to third|stole third|scored|out at third)') == TRUE) & stripwhite(gsub('((advanced to second|stole second|stole third|advanced to third|scored|out).*$)', '', r1_text[i-1]))!=stripwhite(gsub('((singled|reached).*$)', '', r1_name[i-1])) ~ r1_name[i-1],
420
- r1_text[i-1]=='' & stripwhite(gsub('((advanced to second|stole second|stole third|advanced to third|scored|out|failed|Failed|picked off).*$)', '', bat_text[i-1]))!=stripwhite(r1_name[i-1]) ~ r1_name[i-1]
421
- )}}
422
- return(stripwhite(r1_name))
423
- }
424
-
425
-
426
- r2_name = function(bat_text, bat_name, r1_text, r1_name, r2_text, r2_name, inn_end, game_end, sub_in, sub_out){
427
- m=length(bat_text)
428
- r2_name= character(m)
429
- for (i in 2:m){
430
- if (isTRUE(inn_end[i-1]==0 & game_end[i-1]==0)) {
431
- r2_name[i]=case_when(
432
- sub_out[i-1]!=''&sub_out[i-1]==stripwhite(r2_name[i-1])~sub_in[i-1],
433
- ((str_detect(bat_text[i-1], '(doubled|advanced to second|stole second)') == TRUE) & (str_detect(bat_text[i-1], '(advanced to third|scored|out|stole third)') == FALSE)) ~ stripwhite(gsub('((doubled|advanced to second|stole second).*$)', '', bat_text[i-1])),
434
- ((str_detect(r1_text[i-1], '(advanced to second|stole second)') == TRUE) & (str_detect(r1_text[i-1], '(advanced to third|scored|out|stole third)') == FALSE)) ~ stripwhite(gsub('((advanced to second|stole second).*$)', '', r1_text[i-1])),
435
- r2_text[i-1]=='' & stripwhite(gsub('((stole third|advanced to third|scored|out).*$)', '', r1_text[i-1]))!=stripwhite(r2_name[i-1]) & (str_detect(bat_text[i-1], '(advanced to third|stole third|scored|picked off|caught stealing)') == FALSE) ~ r2_name[i-1],
436
- r2_text[i-1]=='' & stripwhite(gsub('((out on the play).*$)', '', r1_text[i-1]))!=stripwhite(r2_name[i-1]) & (str_detect(bat_text[i-1], '(double play)') == TRUE) ~ r2_name[i-1],
437
- r1_text[i-1]=='' & (str_detect(bat_text[i-1], '(stole third|advanced to third|scored|picked off|homered|caught stealing)') == FALSE) ~ r2_name[i-1],
438
- sub_out[i-1]!=''&sub_out[i-1]==stripwhite(r2_name[i-1])~sub_in[i-1]
439
- )
440
- r2_name[i]=stripwhite(gsub('((singled|reached).*$)', '', r2_name[i]))
441
- }
442
- }
443
- return(stripwhite(r2_name))
444
- }
445
-
446
-
447
- r3_name = function(bat_text, bat_name, r1_text, r1_name, r2_text, r2_name, r3_text, r3_name, inn_end, game_end, sub_in, sub_out){
448
- m=length(bat_text)
449
- r3_name= character(m)
450
- for (i in 2:m){
451
- if (isTRUE(inn_end[i-1]==0 & game_end[i-1]==0)) {
452
- r3_name[i]=case_when(
453
- sub_out[i-1]!=''&sub_out[i-1]==stripwhite(r3_name[i-1])~sub_in[i-1],
454
- ((str_detect(bat_text[i-1], '(tripled|advanced to third|stole third)') == TRUE) & (str_detect(bat_text[i-1], '(scored|out)') == FALSE)) ~ stripwhite(gsub('((tripled|advanced to third|stole third).*$)', '', bat_text[i-1])),
455
- ((str_detect(r1_text[i-1], '(advanced to third|stole third)') == TRUE) & (str_detect(r1_text[i-1], '(scored|out)') == FALSE)) ~ stripwhite(gsub('((advanced to third|stole third).*$)', '', r1_text[i-1])),
456
- ((str_detect(r2_text[i-1], '(advanced to third|stole third)') == TRUE) & (str_detect(r2_text[i-1], '(scored|out)') == FALSE)) ~ stripwhite(gsub('((advanced to third|stole third).*$)', '', r2_text[i-1])),
457
- r1_text[i-1]=='' & (str_detect(bat_text[i-1], '(scored|stole home|homered)') == FALSE) ~ r3_name[i-1],
458
- r2_text[i-1]=='' & stripwhite(gsub('((scored|stole home|out).*$)', '', r1_text[i-1]))!=stripwhite(r3_name[i-1]) & (str_detect(bat_text[i-1], '(scored|stole home)') == FALSE) ~ r3_name[i-1],
459
- r3_text[i-1]=='' & (str_detect(r2_text[i-1], '(scored|stole home|out)') == FALSE) & (str_detect(r1_text[i-1], '(scored|stole home|out)') == FALSE) & (str_detect(bat_text[i-1], '(scored|stole home)') == FALSE) ~ r3_name[i-1])
460
- r3_name[i]=stripwhite(gsub('((singled|doubled|reached|advanced|stole|failed|Failed|picked off).*$)', '', r3_name[i]))
461
- }
462
- }
463
- return(stripwhite(r3_name))
464
- }
465
-
466
-
467
-
468
-
469
-
470
- new_game=function(game_end){
471
- m = length(game_end)
472
- new_game=integer(m)
473
- new_game[1]=1
474
- for (i in 2:m){
475
- new_game[i]=game_end[i-1]
476
- }
477
- return(new_game)
478
- }
479
-
480
- new_inn=function(inn_end){
481
- m = length(inn_end)
482
- new_inn=integer(m)
483
- new_inn[1]=1
484
- for (i in 2:m){
485
- new_inn[i]=inn_end[i-1]
486
- }
487
- return(new_inn)
488
- }
489
-
490
-
491
- outs_before= function(outs_on_play, new_game, new_inn){
492
- m=length(outs_on_play)
493
- inn_outs=integer(m)
494
- for (i in 2:m){
495
- if (isTRUE(new_game[i] == 0 & new_inn[i] == 0)) {
496
- inn_outs[i]=((inn_outs[i-1]+outs_on_play[i-1]) %% 3)
497
- }
498
- }
499
- return(inn_outs)
500
- }
501
-
502
-
503
-
504
-
505
-
506
-
507
-
508
- score_before=function(new_game, runs_on_play, top_inning, home_team=1){
509
- m=length(new_game)
510
- home_score_before=integer(m)
511
- away_score_before=integer(m)
512
- for (i in 2:m){
513
- home_score_before[i]= case_when(
514
- new_game[i]==0 & top_inning[i-1]==0 ~ as.numeric(home_score_before[i-1]+runs_on_play[i-1]),
515
- new_game[i]==0 & top_inning[i-1]==1 ~ as.numeric(home_score_before[i-1]),
516
- TRUE ~ 0)
517
-
518
- away_score_before[i]= case_when(
519
- new_game[i]==0 & top_inning[i-1]==1 ~ as.numeric(away_score_before[i-1]+runs_on_play[i-1]),
520
- new_game[i]==0 & top_inning[i-1]==0 ~ as.numeric(away_score_before[i-1]),
521
- TRUE ~ 0)
522
- }
523
- if(home_team==1){
524
- return(home_score_before)
525
- }
526
- else{return(away_score_before)}
527
-
528
- }
529
-
530
- runs_play=function(home_score, away_score, home_score_before, away_score_before, top_inn){
531
- n=length(homescore)
532
- runs_play=integer(n)
533
- for (i in 2:n){
534
- case_when(top_inn[i]==0 ~ homescore[i]-homescore_before[i])
535
- if (top_inn[i]==0){
536
- runs_play[i]=homescore[i]-homescore_before[i]
537
- } else{
538
- runs_play[i]=roadscore[i]-roadscore_before[i]
539
- }
540
- }
541
- return(runs_play)
542
- }
543
-
544
-
545
- runs_this_inn=function(end_inn, runs_on_play){
546
- m=length(end_inn)
547
- runs=integer(m)
548
- endinnloc=c(0,grep(1,end_inn))
549
- numinns=length(endinnloc)
550
-
551
-
552
- for (j in 2:numinns){
553
- for (k in (endinnloc[j-1]+1):endinnloc[j]){
554
- runs[k]=sum(runs_on_play[(endinnloc[j-1]+1):endinnloc[j]])
555
- }
556
- }
557
- return(runs)
558
- }
559
-
560
-
561
- runs_rest_of_inn=function(end_inn, runs_on_play, runs_this_inn){
562
- m=length(end_inn)
563
- runs=integer(m)
564
-
565
- endinnloc=c(0,grep(1,end_inn))
566
- numinns=length(endinnloc)
567
-
568
- for (j in 2:numinns){
569
- for (k in (endinnloc[j-1]+1):endinnloc[j]){
570
- runs[k]=runs_this_inn[k]-sum(runs_on_play[(endinnloc[j-1]+1):(k)])
571
- }
572
- }
573
- runs=runs+runs_on_play
574
- return(runs)
575
- }
576
-
577
-
578
-
579
-
580
-
581
- bat_order_id = function(new_game, top_inn, bat_name){
582
- m = length(top_inn)
583
-
584
- batorder = rep(NA_character_, m)
585
-
586
- newgameloc = c(grep(1, new_game), (m+1))
587
- numgames = length(newgameloc)
588
-
589
- for (j in 2:numgames){
590
- kk = 0
591
- jj = 0
592
- for (i in newgameloc[j-1]:(newgameloc[j]-1)){
593
-
594
- if (!is.na(top_inn[i]) && !is.na(bat_name[i]) &&
595
- top_inn[i] == 1 && bat_name[i] != ''){
596
-
597
- batorder[i] = (kk %% 9) + 1
598
- kk = kk + 1
599
-
600
- } else if (!is.na(top_inn[i]) && !is.na(bat_name[i]) &&
601
- top_inn[i] == 0 && bat_name[i] != ''){
602
-
603
- batorder[i] = (jj %% 9) + 1
604
- jj = jj + 1
605
-
606
- } else {
607
- batorder[i] = '' # leave empty if NA or no name
608
- }
609
- }
610
- }
611
- return(batorder)
612
- }
613
-
614
-
615
-
616
- bat_order_fill=function(bat_order, end_game){
617
- m=length(bat_order)
618
- for (i in (m):2){
619
- if(is.na(bat_order[i-1])==TRUE & end_game[i-1]==0){
620
- bat_order[i-1]=bat_order[i]
621
- }
622
- }
623
-
624
- for (i in 2:m){
625
- if(is.na(bat_order[i])==TRUE){
626
- bat_order[i]=bat_order[i-1]
627
- }
628
- }
629
- return(bat_order)
630
- }
631
- ##########################################################
632
-
633
-
634
-
635
-
636
- ncaa_parse=function(pbp_data_frame){
637
-
638
- pbp_data_frame=pbp_data_frame%>%
639
- mutate(
640
- tmp_text=paste(away_text, home_text),
641
- # #
642
- sub_fl=case_when(
643
- str_detect(tmp_text, '(singled|doubled|tripled|homered|walked|reached|struck out|grounded|flied|lined|popped| hit|infield fly|infield fly|out|double play|triple play)')==TRUE & str_detect(tmp_text, c('pinch hit'))==FALSE ~ 0,
644
- str_detect(tmp_text, c('to (p|c|1b|2b|3b|ss|lf|rf|cf|dh)'))==TRUE ~ 1,
645
- str_detect(tmp_text, c('pinch hit'))==TRUE ~ 1,
646
- str_detect(tmp_text, c('pinch ran'))==TRUE ~ 1,
647
- TRUE ~ 0),
648
-
649
- # Split the text up
650
- bat_text=gsub('(;|3a|:).*$','', tmp_text),
651
-
652
- r1_text=case_when(
653
- str_detect(tmp_text, '(;|3a|:)')==TRUE ~ stripwhite(gsub('^.*?(;|3a|:)','',tmp_text)),
654
- TRUE~''),
655
-
656
- r2_text=case_when(
657
- str_detect(r1_text, '(;|3a|:)')==TRUE ~ stripwhite(gsub('^.*?(;|3a|:)','',r1_text)),
658
- TRUE~''),
659
-
660
- r3_text=case_when(
661
- str_detect(r2_text, '(;|3a|:)')==TRUE ~ stripwhite(gsub('^.*?(;|3a|:)','',r2_text)),
662
- TRUE~''),
663
-
664
- r2_text=stripwhite(gsub('(;|3a|:).*$','',r2_text)),
665
-
666
- r1_text=stripwhite(gsub('(;|3a|:).*$','',r1_text)),
667
-
668
- # Event code: same as retrosheet
669
- event_cd=case_when(
670
- sub_fl==1 ~ 1,
671
- str_sub(stripwhite(tmp_text),1,1)=='(' ~ 1,
672
- str_detect(tmp_text, '(hitting out of turn| for |No play|halted|delay|postponed|ejected|suspended|coach|sunny|review|challenged|HC|\\*\\*)') == TRUE ~ 1,
673
- str_detect(tmp_text,'struck out') == TRUE ~ 3,
674
- str_detect(tmp_text,'stole') == TRUE ~ 4,
675
- (str_detect(tmp_text,'(caught stealing|out at second c to|out at third c to)') == TRUE) & (str_detect(tmp_text,'(bunt|grounded)') == FALSE) ~ 6,
676
- str_detect(tmp_text,'picked off') == TRUE ~ 8,
677
- str_detect(tmp_text,'wild pitch') == TRUE ~ 9,
678
- str_detect(tmp_text,'passed ball') == TRUE ~ 10,
679
- str_detect(tmp_text,'balk') == TRUE ~ 11,
680
- str_detect(tmp_text,'Dropped foul') == TRUE ~ 13,
681
- str_detect(tmp_text,'walked') == TRUE ~ 14,
682
- str_detect(tmp_text,'hit by pitch') == TRUE ~ 16,
683
- str_detect(tmp_text,'interference') == TRUE ~ 17,
684
- str_detect(tmp_text,'error') == TRUE ~ 18,
685
- str_detect(tmp_text,'muffed') == TRUE ~ 18,
686
- str_detect(tmp_text,'dropped') == TRUE ~ 18,
687
- str_detect(tmp_text,'fielder\'s choice') == TRUE ~ 19,
688
- str_detect(tmp_text,'singled') == TRUE ~ 20,
689
- str_detect(tmp_text,'doubled') == TRUE ~ 21,
690
- str_detect(tmp_text,'tripled') == TRUE ~ 22,
691
- str_detect(tmp_text,'homered') == TRUE ~ 23,
692
- str_detect(tmp_text, '(flied out|grounded out|popped|fouled out|lined out| infield fly|double play|triple play|out at (first|second|third|home))') == TRUE ~ 2,
693
- str_detect(tmp_text, 'advanced') == TRUE ~ 12,
694
- TRUE ~ 0),
695
-
696
-
697
- # Bat name
698
- bat_name= case_when(
699
- event_cd %in% c(0,1)~'',
700
- str_detect(bat_text, '(Batter|Runner\'s interference)')==TRUE ~'',
701
- str_detect(bat_text, '(walked|singled|doubled|tripled|reached|struck out|grounded out)')==FALSE & str_detect(bat_text, '(advanced|caught stealing|stole|picked off|out at (first|second|third|home)|tagged out)')==TRUE ~ '',
702
- str_detect(bat_text, '(singled|doubled|tripled|homered|walked|reached|struck out|grounded|flied|lined|popped|hit | out |fouled out|pinch hit|infield fly|intentionally walked|was intentionally walked|fouled into double play)')==TRUE ~ gsub('((singled|doubled|tripled|homered|walked|reached|struck out|grounded|flied|lined|popped|hit | out |fouled out|pinch hit|infield fly|intentionally walked|was intentionally walked|fouled into double play).*$)', '', bat_text),
703
- str_detect(stripwhite(r1_text), 'caught stealing c to (2b|3b), double play.')==TRUE ~ bat_text,
704
- TRUE ~ ''),
705
-
706
- # Sub in
707
- sub_in= case_when(
708
- sub_fl==1&str_detect(bat_text, 'to (p|c|1b|2b|3b|ss|lf|rf|cf|dh)')==TRUE ~ stripwhite(gsub('(to (p|c|1b|2b|3b|ss|lf|rf|cf|dh).*$)', '', bat_text)),
709
- sub_fl==1&str_detect(bat_text, 'pinch ran for')==TRUE ~ stripwhite(gsub('pinch ran for.*$', '', bat_text)),
710
- sub_fl==1&str_detect(bat_text, 'pinch hit for')==TRUE ~ stripwhite(gsub('pinch hit for.*$', '', bat_text)),
711
- TRUE ~ ''),
712
-
713
- # Sub out
714
- sub_out= case_when(
715
- sub_fl==1&str_detect(bat_text, 'to (p|c|1b|2b|3b|ss|lf|rf|cf|dh) for')==TRUE ~ gsub('^.*to (p|c|1b|2b|3b|ss|lf|rf|cf|dh) for', '', bat_text),
716
- sub_fl==1&str_detect(bat_text, 'pinch ran for')==TRUE ~ gsub('^.*pinch ran for', '', bat_text),
717
- sub_fl==1&str_detect(bat_text, 'pinch hit')==TRUE ~ gsub('^.*pinch hit for', '', bat_text),
718
- TRUE ~ ''),
719
- # Clean sub out
720
- sub_out=strip_punc(sub_out),
721
-
722
-
723
- # Game end
724
- game_end = game_end(game_id),
725
-
726
- # New game
727
- new_game=new_game(game_end),
728
-
729
- # Top inning
730
- top_inning=ifelse(away_text=='', 0,1),
731
- # End of inning
732
- inn_end = inn_end(top_inning),
733
- # Runner names
734
- r1_name=r1_name(bat_text, bat_name, r1_text, r1_name, inn_end, game_end, sub_in, sub_out),
735
- r2_name =r2_name(bat_text, bat_name, r1_text, r1_name, r2_text, r2_name, inn_end, game_end, sub_in, sub_out),
736
- r3_name =r3_name(bat_text, bat_name, r1_text, r1_name, r2_text, r2_name, r3_text, r3_name, inn_end, game_end, sub_in, sub_out),
737
- # Clean runner names
738
- r1_name=replace(r1_name,is.na(r1_name),''),
739
- r2_name=replace(r2_name,is.na(r2_name),''),
740
- r3_name=replace(r3_name,is.na(r3_name),''),
741
-
742
-
743
-
744
-
745
-
746
-
747
- # Fix repeat bat names
748
- bat_name=case_when(
749
- bat_name!='' & stripwhite(bat_name)==stripwhite(r1_name)~ '',
750
- bat_name!='' & stripwhite(bat_name)==stripwhite(r2_name)~ '',
751
- bat_name!='' & stripwhite(bat_name)==stripwhite(r3_name)~ '',
752
- TRUE ~ bat_name),
753
-
754
- #
755
- outs_on_play=case_when(
756
- event_cd %in% c(0,1) ~ 0,
757
- str_count(bat_text, 'triple play') == 1 ~ 3,
758
- str_count(bat_text, 'double play') == 1 ~ 2,
759
- (str_detect(bat_text, '( out|popped)') == TRUE) & (str_detect(bat_text, '(reached)') == TRUE) ~ 0,
760
- # 1 out
761
- ((str_detect(bat_text, '( out |popped|infield fly)') == TRUE) & (str_detect(r1_text, '( out |popped)')==FALSE) & (str_detect(r2_text, '( out |popped)')==FALSE) &(str_detect(r3_text, '( out |popped)')==FALSE)) |
762
- ((str_detect(bat_text, '( out |popped|infield fly)') == FALSE) & (str_detect(r1_text, '( out |popped)')==TRUE) & (str_detect(r2_text, '( out |popped)')==FALSE) &(str_detect(r3_text, '( out |popped)')==FALSE)) |
763
- ((str_detect(bat_text, '( out |popped|infield fly)') == FALSE) & (str_detect(r1_text, '( out |popped)')==FALSE) & (str_detect(r2_text, '( out |popped)')==TRUE) &(str_detect(r3_text, '( out |popped)')==FALSE)) |
764
- ((str_detect(bat_text, '( out |popped|infield fly)') == FALSE) & (str_detect(r1_text, '( out |popped)')==FALSE) & (str_detect(r2_text, '( out |popped)')==FALSE) &(str_detect(r3_text, '( out |popped)')==TRUE)) ~ 1,
765
- # 2 outs
766
- ((str_detect(bat_text, '( out |popped|infield fly)') == TRUE) & (str_detect(r1_text, '( out |popped)')==TRUE) & (str_detect(r2_text, '( out |popped)')==FALSE) &(str_detect(r3_text, '( out |popped)')==FALSE)) |
767
- ((str_detect(bat_text, '( out |popped|infield fly)') == TRUE) & (str_detect(r1_text, '( out |popped)')==FALSE) & (str_detect(r2_text, '( out |popped)')==TRUE) &(str_detect(r3_text, '( out |popped)')==FALSE)) |
768
- ((str_detect(bat_text, '( out |popped|infield fly)') == TRUE) & (str_detect(r1_text, '( out |popped)')==FALSE) & (str_detect(r2_text, '( out |popped)')==FALSE) &(str_detect(r3_text, '( out |popped)')==TRUE)) |
769
- ((str_detect(bat_text, '( out |popped|infield fly)') == FALSE) & (str_detect(r1_text, '( out |popped)')==TRUE) & (str_detect(r2_text, '( out |popped)')==TRUE) &(str_detect(r3_text, '( out |popped)')==FALSE)) |
770
- ((str_detect(bat_text, '( out |popped|infield fly)') == FALSE) & (str_detect(r1_text, '( out |popped)')==TRUE) & (str_detect(r2_text, '( out |popped)')==FALSE) &(str_detect(r3_text, '( out |popped)')==TRUE)) |
771
- ((str_detect(bat_text, '( out |popped|infield fly)') == FALSE) & (str_detect(r1_text, '( out |popped)')==FALSE) & (str_detect(r2_text, '( out |popped)')==TRUE) &(str_detect(r3_text, '( out |popped)')==TRUE)) ~ 2,
772
- # 3 outs
773
- ((str_detect(bat_text, '( out |popped|infield fly)') == TRUE) & (str_detect(r1_text, '( out |popped)')==TRUE) & (str_detect(r2_text, '( out |popped)')==TRUE) &(str_detect(r3_text, '( out |popped)')==FALSE)) |
774
- ((str_detect(bat_text, '( out |popped|infield fly)') == TRUE) & (str_detect(r1_text, '( out |popped)')==FALSE) & (str_detect(r2_text, '( out |popped)')==TRUE) &(str_detect(r3_text, '( out |popped)')==TRUE)) |
775
- ((str_detect(bat_text, '( out |popped|infield fly)') == TRUE) & (str_detect(r1_text, '( out |popped)')==TRUE) & (str_detect(r2_text, '( out |popped)')==FALSE) &(str_detect(r3_text, '( out |popped)')==TRUE)) |
776
- ((str_detect(bat_text, '( out |popped)') == FALSE) & (str_detect(r1_text, '( out |popped)')==TRUE) & (str_detect(r2_text, '( out |popped)')==TRUE) &(str_detect(r3_text, '( out |popped)')==TRUE)) ~ 3,
777
- TRUE ~ 0),
778
-
779
- # New inning
780
- new_inn=new_inn(inn_end),
781
- # Outs before
782
- outs_before=outs_before(outs_on_play, new_game, new_inn),
783
- # Outs after
784
- outs_after=outs_before+outs_on_play,
785
-
786
- # Base code
787
- base_cd_before=case_when(
788
- stripwhite(r1_name)!='' & r2_name=='' & r3_name=='' ~ 1,
789
- r1_name=='' & r2_name!='' & r3_name=='' ~ 2,
790
- r1_name!='' & r2_name!='' & r3_name=='' ~ 3,
791
- r1_name=='' & r2_name=='' & r3_name!='' ~ 4,
792
- r1_name!='' & r2_name=='' & r3_name!='' ~ 5,
793
- r1_name=='' & r2_name!='' & r3_name!='' ~ 6,
794
- r1_name!='' & r2_name!='' & r3_name!='' ~ 7,
795
- TRUE~0),
796
-
797
- # Batting order
798
- bat_order=bat_order_id(new_game, top_inning, bat_name),
799
-
800
- # Hit type
801
- hit_type=case_when(
802
- event_cd==3 ~ 'K',
803
- str_detect(bat_text,'(bunt)')==TRUE ~ 'B',
804
- str_detect(bat_text, '(bunt)')==FALSE & str_detect(bat_text, '(SAC)')==TRUE & str_detect(bat_text, '(flied|popped)')==FALSE ~ 'B',
805
- str_detect(bat_text,'(grounded out|(p|3b|2b|ss|1b) to (p|3b|2b|ss|1b|c))')==TRUE ~ 'GO',
806
- str_detect(bat_text,'(flied|fouled out to (lf|rf))')==TRUE ~ 'FO',
807
- str_detect(bat_text,'(lined)')==TRUE ~ 'LO',
808
- str_detect(bat_text,'(popped|infield fly|fouled out to (p|3b|2b|ss|1b|c))')==TRUE ~ 'PO',
809
- TRUE ~ '' ),
810
-
811
- # Runs on play
812
- runs_on_play=(as.numeric(str_count(tmp_text, '(advanced to home)'))+as.numeric(str_count(tmp_text, '(scored)')) + as.numeric(str_count(tmp_text, '(homered)')) + as.numeric(str_count(tmp_text, '(stole home)'))-as.numeric(str_count(tmp_text, '(scored, scored)'))),
813
-
814
- # Away score
815
- away_score_before=score_before(new_game, runs_on_play, top_inning, home_team=0),
816
-
817
- # Home score
818
- home_score_before=score_before(new_game, runs_on_play, top_inning, home_team=1),
819
-
820
- # # Away score after
821
- away_score_after=case_when(
822
- top_inning==1 ~away_score_before+ runs_on_play,
823
- TRUE ~ away_score_before),
824
-
825
- # # Home score after
826
- home_score_after=case_when(
827
- top_inning==0 ~home_score_before+ runs_on_play,
828
- TRUE ~ home_score_before),
829
-
830
- # Runs this inning
831
- runs_this_inn=runs_this_inn(inn_end, runs_on_play),
832
-
833
- # Runs rest of inning
834
- runs_roi=runs_rest_of_inn(inn_end,runs_on_play, runs_this_inn),
835
-
836
- # Intentional walk
837
- int_bb_fl=case_when(
838
- str_detect(tmp_text,'intentionally ') == TRUE ~ 1,
839
- TRUE ~ 0
840
- ),
841
-
842
- # Sac bunts
843
- sh_fl=case_when(
844
- str_detect(bat_text, '(SAC)')==TRUE & str_detect(bat_text, '(flied|popped)')==FALSE ~ 1,
845
- TRUE~0),
846
-
847
- # Sac flys
848
- sf_fl=case_when(
849
- str_detect(bat_text, '(SAC)')==TRUE & str_detect(bat_text, '(flied|popped)')==TRUE ~ 1,
850
- str_detect(bat_text, '(SAC)')==FALSE & str_detect(bat_text, '(flied|popped)')==TRUE & str_detect(bat_text, '(RBI)')==TRUE~1,
851
- TRUE~0 )
852
- )
853
-
854
-
855
- pbp_data_frame=pbp_data_frame%>%
856
- mutate(bat_order=bat_order_fill(bat_order, game_end))
857
-
858
- return(pbp_data_frame)
859
-
860
- }
861
-
862
- prefixes <- c("St\\.", "Mc", "De", "Di", "Van", "Von")
863
-
864
- #Get NCAA Game IDs
865
- get_ncaa_schedule <- function(date) {
866
-
867
- date <- as.Date(date)
868
- api_url <- sprintf(
869
- "https://ncaa-api.henrygd.me/scoreboard/baseball/d1/%04d/%02d/%02d/all-conf",
870
- as.integer(format(date, "%Y")),
871
- as.integer(format(date, "%m")),
872
- as.integer(format(date, "%d"))
873
- )
874
-
875
-
876
- res <- GET(api_url, user_agent("Mozilla/5.0"))
877
- stop_for_status(res)
878
-
879
- dat <- fromJSON(content(res, "text", encoding = "UTF-8"), flatten = TRUE)
880
-
881
-
882
-
883
- dat$games <- dat$games %>%
884
- tidyr::unnest(game.away.conferences) %>%
885
- rename(away_conference = conferenceName) %>%
886
- dplyr::select(-conferenceSeo) %>%
887
- tidyr::unnest(game.home.conferences) %>%
888
- rename(home_conference = conferenceName)
889
-
890
- tibble(
891
- Date = dat$games$game.startDate,
892
- GameID = dat$games$game.url %>%
893
- str_extract("(?<=/game/)\\d+"),
894
- HomeTeam = dat$games$game.home.names.short,
895
- AwayTeam = dat$games$game.away.names.short,
896
- StartTime = dat$games$game.startTime,
897
- HomeScore = dat$games$game.home.score,
898
- AwayScore = dat$games$game.away.score,
899
- HomeRecord = dat$games$game.home.description,
900
- AwayRecord = dat$games$game.away.description,
901
- HomeConference = dat$games$home_conference,
902
- AwayConference = dat$games$away_conference
903
- )
904
- }
905
-
906
-
907
- #Scrapes NCAA PBP based on game IDs
908
- get_ncaa_pbp <- function(game_id){
909
-
910
- url <- sprintf("https://ncaa-api.henrygd.me/game/%s/play-by-play", game_id)
911
- res <- GET(url)
912
- stop_for_status(res)
913
-
914
- dat <- fromJSON(content(res, "text", encoding = "UTF-8"), flatten = TRUE)
915
-
916
- teams_df <- dat$teams
917
-
918
- team_home <- teams_df %>% filter(isHome == TRUE) %>% pull(nameShort)
919
- team_visitor <- teams_df %>% filter(isHome == FALSE) %>% pull(nameShort)
920
-
921
- home_id <- teams_df %>% filter(isHome == TRUE) %>% pull(teamId)
922
- visitor_id <- teams_df %>% filter(isHome == FALSE) %>% pull(teamId)
923
-
924
- home_name_id <- teams_df %>%
925
- filter(isHome == TRUE) %>%
926
- mutate(name_id = paste0(toupper(str_sub(nameShort, 1, 3)), "_", toupper(str_sub(teamName, 1, 3)))) %>%
927
- pull(name_id)
928
-
929
- away_name_id <- teams_df %>%
930
- filter(isHome == FALSE) %>%
931
- mutate(name_id = paste0(toupper(str_sub(nameShort, 1, 3)), "_", toupper(str_sub(teamName, 1, 3)))) %>%
932
- pull(name_id)
933
-
934
- pbp <- dat$periods %>%
935
- unnest(playbyplayStats, names_sep = "_") %>%
936
- unnest(playbyplayStats_plays, names_sep = "_") %>%
937
- mutate(
938
- game_id = game_id,
939
- away_team = team_visitor,
940
- home_team = team_home,
941
- PlayNumber = row_number(),
942
- BatterTeam = ifelse(playbyplayStats_teamId == home_id, team_home, team_visitor),
943
- PitcherTeam = ifelse(playbyplayStats_teamId == home_id, team_visitor, team_home),
944
- away_text = ifelse(playbyplayStats_teamId == visitor_id, playbyplayStats_plays_playText, ""),
945
- home_text = ifelse(playbyplayStats_teamId == home_id, playbyplayStats_plays_playText, ""),
946
- Score = ifelse(is.na(playbyplayStats_plays_homeScore) & is.na(playbyplayStats_plays_visitorScore),
947
- "",
948
- paste0(playbyplayStats_plays_visitorScore, "-", playbyplayStats_plays_homeScore)),
949
- home_name_id = home_name_id,
950
- away_name_id = away_name_id
951
- ) %>%
952
- dplyr::select(game_id, Inning = periodNumber, PlayNumber, BatterTeam, PitcherTeam,
953
- away_team, home_team, away_text, home_text, Score, home_name_id, away_name_id)
954
-
955
- return(pbp)
956
- }
957
-
958
-
959
- scrape_clean_ncaa_pbp <- function(start_date, end_date) {
960
-
961
- games_list <- tibble()
962
- counter <- 0
963
-
964
- dates_list <- seq(as.Date(start_date), as.Date(end_date), by = "day")
965
-
966
-
967
- #Get Schedule Data for Game IDs
968
- for (date in dates_list) {
969
-
970
- counter <- counter + 1
971
- message(sprintf(
972
- "%5.1f%% | %d / %d | %s",
973
- 100 * counter / length(dates_list), counter, length(dates_list), as.Date(date)
974
- ))
975
-
976
- new_game_ids <- tryCatch(
977
- get_ncaa_schedule(date),
978
- error = function(e) NULL
979
- )
980
-
981
- if (is.null(new_game_ids) || nrow(new_game_ids) == 0) {
982
- message(" -- no games, skipping")
983
- next
984
- }
985
-
986
- new_game_ids <- new_game_ids %>%
987
- distinct(GameID, .keep_all = TRUE)
988
- games_list <- rbind(games_list, new_game_ids)
989
-
990
- Sys.sleep(0.2)
991
- }
992
-
993
- df <- tibble()
994
- counter <- 0
995
-
996
- #Get PBP Data
997
- for (i in seq_len(nrow(games_list))){
998
-
999
- counter <- counter + 1
1000
-
1001
- new_data <- tryCatch(
1002
- get_ncaa_pbp(games_list$GameID[i]),
1003
- error = function(e) NULL
1004
- )
1005
-
1006
- if (is.null(new_data) || nrow(new_data) == 0) {
1007
- message(" -- no data, skipping")
1008
- next
1009
- }
1010
-
1011
- new_data <- new_data %>%
1012
- mutate(Date = games_list$Date[i])
1013
-
1014
- df <- rbind(new_data, df)
1015
-
1016
-
1017
-
1018
- message(sprintf(
1019
- "%5.1f%% | %d / %d | %s",
1020
- 100 * counter / length(unique(games_list$GameID)), counter, length(unique(games_list$GameID)), games_list$GameID[i]
1021
- ))
1022
-
1023
- Sys.sleep(0.2)
1024
-
1025
- }
1026
-
1027
- pbp <- ncaa_parse(df2)
1028
-
1029
- }
1030
-
1031
-
1032
 
1033
 
1034
  # UI
@@ -1470,6 +656,7 @@ ui <- fluidPage(
1470
 
1471
  br(),
1472
  actionButton("scrape_btn", "Scrape Data", class = "btn-primary"),
 
1473
  br(), br(),
1474
  downloadButton("download_scrape", "Download CSV")
1475
  ),
@@ -2374,20 +1561,77 @@ removeModal()
2374
 
2375
  # Scrape button
2376
  observeEvent(input$scrape_btn, {
2377
- scrape_status_msg("Testing FTP connection...")
 
 
 
2378
 
2379
  result <- tryCatch({
2380
- ftp_url <- paste0("ftp://", Sys.getenv("FTP_USERNAME"), ":",
2381
- Sys.getenv("FTP_PASSWORD"),
2382
- "@ftp.trackmanbaseball.com/v3/2025/04/01/CSV/")
2383
- file_list <- RCurl::getURL(ftp_url, ftp.use.epsv = FALSE,
2384
- dirlistonly = TRUE, connecttimeout = 10)
2385
- paste("Connected! Files found:", nchar(file_list), "chars")
 
 
 
 
 
 
 
 
 
 
2386
  }, error = function(e) {
2387
- paste("FTP failed:", e$message)
 
2388
  })
2389
 
2390
- scrape_status_msg(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2391
  })
2392
 
2393
  # Status text
 
215
  ))
216
  }
217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
 
220
  # UI
 
656
 
657
  br(),
658
  actionButton("scrape_btn", "Scrape Data", class = "btn-primary"),
659
+ actionButton("fetch_btn", "Fetch Results", class = "btn-primary"),
660
  br(), br(),
661
  downloadButton("download_scrape", "Download CSV")
662
  ),
 
1561
 
1562
  # Scrape button
1563
  observeEvent(input$scrape_btn, {
1564
+ scrape_status_msg("Triggering scrape on GitHub...")
1565
+
1566
+ gh_token <- Sys.getenv("GITHUB_TOKEN")
1567
+ gh_repo <- Sys.getenv("GITHUB_REPO")
1568
 
1569
  result <- tryCatch({
1570
+ httr::POST(
1571
+ paste0("https://api.github.com/repos/", gh_repo, "/actions/workflows/scrape.yml/dispatches"),
1572
+ httr::add_headers(
1573
+ Authorization = paste("Bearer", gh_token),
1574
+ Accept = "application/vnd.github.v3+json"
1575
+ ),
1576
+ body = jsonlite::toJSON(list(
1577
+ ref = "main",
1578
+ inputs = list(
1579
+ start_date = as.character(input$start_date),
1580
+ end_date = as.character(input$end_date),
1581
+ data_type = input$scrape_source
1582
+ )
1583
+ ), auto_unbox = TRUE),
1584
+ encode = "raw"
1585
+ )
1586
  }, error = function(e) {
1587
+ scrape_status_msg(paste("Failed:", e$message))
1588
+ return(NULL)
1589
  })
1590
 
1591
+ if (is.null(result)) return()
1592
+
1593
+ if (httr::status_code(result) == 204) {
1594
+ scrape_status_msg("Scrape triggered! GitHub is running it now. Wait a few minutes then click 'Fetch Results'.")
1595
+ } else {
1596
+ scrape_status_msg(paste("GitHub API error:", httr::status_code(result)))
1597
+ }
1598
+ })
1599
+
1600
+ # Fetch results from GitHub
1601
+ observeEvent(input$fetch_btn, {
1602
+ scrape_status_msg("Fetching results...")
1603
+
1604
+ gh_token <- Sys.getenv("GITHUB_TOKEN")
1605
+ gh_repo <- Sys.getenv("GITHUB_REPO")
1606
+ filename <- paste0(input$scrape_source, "_", input$start_date, "_to_", input$end_date, ".csv")
1607
+
1608
+ url <- paste0("https://api.github.com/repos/", gh_repo, "/contents/data/", filename)
1609
+
1610
+ result <- tryCatch({
1611
+ resp <- httr::GET(
1612
+ url,
1613
+ httr::add_headers(
1614
+ Authorization = paste("Bearer", gh_token),
1615
+ Accept = "application/vnd.github.v3.raw"
1616
+ )
1617
+ )
1618
+
1619
+ if (httr::status_code(resp) == 200) {
1620
+ read.csv(text = httr::content(resp, as = "text", encoding = "UTF-8"))
1621
+ } else {
1622
+ NULL
1623
+ }
1624
+ }, error = function(e) {
1625
+ scrape_status_msg(paste("Error:", e$message))
1626
+ NULL
1627
+ })
1628
+
1629
+ if (!is.null(result) && nrow(result) > 0) {
1630
+ scraped_data(result)
1631
+ scrape_status_msg(paste0("Done! ", nrow(result), " rows × ", ncol(result), " columns."))
1632
+ } else {
1633
+ scrape_status_msg("Not ready yet. Wait a minute and try again.")
1634
+ }
1635
  })
1636
 
1637
  # Status text