OwenStOnge commited on
Commit
cc75eed
·
verified ·
1 Parent(s): e8d4205

Update app.R

Browse files
Files changed (1) hide show
  1. app.R +69 -0
app.R CHANGED
@@ -1939,6 +1939,75 @@ observe({
1939
  write.csv(scraped_data(), file, row.names = FALSE)
1940
  }
1941
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1942
 
1943
  }
1944
 
 
1939
  write.csv(scraped_data(), file, row.names = FALSE)
1940
  }
1941
  )
1942
+
1943
+
1944
+ observeEvent(input$upload_hf_btn, {
1945
+ req(scraped_data())
1946
+
1947
+ scrape_status_msg("Downloading existing dataset...")
1948
+
1949
+ hf_token <- Sys.getenv("HF_WRITE_TOKEN")
1950
+ repo_id <- "CoastalBaseball/_2026MasterData"
1951
+ filename <- paste0(input$scrape_source, "2026_master_pbp.parquet")
1952
+
1953
+ # Try to download existing data
1954
+ existing <- tryCatch({
1955
+ resp <- httr::GET(
1956
+ paste0("https://huggingface.co/datasets/", repo_id, "/resolve/main/", filename),
1957
+ httr::add_headers(Authorization = paste("Bearer", hf_token))
1958
+ )
1959
+
1960
+ if (httr::status_code(resp) == 200) {
1961
+ tmp_dl <- tempfile(fileext = ".parquet")
1962
+ writeBin(httr::content(resp, as = "raw"), tmp_dl)
1963
+ arrow::read_parquet(tmp_dl)
1964
+ } else {
1965
+ NULL
1966
+ }
1967
+ }, error = function(e) { NULL })
1968
+
1969
+ # Combine
1970
+ if (!is.null(existing) && nrow(existing) > 0) {
1971
+ scrape_status_msg("Merging with existing data...")
1972
+ combined <- bind_rows(existing, scraped_data())
1973
+
1974
+ if ("PitchUID" %in% names(combined)) {
1975
+ combined <- combined %>% distinct(PitchUID, .keep_all = TRUE)
1976
+ } else {
1977
+ combined <- combined %>% distinct()
1978
+ }
1979
+ } else {
1980
+ combined <- scraped_data()
1981
+ }
1982
+
1983
+ # Upload combined data as parquet
1984
+ scrape_status_msg(paste0("Uploading ", nrow(combined), " total rows..."))
1985
+
1986
+ tmp <- tempfile(fileext = ".parquet")
1987
+ arrow::write_parquet(combined, tmp)
1988
+
1989
+ result <- tryCatch({
1990
+ resp <- httr::PUT(
1991
+ paste0("https://huggingface.co/api/datasets/", repo_id, "/upload/main/", filename),
1992
+ httr::add_headers(
1993
+ Authorization = paste("Bearer", hf_token),
1994
+ `Content-Type` = "application/octet-stream"
1995
+ ),
1996
+ body = httr::upload_file(tmp)
1997
+ )
1998
+
1999
+ if (httr::status_code(resp) %in% c(200, 201)) {
2000
+ paste0("Done! ", nrow(combined), " total rows in dataset.")
2001
+ } else {
2002
+ paste0("Upload failed: ", httr::status_code(resp))
2003
+ }
2004
+ }, error = function(e) {
2005
+ paste("Upload error:", e$message)
2006
+ })
2007
+
2008
+ file.remove(tmp)
2009
+ scrape_status_msg(result)
2010
+ })
2011
 
2012
  }
2013