OwenStOnge commited on
Commit
a251a67
·
verified ·
1 Parent(s): 2b69e06

Update app.R

Browse files
Files changed (1) hide show
  1. app.R +66 -50
app.R CHANGED
@@ -2026,71 +2026,87 @@ observe({
2026
  }
2027
  )
2028
 
 
2029
  observeEvent(input$upload_hf_btn, {
2030
  req(scraped_data())
2031
 
2032
- scrape_status_msg("Downloading existing dataset...")
2033
-
2034
  hf_token <- Sys.getenv("HF_WRITE_TOKEN")
2035
  repo_id <- "CoastalBaseball/2026MasterDataset"
2036
- filename <- paste0(input$scrape_source, "_2026_master.parquet")
2037
 
2038
- # Try to download existing data
2039
- existing <- tryCatch({
2040
- resp <- httr::GET(
2041
- paste0("https://huggingface.co/datasets/", repo_id, "/resolve/main/", filename),
2042
- httr::add_headers(Authorization = paste("Bearer", hf_token))
2043
- )
2044
 
2045
- if (httr::status_code(resp) == 200) {
2046
- tmp_dl <- tempfile(fileext = ".parquet")
2047
- writeBin(httr::content(resp, as = "raw"), tmp_dl)
2048
- arrow::read_parquet(tmp_dl)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2049
  } else {
2050
- NULL
2051
  }
2052
- }, error = function(e) { NULL })
2053
-
2054
- # Combine
2055
- if (!is.null(existing) && nrow(existing) > 0) {
2056
- scrape_status_msg("Merging with existing data...")
2057
- combined <- bind_rows(existing, scraped_data())
2058
 
2059
- if ("PitchUID" %in% names(combined)) {
2060
- combined <- combined %>% distinct(PitchUID, .keep_all = TRUE)
2061
- } else {
2062
- combined <- combined %>% distinct()
2063
- }
2064
- } else {
2065
- combined <- scraped_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2066
  }
2067
 
2068
- # Upload via commit API
2069
- scrape_status_msg(paste0("Uploading ", nrow(combined), " total rows..."))
 
2070
 
2071
- tmp <- tempfile(fileext = ".parquet")
2072
- arrow::write_parquet(combined, tmp)
2073
-
2074
- result <- tryCatch({
2075
- hf <- import("huggingface_hub")
2076
- api <- hf$HfApi()
2077
-
2078
 
2079
- api$upload_file(
2080
- path_or_fileobj = tmp,
2081
- path_in_repo = filename,
2082
- repo_id = repo_id,
2083
- repo_type = "dataset",
2084
- token = hf_token
2085
- )
2086
 
2087
- paste0("Done! ", nrow(combined), " total rows in dataset.")
2088
- }, error = function(e) {
2089
- paste("Upload error:", e$message)
2090
- })
2091
-
2092
- file.remove(tmp)
2093
- scrape_status_msg(result)
 
2094
  })
2095
 
2096
  }
 
2026
  }
2027
  )
2028
 
2029
+
2030
  observeEvent(input$upload_hf_btn, {
2031
  req(scraped_data())
2032
 
 
 
2033
  hf_token <- Sys.getenv("HF_WRITE_TOKEN")
2034
  repo_id <- "CoastalBaseball/2026MasterDataset"
 
2035
 
2036
+ # Helper: download existing, merge, dedupe, upload
2037
+ upload_to_hf <- function(new_data, filename, label) {
2038
+ scrape_status_msg(paste0("Downloading existing ", label, "..."))
 
 
 
2039
 
2040
+ existing <- tryCatch({
2041
+ resp <- httr::GET(
2042
+ paste0("https://huggingface.co/datasets/", repo_id, "/resolve/main/", filename),
2043
+ httr::add_headers(Authorization = paste("Bearer", hf_token))
2044
+ )
2045
+ if (httr::status_code(resp) == 200) {
2046
+ tmp_dl <- tempfile(fileext = ".parquet")
2047
+ writeBin(httr::content(resp, as = "raw"), tmp_dl)
2048
+ arrow::read_parquet(tmp_dl)
2049
+ } else { NULL }
2050
+ }, error = function(e) { NULL })
2051
+
2052
+ if (!is.null(existing) && nrow(existing) > 0) {
2053
+ scrape_status_msg(paste0("Merging ", label, "..."))
2054
+ combined <- bind_rows(existing, new_data)
2055
+ if ("PitchUID" %in% names(combined)) {
2056
+ combined <- combined %>% distinct(PitchUID, .keep_all = TRUE)
2057
+ } else {
2058
+ combined <- combined %>% distinct()
2059
+ }
2060
  } else {
2061
+ combined <- new_data
2062
  }
 
 
 
 
 
 
2063
 
2064
+ scrape_status_msg(paste0("Uploading ", label, " (", nrow(combined), " rows)..."))
2065
+
2066
+ tmp <- tempfile(fileext = ".parquet")
2067
+ arrow::write_parquet(combined, tmp)
2068
+
2069
+ result <- tryCatch({
2070
+ hf <- import("huggingface_hub")
2071
+ api <- hf$HfApi()
2072
+ api$upload_file(
2073
+ path_or_fileobj = tmp,
2074
+ path_in_repo = filename,
2075
+ repo_id = repo_id,
2076
+ repo_type = "dataset",
2077
+ token = hf_token
2078
+ )
2079
+ paste0(label, ": ", nrow(combined), " rows uploaded.")
2080
+ }, error = function(e) {
2081
+ paste0(label, " upload error: ", e$message)
2082
+ })
2083
+
2084
+ file.remove(tmp)
2085
+ gc()
2086
+ return(result)
2087
  }
2088
 
2089
+ # 1. Upload main dataset
2090
+ main_file <- paste0(input$scrape_source, "_2026_master.parquet")
2091
+ msg1 <- upload_to_hf(scraped_data(), main_file, "Master Dataset")
2092
 
2093
+ # 2. If PBP, also upload Coastal filtered datasets
2094
+ if (input$scrape_source == "pbp") {
2095
+ coastal_pitchers <- scraped_data() %>% filter(PitcherTeam == "COA_CHA")
2096
+ coastal_hitters <- scraped_data() %>% filter(BatterTeam == "COA_CHA")
 
 
 
2097
 
2098
+ msg2 <- if (nrow(coastal_pitchers) > 0) {
2099
+ upload_to_hf(coastal_pitchers, "CoastalPitchers2026.parquet", "Coastal Pitchers")
2100
+ } else { "Coastal Pitchers: No matching rows" }
 
 
 
 
2101
 
2102
+ msg3 <- if (nrow(coastal_hitters) > 0) {
2103
+ upload_to_hf(coastal_hitters, "CoastalHitters2026.parquet", "Coastal Hitters")
2104
+ } else { "Coastal Hitters: No matching rows" }
2105
+
2106
+ scrape_status_msg(paste(msg1, msg2, msg3, sep = "\n"))
2107
+ } else {
2108
+ scrape_status_msg(msg1)
2109
+ }
2110
  })
2111
 
2112
  }