Spaces:
Running
Running
Update app.R
Browse files
app.R
CHANGED
|
@@ -2026,71 +2026,87 @@ observe({
|
|
| 2026 |
}
|
| 2027 |
)
|
| 2028 |
|
|
|
|
| 2029 |
observeEvent(input$upload_hf_btn, {
|
| 2030 |
req(scraped_data())
|
| 2031 |
|
| 2032 |
-
scrape_status_msg("Downloading existing dataset...")
|
| 2033 |
-
|
| 2034 |
hf_token <- Sys.getenv("HF_WRITE_TOKEN")
|
| 2035 |
repo_id <- "CoastalBaseball/2026MasterDataset"
|
| 2036 |
-
filename <- paste0(input$scrape_source, "_2026_master.parquet")
|
| 2037 |
|
| 2038 |
-
#
|
| 2039 |
-
|
| 2040 |
-
|
| 2041 |
-
paste0("https://huggingface.co/datasets/", repo_id, "/resolve/main/", filename),
|
| 2042 |
-
httr::add_headers(Authorization = paste("Bearer", hf_token))
|
| 2043 |
-
)
|
| 2044 |
|
| 2045 |
-
|
| 2046 |
-
|
| 2047 |
-
|
| 2048 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2049 |
} else {
|
| 2050 |
-
|
| 2051 |
}
|
| 2052 |
-
}, error = function(e) { NULL })
|
| 2053 |
-
|
| 2054 |
-
# Combine
|
| 2055 |
-
if (!is.null(existing) && nrow(existing) > 0) {
|
| 2056 |
-
scrape_status_msg("Merging with existing data...")
|
| 2057 |
-
combined <- bind_rows(existing, scraped_data())
|
| 2058 |
|
| 2059 |
-
|
| 2060 |
-
|
| 2061 |
-
|
| 2062 |
-
|
| 2063 |
-
|
| 2064 |
-
|
| 2065 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2066 |
}
|
| 2067 |
|
| 2068 |
-
# Upload
|
| 2069 |
-
|
|
|
|
| 2070 |
|
| 2071 |
-
|
| 2072 |
-
|
| 2073 |
-
|
| 2074 |
-
|
| 2075 |
-
hf <- import("huggingface_hub")
|
| 2076 |
-
api <- hf$HfApi()
|
| 2077 |
-
|
| 2078 |
|
| 2079 |
-
|
| 2080 |
-
|
| 2081 |
-
|
| 2082 |
-
repo_id = repo_id,
|
| 2083 |
-
repo_type = "dataset",
|
| 2084 |
-
token = hf_token
|
| 2085 |
-
)
|
| 2086 |
|
| 2087 |
-
|
| 2088 |
-
|
| 2089 |
-
|
| 2090 |
-
|
| 2091 |
-
|
| 2092 |
-
|
| 2093 |
-
|
|
|
|
| 2094 |
})
|
| 2095 |
|
| 2096 |
}
|
|
|
|
| 2026 |
}
|
| 2027 |
)
|
| 2028 |
|
| 2029 |
+
|
| 2030 |
observeEvent(input$upload_hf_btn, {
|
| 2031 |
req(scraped_data())
|
| 2032 |
|
|
|
|
|
|
|
| 2033 |
hf_token <- Sys.getenv("HF_WRITE_TOKEN")
|
| 2034 |
repo_id <- "CoastalBaseball/2026MasterDataset"
|
|
|
|
| 2035 |
|
| 2036 |
+
# Helper: download existing, merge, dedupe, upload
|
| 2037 |
+
upload_to_hf <- function(new_data, filename, label) {
|
| 2038 |
+
scrape_status_msg(paste0("Downloading existing ", label, "..."))
|
|
|
|
|
|
|
|
|
|
| 2039 |
|
| 2040 |
+
existing <- tryCatch({
|
| 2041 |
+
resp <- httr::GET(
|
| 2042 |
+
paste0("https://huggingface.co/datasets/", repo_id, "/resolve/main/", filename),
|
| 2043 |
+
httr::add_headers(Authorization = paste("Bearer", hf_token))
|
| 2044 |
+
)
|
| 2045 |
+
if (httr::status_code(resp) == 200) {
|
| 2046 |
+
tmp_dl <- tempfile(fileext = ".parquet")
|
| 2047 |
+
writeBin(httr::content(resp, as = "raw"), tmp_dl)
|
| 2048 |
+
arrow::read_parquet(tmp_dl)
|
| 2049 |
+
} else { NULL }
|
| 2050 |
+
}, error = function(e) { NULL })
|
| 2051 |
+
|
| 2052 |
+
if (!is.null(existing) && nrow(existing) > 0) {
|
| 2053 |
+
scrape_status_msg(paste0("Merging ", label, "..."))
|
| 2054 |
+
combined <- bind_rows(existing, new_data)
|
| 2055 |
+
if ("PitchUID" %in% names(combined)) {
|
| 2056 |
+
combined <- combined %>% distinct(PitchUID, .keep_all = TRUE)
|
| 2057 |
+
} else {
|
| 2058 |
+
combined <- combined %>% distinct()
|
| 2059 |
+
}
|
| 2060 |
} else {
|
| 2061 |
+
combined <- new_data
|
| 2062 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2063 |
|
| 2064 |
+
scrape_status_msg(paste0("Uploading ", label, " (", nrow(combined), " rows)..."))
|
| 2065 |
+
|
| 2066 |
+
tmp <- tempfile(fileext = ".parquet")
|
| 2067 |
+
arrow::write_parquet(combined, tmp)
|
| 2068 |
+
|
| 2069 |
+
result <- tryCatch({
|
| 2070 |
+
hf <- import("huggingface_hub")
|
| 2071 |
+
api <- hf$HfApi()
|
| 2072 |
+
api$upload_file(
|
| 2073 |
+
path_or_fileobj = tmp,
|
| 2074 |
+
path_in_repo = filename,
|
| 2075 |
+
repo_id = repo_id,
|
| 2076 |
+
repo_type = "dataset",
|
| 2077 |
+
token = hf_token
|
| 2078 |
+
)
|
| 2079 |
+
paste0(label, ": ", nrow(combined), " rows uploaded.")
|
| 2080 |
+
}, error = function(e) {
|
| 2081 |
+
paste0(label, " upload error: ", e$message)
|
| 2082 |
+
})
|
| 2083 |
+
|
| 2084 |
+
file.remove(tmp)
|
| 2085 |
+
gc()
|
| 2086 |
+
return(result)
|
| 2087 |
}
|
| 2088 |
|
| 2089 |
+
# 1. Upload main dataset
|
| 2090 |
+
main_file <- paste0(input$scrape_source, "_2026_master.parquet")
|
| 2091 |
+
msg1 <- upload_to_hf(scraped_data(), main_file, "Master Dataset")
|
| 2092 |
|
| 2093 |
+
# 2. If PBP, also upload Coastal filtered datasets
|
| 2094 |
+
if (input$scrape_source == "pbp") {
|
| 2095 |
+
coastal_pitchers <- scraped_data() %>% filter(PitcherTeam == "COA_CHA")
|
| 2096 |
+
coastal_hitters <- scraped_data() %>% filter(BatterTeam == "COA_CHA")
|
|
|
|
|
|
|
|
|
|
| 2097 |
|
| 2098 |
+
msg2 <- if (nrow(coastal_pitchers) > 0) {
|
| 2099 |
+
upload_to_hf(coastal_pitchers, "CoastalPitchers2026.parquet", "Coastal Pitchers")
|
| 2100 |
+
} else { "Coastal Pitchers: No matching rows" }
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2101 |
|
| 2102 |
+
msg3 <- if (nrow(coastal_hitters) > 0) {
|
| 2103 |
+
upload_to_hf(coastal_hitters, "CoastalHitters2026.parquet", "Coastal Hitters")
|
| 2104 |
+
} else { "Coastal Hitters: No matching rows" }
|
| 2105 |
+
|
| 2106 |
+
scrape_status_msg(paste(msg1, msg2, msg3, sep = "\n"))
|
| 2107 |
+
} else {
|
| 2108 |
+
scrape_status_msg(msg1)
|
| 2109 |
+
}
|
| 2110 |
})
|
| 2111 |
|
| 2112 |
}
|