Spaces:
Running
Running
Update app.R
Browse files
app.R
CHANGED
|
@@ -2034,7 +2034,6 @@ observeEvent(input$upload_hf_btn, {
|
|
| 2034 |
hf_token <- Sys.getenv("HF_WRITE_TOKEN")
|
| 2035 |
repo_id <- "CoastalBaseball/2026MasterDataset"
|
| 2036 |
|
| 2037 |
-
# Helper: download existing, merge, dedupe, upload
|
| 2038 |
upload_to_hf <- function(new_data, filename, label) {
|
| 2039 |
scrape_status_msg(paste0("Downloading existing ", label, "..."))
|
| 2040 |
|
|
@@ -2050,7 +2049,10 @@ observeEvent(input$upload_hf_btn, {
|
|
| 2050 |
} else { NULL }
|
| 2051 |
}, error = function(e) { NULL })
|
| 2052 |
|
| 2053 |
-
if (!is.null(existing)
|
|
|
|
|
|
|
|
|
|
| 2054 |
scrape_status_msg(paste0("Merging ", label, "..."))
|
| 2055 |
combined <- bind_rows(existing, new_data)
|
| 2056 |
if ("PitchUID" %in% names(combined)) {
|
|
@@ -2062,6 +2064,8 @@ observeEvent(input$upload_hf_btn, {
|
|
| 2062 |
combined <- new_data
|
| 2063 |
}
|
| 2064 |
|
|
|
|
|
|
|
| 2065 |
scrape_status_msg(paste0("Uploading ", label, " (", nrow(combined), " rows)..."))
|
| 2066 |
|
| 2067 |
tmp <- tempfile(fileext = ".parquet")
|
|
@@ -2077,7 +2081,7 @@ observeEvent(input$upload_hf_btn, {
|
|
| 2077 |
repo_type = "dataset",
|
| 2078 |
token = hf_token
|
| 2079 |
)
|
| 2080 |
-
paste0(label, ": ", nrow(combined), "
|
| 2081 |
}, error = function(e) {
|
| 2082 |
paste0(label, " upload error: ", e$message)
|
| 2083 |
})
|
|
@@ -2087,11 +2091,9 @@ observeEvent(input$upload_hf_btn, {
|
|
| 2087 |
return(result)
|
| 2088 |
}
|
| 2089 |
|
| 2090 |
-
# 1. Upload main dataset
|
| 2091 |
main_file <- paste0(input$scrape_source, "_2026_master.parquet")
|
| 2092 |
msg1 <- upload_to_hf(scraped_data(), main_file, "Master Dataset")
|
| 2093 |
|
| 2094 |
-
# 2. If PBP, also upload Coastal filtered datasets
|
| 2095 |
if (input$scrape_source == "pbp") {
|
| 2096 |
coastal_pitchers <- scraped_data() %>% filter(PitcherTeam == "COA_CHA")
|
| 2097 |
coastal_hitters <- scraped_data() %>% filter(BatterTeam == "COA_CHA")
|
|
|
|
| 2034 |
hf_token <- Sys.getenv("HF_WRITE_TOKEN")
|
| 2035 |
repo_id <- "CoastalBaseball/2026MasterDataset"
|
| 2036 |
|
|
|
|
| 2037 |
upload_to_hf <- function(new_data, filename, label) {
|
| 2038 |
scrape_status_msg(paste0("Downloading existing ", label, "..."))
|
| 2039 |
|
|
|
|
| 2049 |
} else { NULL }
|
| 2050 |
}, error = function(e) { NULL })
|
| 2051 |
|
| 2052 |
+
existing_rows <- if (!is.null(existing)) nrow(existing) else 0
|
| 2053 |
+
scraped_rows <- nrow(new_data)
|
| 2054 |
+
|
| 2055 |
+
if (existing_rows > 0) {
|
| 2056 |
scrape_status_msg(paste0("Merging ", label, "..."))
|
| 2057 |
combined <- bind_rows(existing, new_data)
|
| 2058 |
if ("PitchUID" %in% names(combined)) {
|
|
|
|
| 2064 |
combined <- new_data
|
| 2065 |
}
|
| 2066 |
|
| 2067 |
+
new_rows <- nrow(combined) - existing_rows
|
| 2068 |
+
|
| 2069 |
scrape_status_msg(paste0("Uploading ", label, " (", nrow(combined), " rows)..."))
|
| 2070 |
|
| 2071 |
tmp <- tempfile(fileext = ".parquet")
|
|
|
|
| 2081 |
repo_type = "dataset",
|
| 2082 |
token = hf_token
|
| 2083 |
)
|
| 2084 |
+
paste0(label, ": ", scraped_rows, " rows scraped, ", new_rows, " new rows added (", nrow(combined), " total)")
|
| 2085 |
}, error = function(e) {
|
| 2086 |
paste0(label, " upload error: ", e$message)
|
| 2087 |
})
|
|
|
|
| 2091 |
return(result)
|
| 2092 |
}
|
| 2093 |
|
|
|
|
| 2094 |
main_file <- paste0(input$scrape_source, "_2026_master.parquet")
|
| 2095 |
msg1 <- upload_to_hf(scraped_data(), main_file, "Master Dataset")
|
| 2096 |
|
|
|
|
| 2097 |
if (input$scrape_source == "pbp") {
|
| 2098 |
coastal_pitchers <- scraped_data() %>% filter(PitcherTeam == "COA_CHA")
|
| 2099 |
coastal_hitters <- scraped_data() %>% filter(BatterTeam == "COA_CHA")
|