Spaces:
Running
Running
Update app.R
Browse files
app.R
CHANGED
|
@@ -1939,6 +1939,75 @@ observe({
|
|
| 1939 |
write.csv(scraped_data(), file, row.names = FALSE)
|
| 1940 |
}
|
| 1941 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1942 |
|
| 1943 |
}
|
| 1944 |
|
|
|
|
| 1939 |
write.csv(scraped_data(), file, row.names = FALSE)
|
| 1940 |
}
|
| 1941 |
)
|
| 1942 |
+
|
| 1943 |
+
|
| 1944 |
+
observeEvent(input$upload_hf_btn, {
|
| 1945 |
+
req(scraped_data())
|
| 1946 |
+
|
| 1947 |
+
scrape_status_msg("Downloading existing dataset...")
|
| 1948 |
+
|
| 1949 |
+
hf_token <- Sys.getenv("HF_WRITE_TOKEN")
|
| 1950 |
+
repo_id <- "CoastalBaseball/_2026MasterData"
|
| 1951 |
+
filename <- paste0(input$scrape_source, "2026_master_pbp.parquet")
|
| 1952 |
+
|
| 1953 |
+
# Try to download existing data
|
| 1954 |
+
existing <- tryCatch({
|
| 1955 |
+
resp <- httr::GET(
|
| 1956 |
+
paste0("https://huggingface.co/datasets/", repo_id, "/resolve/main/", filename),
|
| 1957 |
+
httr::add_headers(Authorization = paste("Bearer", hf_token))
|
| 1958 |
+
)
|
| 1959 |
+
|
| 1960 |
+
if (httr::status_code(resp) == 200) {
|
| 1961 |
+
tmp_dl <- tempfile(fileext = ".parquet")
|
| 1962 |
+
writeBin(httr::content(resp, as = "raw"), tmp_dl)
|
| 1963 |
+
arrow::read_parquet(tmp_dl)
|
| 1964 |
+
} else {
|
| 1965 |
+
NULL
|
| 1966 |
+
}
|
| 1967 |
+
}, error = function(e) { NULL })
|
| 1968 |
+
|
| 1969 |
+
# Combine
|
| 1970 |
+
if (!is.null(existing) && nrow(existing) > 0) {
|
| 1971 |
+
scrape_status_msg("Merging with existing data...")
|
| 1972 |
+
combined <- bind_rows(existing, scraped_data())
|
| 1973 |
+
|
| 1974 |
+
if ("PitchUID" %in% names(combined)) {
|
| 1975 |
+
combined <- combined %>% distinct(PitchUID, .keep_all = TRUE)
|
| 1976 |
+
} else {
|
| 1977 |
+
combined <- combined %>% distinct()
|
| 1978 |
+
}
|
| 1979 |
+
} else {
|
| 1980 |
+
combined <- scraped_data()
|
| 1981 |
+
}
|
| 1982 |
+
|
| 1983 |
+
# Upload combined data as parquet
|
| 1984 |
+
scrape_status_msg(paste0("Uploading ", nrow(combined), " total rows..."))
|
| 1985 |
+
|
| 1986 |
+
tmp <- tempfile(fileext = ".parquet")
|
| 1987 |
+
arrow::write_parquet(combined, tmp)
|
| 1988 |
+
|
| 1989 |
+
result <- tryCatch({
|
| 1990 |
+
resp <- httr::PUT(
|
| 1991 |
+
paste0("https://huggingface.co/api/datasets/", repo_id, "/upload/main/", filename),
|
| 1992 |
+
httr::add_headers(
|
| 1993 |
+
Authorization = paste("Bearer", hf_token),
|
| 1994 |
+
`Content-Type` = "application/octet-stream"
|
| 1995 |
+
),
|
| 1996 |
+
body = httr::upload_file(tmp)
|
| 1997 |
+
)
|
| 1998 |
+
|
| 1999 |
+
if (httr::status_code(resp) %in% c(200, 201)) {
|
| 2000 |
+
paste0("Done! ", nrow(combined), " total rows in dataset.")
|
| 2001 |
+
} else {
|
| 2002 |
+
paste0("Upload failed: ", httr::status_code(resp))
|
| 2003 |
+
}
|
| 2004 |
+
}, error = function(e) {
|
| 2005 |
+
paste("Upload error:", e$message)
|
| 2006 |
+
})
|
| 2007 |
+
|
| 2008 |
+
file.remove(tmp)
|
| 2009 |
+
scrape_status_msg(result)
|
| 2010 |
+
})
|
| 2011 |
|
| 2012 |
}
|
| 2013 |
|