jma / parse_start_year.R
alexdum's picture
first commit
57537fb
#!/usr/bin/env Rscript
url <- "https://www.data.jma.go.jp/obd/stats/etrn/view/available_table.php?prec_no=44&block_no=47662"
suppressMessages({
if (!requireNamespace("rvest", quietly = TRUE) ||
!requireNamespace("xml2", quietly = TRUE)) {
stop("Missing packages: please install 'rvest' and 'xml2'.")
}
})
doc <- xml2::read_html(url)
tables <- rvest::html_table(doc, fill = TRUE)
start_years <- c()
for (tbl in tables) {
if (nrow(tbl) == 0) {
next
}
name_norm <- tolower(gsub("\\s+", " ", names(tbl)))
start_cols <- which(grepl("start", name_norm))
if (length(start_cols) == 0) {
next
}
for (idx in start_cols) {
values <- suppressWarnings(as.numeric(gsub("[^0-9]", "", tbl[[idx]])))
values <- values[!is.na(values)]
start_years <- c(start_years, values)
}
}
if (length(start_years) == 0) {
stop("No start year values found in tables.")
}
cat(min(start_years), "\n")