library(dplyr) library(readr) library(jsonlite) library(stringr) # 1. Read the AMeDAS table for English names amedas_raw <- fromJSON("https://www.jma.go.jp/bosai/amedas/const/amedastable.json") to_decimal_degrees <- function(deg, minutes) { ifelse(is.na(deg) | is.na(minutes), NA_real_, ifelse(deg < 0, deg - minutes / 60, deg + minutes / 60)) } parse_amedas_coord <- function(coord) { if (is.null(coord) || length(coord) < 2) return(NA_real_) to_decimal_degrees(as.numeric(coord[1]), as.numeric(coord[2])) } normalize_name <- function(name) { name %>% str_replace_all("[\\((].*?[\\))]", "") %>% str_squish() } coord_round <- function(x) round(x, 4) amedas_entries <- imap_dfr(amedas_raw, function(entry, station_id) { name_jp <- if (!is.null(entry$kjName) && nzchar(entry$kjName)) entry$kjName else station_id name_en <- if (!is.null(entry$enName) && nzchar(entry$enName)) entry$enName else NA_character_ tibble( ID = as.character(station_id), Name_JP = name_jp, Name_EN = name_en, Lat = parse_amedas_coord(entry$lat), Lon = parse_amedas_coord(entry$lon) ) }) # 2. Read the full stations list we got from the map (1,445 stations) # I'll re-read it from the backup or the first version if possible, # but I'll use the current one and deduplicate by Japanese name and coordinates. stations <- read_csv("data/jma_stations_full.csv", show_col_types = FALSE) # 3. Fix the IDs and prec_no # For stations where prec_no is NA, try to get it from others with similar IDs or names. mappings <- stations %>% filter(!is.na(prec_no)) %>% select(Name_JP, prec_no) %>% distinct() stations_fixed <- stations %>% left_join(mappings, by = "Name_JP") %>% mutate(prec_no = ifelse(is.na(prec_no.x), prec_no.y, prec_no.x)) %>% select(-prec_no.x, -prec_no.y) # 4. Deduplicate to get exactly the 1,445 stations # We'll prefer S1 type metadata if both exist for the same Name_JP stations_final <- stations_fixed %>% arrange(Name_JP, desc(Type)) %>% distinct(Name_JP, .keep_all = TRUE) # 5. Ensure Name_EN is populated amedas_match <- amedas_entries %>% mutate( Name_JP_norm = normalize_name(Name_JP), Lat_round = coord_round(Lat), Lon_round = coord_round(Lon) ) %>% distinct(Name_JP_norm, Lat_round, Lon_round, .keep_all = TRUE) stations_final <- stations_final %>% left_join( amedas_entries %>% select(ID, Name_EN) %>% rename(Name_EN_json = Name_EN), by = "ID" ) %>% mutate( Name_JP_norm = normalize_name(Name_JP), Lat_round = coord_round(Lat), Lon_round = coord_round(Lon) ) %>% left_join( amedas_match %>% select(Name_JP_norm, Lat_round, Lon_round, Name_EN) %>% rename(Name_EN_match = Name_EN), by = c("Name_JP_norm", "Lat_round", "Lon_round") ) %>% mutate(Name_EN = coalesce(Name_EN, Name_EN_json, Name_EN_match)) %>% select(-Name_EN_json, -Name_EN_match, -Name_JP_norm, -Lat_round, -Lon_round) # 6. Save final file write_csv(stations_final, "data/jma_stations_full.csv") message(sprintf("Final station count: %d", nrow(stations_final)))