library(dplyr)
library(readr)
library(jsonlite)
library(stringr)

# 1. Read the AMeDAS table for English names
amedas_raw <- fromJSON("https://www.jma.go.jp/bosai/amedas/const/amedastable.json")

to_decimal_degrees <- function(deg, minutes) {
  ifelse(is.na(deg) | is.na(minutes), NA_real_,
         ifelse(deg < 0, deg - minutes / 60, deg + minutes / 60))
}

parse_amedas_coord <- function(coord) {
  if (is.null(coord) || length(coord) < 2) return(NA_real_)
  to_decimal_degrees(as.numeric(coord[1]), as.numeric(coord[2]))
}

normalize_name <- function(name) {
  name %>%
    str_replace_all("[\\(（].*?[\\)）]", "") %>%
    str_squish()
}

coord_round <- function(x) round(x, 4)

amedas_entries <- imap_dfr(amedas_raw, function(entry, station_id) {
  name_jp <- if (!is.null(entry$kjName) && nzchar(entry$kjName)) entry$kjName else station_id
  name_en <- if (!is.null(entry$enName) && nzchar(entry$enName)) entry$enName else NA_character_
  tibble(
    ID = as.character(station_id),
    Name_JP = name_jp,
    Name_EN = name_en,
    Lat = parse_amedas_coord(entry$lat),
    Lon = parse_amedas_coord(entry$lon)
  )
})

# 2. Read the full stations list we got from the map (1,445 stations)
# I'll re-read it from the backup or the first version if possible, 
# but I'll use the current one and deduplicate by Japanese name and coordinates.
stations <- read_csv("data/jma_stations_full.csv", show_col_types = FALSE)

# 3. Fix the IDs and prec_no
# For stations where prec_no is NA, try to get it from others with similar IDs or names.
mappings <- stations %>%
  filter(!is.na(prec_no)) %>%
  select(Name_JP, prec_no) %>%
  distinct()

stations_fixed <- stations %>%
  left_join(mappings, by = "Name_JP") %>%
  mutate(prec_no = ifelse(is.na(prec_no.x), prec_no.y, prec_no.x)) %>%
  select(-prec_no.x, -prec_no.y)

# 4. Deduplicate to get exactly the 1,445 stations
# We'll prefer S1 type metadata if both exist for the same Name_JP
stations_final <- stations_fixed %>%
  arrange(Name_JP, desc(Type)) %>%
  distinct(Name_JP, .keep_all = TRUE)

# 5. Ensure Name_EN is populated
amedas_match <- amedas_entries %>%
  mutate(
    Name_JP_norm = normalize_name(Name_JP),
    Lat_round = coord_round(Lat),
    Lon_round = coord_round(Lon)
  ) %>%
  distinct(Name_JP_norm, Lat_round, Lon_round, .keep_all = TRUE)

stations_final <- stations_final %>%
  left_join(
    amedas_entries %>%
      select(ID, Name_EN) %>%
      rename(Name_EN_json = Name_EN),
    by = "ID"
  ) %>%
  mutate(
    Name_JP_norm = normalize_name(Name_JP),
    Lat_round = coord_round(Lat),
    Lon_round = coord_round(Lon)
  ) %>%
  left_join(
    amedas_match %>%
      select(Name_JP_norm, Lat_round, Lon_round, Name_EN) %>%
      rename(Name_EN_match = Name_EN),
    by = c("Name_JP_norm", "Lat_round", "Lon_round")
  ) %>%
  mutate(Name_EN = coalesce(Name_EN, Name_EN_json, Name_EN_match)) %>%
  select(-Name_EN_json, -Name_EN_match, -Name_JP_norm, -Lat_round, -Lon_round)

# 6. Save final file
write_csv(stations_final, "data/jma_stations_full.csv")
message(sprintf("Final station count: %d", nrow(stations_final)))