Spaces:

alexdum
/

jma

Running

App Files Files Community

jma / funs /finalize_metadata.R

alexdum

first commit

57537fb about 1 month ago

raw

history blame contribute delete

3.09 kB

	library(dplyr)
	library(readr)
	library(jsonlite)
	library(stringr)

	# 1. Read the AMeDAS table for English names
	amedas_raw <- fromJSON("https://www.jma.go.jp/bosai/amedas/const/amedastable.json")

	to_decimal_degrees <- function(deg, minutes) {
	ifelse(is.na(deg) \| is.na(minutes), NA_real_,
	ifelse(deg < 0, deg - minutes / 60, deg + minutes / 60))
	}

	parse_amedas_coord <- function(coord) {
	if (is.null(coord) \|\| length(coord) < 2) return(NA_real_)
	to_decimal_degrees(as.numeric(coord[1]), as.numeric(coord[2]))
	}

	normalize_name <- function(name) {
	name %>%
	str_replace_all("[\$（].*?[\$）]", "") %>%
	str_squish()
	}

	coord_round <- function(x) round(x, 4)

	amedas_entries <- imap_dfr(amedas_raw, function(entry, station_id) {
	name_jp <- if (!is.null(entry$kjName) && nzchar(entry$kjName)) entry$kjName else station_id
	name_en <- if (!is.null(entry$enName) && nzchar(entry$enName)) entry$enName else NA_character_
	tibble(
	ID = as.character(station_id),
	Name_JP = name_jp,
	Name_EN = name_en,
	Lat = parse_amedas_coord(entry$lat),
	Lon = parse_amedas_coord(entry$lon)
	)
	})

	# 2. Read the full stations list we got from the map (1,445 stations)
	# I'll re-read it from the backup or the first version if possible,
	# but I'll use the current one and deduplicate by Japanese name and coordinates.
	stations <- read_csv("data/jma_stations_full.csv", show_col_types = FALSE)

	# 3. Fix the IDs and prec_no
	# For stations where prec_no is NA, try to get it from others with similar IDs or names.
	mappings <- stations %>%
	filter(!is.na(prec_no)) %>%
	select(Name_JP, prec_no) %>%
	distinct()

	stations_fixed <- stations %>%
	left_join(mappings, by = "Name_JP") %>%
	mutate(prec_no = ifelse(is.na(prec_no.x), prec_no.y, prec_no.x)) %>%
	select(-prec_no.x, -prec_no.y)

	# 4. Deduplicate to get exactly the 1,445 stations
	# We'll prefer S1 type metadata if both exist for the same Name_JP
	stations_final <- stations_fixed %>%
	arrange(Name_JP, desc(Type)) %>%
	distinct(Name_JP, .keep_all = TRUE)

	# 5. Ensure Name_EN is populated
	amedas_match <- amedas_entries %>%
	mutate(
	Name_JP_norm = normalize_name(Name_JP),
	Lat_round = coord_round(Lat),
	Lon_round = coord_round(Lon)
	) %>%
	distinct(Name_JP_norm, Lat_round, Lon_round, .keep_all = TRUE)

	stations_final <- stations_final %>%
	left_join(
	amedas_entries %>%
	select(ID, Name_EN) %>%
	rename(Name_EN_json = Name_EN),
	by = "ID"
	) %>%
	mutate(
	Name_JP_norm = normalize_name(Name_JP),
	Lat_round = coord_round(Lat),
	Lon_round = coord_round(Lon)
	) %>%
	left_join(
	amedas_match %>%
	select(Name_JP_norm, Lat_round, Lon_round, Name_EN) %>%
	rename(Name_EN_match = Name_EN),
	by = c("Name_JP_norm", "Lat_round", "Lon_round")
	) %>%
	mutate(Name_EN = coalesce(Name_EN, Name_EN_json, Name_EN_match)) %>%
	select(-Name_EN_json, -Name_EN_match, -Name_JP_norm, -Lat_round, -Lon_round)

	# 6. Save final file
	write_csv(stations_final, "data/jma_stations_full.csv")
	message(sprintf("Final station count: %d", nrow(stations_final)))