# scripts/export_seo_metadata.R # # Exports enriched DWD station metadata from all 5 resolution caches # into a single JSON file for use by the Netlify Edge Function. # # Output: dwd-seo-metadata.json # # Usage: # Rscript scripts/export_seo_metadata.R # (run from the DWD app root directory) # Ensure UTF-8 output invisible(Sys.setlocale("LC_CTYPE", "en_US.UTF-8")) library(jsonlite) # ── Slugification ──────────────────────────────────────────────────────── slugify <- function(text) { s <- as.character(text) # German umlaut / special char replacements (before NFD decomposition) s <- gsub("\u00fc", "ue", s) # ü s <- gsub("\u00f6", "oe", s) # ö s <- gsub("\u00e4", "ae", s) # ä s <- gsub("\u00dc", "Ue", s) # Ü s <- gsub("\u00d6", "Oe", s) # Ö s <- gsub("\u00c4", "Ae", s) # Ä s <- gsub("\u00df", "ss", s) # ß # Lowercase s <- tolower(s) # Remove accents via iconv transliteration s <- iconv(s, from = "UTF-8", to = "ASCII//TRANSLIT", sub = "") # Replace non-alphanumeric with hyphens s <- gsub("[^a-z0-9]+", "-", s) # Trim leading/trailing hyphens s <- gsub("^-+|-+$", "", s) s } # ── Resolution config ──────────────────────────────────────────────────── resolution_config <- list( list( key = "daily", label = "Daily", slug = "daily", file = "www/tabs/dwd_stations_enriched_daily.rds" ), list( key = "hourly", label = "Hourly", slug = "hourly", file = "www/tabs/dwd_stations_enriched.rds" ), list( key = "monthly", label = "Monthly", slug = "monthly", file = "www/tabs/dwd_stations_enriched_monthly.rds" ), list( key = "annual", label = "Annual", slug = "annual", file = "www/tabs/dwd_stations_enriched_annual.rds" ), list( key = "10_minutes", label = "10 Minutes", slug = "10-minutes", file = "www/tabs/dwd_stations_enriched_10min.rds" ) ) # ── Main export ────────────────────────────────────────────────────────── stations_out <- list() states_out <- list() resolutions_out <- list() slug_map <- list() for (rc in resolution_config) { if (!file.exists(rc$file)) { message("Skipping ", rc$key, ": file not found (", rc$file, ")") next } df <- readRDS(rc$file) message(rc$label, ": ", nrow(df), " stations loaded") res_slug <- rc$slug # ── Per-state aggregation ──────────────────────────────────────────── state_counts <- list() for (i in seq_len(nrow(df))) { row <- df[i, ] station_name <- enc2utf8(as.character(row$name)) station_id <- as.character(row$id) state_name <- enc2utf8(as.character(row$state)) station_slug <- slugify(station_name) state_slug <- slugify(state_name) # Build path key: "daily/bayern/muenchen-flughafen" path_key <- paste0(res_slug, "/", state_slug, "/", station_slug) # Station entry entry <- list( id = station_id, name = station_name, state = state_name, stateSlug = state_slug, elevation = as.numeric(row$elevation), lat = as.numeric(row$latitude), lon = as.numeric(row$longitude), resolution = rc$key, resolutionLabel = rc$label, resolutionSlug = res_slug, overallStart = as.character(row$station_overall_start), overallEnd = as.character(row$station_overall_end) ) # Add detailed_summary if available if ("detailed_summary" %in% names(row)) { entry$availableParams <- as.character(row$detailed_summary) } stations_out[[path_key]] <- entry # Track state counts state_key <- paste0(res_slug, "/", state_slug) if (is.null(state_counts[[state_key]])) { state_counts[[state_key]] <- list( state = state_name, stateSlug = state_slug, resolution = rc$key, resolutionLabel = rc$label, resolutionSlug = res_slug, stationCount = 0L, activeStationCount = 0L ) } state_counts[[state_key]]$stationCount <- state_counts[[state_key]]$stationCount + 1L # Determine the UI's default date range start for this resolution # Must match server.R URL param handler for initial load default_start_date <- switch(rc$key, "10_minutes" = Sys.Date() - 30, "hourly" = Sys.Date() - 366, "monthly" = Sys.Date() - (365 * 6), "daily" = Sys.Date() - (365 * 6), "annual" = as.Date("1991-01-01"), Sys.Date() - (365 * 6) ) default_start_num <- as.numeric(format(default_start_date, "%Y%m%d")) default_end_num <- as.numeric(format(Sys.Date(), "%Y%m%d")) overall_start_str <- as.character(row$station_overall_start) overall_end_str <- as.character(row$station_overall_end) is_active <- FALSE # Must match server.R filtered_stations(): # as.numeric(station_overall_start) <= range_end & # as.numeric(station_overall_end) >= range_start start_num <- suppressWarnings(as.numeric(overall_start_str)) end_num <- if (overall_end_str == "99999999") Inf else suppressWarnings(as.numeric(overall_end_str)) if (!is.na(start_num) && !is.na(end_num) && start_num <= default_end_num && end_num >= default_start_num) { is_active <- TRUE } if (is_active) { state_counts[[state_key]]$activeStationCount <- state_counts[[state_key]]$activeStationCount + 1L } # Slug map (only need unique entries) if (is.null(slug_map[[station_name]])) { slug_map[[station_name]] <- station_slug } if (is.null(slug_map[[state_name]])) { slug_map[[state_name]] <- state_slug } } # Merge state entries for (sk in names(state_counts)) { states_out[[sk]] <- state_counts[[sk]] } # Resolution summary (including active count summed from states) total_active <- sum(sapply(state_counts, function(sc) sc$activeStationCount)) resolutions_out[[res_slug]] <- list( key = rc$key, label = rc$label, slug = res_slug, stationCount = nrow(df), activeStationCount = total_active ) } # ── Assemble and write JSON ────────────────────────────────────────────── output <- list( stations = stations_out, states = states_out, resolutions = resolutions_out, slugMap = slug_map ) output_path <- "dwd-seo-metadata.json" json_str <- toJSON(output, pretty = TRUE, auto_unbox = TRUE) writeLines(json_str, output_path) message("\nExported ", length(stations_out), " station entries, ", length(states_out), " state entries, ", length(resolutions_out), " resolutions") message("Output: ", normalizePath(output_path))