dwd / global.R
alexdum's picture
feat: Extend default UI date range to six years and refactor URL parameter application for improved deep-linking.
e347af4
# global.R for DWD App
library(shiny)
library(bslib)
library(bsicons)
library(mapgl)
library(sf)
library(jsonlite)
library(dplyr)
library(readr)
library(DT)
library(plotly)
library(lubridate)
library(shinycssloaders)
library(curl)
library(stringr)
library(purrr)
library(shinyjs)
library(tibble)
library(data.table)
library(writexl)
# OpenFreeMap Style URLs
ofm_positron_style <- "https://tiles.openfreemap.org/styles/positron"
ofm_bright_style <- "https://tiles.openfreemap.org/styles/bright"
# EOX Sentinel-2 Cloudless (Free for commercial use with attribution)
sentinel_url <- "https://tiles.maps.eox.at/wmts/1.0.0/s2cloudless-2023_3857/default/GoogleMapsCompatible/{z}/{y}/{x}.jpg"
sentinel_attribution <- '<a href="https://s2maps.eu" target="_blank">Sentinel-2 cloudless - by EOX IT Services GmbH</a> (Contains modified Copernicus Sentinel data 2023)'
# --- Configuration ---
# Base URLs
dwd_10min_base_url <- "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/"
dwd_base_url <- "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/hourly/"
dwd_daily_base_url <- "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/"
dwd_monthly_base_url <- "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/monthly/"
dwd_annual_base_url <- "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/annual/"
# Cache TTL (days)
cache_ttl_days <- 30
# Valid parameters to fetch (10 Minutes)
dwd_10min_params <- list(
air_temperature = "air_temperature",
extreme_wind = "extreme_wind",
precipitation = "precipitation",
solar = "solar", # global radiation
wind = "wind",
extreme_temperature = "extreme_temperature"
)
# Valid parameters to fetch (Hourly)
dwd_params <- list(
temp = "air_temperature",
dew_point = "dew_point",
moisture = "moisture",
precip = "precipitation",
wind = "wind",
visibility = "visibility",
solar = "solar",
pressure = "pressure",
cloudiness = "cloudiness",
cloud_type = "cloud_type",
extreme_wind = "extreme_wind",
weather_phenomena = "weather_phenomena",
soil = "soil_temperature",
sun = "sun"
)
# Valid parameters to fetch (Daily)
dwd_daily_params <- list(
kl = "kl", # Daily Climate Data (Temp, Precip, Wind, etc.)
precip = "more_precip", # Precipitation only (thousands of stations)
solar = "solar", # global radiation, sunshine duration
soil = "soil_temperature",
water_equiv = "water_equiv",
weather = "weather_phenomena",
more_weather = "more_weather_phenomena"
)
# Valid parameters to fetch (Monthly)
dwd_monthly_params <- list(
kl = "kl",
precip = "more_precip",
weather = "weather_phenomena"
)
# Valid parameters to fetch (Annual)
dwd_annual_params <- list(
kl = "kl",
precip = "more_precip",
weather = "weather_phenomena"
)
# Resolution Helpers
normalize_dwd_resolution <- function(resolution) {
if (is.null(resolution) || length(resolution) == 0 || anyNA(resolution) || resolution == "") {
return(NULL)
}
res <- tolower(gsub("[[:space:]-]+", "_", as.character(resolution)))
if (res %in% c("10_minutes", "10_minute", "10_min", "10minutes")) {
return("10_minutes")
}
if (res == "yearly") {
return("annual")
}
res
}
get_dwd_resolution_suffix <- function(resolution) {
switch(normalize_dwd_resolution(resolution),
"daily" = "_daily",
"monthly" = "_monthly",
"annual" = "_annual",
"10_minutes" = "_10min",
""
)
}
format_dwd_resolution <- function(resolution) {
switch(normalize_dwd_resolution(resolution),
"10_minutes" = "10 Minutes",
"hourly" = "Hourly",
"daily" = "Daily",
"monthly" = "Monthly",
"annual" = "Annual",
{
res <- normalize_dwd_resolution(resolution)
if (is.null(res)) "Data" else str_to_title(gsub("_", " ", res))
}
)
}
get_dwd_resolution_config <- function(resolution = "hourly") {
res <- normalize_dwd_resolution(resolution)
if (is.null(res)) res <- "hourly"
switch(res,
"daily" = list(
base_url = dwd_daily_base_url,
params = dwd_daily_params,
station_url = paste0(dwd_daily_base_url, "kl/historical/"),
station_desc = "KL_Tageswerte_Beschreibung_Stationen.txt"
),
"monthly" = list(
base_url = dwd_monthly_base_url,
params = dwd_monthly_params,
station_url = paste0(dwd_monthly_base_url, "kl/recent/"),
station_desc = "KL_Monatswerte_Beschreibung_Stationen.txt"
),
"annual" = list(
base_url = dwd_annual_base_url,
params = dwd_annual_params,
station_url = paste0(dwd_annual_base_url, "kl/recent/"),
station_desc = "KL_Jahreswerte_Beschreibung_Stationen.txt"
),
"10_minutes" = list(
base_url = dwd_10min_base_url,
params = dwd_10min_params,
station_url = paste0(dwd_base_url, "air_temperature/recent/"),
station_desc = "Beschreibung_Stationen.txt"
),
list(
base_url = dwd_base_url,
params = dwd_params,
station_url = paste0(dwd_base_url, "air_temperature/recent/"),
station_desc = "Beschreibung_Stationen.txt"
)
)
}
# Column Labels with Units
dwd_column_labels <- c(
# --- Identifiers ---
"datetime" = "Date/Time",
"datetime_end" = "Period End",
# --- Temperature ---
"temp" = "Air Temperature [°C]",
"temp_min" = "Min Temperature [°C]",
"temp_max" = "Max Temperature [°C]",
"temp_min_avg" = "Avg Min Temperature [°C]",
"temp_max_avg" = "Avg Max Temperature [°C]",
"dew_point" = "Dew Point [°C]",
"wet_bulb_temp" = "Wet Bulb Temperature [°C]",
"temp_5cm" = "Temp at 5cm [°C]",
# --- Humidity ---
"rh" = "Relative Humidity [%]",
"abs_humidity" = "Absolute Humidity [g/m³]",
"vapor_pressure" = "Vapor Pressure [hPa]",
# --- Precipitation ---
"precip" = "Precipitation [mm]",
"precip_max_day" = "Max Daily Precipitation [mm]",
"precip_ind" = "Precipitation Indicator",
# --- Wind ---
"wind_speed" = "Wind Speed [m/s]",
"wind_dir" = "Wind Direction [°]",
"wind_gust_max" = "Max Wind Gust [m/s]",
"wind_gust_min" = "Min Wind Gust [m/s]",
# --- Pressure ---
"pressure" = "Pressure (Sea Level) [hPa]",
"station_pressure" = "Pressure (Station Level) [hPa]",
# --- Cloud / Sun / Radiation ---
"cloud_cover" = "Cloud Cover [oktas]",
"solar_global" = "Global Radiation [J/cm²]",
"sunshine_duration" = "Sunshine Duration [min]",
"diffuse_radiation" = "Diffuse Radiation [J/cm²]",
"longwave_radiation" = "Longwave Radiation [J/cm²]",
# --- Soil ---
"soil_temp_2cm" = "Soil Temp 2cm [°C]",
"soil_temp_5cm" = "Soil Temp 5cm [°C]",
"soil_temp_10cm" = "Soil Temp 10cm [°C]",
"soil_temp_20cm" = "Soil Temp 20cm [°C]",
"soil_temp_50cm" = "Soil Temp 50cm [°C]",
"soil_temp_100cm" = "Soil Temp 100cm [°C]",
"soil_temp_min_5cm" = "Min Soil Temp 5cm [°C]",
# --- Snow ---
"snow_depth" = "Snow Depth [cm]",
"snow_water_equiv" = "Snow Water Equivalent [mm]",
"snow_fresh_sum" = "Fresh Snow Sum [cm]",
"snow_depth_sum" = "Snow Depth Sum [cm]",
# --- Weather Phenomena (Binary/Code) ---
"weather_code" = "Weather Code",
"weather_text" = "Weather Description",
"visibility" = "Visibility [m]",
# --- Phenomena Flags (0/1) ---
"thunderstorm" = "Thunderstorm",
"glaze" = "Glaze",
"graupel" = "Graupel",
"hail" = "Hail",
"fog" = "Fog",
"frost" = "Frost",
"storm_6" = "Storm (Bft 6)",
"storm_8" = "Storm (Bft 8)",
"dew" = "Dew"
)
# --- Source Helper Functions ---
# Source all R files from the R/ directory
for (file in list.files("fun", pattern = "\\.R$", full.names = TRUE)) {
source(file)
}
# --- Initialization ---
# Ensure data directory exists
if (!dir.exists("data")) dir.create("data")
# Log cache TTL and current cache ages
log_cache_status <- function() {
files <- list.files("data", pattern = "^dwd_.*[.]rds$", full.names = TRUE)
if (length(files) == 0) {
message("Cache TTL: ", cache_ttl_days, " days (no cache files found).")
return(invisible(NULL))
}
info <- file.info(files)
age_days <- difftime(Sys.time(), info$mtime, units = "days")
age_labels <- paste0(basename(files), "=", sprintf("%.1f", as.numeric(age_days)), "d")
message("Cache TTL: ", cache_ttl_days, " days; cache ages: ", paste(age_labels, collapse = ", "))
invisible(NULL)
}
log_cache_status()
# --- Granular Metadata Cache ---
# As requested, storing in www/tabs
granular_cache_path <- "www/tabs/dwd_granular_metadata.rds"
#' Get Granular Metadata Cache
#' @return Named list keyed by "resolution_stationid" or empty list
get_granular_cache <- function() {
if (file.exists(granular_cache_path)) {
tryCatch(
readRDS(granular_cache_path),
error = function(e) {
message("Failed to read granular cache: ", e$message)
list()
}
)
} else {
list()
}
}
#' Update Granular Metadata Cache
#' Adds or updates the metadata for a specific station/resolution
#' @param station_id Station ID string
#' @param resolution "daily" or "monthly"
#' @param meta_df Data frame with start_date, end_date per parameter
update_granular_cache <- function(station_id, resolution, meta_df) {
if (is.null(meta_df) || nrow(meta_df) == 0) {
return(invisible(NULL))
}
# We use lowercase resolution in keys for consistency
key <- paste0(tolower(resolution), "_", station_id)
cache <- get_granular_cache()
# Store the metadata along with a timestamp
cache[[key]] <- list(
station_id = station_id,
resolution = resolution,
params = meta_df,
updated = Sys.time()
)
tryCatch(
{
saveRDS(cache, granular_cache_path)
message("Updated granular cache for ", key)
},
error = function(e) {
message("Failed to save granular cache: ", e$message)
}
)
invisible(NULL)
}
#' Load DWD File Index from Cache (Read-Only)
#' Used for looking up ZIP URLs for data downloading
#' @param resolution "hourly", "daily", "monthly", "annual", or "10_minutes"
#' @return Data frame of file index or NULL
load_dwd_index <- function(resolution = "hourly") {
resolution <- normalize_dwd_resolution(resolution)
if (is.null(resolution)) resolution <- "hourly"
suffix <- get_dwd_resolution_suffix(resolution)
index_file <- paste0("www/tabs/dwd_file_index", suffix, ".rds")
if (file.exists(index_file)) {
tryCatch(readRDS(index_file), error = function(e) {
message("Failed to read file index: ", e$message)
message("Attempting to rebuild file index for ", resolution, "...")
get_dwd_index(resolution)
})
} else {
message("File index not found: ", index_file, " - building on demand...")
get_dwd_index(resolution)
}
}
#' Get DWD Station List (with Caching)
#' @param resolution "hourly", "daily", "monthly", "annual", or "10_minutes"
#' @return Data frame of stations
get_dwd_stations <- function(resolution = "hourly") {
resolution <- normalize_dwd_resolution(resolution)
if (is.null(resolution)) resolution <- "hourly"
suffix <- get_dwd_resolution_suffix(resolution)
station_file <- paste0("www/tabs/dwd_stations", suffix, ".rds")
if (file.exists(station_file)) {
info <- file.info(station_file)
# Use shorter TTL for 10-min if needed? keeping standard for now
age_days <- difftime(Sys.time(), info$mtime, units = "days")
if (age_days <= cache_ttl_days) {
return(readRDS(station_file))
}
message(paste(resolution, "station metadata is old. Rebuilding..."))
}
message(paste("Fetching", resolution, "Station Metadata..."))
stations <- fetch_dwd_stations(resolution)
if (!is.null(stations)) {
saveRDS(stations, station_file)
}
stations
}
#' Load Enriched Stations from Granular Metadata Cache
#' Merges base station data with pre-generated availability metadata
#' @param resolution "hourly", "daily", "monthly", "annual", or "10_minutes"
#' @return Data frame of enriched stations
load_enriched_stations <- function(resolution = "daily") {
resolution <- normalize_dwd_resolution(resolution)
if (is.null(resolution)) resolution <- "daily"
# Check for Enriched Cache
suffix <- get_dwd_resolution_suffix(resolution)
cache_file <- paste0("www/tabs/dwd_stations_enriched", suffix, ".rds")
if (file.exists(cache_file)) {
info <- file.info(cache_file)
if (difftime(Sys.time(), info$mtime, units = "days") <= cache_ttl_days) {
# Verify it's a valid data frame
tryCatch(
{
res <- readRDS(cache_file)
if (is.data.frame(res) && nrow(res) > 0) {
return(res)
}
},
error = function(e) message("Enriched cache corrupted, rebuilding...")
)
}
}
# 1. Get base station list (name, lat, lon, state, etc.)
st_df <- get_dwd_stations(resolution)
if (is.null(st_df) || nrow(st_df) == 0) {
message("No base station data available for ", resolution)
return(NULL)
}
# 2. Load granular cache and file index
cache <- get_granular_cache()
file_index <- load_dwd_index(resolution)
# Pre-split index by ID for O(1) lookup inside loop
index_by_id <- if (!is.null(file_index)) split(file_index, file_index$id) else list()
# Directory Param to Result Label Map
dir_map <- c(
"air_temperature" = "Temperature",
"cloudiness" = "Cloudiness",
"precipitation" = "Precipitation",
"pressure" = "Pressure",
"soil_temperature" = "Soil Temp",
"solar" = "Solar",
"sun" = "Sunshine",
"visibility" = "Visibility",
"wind" = "Wind",
"kl" = "Climate Data (KL)",
"more_precip" = "Precipitation (More)",
"water_equiv" = "Water Equiv",
"weather_phenomena" = "Weather Phenomena",
"more_weather" = "More Weather Phenomena",
"moisture" = "Moisture",
"dew_point" = "Dew Point",
"soil" = "Soil Temp",
"cloud_type" = "Cloud Type"
)
# Specific override for 10-minutes: Air Temperature file includes Station Pressure
if (resolution == "10_minutes") {
dir_map["air_temperature"] <- "Temperature, Pressure"
}
# 4. Build summary data from cache/index
# Iterate over base stations to ensure filtering logic applies to all
summary_list <- lapply(st_df$id, function(sid) {
# --- A. Granular Metadata ---
cache_key <- paste0(resolution, "_", sid)
entry <- cache[[cache_key]]
gran_labels <- character(0)
gran_text <- character(0)
overall_starts <- character(0)
overall_ends <- character(0)
if (!is.null(entry) && !is.null(entry$params) && nrow(entry$params) > 0) {
params_df <- entry$params
param_map <- get_dwd_param_map()
# Map to readable labels
params_df$label <- sapply(params_df$param, function(p) {
if (p %in% names(param_map)) param_map[[p]] else p
})
# Clean junk rows and NA labels
params_df <- params_df[nchar(params_df$start_date) >= 4 & nchar(params_df$end_date) >= 4, ]
params_df <- params_df[!is.na(params_df$label), ]
if (nrow(params_df) > 0) {
# Aggregate
agg_summary <- params_df %>%
group_by(label) %>%
summarise(
date_start = min(start_date, na.rm = TRUE),
date_end = max(end_date, na.rm = TRUE),
.groups = "drop"
)
# Format
gran_text <- mapply(function(lbl, sd, ed) {
s_yr <- if (grepl("^[0-9]+$", substr(sd, 1, 4))) substr(sd, 1, 4) else "Unknown"
e_yr <- "Present"
if (grepl("^[0-9]+$", substr(ed, 1, 4))) {
if (as.numeric(substr(ed, 1, 4)) < as.numeric(format(Sys.Date(), "%Y")) - 1) {
e_yr <- substr(ed, 1, 4)
}
}
paste0(lbl, " (", s_yr, "-", e_yr, ")")
}, agg_summary$label, agg_summary$date_start, agg_summary$date_end)
gran_labels <- agg_summary$label
overall_starts <- params_df$start_date[grepl("^\\d{8}$", params_df$start_date)]
overall_ends <- params_df$end_date[grepl("^\\d{8}$", params_df$end_date)]
}
}
# --- B. Index Metadata (Fallback) ---
index_text <- character(0)
idx_entry <- index_by_id[[sid]]
if (!is.null(idx_entry)) {
idx_params <- unique(idx_entry$param)
# For each index param, check if we already have a granular label for it
for (ip in idx_params) {
# Map directory name to Label
ip_label <- if (ip %in% names(dir_map)) dir_map[[ip]] else str_to_title(ip)
# Fuzzy Match: If we already have "Temperature" from granular, skip "Temperature" from index
if (any(grepl(ip_label, gran_labels, fixed = TRUE))) {
next
}
# Suppress "Climate Data (KL)" if we already have constituent parameters
if (ip_label == "Climate Data (KL)") {
# Check if we have any of the main parameters that make up KL
has_kl_content <- any(c("Temperature", "Precipitation", "Wind", "Sunshine") %in% gran_labels)
if (has_kl_content) {
next
}
}
# Get generic dates from index if possible
ip_rows <- idx_entry[idx_entry$param == ip, ]
min_s <- suppressWarnings(min(ip_rows$start_date, na.rm = TRUE))
max_e <- suppressWarnings(max(ip_rows$end_date, na.rm = TRUE))
s_yr <- "Unknown"
if (!is.na(min_s) && nchar(min_s) >= 4) s_yr <- substr(min_s, 1, 4)
e_yr <- "Present"
if (any(ip_rows$type == "recent")) {
e_yr <- "Present"
} else {
if (!is.na(max_e) && nchar(max_e) >= 4) e_yr <- substr(max_e, 1, 4)
}
index_text <- c(index_text, paste0(ip_label, " (", s_yr, "-", e_yr, ")"))
# Add to overall dates for summary
if (!is.na(min_s) && grepl("^\\d{8}$", min_s)) overall_starts <- c(overall_starts, min_s)
# If this param has 'recent' files, mark as ongoing (99999999)
if (any(ip_rows$type == "recent")) {
overall_ends <- c(overall_ends, "99999999")
} else if (!is.na(max_e) && grepl("^\\d{8}$", max_e)) {
overall_ends <- c(overall_ends, max_e)
}
}
}
# Combine text
full_summary <- c(gran_text, index_text)
detailed_summary <- paste(sort(unique(full_summary)), collapse = ", ")
# Overall Dates
overall_start <- if (length(overall_starts) > 0) min(overall_starts) else "99999999"
overall_end <- if (length(overall_ends) > 0) max(overall_ends) else "00000000"
# Params List
all_params <- character(0)
if (!is.null(idx_entry)) all_params <- unique(idx_entry$param)
available_params <- paste(sort(all_params), collapse = ", ")
data.frame(
id = sid,
detailed_summary = detailed_summary,
station_overall_start = overall_start,
station_overall_end = overall_end,
available_params = available_params,
stringsAsFactors = FALSE
)
})
summary_df <- do.call(rbind, summary_list)
# 5. Join with base stations
st_df <- st_df %>%
left_join(summary_df, by = "id") %>%
mutate(
# Handle both NA and empty strings
detailed_summary = ifelse(is.na(detailed_summary) | detailed_summary == "", "No Data", detailed_summary),
station_overall_start = ifelse(is.na(station_overall_start) | station_overall_start == "", "18000101", station_overall_start),
station_overall_end = ifelse(is.na(station_overall_end) | station_overall_end == "", "99999999", station_overall_end),
available_params = ifelse(is.na(available_params) | available_params == "", "Unknown", available_params)
)
res <- as.data.frame(st_df)
saveRDS(res, cache_file)
res
}
# --- App Defaults ---
# UI starts on Daily resolution, so default to a multi-year window that
# still catches stations whose daily coverage ended recently.
max_year_data <- year(Sys.Date())
default_end_date <- Sys.Date()
default_start_date <- default_end_date - (365 * 6)
# Note: All R functions are sourced from fun/ directory above