| |
|
|
| |
| |
| |
| fetch_dwd_file_index <- function(resolution = "hourly") { |
| resolution <- normalize_dwd_resolution(resolution) |
| index_list <- list() |
|
|
| |
| config <- get_dwd_resolution_config(resolution) |
| base_url_use <- config$base_url |
| params_use <- config$params |
|
|
| for (param in names(params_use)) { |
| folder <- params_use[[param]] |
|
|
| |
| crawl_list <- list() |
|
|
| |
| |
| if (param == "solar") { |
| crawl_list <- list( |
| list(url = paste0(base_url_use, folder, "/"), type = "solar") |
| ) |
| |
| |
| if (resolution == "10_minutes") { |
| crawl_list <- list( |
| list(url = paste0(base_url_use, folder, "/recent/"), type = "recent"), |
| list(url = paste0(base_url_use, folder, "/historical/"), type = "historical") |
| ) |
| } |
| } else { |
| crawl_list <- list( |
| list(url = paste0(base_url_use, folder, "/recent/"), type = "recent"), |
| list(url = paste0(base_url_use, folder, "/historical/"), type = "historical") |
| ) |
| } |
|
|
| for (item in crawl_list) { |
| url <- item$url |
| subtype <- item$type |
|
|
| tryCatch( |
| { |
| |
| max_retries <- 3 |
| retry_count <- 0 |
| success <- FALSE |
| resp <- NULL |
|
|
| while (retry_count < max_retries && !success) { |
| retry_count <- retry_count + 1 |
| tryCatch( |
| { |
| h <- new_handle() |
| handle_setopt(h, followlocation = TRUE, timeout = 60) |
| resp <- curl_fetch_memory(url, handle = h) |
| success <- TRUE |
| }, |
| error = function(e) { |
| if (retry_count == max_retries) stop(e) |
| Sys.sleep(1) |
| } |
| ) |
| } |
|
|
| if (resp$status_code == 200) { |
| content <- rawToChar(resp$content) |
| |
| zip_files <- str_extract_all(content, 'href="([^"]+\\.zip)"')[[1]] |
| zip_files <- gsub('href="', "", zip_files) |
| zip_files <- gsub('"', "", zip_files) |
|
|
| if (length(zip_files) > 0) { |
| |
| ids <- character(0) |
|
|
| if (resolution == "10_minutes") { |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| is_match <- grepl("_\\d{5}_", zip_files) |
| valid_zips <- zip_files[is_match] |
| ids_raw <- str_extract(valid_zips, "_\\d{5}_") |
| ids <- gsub("_", "", ids_raw) |
| zip_files <- valid_zips |
| } else if (param == "solar") { |
| |
| is_match <- grepl("_ST_\\d{5}_", zip_files) |
| valid_zips <- zip_files[is_match] |
| ids_raw <- str_extract(valid_zips, "_ST_\\d{5}_") |
| ids <- gsub("_ST_", "", ids_raw) |
| ids <- gsub("_", "", ids) |
| zip_files <- valid_zips |
| } else { |
| |
| is_match <- grepl("_\\d{5}_", zip_files) |
| valid_zips <- zip_files[is_match] |
| ids_raw <- str_extract(valid_zips, "_\\d{5}_") |
| ids <- gsub("_", "", ids_raw) |
| zip_files <- valid_zips |
| } |
|
|
| if (length(zip_files) > 0) { |
| |
| |
| date_pattern <- "_((?:17|18|19|20)\\d{6})_((?:17|18|19|20)\\d{6})" |
|
|
| start_dates <- str_match(zip_files, date_pattern)[, 2] |
| end_dates <- str_match(zip_files, date_pattern)[, 3] |
|
|
| |
| |
|
|
| |
| df <- data.frame( |
| id = ids, |
| filename = zip_files, |
| url = paste0(url, zip_files), |
| param = param, |
| type = subtype, |
| start_date = start_dates, |
| end_date = end_dates, |
| stringsAsFactors = FALSE |
| ) |
| index_list[[paste(param, subtype, sep = "_")]] <- df |
| } |
| } |
| } |
| }, |
| error = function(e) { |
| warning(paste("Failed to index", url, ":", e$message)) |
| } |
| ) |
| } |
| } |
|
|
| if (length(index_list) > 0) { |
| bind_rows(index_list) |
| } else { |
| NULL |
| } |
| } |
|
|
| |
| |
| |
| fetch_dwd_stations <- function(resolution = "hourly") { |
| resolution <- normalize_dwd_resolution(resolution) |
| config <- get_dwd_resolution_config(resolution) |
| url <- config$station_url |
| desc_pattern <- config$station_desc |
|
|
| tryCatch( |
| { |
| h <- new_handle() |
| handle_setopt(h, followlocation = TRUE, timeout = 60) |
| resp <- curl_fetch_memory(url, handle = h) |
| content <- rawToChar(resp$content) |
|
|
| |
| |
| |
| |
|
|
| |
| |
| pat <- paste0('href="([^"]+', gsub("\\.", "\\\\.", desc_pattern), ')"') |
| desc_file_match <- str_extract(content, pat) |
|
|
| if (is.na(desc_file_match)) { |
| |
| |
| all_links <- str_extract_all(content, 'href="([^"]+)"')[[1]] |
| target_link <- all_links[grepl(desc_pattern, all_links)] |
| if (length(target_link) > 0) { |
| desc_file_match <- target_link[1] |
| } else { |
| return(NULL) |
| } |
| } |
|
|
| desc_file <- gsub('href="', "", desc_file_match) |
| desc_file <- gsub('"', "", desc_file) |
|
|
| |
| desc_file <- basename(desc_file) |
|
|
| full_url <- paste0(url, desc_file) |
|
|
| |
| |
| |
| |
| col_names <- c("id", "start_date", "end_date", "elevation", "lat", "lon", "name", "state", "abgabe") |
|
|
| |
| |
| |
|
|
| st_df <- read_fwf( |
| full_url, |
| fwf_widths(c(5, 9, 9, 15, 12, 10, 41, 40, NA), col_names), |
| skip = 2, |
| locale = locale(encoding = "ISO-8859-1"), |
| show_col_types = FALSE |
| ) |
|
|
| |
| st_df %>% |
| mutate( |
| id = sprintf("%05d", as.numeric(id)), |
| name = str_trim(name), |
| state = str_trim(state), |
| country_name = "Germany", |
| |
| |
| latitude = lat, |
| longitude = lon |
| ) %>% |
| select(id, name, latitude, longitude, elevation, state, country_name, start_date, end_date) %>% |
| distinct(id, .keep_all = TRUE) |
| }, |
| error = function(e) { |
| warning("Failed to fetch station list: ", e$message) |
| NULL |
| } |
| ) |
| } |
|
|
| |
| |
| get_dwd_param_map <- function() { |
| c( |
| |
| "JA_TT" = "Temperature", |
| "JA_TX" = "Avg Max Temp", |
| "JA_TN" = "Avg Min Temp", |
| "JA_RR" = "Precipitation", |
| "JA_FK" = "Wind", |
| "JA_N" = "Cloud Cover", |
| "JA_SD_S" = "Sunshine", |
| "JA_MX_TX" = "Abs Max Temp", |
| "JA_MX_TN" = "Abs Min Temp", |
| "JA_MX_RS" = "Max Daily Precip", |
| "JA_MX_FX" = "Max Wind Gust", |
| "JA_NSH" = "Fresh Snow Sum", |
| "JA_SH_S" = "Snow Depth Sum", |
| "JA_GEWITTER" = "Thunderstorm Days", |
| "JA_GLATTEIS" = "Glaze Days", |
| "JA_GRAUPEL" = "Graupel Days", |
| "JA_HAGEL" = "Hail Days", |
| "JA_NEBEL" = "Fog Days", |
| "JA_REIF" = "Frost Days", |
| "JA_STURM_6" = "Storm Days (Bft 6)", |
| "JA_STURM_8" = "Storm Days (Bft 8)", |
| "JA_TAU" = "Dew Days", |
|
|
| |
| "MO_TT" = "Temperature", |
| "MO_TX" = "Max Temp", |
| "MO_TN" = "Min Temp", |
| "MO_RR" = "Precipitation", |
| "MO_FK" = "Wind", |
| "MO_N" = "Cloud Cover", |
| "MO_SD_S" = "Sunshine", |
| "MX_TX" = "Abs Max Temp", |
| "MX_TN" = "Abs Min Temp", |
| "MX_RS" = "Max Daily Precip", |
| "MX_FX" = "Max Wind Gust", |
| "MO_NSH" = "Fresh Snow Sum", |
| "MO_SH_S" = "Snow Depth Sum", |
|
|
| |
| "TMK" = "Temperature", |
| "TNK" = "Min Temp", |
| "TXK" = "Max Temp", |
| "RSK" = "Precipitation", |
| "RS" = "Precipitation (Synop)", |
| "RSF" = "Precipitation Form", |
| "RSKF" = "Precipitation Form", |
| "FM" = "Wind Speed", |
| "FX" = "Wind Gust", |
| "SDK" = "Sunshine", |
| "NM" = "Cloud Cover", |
| "NSH_TAG" = "Fresh Snow", |
| "PM" = "Pressure", |
| "UPM" = "Humidity", |
| "VPM" = "Vapor Pressure", |
| "SHK_TAG" = "Snow Depth", |
| "SH_TAG" = "Snow Depth (Synop)", |
| "TGK" = "Min Ground Temp (5cm)", |
|
|
| |
| "TT_TU" = "Temperature", |
| "RF_TU" = "Humidity", |
| "P0" = "Pressure (Station)", |
| "P" = "Pressure (Sea Level)", |
| "V_N" = "Cloud Cover", |
| "SD_SO" = "Sunshine", |
| "F" = "Wind Speed", |
| "D" = "Wind Direction", |
| "R1" = "Precipitation", |
| "WRTR" = "Weather Text", |
| "WW" = "Weather Code", |
|
|
| |
| "V_VV" = "Visibility", |
| "V_S1_CS" = "Cloud Layer 1 Type", |
| "V_S1_HHS" = "Cloud Layer 1 Height", |
| "V_S1_NS" = "Cloud Layer 1 Amount", |
| "V_S2_CS" = "Cloud Layer 2 Type", |
| "V_S2_HHS" = "Cloud Layer 2 Height", |
| "V_S2_NS" = "Cloud Layer 2 Amount", |
| "V_S3_CS" = "Cloud Layer 3 Type", |
| "V_S3_HHS" = "Cloud Layer 3 Height", |
| "V_S3_NS" = "Cloud Layer 3 Amount", |
| "V_S4_CS" = "Cloud Layer 4 Type", |
| "V_S4_HHS" = "Cloud Layer 4 Height", |
| "V_S4_NS" = "Cloud Layer 4 Amount", |
| "ST_2" = "Soil Temp (2cm)", |
| "ST_5" = "Soil Temp (5cm)", |
| "ST_10" = "Soil Temp (10cm)", |
| "ST_20" = "Soil Temp (20cm)", |
| "ST_50" = "Soil Temp (50cm)", |
| "ST_100" = "Soil Temp (100cm)", |
| "TS2" = "Soil Temp (2cm)", |
| "TS5" = "Soil Temp (5cm)", |
| "TS10" = "Soil Temp (10cm)", |
| "TS20" = "Soil Temp (20cm)", |
| "TS50" = "Soil Temp (50cm)", |
| "TS100" = "Soil Temp (100cm)", |
| "R_R1" = "Precipitation (Hourly)", |
| "RS_IND" = "Precipitation Indicator" |
| ) |
| } |
|
|