library(jsonlite) library(dplyr) library(readr) library(curl) # 1. Get Official Station List Resources dataset_slug <- "informations-sur-les-stations-meteo-france-metadonnees" api_url <- paste0("https://www.data.gouv.fr/api/1/datasets/", dataset_slug, "/") print("Fetching metadata dataset info...") meta <- fromJSON(api_url) # Find a CSV resource logic refined resources <- meta$resources print("All Resource Titles:") print(resources$title) # Regex to find metadata file # Looking for "liste" AND "stations", or "metadonnees" meta_idx <- grep("liste.*stations|postes|metadonnees", resources$title, ignore.case = TRUE) csv_idx <- which(resources$format == "csv") # Intersect target_indices <- intersect(meta_idx, csv_idx) if (length(target_indices) == 0) { print("No obvious metadata CSV found. Checking ANY CSV with 'stations'...") target_indices <- intersect(grep("stations", resources$title, ignore.case = TRUE), csv_idx) } # Initialize official_stations to NULL or an empty data frame official_stations <- NULL if (length(target_indices) > 0) { target_res <- resources[target_indices[1], ] print(paste("Selected Resource:", target_res$title)) print(paste("URL:", target_res$url)) # It appears to be comma-separated official_stations <- read_csv(target_res$url, show_col_types = FALSE) print("Official Stations Columns:") print(colnames(official_stations)) # Count hourly hourly_stations <- official_stations %>% filter(is_hourly == TRUE) print(paste("Total Stations:", nrow(official_stations))) print(paste("Hourly Stations:", nrow(hourly_stations))) print(paste("Open Hourly Stations:", nrow(hourly_stations %>% filter(is_open == TRUE)))) # 2. Get Synop List synop_url <- "https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/postesSynop.csv" synop_stations <- read_delim(synop_url, delim = ";", show_col_types = FALSE, col_types = cols(ID = col_character(), .default = col_character()) ) # 3. Join Attempt print("Attempting Fuzzy Join on Location...") # Check official columns # We saw: id, lat, lon, department_id, is_hourly if (!all(c("lat", "lon") %in% names(official_stations))) { print("Expected lat/lon columns not found.") } else { # Prepare Synop synop_join <- synop_stations %>% mutate( lat_round = round(as.numeric(Latitude), 3), lon_round = round(as.numeric(Longitude), 3) ) # Prepare Official official_join <- official_stations %>% mutate( lat_round = round(as.numeric(lat), 3), lon_round = round(as.numeric(lon), 3) ) %>% filter(is_hourly == TRUE) # Only keep stations with hourly data # Join joined <- inner_join(synop_join, official_join, by = c("lat_round", "lon_round")) print(paste("Matches found:", nrow(joined))) # Select columns for mapping # We need: id_synop (ID), id_clim (id), key_lat, key_lon, department (department_id) mapping <- joined %>% select( id_synop = ID, id_clim = id, name_synop = Nom, name_clim = name, department = department_id ) %>% distinct(id_synop, .keep_all = TRUE) print(head(mapping)) # Write to file write_delim(mapping, "station_mapping_auto.csv", delim = ";") print("Mapping saved to station_mapping_auto.csv") } } else { print("No suitable metadata resource found.") }