|
|
library(jsonlite) |
|
|
library(dplyr) |
|
|
library(readr) |
|
|
library(curl) |
|
|
|
|
|
|
|
|
dataset_slug <- "informations-sur-les-stations-meteo-france-metadonnees" |
|
|
api_url <- paste0("https://www.data.gouv.fr/api/1/datasets/", dataset_slug, "/") |
|
|
|
|
|
print("Fetching metadata dataset info...") |
|
|
meta <- fromJSON(api_url) |
|
|
|
|
|
|
|
|
resources <- meta$resources |
|
|
print("All Resource Titles:") |
|
|
print(resources$title) |
|
|
|
|
|
|
|
|
|
|
|
meta_idx <- grep("liste.*stations|postes|metadonnees", resources$title, ignore.case = TRUE) |
|
|
csv_idx <- which(resources$format == "csv") |
|
|
|
|
|
|
|
|
target_indices <- intersect(meta_idx, csv_idx) |
|
|
|
|
|
if (length(target_indices) == 0) { |
|
|
print("No obvious metadata CSV found. Checking ANY CSV with 'stations'...") |
|
|
target_indices <- intersect(grep("stations", resources$title, ignore.case = TRUE), csv_idx) |
|
|
} |
|
|
|
|
|
|
|
|
official_stations <- NULL |
|
|
|
|
|
if (length(target_indices) > 0) { |
|
|
target_res <- resources[target_indices[1], ] |
|
|
print(paste("Selected Resource:", target_res$title)) |
|
|
print(paste("URL:", target_res$url)) |
|
|
|
|
|
|
|
|
official_stations <- read_csv(target_res$url, show_col_types = FALSE) |
|
|
print("Official Stations Columns:") |
|
|
print(colnames(official_stations)) |
|
|
|
|
|
|
|
|
hourly_stations <- official_stations %>% filter(is_hourly == TRUE) |
|
|
print(paste("Total Stations:", nrow(official_stations))) |
|
|
print(paste("Hourly Stations:", nrow(hourly_stations))) |
|
|
print(paste("Open Hourly Stations:", nrow(hourly_stations %>% filter(is_open == TRUE)))) |
|
|
|
|
|
|
|
|
synop_url <- "https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/postesSynop.csv" |
|
|
synop_stations <- read_delim(synop_url, |
|
|
delim = ";", show_col_types = FALSE, |
|
|
col_types = cols(ID = col_character(), .default = col_character()) |
|
|
) |
|
|
|
|
|
|
|
|
print("Attempting Fuzzy Join on Location...") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!all(c("lat", "lon") %in% names(official_stations))) { |
|
|
print("Expected lat/lon columns not found.") |
|
|
} else { |
|
|
|
|
|
synop_join <- synop_stations %>% |
|
|
mutate( |
|
|
lat_round = round(as.numeric(Latitude), 3), |
|
|
lon_round = round(as.numeric(Longitude), 3) |
|
|
) |
|
|
|
|
|
|
|
|
official_join <- official_stations %>% |
|
|
mutate( |
|
|
lat_round = round(as.numeric(lat), 3), |
|
|
lon_round = round(as.numeric(lon), 3) |
|
|
) %>% |
|
|
filter(is_hourly == TRUE) |
|
|
|
|
|
|
|
|
joined <- inner_join(synop_join, official_join, by = c("lat_round", "lon_round")) |
|
|
|
|
|
print(paste("Matches found:", nrow(joined))) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mapping <- joined %>% |
|
|
select( |
|
|
id_synop = ID, |
|
|
id_clim = id, |
|
|
name_synop = Nom, |
|
|
name_clim = name, |
|
|
department = department_id |
|
|
) %>% |
|
|
distinct(id_synop, .keep_all = TRUE) |
|
|
|
|
|
print(head(mapping)) |
|
|
|
|
|
|
|
|
write_delim(mapping, "station_mapping_auto.csv", delim = ";") |
|
|
print("Mapping saved to station_mapping_auto.csv") |
|
|
} |
|
|
} else { |
|
|
print("No suitable metadata resource found.") |
|
|
} |
|
|
|