Spaces:

alexdum
/

meteogate

Running

File size: 3,641 Bytes

bfea3af

library(jsonlite)
library(dplyr)
library(readr)
library(curl)

# 1. Get Official Station List Resources
dataset_slug <- "informations-sur-les-stations-meteo-france-metadonnees"
api_url <- paste0("https://www.data.gouv.fr/api/1/datasets/", dataset_slug, "/")

print("Fetching metadata dataset info...")
meta <- fromJSON(api_url)

# Find a CSV resource logic refined
resources <- meta$resources
print("All Resource Titles:")
print(resources$title)

# Regex to find metadata file
# Looking for "liste" AND "stations", or "metadonnees"
meta_idx <- grep("liste.*stations|postes|metadonnees", resources$title, ignore.case = TRUE)
csv_idx <- which(resources$format == "csv")

# Intersect
target_indices <- intersect(meta_idx, csv_idx)

if (length(target_indices) == 0) {
    print("No obvious metadata CSV found. Checking ANY CSV with 'stations'...")
    target_indices <- intersect(grep("stations", resources$title, ignore.case = TRUE), csv_idx)
}

# Initialize official_stations to NULL or an empty data frame
official_stations <- NULL

if (length(target_indices) > 0) {
    target_res <- resources[target_indices[1], ]
    print(paste("Selected Resource:", target_res$title))
    print(paste("URL:", target_res$url))

    # It appears to be comma-separated
    official_stations <- read_csv(target_res$url, show_col_types = FALSE)
    print("Official Stations Columns:")
    print(colnames(official_stations))

    # Count hourly
    hourly_stations <- official_stations %>% filter(is_hourly == TRUE)
    print(paste("Total Stations:", nrow(official_stations)))
    print(paste("Hourly Stations:", nrow(hourly_stations)))
    print(paste("Open Hourly Stations:", nrow(hourly_stations %>% filter(is_open == TRUE))))

    # 2. Get Synop List
    synop_url <- "https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/postesSynop.csv"
    synop_stations <- read_delim(synop_url,
        delim = ";", show_col_types = FALSE,
        col_types = cols(ID = col_character(), .default = col_character())
    )

    # 3. Join Attempt
    print("Attempting Fuzzy Join on Location...")

    # Check official columns
    # We saw: id, lat, lon, department_id, is_hourly

    if (!all(c("lat", "lon") %in% names(official_stations))) {
        print("Expected lat/lon columns not found.")
    } else {
        # Prepare Synop
        synop_join <- synop_stations %>%
            mutate(
                lat_round = round(as.numeric(Latitude), 3),
                lon_round = round(as.numeric(Longitude), 3)
            )

        # Prepare Official
        official_join <- official_stations %>%
            mutate(
                lat_round = round(as.numeric(lat), 3),
                lon_round = round(as.numeric(lon), 3)
            ) %>%
            filter(is_hourly == TRUE) # Only keep stations with hourly data

        # Join
        joined <- inner_join(synop_join, official_join, by = c("lat_round", "lon_round"))

        print(paste("Matches found:", nrow(joined)))

        # Select columns for mapping
        # We need: id_synop (ID), id_clim (id), key_lat, key_lon, department (department_id)

        mapping <- joined %>%
            select(
                id_synop = ID,
                id_clim = id,
                name_synop = Nom,
                name_clim = name,
                department = department_id
            ) %>%
            distinct(id_synop, .keep_all = TRUE)

        print(head(mapping))

        # Write to file
        write_delim(mapping, "station_mapping_auto.csv", delim = ";")
        print("Mapping saved to station_mapping_auto.csv")
    }
} else {
    print("No suitable metadata resource found.")
}