Spaces:

ncsdecoopman
/

ExtremePrecipit

Sleeping

App Files Files Community

ncsdecoopman commited on Nov 6, 2025

Commit

0ab0788

0 Parent(s):

Déploiement Docker depuis workflow (structure corrigée)

Browse files

Files changed (25) hide show

.gitignore +26 -0
.huggingface.yaml +1 -0
.streamlit/config.toml +15 -0
Dockerfile +25 -0
README.md +1 -0
app/__init__.py +0 -0
app/config/config.yaml +12 -0
app/pipelines/import_config.py +94 -0
app/pipelines/import_data.py +265 -0
app/pipelines/import_map.py +119 -0
app/pipelines/import_scatter.py +36 -0
app/utils/__init__.py +0 -0
app/utils/config_utils.py +165 -0
app/utils/data_utils.py +223 -0
app/utils/gev_utils.py +171 -0
app/utils/hist_utils.py +127 -0
app/utils/legends_utils.py +221 -0
app/utils/map_utils.py +223 -0
app/utils/menus_utils.py +224 -0
app/utils/scatter_plot_utils.py +432 -0
app/utils/show_info.py +19 -0
app/utils/stats_utils.py +147 -0
download_data.py +52 -0
main.py +372 -0
requirements.txt +21 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,26 @@

+# Ignorer les fichiers de cache Python
+__pycache__/
+*.py[cod]
+*.parquet
+*.zarr
+*.nc
+*.csv
+# On garde les .keep
+!data/**/.keep
+# Ajouter les config streamlit au git
+!/.streamlit/*
+# Ignore aussi d'éventuels fichiers temporaires et artefacts
+*.tmp
+*.bak
+*.DS_Store
+__pycache__/
+*.pyc
+*.cache
+*.log
+# Ignorer les caches Jupyter
+.jupyter_cache/
+presentation_files/
+assets/

.huggingface.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ sdk: docker

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,15 @@

+# .streamlit/config.toml
+[theme]
+base = "light"
+primaryColor = "#5A7BFF"
+backgroundColor = "#EEF2FF"
+secondaryBackgroundColor = "#FFFFFF"
+textColor = "#1F2D3D"
+font = "sans serif"   # "serif" | "sans serif" | "monospace"
+[server]
+headless = true
+runOnSave = true
+[browser]
+gatherUsageStats = false

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM python:3.11-slim
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY requirements.txt .
+COPY download_data.py .
+RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install huggingface_hub
+RUN rm -rf ~/.cache/huggingface/hub && \
+    python download_data.py
+COPY . .
+EXPOSE 7860
+ENV MPLCONFIGDIR=/tmp/matplotlib
+ENV PYTHONPATH=/app
+CMD ["streamlit", "run", "main.py", "--server.port=7860", "--server.address=0.0.0.0"]

README.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Déploiement avec snapshot_download depuis le dataset Hugging Face

app/__init__.py ADDED Viewed

File without changes

app/config/config.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+years:
+  min: 1959
+  max: 2022 # choix dans le menu
+  rupture: 1985
+statisticals:
+  modelised: data/statisticals/modelised
+  observed: data/statisticals/observed
+gev:
+  modelised: data/gev/modelised
+  observed: data/gev/observed

app/pipelines/import_config.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import streamlit as st
+from app.utils.config_utils import load_config, menu_config_statisticals, menu_config_gev
+from app.utils.menus_utils import menu_statisticals, menu_gev
+import streamlit as st
+from pathlib import Path
+from functools import reduce
+from app.utils.config_utils import *
+from app.utils.menus_utils import *
+from app.utils.data_utils import *
+from app.utils.stats_utils import *
+from app.utils.map_utils import *
+from app.utils.legends_utils import *
+from app.utils.hist_utils import *
+from app.utils.scatter_plot_utils import *
+from app.utils.show_info import show_info_metric
+from app.utils.gev_utils import compute_return_levels_ns
+import pydeck as pdk
+import polars as pl
+import numpy as np
+from app.pipelines.import_data import pipeline_data
+from app.pipelines.import_map import pipeline_map
+from app.utils.data_utils import standardize_year, filter_nan
+def pipeline_config(config_path: str, type: str, show_param: bool=False):
+    # Chargement de la configuration
+    config = load_config(config_path)
+    min_years = config["years"]["min"]
+    max_years = config["years"]["max"]
+    if type == "stat":
+        STATS, SEASON, SCALE = menu_config_statisticals()
+        params = menu_statisticals(
+            min_years,
+            max_years,
+            STATS,
+            SEASON
+        )
+        if params is None:
+            st.info("Les paramètres d’analyse ne sont pas encore définis. Merci de les configurer pour lancer l’analyse.")
+            st.stop()
+        stat_choice, quantile_choice, min_year_choice, max_year_choice, season_choice, scale_choice, missing_rate, show_relief, show_stations = params
+        return {
+            "config": config,
+            "stat_choice": stat_choice,
+            "season_choice": season_choice,
+            "stat_choice_key": STATS[stat_choice],
+            "scale_choice_key": SCALE[scale_choice],
+            "min_year_choice": min_year_choice,
+            "max_year_choice": max_year_choice,
+            "season_choice_key": SEASON[season_choice],
+            "missing_rate": missing_rate,
+            "quantile_choice": quantile_choice,
+            "scale_choice": scale_choice,
+            "show_relief": show_relief,
+            "show_stations": show_stations
+        }
+    elif type == "gev":
+        MODEL_PARAM, MODEL_NAME = menu_config_gev()
+        _, SEASON, _ = menu_config_statisticals()
+        params = menu_gev(
+            config,
+            MODEL_NAME,
+            MODEL_PARAM,
+            SEASON,
+            show_param=show_param
+        )
+        if params is None:
+            st.info("Les paramètres d’analyse ne sont pas encore définis. Merci de les configurer pour lancer l’analyse.")
+            st.stop()
+        return {
+            "config": config,
+            **params
+        }

app/pipelines/import_data.py ADDED Viewed

	@@ -0,0 +1,265 @@

+import streamlit as st
+from app.utils.data_utils import (
+    load_data,
+    cleaning_data_observed,
+    dont_show_extreme,
+    add_metadata,
+    get_column_load,
+    filter_nan
+)
+from app.utils.stats_utils import compute_statistic_per_point
+from app.utils.gev_utils import safe_compute_return_df, compute_delta_qT, compute_delta_stat
+from app.utils.legends_utils import get_stat_column_name
+import polars as pl
+def load_data_cached(use_cache: bool):
+    if use_cache:
+        return st.cache_data(load_data_inner) # Version cachée qui retourne un DataFrame pour la sérialisation.
+    else:
+        return load_data_inner
+def load_data_inner(type_data: str, echelle: str, min_year: int, max_year: int, season_key: str, col_to_load: list, config) -> pl.DataFrame:
+    return load_data(type_data, echelle, min_year, max_year, season_key, col_to_load, config)
+def pipeline_data(params, config, use_cache=False):
+    stat_choice_key, scale_choice_key, min_year_choice, max_year_choice, season_choice_key, missing_rate, quantile_choice, scale_choice = params
+    loader = load_data_cached(use_cache)
+    # Colonne de statistique nécessaire au chargement
+    col_to_load, col_important = get_column_load(stat_choice_key, scale_choice_key)
+    if scale_choice == "Journalière":
+        scale_choice = "quotidien"
+    elif scale_choice == "Horaire":
+        scale_choice = "horaire"
+    try:
+        modelised_load = loader(
+            'modelised', scale_choice if scale_choice != "quotidien" else "horaire",
+            min_year_choice,
+            max_year_choice,
+            season_choice_key,
+            col_to_load,
+            config
+        )
+    except Exception as e:
+        raise RuntimeError(f"Erreur lors du chargement des données modélisées : {e}")
+    try:
+        observed_load = loader(
+            'observed', scale_choice,
+            min_year_choice,
+            max_year_choice,
+            season_choice_key,
+            col_to_load + ["nan_ratio"],
+            config
+        )
+    except Exception as e:
+        raise RuntimeError(f"Erreur lors du chargement des données observées : {e}")
+    # Selection des données observées
+    len_series = 0.75*(max_year_choice-min_year_choice+1)
+    df_observed_cleaning = cleaning_data_observed(observed_load, len_series, missing_rate)
+    # Calcul des statistiques
+    modelised = compute_statistic_per_point(modelised_load, stat_choice_key)
+    observed = compute_statistic_per_point(df_observed_cleaning, stat_choice_key)
+    # Ajout de l'altitude et des lat lon
+    modelised = add_metadata(modelised, scale_choice_key, type='modelised')
+    observed = add_metadata(observed, scale_choice_key, type='observed')
+    # Obtention de la colonne étudiée
+    column = get_stat_column_name(stat_choice_key, scale_choice_key)
+    # Retrait des extrêmes pour l'affichage uniquement
+    modelised_show, observed_show = dont_show_extreme(modelised, observed, column, quantile_choice, stat_choice_key)
+    return {
+        "modelised_load": modelised_load,
+        "observed_load": observed_load,
+        "observed_cleaning": df_observed_cleaning,
+        "modelised_show": modelised_show,
+        "observed_show": observed_show,
+        "modelised": modelised,
+        "observed": observed,
+        "column": column
+    }
+def pipeline_data_gev(params):
+    column = params["param_choice"]
+    BOOTSTRAP = False
+    if "_bootstrap" in params['model_name']: # dans le cas des modèles avec bootstrap
+        BOOTSTRAP = True
+        # On repasse sur les fichiers non boostrapés
+        params['model_name'] = params['model_name'].replace('_bootstrap', '')
+    df_modelised_load = pl.read_parquet(params["mod_dir"] / f"gev_param_{params['model_name']}.parquet")
+    df_observed_load = pl.read_parquet(params["obs_dir"] / f"gev_param_{params['model_name']}.parquet")
+    df_modelised = filter_nan(df_modelised_load, "xi") # xi est toujours valable
+    df_observed = filter_nan(df_observed_load, "xi") # xi est toujours valable
+    df_modelised = add_metadata(df_modelised, "mm_h" if params["echelle"] == "horaire" else "mm_j", type="modelised")
+    df_observed = add_metadata(df_observed, "mm_h" if params["echelle"] == "horaire" else "mm_j", type="observed")
+    # Étape 1 : créer une colonne avec les paramètres nettoyés
+    df_modelised = safe_compute_return_df(df_modelised)
+    df_observed = safe_compute_return_df(df_observed)
+    # Étape 2 : appliquer delta_qT_decennale (avec numpy)
+    T_choice = params["T_choice"]  # ou récupéré dynamiquement via Streamlit
+    if "_break_year" in params['model_name']: # dans le cas des modèles avec point de rupture
+        year_range = params["max_year_choice"] - params["config"]["years"]["rupture"] # Δa+ = a_max - a_rupture
+    else:
+        year_range = params["max_year_choice"] - params["min_year_choice"] # Δa = a_max - a_min
+    if column == "Δqᵀ":
+        # Calcul du delta qT
+        df_modelised = df_modelised.with_columns([
+            pl.struct(["mu1", "sigma1", "xi"])
+            .map_elements(lambda row: compute_delta_qT(row, T_choice, year_range, params["par_X_annees"]), return_dtype=pl.Float64)
+            .alias("Δqᵀ")
+        ])
+        df_observed = df_observed.with_columns([
+            pl.struct(["mu1", "sigma1", "xi"])
+            .map_elements(lambda row: compute_delta_qT(row, T_choice, year_range, params["par_X_annees"]), return_dtype=pl.Float64)
+            .alias("Δqᵀ")
+        ])
+    elif column in ["ΔE", "ΔVar", "ΔCV"]:
+        t_start = params["min_year_choice"]
+        t_end = params["max_year_choice"]
+        t0 = params["config"]["years"]["rupture"]
+        df_modelised = df_modelised.with_columns([
+            pl.struct(["mu0", "mu1", "sigma0", "sigma1", "xi"])
+            .map_elements(lambda row: compute_delta_stat(row, column, t_start, t0 , t_end, params["par_X_annees"]), return_dtype=pl.Float64)
+            .alias(column)
+        ])
+        df_observed = df_observed.with_columns([
+            pl.struct(["mu0", "mu1", "sigma0", "sigma1", "xi"])
+            .map_elements(lambda row: compute_delta_stat(row, column, t_start, t0, t_end, params["par_X_annees"]), return_dtype=pl.Float64)
+            .alias(column)
+        ])
+    if BOOTSTRAP:
+        df_mod_bootstrap = pl.read_parquet(params["mod_dir"] / f"gev_param_{params['model_name']}_bootstrap.parquet")
+        df_obs_bootstrap = pl.read_parquet(params["obs_dir"] / f"gev_param_{params['model_name']}_bootstrap.parquet")
+        # Recalcule delta_qT pour chaque bootstrap
+        df_mod_bootstrap = df_mod_bootstrap.with_columns([
+            pl.struct(["mu1", "sigma1", "xi"]).map_elements(
+                lambda row: compute_delta_qT(
+                    row,
+                    params["T_choice"],
+                    year_range,
+                    params["par_X_annees"]
+                ),
+                return_dtype=pl.Float64
+            ).alias("Δqᵀ")
+        ])
+        df_obs_bootstrap = df_obs_bootstrap.with_columns([
+            pl.struct(["mu1", "sigma1", "xi"]).map_elements(
+                lambda row: compute_delta_qT(
+                    row,
+                    params["T_choice"],
+                    year_range,
+                    params["par_X_annees"]
+                ),
+                return_dtype=pl.Float64
+            ).alias("Δqᵀ")
+        ])
+        # Calcule les bornes de l'intervalle de confiance
+        df_ic_mod = (
+            df_mod_bootstrap
+            .group_by("NUM_POSTE")
+            .agg([
+                pl.col("Δqᵀ").quantile(0.05, "nearest").alias("Δqᵀ_q050"),
+                pl.col("Δqᵀ").quantile(0.95, "nearest").alias("Δqᵀ_q950"),
+            ])
+        )
+        df_ic_obs = (
+            df_obs_bootstrap
+            .group_by("NUM_POSTE")
+            .agg([
+                pl.col("Δqᵀ").quantile(0.05, "nearest").alias("Δqᵀ_q050"),
+                pl.col("Δqᵀ").quantile(0.95, "nearest").alias("Δqᵀ_q950"),
+            ])
+        )
+        # Forcer NUM_POSTE à être de même type (int) dans les deux DataFrames
+        df_ic_mod = df_ic_mod.with_columns([pl.col("NUM_POSTE").cast(pl.Int64)])
+        df_ic_obs = df_ic_obs.with_columns([pl.col("NUM_POSTE").cast(pl.Int64)])
+        df_modelised = df_modelised.with_columns([pl.col("NUM_POSTE").cast(pl.Int64)])
+        df_observed = df_observed.with_columns([pl.col("NUM_POSTE").cast(pl.Int64)])
+        # Join à df_observed
+        df_modelised = df_modelised.join(df_ic_mod, on="NUM_POSTE", how="left")
+        df_observed = df_observed.join(df_ic_obs, on="NUM_POSTE", how="left")
+        # Création d'une colonne est significatif ou non (ne recoupe pas l'intervalle)
+        df_modelised = df_modelised.with_columns([
+            (
+                ~((pl.col("Δqᵀ_q050") <= 0) & (pl.col("Δqᵀ_q950") >= 0))
+            ).alias("is_significant")
+        ])
+        df_observed = df_observed.with_columns([
+            (
+                ~((pl.col("Δqᵀ_q050") <= 0) & (pl.col("Δqᵀ_q950") >= 0))
+            ).alias("is_significant")
+        ])
+    # Retrait des percentiles
+    modelised_show = dont_show_extreme(df_modelised, column, params["quantile_choice"])
+    observed_show = dont_show_extreme(df_observed, column, params["quantile_choice"])
+    if column in ["Δqᵀ", "ΔE", "ΔVar", "ΔCV"]:
+        val_max = max(modelised_show[column].max(), observed_show[column].max())
+        val_min = min(modelised_show[column].min(), observed_show[column].min())
+        abs_max = max(abs(val_min), abs(val_max))
+        return {
+            "modelised_load": df_modelised_load,
+            "observed_load": df_observed_load,
+            "modelised": df_modelised,
+            "observed": df_observed,
+            "modelised_show": modelised_show,
+            "observed_show": observed_show,
+            "column": column,
+            "vmin": -abs_max,
+            "vmax": abs_max,
+            "echelle": "diverging_zero_white",
+            "continu": True
+        }
+    else:
+        return {
+            "modelised_load": df_modelised_load,
+            "observed_load": df_observed_load,
+            "modelised": df_modelised,
+            "observed": df_observed,
+            "modelised_show": modelised_show,
+            "observed_show": observed_show,
+            "column": column
+        }

app/pipelines/import_map.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from app.utils.config_utils import echelle_config
+from app.utils.map_utils import create_layer, create_scatter_layer, create_tooltip
+from app.utils.legends_utils import formalised_legend, display_vertical_color_legend
+import pydeck as pdk
+def safe_min(*args):
+    return min(x for x in args if x is not None) if any(x is not None for x in args) else None
+def safe_max(*args):
+    return max(x for x in args if x is not None) if any(x is not None for x in args) else None
+def pipeline_map(
+    params_load,
+    n_colors:int = 15,
+    param_view: dict = {"latitude": 46.9, "longitude": 1.7, "zoom": 5}
+):
+    # Déballage des paramètres
+    stat_choice_key, result, unit_label, height = params_load
+    # Echelle continue ou discrète
+    if "continu" in result:
+        continu = result["continu"]
+    elif stat_choice_key == "month":
+        continu = False
+    else:
+        continu = True
+    # Nombre de couleurs
+    if "categories" in result: # Discret
+        categories = result["categories"]
+        n_colors = len(categories)
+    else:
+        categories = None
+        n_colors = n_colors
+    # Echelle paramétrée par l'utilisateur
+    if "echelle" not in result: # Choix d'une échelle personnalisée
+        result["echelle"] = None
+    # On trouve alors la représéntation de la légende
+    colormap = echelle_config(continu, echelle=result["echelle"], n_colors=n_colors)
+    result_df_modelised_show = result["modelised_show"]
+    result_df_observed_show = result["observed_show"]
+    # Normalisation des valeurs modélisées
+    result_df_modelised_show, vmin_mod, vmax_mod = formalised_legend(
+        result["modelised_show"],
+        column_to_show=result["column"],
+        colormap=colormap,
+        is_categorical=not continu,
+        categories=categories
+    )
+    # Normalisation des observations avec les mêmes bornes
+    result_df_observed_show, vmin_obs, vmax_obs = formalised_legend(
+        result["observed_show"],
+        column_to_show=result["column"],
+        colormap=colormap,
+        is_categorical=not continu,
+        categories=categories
+    )
+    # Calcul des bornes communes
+    if "vmin" in result and "vmax" in result:
+        vmin_commun, vmax_commun = result["vmin"], result["vmax"]
+    else:
+        vmin_commun = safe_min(vmin_mod, vmin_obs)
+        vmax_commun = safe_max(vmax_mod, vmax_obs)
+    # Mise à jour de la normalisation pour les deux ensembles de données avec les bornes communes
+    result_df_modelised_show, _, _ = formalised_legend(
+        result["modelised_show"],
+        column_to_show=result["column"],
+        colormap=colormap,
+        vmin=vmin_commun,
+        vmax=vmax_commun,
+        is_categorical=not continu,
+        categories=categories
+    )
+    result_df_observed_show, _, _ = formalised_legend(
+        result["observed_show"],
+        column_to_show=result["column"],
+        colormap=colormap,
+        vmin=vmin_commun,
+        vmax=vmax_commun,
+        is_categorical=not continu,
+        categories=categories
+    )
+    # Création du layer modélisé et observé
+    layer = create_layer(result_df_modelised_show)
+    scatter_layer = create_scatter_layer(result_df_observed_show)
+    # Tooltip
+    tooltip = create_tooltip(unit_label)
+    # View par défaut
+    view_state = pdk.ViewState(
+        latitude=param_view["latitude"],
+        longitude=param_view["longitude"],
+        zoom=param_view["zoom"]
+    )
+    # Légende vertical
+    legend = display_vertical_color_legend(
+        height,
+        colormap,
+        vmin_commun,
+        vmax_commun,
+        n_ticks=n_colors,
+        label=unit_label,
+        model_labels=categories
+    )
+    return layer, scatter_layer, tooltip, view_state, legend

app/pipelines/import_scatter.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import streamlit as st
+import polars as pl
+from app.utils.hist_utils import plot_histogramme, plot_histogramme_comparatif
+from app.utils.scatter_plot_utils import generate_scatter_plot_interactive
+from app.utils.data_utils import match_and_compare
+from app.utils.stats_utils import generate_metrics
+def pipeline_scatter(params_load):
+    result, stat_choice_key, scale_choice_key, stat_choice, unit_label, height = params_load
+    df_modelised_load = result["modelised_load"]
+    df_observed_load = result["observed_load"]
+    n_tot_mod = df_modelised_load.select(pl.col("NUM_POSTE").n_unique()).item()
+    n_tot_obs = df_observed_load.select(pl.col("NUM_POSTE").n_unique()).item()
+    if stat_choice_key not in ["date", "month"]:
+        echelle = "horaire" if scale_choice_key == "mm_h" else "quotidien"
+        df_obs_vs_mod = pl.read_csv(f"data/metadonnees/obs_vs_mod/obs_vs_mod_{echelle}.csv")
+        obs_vs_mod = match_and_compare(result["observed"], result["modelised"], result["column"], df_obs_vs_mod)
+        if obs_vs_mod is not None and obs_vs_mod.height > 0:
+            fig = generate_scatter_plot_interactive(obs_vs_mod, stat_choice, unit_label, height)
+            me, mae, rmse, r2 = generate_metrics(obs_vs_mod)
+            return n_tot_mod, n_tot_obs, me, mae, rmse, r2, fig
+        else:
+            fig = plot_histogramme(result["modelised"], result["column"], stat_choice, stat_choice_key, unit_label, height)
+            return n_tot_mod, n_tot_obs, None, None, None, None, fig
+    else:
+        fig = plot_histogramme_comparatif(result["observed"], result["modelised"], result["column"], stat_choice, stat_choice_key, unit_label, height)
+        return n_tot_mod, n_tot_obs, None, None, None, None, fig

app/utils/__init__.py ADDED Viewed

File without changes

app/utils/config_utils.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import yaml
+from matplotlib import colors as mcolors
+from matplotlib.colors import ListedColormap
+def menu_config_statisticals():
+    STATS = {
+        "Moyenne": "mean",
+        "Maximum": "max",
+        "Moyenne des maxima": "mean-max",
+        "Mois comptabilisant le plus de maximas": "month",
+        "Jour de pluie": "numday",
+    }
+    SEASON = {
+        "Année hydrologique": "hydro",
+        "Hiver": "djf",
+        "Printemps": "mam",
+        "Été": "jja",
+        "Automne": "son",
+    }
+    SCALE = {
+        "Horaire": "mm_h",
+        "Journalière": "mm_j"
+    }
+    return STATS, SEASON, SCALE
+def menu_config_gev():
+    MODEL_PARAM = {
+        "s_gev": {"mu0": "μ₀", "sigma0": "σ₀", "xi": "ξ"},
+        "ns_gev_m1": {"mu0": "μ₀", "mu1": "μ₁", "sigma0": "σ₀", "xi": "ξ"},
+        "ns_gev_m2": {"mu0": "μ₀", "sigma0": "σ₀", "sigma1": "σ₁", "xi": "ξ"},
+        "ns_gev_m3": {"mu0": "μ₀", "mu1": "μ₁", "sigma0": "σ₀", "sigma1": "σ₁", "xi": "ξ"},
+        "ns_gev_m1_break_year": {"mu0": "μ₀", "mu1": "μ₁", "sigma0": "σ₀", "xi": "ξ"},
+        "ns_gev_m2_break_year": {"mu0": "μ₀", "sigma0": "σ₀", "sigma1": "σ₁", "xi": "ξ"},
+        "ns_gev_m3_break_year": {"mu0": "μ₀", "mu1": "μ₁", "sigma0": "σ₀", "sigma1": "σ₁", "xi": "ξ"},
+        "best_model": {"mu0": "μ₀", "mu1": "μ₁", "sigma0": "σ₀", "sigma1": "σ₁", "xi": "ξ"}
+    }
+    # Liste complète des modèles avec leurs équations explicites
+    MODEL_NAME = {
+        # Stationnaire
+        "M₀(μ₀, σ₀) : μ(t) = μ₀ ; σ(t) = σ₀ ; ξ(t) = ξ": "s_gev",
+        # Non stationnaires simples
+        "M₁(μ, σ₀) : μ(t) = μ₀ + μ₁·t ; σ(t) = σ₀ ; ξ(t) = ξ": "ns_gev_m1",
+        "M₂(μ₀, σ) : μ(t) = μ₀ ; σ(t) = σ₀ + σ₁·t ; ξ(t) = ξ": "ns_gev_m2",
+        "M₃(μ, σ) : μ(t) = μ₀ + μ₁·t ; σ(t) = σ₀ + σ₁·t ; ξ(t) = ξ": "ns_gev_m3",
+        # Non stationnaires avec rupture
+        "M₁⋆(μ, σ₀) : μ(t) = μ₀ + μ₁·t₊ ; σ(t) = σ₀ ; ξ(t) = ξ en notant t₊ = t · 𝟙_{t > t₀} avec t₀ = 1985": "ns_gev_m1_break_year",
+        "M₂⋆(μ₀, σ) : μ(t) = μ₀ ; σ(t) = σ₀ + σ₁·t₊ ; ξ(t) = ξ en notant t₊ = t · 𝟙_{t > t₀} avec t₀ = 1985": "ns_gev_m2_break_year",
+        "M₃⋆(μ, σ) : μ(t) = μ₀ + μ₁·t₊ ; σ(t) = σ₀ + σ₁·t₊ ; ξ(t) = ξ en notant t₊ = t · 𝟙_{t > t₀} avec t₀ = 1985": "ns_gev_m3_break_year",
+        "M₃⋆ᵇ(μ, σ) : μ(t) = μ₀ + μ₁·t₊ ; σ(t) = σ₀ + σ₁·t₊ ; ξ(t) = ξ en notant t₊ = t · 𝟙_{t > t₀} avec t₀ = 1985": "ns_gev_m3_break_year_bootstrap",
+        # Autres
+        "M(minimisant AIC)": "best_model",
+        "M(minimisant pval)": "best_model_lrt"
+    }
+    return MODEL_PARAM, MODEL_NAME
+def reverse_param_label(param_label: str, model_name: str, model_param_map: dict) -> str:
+    """
+    Convertit un label unicode (e.g. 'μ₀') en nom de paramètre interne (e.g. 'mu0'),
+    en utilisant le mapping inverse de model_param_map.
+    """
+    if model_name not in model_param_map:
+        raise ValueError(f"Modèle {model_name} non trouvé dans le mapping.")
+    reverse_map = {v: k for k, v in model_param_map[model_name].items()}
+    if param_label not in reverse_map:
+        raise ValueError(f"Label {param_label} non trouvé pour le modèle {model_name}.")
+    return reverse_map[param_label]
+def load_config(config_path: str) -> dict:
+    with open(config_path, "r") as f:
+        return yaml.safe_load(f)
+def echelle_config(type_: bool, echelle: str = None, n_colors: int = 256):
+    if type_: # Continu
+        if echelle == "diverging_zero_white": # Choix personnalisé
+            # Dégradé négatif (bleu) → 0 (blanc) → positif (jaune à rouge)
+            custom_colorscale = [
+                (0.0, "#08306B"),   # bleu foncé
+                (0.1, "#2171B5"),
+                (0.2, "#6BAED6"),
+                (0.3, "#C6DBEF"),
+                (0.49, "#ffffff"),  # blanc à 0
+                (0.5, "#ffffff"),
+                (0.6, "#ffffb2"),   # jaune clair
+                (0.7, "#fecc5c"),
+                (0.8, "#fd8d3c"),
+                (0.9, "#f03b20"),
+                (1.0, "#bd0026"),   # rouge foncé
+            ]
+            cmap = mcolors.LinearSegmentedColormap.from_list("diverging_zero_white", custom_colorscale)
+            if n_colors is not None:
+                # Retourne une version discrète avec n couleurs
+                return ListedColormap([cmap(i / (n_colors - 1)) for i in range(n_colors)])
+            else:
+                return cmap
+        custom_colorscale = [
+            (0.0, "#FFFFE5"),  # blanc
+            (0.1, "#DDEED6"),
+            (0.2, "#BCDDC8"),
+            (0.3, "#9BCCBA"),
+            (0.4, "#7ABBAC"),
+            (0.5, "#59AA9E"),
+            (0.6, "#389990"),
+            (0.7, "#29837A"),
+            (0.8, "#1C6D63"),
+            (0.9, "#0F564B"),
+            (1.0, "#003C30"),
+        ]
+        cmap = mcolors.LinearSegmentedColormap.from_list("custom", custom_colorscale)
+        if n_colors is not None:
+            # Retourne une version discrète avec n couleurs
+            return ListedColormap([cmap(i / (n_colors - 1)) for i in range(n_colors)])
+        else:
+            return cmap
+    else: # Discret
+        couleurs_par_mois = [
+            "#ffffff",  # Janvier
+            "blue",     # Février
+            "green",    # Mars
+            "red",      # Avril
+            "orange",   # Mai
+            "#00CED1",  # Juin
+            "yellow",   # Juillet
+            "#f781bf",  # Août
+            "purple",   # Septembre
+            "#654321",  # Octobre
+            "darkblue", # Novembre
+            "black",    # Décembre
+        ]
+        return ListedColormap(couleurs_par_mois)
+def get_readable_season(season_code: str) -> str:
+    """
+    Retourne le nom humainement lisible d'une saison à partir de son code ("hydro", "djf", etc.).
+    Résultat en minuscules.
+    """
+    _, SEASON, _ = menu_config_statisticals()
+    reverse_season = {v: k.lower() for k, v in SEASON.items()}
+    if season_code not in reverse_season:
+        raise ValueError(f"Code saison inconnu : {season_code}")
+    return reverse_season[season_code]

app/utils/data_utils.py ADDED Viewed

	@@ -0,0 +1,223 @@

+import numpy as np
+import polars as pl
+import streamlit as st
+from scipy.spatial import cKDTree
+from app.utils.config_utils import menu_config_statisticals
+def get_column_load(stat: str, scale: str):
+    if stat == "mean":
+        col = "mean_mm_h"
+    elif stat == "max":
+        col = f"max_{scale}"
+    elif stat == "mean-max":
+        col = f"max_{scale}"
+    elif stat == "month":
+        col = f"max_date_{scale}"
+    elif stat == "numday":
+        col = "n_days_gt1mm"
+    else:
+        raise ValueError(f"Stat '{stat}' is not recognized")
+    return ["NUM_POSTE", col], col
+def load_season(year: int, season_key: str, base_path: str, col_to_load: str) -> pl.DataFrame:
+    filename = f"{base_path}/{year:04d}/{season_key}.parquet"
+    return pl.read_parquet(filename, columns=col_to_load)
+def load_data(type_data: str, echelle: str, min_year: int, max_year: int, season: str, col_to_load: str, config) -> pl.DataFrame:
+    _, SEASON, _ = menu_config_statisticals()
+    if season not in SEASON.values():
+        raise ValueError(f"Saison inconnue : {season}")
+    base_path = f'{config["statisticals"][type_data]}/{echelle}'
+    dataframes = []
+    errors = []
+    for year in range(min_year, max_year + 1):
+        try:
+            df = load_season(year, season, base_path, col_to_load)
+            # Conversion explicite des colonnes dates uniquement si elles existent
+            for col in ["max_date_mm_h", "max_date_mm_j"]:
+                if col in df.columns:
+                    df = df.with_columns(
+                        pl.col(col)
+                        .cast(pl.Utf8)  # s'assure qu'on peut parser avec str.strptime
+                        .str.strptime(pl.Datetime, format="%Y-%m-%d", strict=False)
+                        .cast(pl.Utf8)  # retour sous forme de string (comme dans l'ancien code Pandas)
+                    )
+            # Ajout de la colonne year
+            df = df.with_columns(pl.lit(year).alias("year"))
+            dataframes.append(df)
+        except Exception as e:
+            errors.append(f"{year} ({season}) : {e}")
+    if errors:
+        for err in errors:
+            st.warning(f"Erreur : {err}")
+    if not dataframes:
+        raise ValueError("Aucune donnée chargée.")
+    return pl.concat(dataframes, how="vertical")
+def cleaning_data_observed(
+    df: pl.DataFrame,
+    len_serie: float = None,
+    nan_limit: float = 0.10
+) -> pl.DataFrame:
+    """
+    Filtre les maxima par deux critères :
+      1) on annule les valeurs d’une année si nan_ratio > nan_limit
+      2) on ne garde que les stations ayant au moins n années valides
+    """
+    # ——— règles dépendant de l’échelle ———
+    if len_serie is None:
+        raise ValueError('Paramètre len_serie à préciser')
+    # Selection des saisons avec nan_limit au maximum
+    df_filter = df.filter(pl.col("nan_ratio") <= nan_limit)
+    # Calcul du nombre d'années valides par station NUM_POSTE
+    station_counts = (
+        df_filter.group_by("NUM_POSTE")
+        .agg(pl.col("year").n_unique().alias("num_years"))
+    )
+    # Sélection des NUM_POSTE avec au moins len_serie d'années valides
+    valid_stations = station_counts.filter(pl.col("num_years") >= len_serie)
+    # Jointure pour ne garder que les stations valides
+    df_final = df_filter.filter(
+        pl.col("NUM_POSTE").is_in(valid_stations["NUM_POSTE"])
+    )
+    return df_final
+def dont_show_extreme(
+    modelised: pl.DataFrame,
+    observed:   pl.DataFrame,
+    column:     str,
+    quantile_choice: float,
+    stat_choice_key: str = None
+) -> tuple[pl.DataFrame, pl.DataFrame]:
+    if stat_choice_key not in ("month", "date"):
+        # 1) Calcul des quantiles
+        q_mod = modelised.select(
+            pl.col(column).quantile(quantile_choice, interpolation="nearest")
+        ).item()
+        if observed is None or observed.height == 0:
+            seuil = q_mod
+        else:
+            q_obs = observed.select(
+                pl.col(column).quantile(quantile_choice, interpolation="nearest")
+            ).item()
+            seuil = max(q_mod, q_obs)
+        # 2) Saturation des couleurs
+        clamp_expr = (
+            pl.when(pl.col(column).abs() > seuil)
+              .then(pl.lit(seuil) * pl.col(column).sign())
+              .otherwise(pl.col(column))
+              .alias(column)
+        )
+        # 3) Renvoi des tableaux
+        modelised_show = modelised.with_columns(clamp_expr)
+        observed_show  = observed.with_columns(clamp_expr)
+    else:
+        modelised_show, observed_show = modelised, observed
+    return modelised_show, observed_show
+def add_metadata(df: pl.DataFrame, scale: str, type: str) -> pl.DataFrame:
+    echelle = 'horaire' if scale == 'mm_h' else 'quotidien'
+    # Charger les metadonnées avec Polars
+    df_meta = pl.read_csv(f"data/metadonnees/{type}/postes_{echelle}.csv")
+    # Harmoniser les types des colonnes lat/lon des deux c��tés
+    df_meta = df_meta.with_columns([
+        pl.col("NUM_POSTE").cast(pl.Int32),
+        pl.col("lat").cast(pl.Float32),
+        pl.col("lon").cast(pl.Float32),
+        pl.col("altitude").cast(pl.Int32)  # altitude en entier
+    ])
+    df = df.with_columns([  # forcer ici aussi
+        pl.col("NUM_POSTE").cast(pl.Int32)
+    ])
+    # Join sur NUM_POSTE
+    return df.join(df_meta, on=["NUM_POSTE"], how="left")
+def find_matching_point(df_model: pl.DataFrame, lat_obs: float, lon_obs: float):
+    df_model = df_model.with_columns([
+        ((pl.col("lat") - lat_obs) ** 2 + (pl.col("lon") - lon_obs) ** 2).sqrt().alias("dist")
+    ])
+    closest_row = df_model.filter(pl.col("dist") == pl.col("dist").min()).select(["lat", "lon"]).row(0)
+    return closest_row  # (lat, lon)
+def match_and_compare(
+    obs_df: pl.DataFrame,
+    mod_df: pl.DataFrame,
+    column_to_show: str,
+    obs_vs_mod: pl.DataFrame = None
+) -> pl.DataFrame:
+    if obs_vs_mod is None:
+        raise ValueError("obs_vs_mod must be provided with NUM_POSTE_obs and NUM_POSTE_mod columns")
+    obs_vs_mod = obs_vs_mod.with_columns(
+        pl.col("NUM_POSTE_obs").cast(pl.Int32)
+    ).filter(
+        pl.col("NUM_POSTE_obs").is_in(obs_df["NUM_POSTE"].cast(pl.Int32))
+    )
+    # Renommer temporairement pour le join
+    obs = obs_df.rename({"NUM_POSTE": "NUM_POSTE_obs"})
+    mod = mod_df.rename({"NUM_POSTE": "NUM_POSTE_mod"})
+    obs = obs_df.with_columns(
+        pl.col("NUM_POSTE").cast(pl.Int32)
+    ).rename({"NUM_POSTE": "NUM_POSTE_obs"})
+    mod = mod_df.with_columns(
+        pl.col("NUM_POSTE").cast(pl.Int32)
+    ).rename({"NUM_POSTE": "NUM_POSTE_mod"})
+    obs_vs_mod = obs_vs_mod.with_columns(
+        pl.col("NUM_POSTE_obs").cast(pl.Int32),
+        pl.col("NUM_POSTE_mod").cast(pl.Int32)
+    )
+    # Ajoute les valeurs observées et simulées en fonction des correspondances
+    matched = (
+        obs_vs_mod
+        .join(obs.select(["NUM_POSTE_obs", "lat", "lon", column_to_show]), on="NUM_POSTE_obs", how="left")
+        .join(mod.select(["NUM_POSTE_mod", column_to_show]), on="NUM_POSTE_mod", how="left", suffix="_mod")
+        .rename({column_to_show: "Station", f"{column_to_show}_mod": "AROME"})
+    )
+    matched = matched.select(["NUM_POSTE_obs", "lat", "lon", "NUM_POSTE_mod", "Station", "AROME"]).drop_nulls()
+    return matched
+def standardize_year(year: float, min_year: int, max_year: int) -> float:
+    """
+    Normalise une année `year` entre 0 et 1 avec une transformation min-max.
+    """
+    return (year - min_year) / (max_year - min_year)
+def filter_nan(df: pl.DataFrame, columns: list[str]):
+    return df.drop_nulls(subset=columns)

app/utils/gev_utils.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import numpy as np
+import polars as pl
+from scipy.special import gamma
+# --- Quantile GEV ---
+# Soit :
+#   μ(t)     = μ₀ + μ₁ × t                  # localisation dépendante du temps
+#   σ(t)     = σ₀ + σ₁ × t                  # échelle dépendante du temps
+#   ξ        = constante                    # forme
+#   T        = période de retour (années)
+#   p        = 1 − 1 / T                    # probabilité non-excédée associée
+# Avec t : année (ou covariable normalisée dans l'intervalle [0; 1]
+# t = (annee - min_year) / (max_year - min_year) = (annee - min_year) / delta_year
+# Une unité de t (normalisée) = Δa années (max_year - min_year)
+# En notant Δa = max_year - min_year et a = annee, on a :
+# t = (a − aₘᵢₙ) / Δa  ⇒   a = aₘᵢₙ + t ⋅ Δa
+# La quantile notée qᵀ(t) (précipitation pour une période de retour T à l’année t) s’écrit :
+#   qᵀ(t) = μ(t) + [σ(t) / ξ] × [ (−log(1 − p))^(−ξ) − 1 ]
+#   qᵀ(t) = (μ₀ + μ₁ × t) + [(σ₀ + σ₁ × t) / ξ] × [ (−log(1 − (1/T)))^(−ξ) − 1 ]
+# Soit : z_T = [ -log(1 - 1/T) ]^(−ξ) − 1   ← constante pour un T donné
+# Donc : qᵀ(t) = μ₀ + μ₁·t + [(σ₀ + σ₁·t) / ξ] · z_T
+# Ou : qᵀ(t) = μ(t) + [σ(t) / ξ] · z_T
+# En dérivant qᵀ par rapport à t on a :
+# dqᵀ/dt = μ₁ + σ₁ / ξ · z_T
+# On rappelle :  a = aₘᵢₙ + t ⋅ Δa
+# Donc : dt/da = 1 / Δa
+# Alors dqᵀ/da = dqᵀ/dt · dt/da = μ₁ + σ₁ / ξ · z_T · 1 / Δa
+# LA VARIATION PAR AN de qᵀ :
+# dqᵀ/da = 1 / Δa · (μ₁ + σ₁ / ξ · z_T)
+# DONC PAR 10 ANS :
+# Δqᵀ₁₀ₐₙₛ = (10 / Δa) ⋅ (μ₁ + (σ₁ / ξ) ⋅ zᵀ)
+def safe_compute_return_df(df: pl.DataFrame) -> pl.DataFrame:
+    REQUIRED_GEV_COLS = ["mu0", "mu1", "sigma0", "sigma1", "xi"]
+    for col in REQUIRED_GEV_COLS:
+        if col not in df.columns:
+            df = df.with_columns(pl.lit(0.0).alias(col))
+    df = df.with_columns([
+        pl.col(col).fill_null(0.0).fill_nan(0.0) for col in REQUIRED_GEV_COLS
+    ])
+    return df
+def compute_return_levels_ns(params: dict, T: np.ndarray, t_norm: float) -> np.ndarray:
+    """
+    Calcule les niveaux de retour selon le modèle NS-GEV fourni.
+    - params : dictionnaire des paramètres GEV d'un point
+    - T : périodes de retour (en années)
+    - t_norm : covariable temporelle normalisée (ex : 0 pour année moyenne)
+    """
+    mu = params.get("mu0", 0) + params["mu1"] * t_norm if "mu1" in params else params.get("mu0", 0) # μ(t)
+    sigma = params.get("sigma0", 0) + params["sigma1"] * t_norm if "sigma1" in params else params.get("sigma0", 0) # σ(t)
+    xi = params.get("xi", 0) # xi contant
+    if xi != 0:
+        qT = mu + (sigma / xi) * ((-np.log(1 - 1 / T))**(-xi) - 1)
+    else:
+        qT = mu - sigma * np.log(-np.log(1 - 1/T))
+    return qT
+def delta_qT_X_years(mu1, sigma1, xi, T, year_range, par_X_annees):
+    """
+    Calcule la variation décennale du quantile de retour qᵀ(t)
+    dans un modèle GEV non stationnaire avec t ∈ [0, 1].
+    La variation est ramenée à l’échelle des années civiles en tenant compte de la
+    durée totale du modèle (year_range = a_max - a_min).
+    Si un point de rupture est introduit year_range = a_max - a_rupture,
+    avec une Δqᵀ = 0 avant la rupture.
+    Δqᵀ = (par_X_annees / year_range) × (μ₁ + (σ₁ / ξ) × z_T)
+    avec :
+    - z_T = [ -log(1 - 1/T) ]^(-ξ) - 1   si ξ ≠ 0
+          = log(-log(1 - 1/T))          si ξ = 0 (Gumbel)
+    par_X_annees représente 10, 20, 30 ans dans Δ_10ans qᵀ
+    """
+    try:
+        p = 1 - 1 / T
+        if xi == 0:
+            z_T = np.log(-np.log(p))
+            delta_q = (par_X_annees / year_range) * (mu1 + sigma1 * z_T)
+        else:
+            z_T = (-np.log(p))**(-xi) - 1
+            delta_q = (par_X_annees / year_range) * (mu1 + (sigma1 / xi) * z_T)
+        return float(delta_q)
+    except Exception:
+        return np.nan
+def compute_delta_qT(row, T_choice, year_range, par_X_annees):
+    return delta_qT_X_years(
+        row["mu1"],
+        row["sigma1"],
+        row["xi"],
+        T=T_choice,
+        year_range=year_range,
+        par_X_annees=par_X_annees
+    )
+# --- Espérence, variance, CV de GEV ---
+def gev_moments(mu, sigma, xi):
+    if xi >= 0.5:
+        return np.nan, np.nan, np.nan  # variance indéfinie
+    try:
+        mean = mu + sigma / xi * (gamma(1 - xi) - 1)
+        var = (sigma ** 2) / (xi ** 2) * (gamma(1 - 2 * xi) - gamma(1 - xi) ** 2)
+        cv = np.sqrt(var) / mean if mean != 0 else np.nan
+        return mean, var, cv
+    except Exception:
+        return np.nan, np.nan, np.nan
+def eval_params_nsgev(mu0, mu1, sigma0, sigma1, xi, t, t0):
+    mu_t = mu0 + mu1 * (t - t0)
+    sigma_t = sigma0 + sigma1 * (t - t0)
+    return gev_moments(mu_t, sigma_t, xi)
+def compute_delta_stat(row, stat: str, year_start: int, year_ref: int, year_end: int, par_X_annees: int) -> float:
+    """
+    Calcule la variation du moment statistique GEV (moyenne, variance, CV)
+    exprimée en changement moyen par 10 ans.
+    Parameters:
+    - row : dictionnaire contenant les paramètres GEV
+    - stat : "ΔE", "ΔVar" ou "ΔCV"
+    - year_start, year_end : années de début et de fin
+    - year_ref : année de référence (t0)
+    Returns:
+    - Variation du moment sélectionné rapportée à 10 ans
+    """
+    Δa = year_end - year_start
+    if Δa == 0:
+        return np.nan  # évite division par zéro
+    # Moments aux deux dates
+    mean_start, var_start, cv_start = eval_params_nsgev(
+        mu0=row["mu0"], mu1=row.get("mu1", 0.0),
+        sigma0=row["sigma0"], sigma1=row.get("sigma1", 0.0),
+        xi=row["xi"], t=year_start, t0=year_ref
+    )
+    mean_end, var_end, cv_end = eval_params_nsgev(
+        mu0=row["mu0"], mu1=row.get("mu1", 0.0),
+        sigma0=row["sigma0"], sigma1=row.get("sigma1", 0.0),
+        xi=row["xi"], t=year_end, t0=year_ref
+    )
+    if stat == "ΔE":
+        return (mean_end - mean_start)  * par_X_annees / Δa
+    elif stat == "ΔVar":
+        return (var_end - var_start) * par_X_annees / Δa
+    elif stat == "ΔCV":
+        return (cv_end - cv_start) * par_X_annees / Δa
+    else:
+        return np.nan

app/utils/hist_utils.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import plotly.express as px
+import streamlit as st
+import polars as pl
+import pandas as pd
+def plot_histogramme(df: pl.DataFrame, var, stat, stat_key, unit, height):
+    df = df.to_pandas()
+    # Définir l’ordre complet des mois
+    month_order = ['Janvier', 'Février', 'Mars', 'Avril', 'Mai', 'Juin',
+                   'Juillet', 'Août', 'Septembre', 'Octobre', 'Novembre', 'Décembre']
+    if stat_key == 'month':
+        # Copie de sécurité
+        df = df.copy()
+        # Convertir le numéro du mois (1-12) en label texte
+        df[var] = df[var].astype(int)
+        df[var] = df[var].map({i+1: month_order[i] for i in range(12)})
+        # Calculer la répartition (pourcentage) par mois
+        counts = df[var].value_counts()                  # nb de lignes par mois présent
+        counts = counts.reindex(month_order, fill_value=0)  # forcer l’existence de tous les mois, avec 0 pour les absents
+        # Convertir en pourcentage
+        total = counts.sum()
+        freq_percent = (counts / total * 100) if total > 0 else counts
+        # Construire un nouveau DF pour Plotly
+        hist_df = pd.DataFrame({var: freq_percent.index, 'Pourcentage': freq_percent.values})
+        # Plot en barres
+        fig = px.bar(
+            hist_df,
+            x=var,
+            y='Pourcentage'
+        )
+        fig.update_layout(
+            bargap=0.1,           # Espacement entre barres
+            xaxis_title="",       # Pas de titre horizontal
+            yaxis_title="Pourcentage de stations",
+            height=height,
+            xaxis=dict(
+                categoryorder='array',
+                categoryarray=month_order
+            )
+        )
+    else:
+        # Cas normal : on garde px.histogram
+        fig = px.histogram(
+            df,
+            x=var,
+            nbins=50,
+            histnorm='percent'
+        )
+        fig.update_layout(
+            bargap=0.1,
+            xaxis_title=f"{stat} ({unit})" if unit else f"{stat}",
+            yaxis_title="Pourcentage de stations",
+            height=height
+        )
+    return fig
+def plot_histogramme_comparatif(df_observed: pl.DataFrame, df_modelised: pl.DataFrame, var, stat, stat_key, unit, height):
+    df_observed = df_observed.to_pandas()
+    df_modelised = df_modelised.to_pandas()
+    month_order = ['Janvier', 'Février', 'Mars', 'Avril', 'Mai', 'Juin',
+                   'Juillet', 'Août', 'Septembre', 'Octobre', 'Novembre', 'Décembre']
+    if stat_key == 'month':
+        def prepare_df(df, label):
+            df = df.copy()
+            df[var] = df[var].astype(int)
+            df[var] = df[var].map({i + 1: month_order[i] for i in range(12)})
+            counts = df[var].value_counts()
+            counts = counts.reindex(month_order, fill_value=0)
+            total = counts.sum()
+            freq_percent = (counts / total * 100) if total > 0 else counts
+            return pd.DataFrame({
+                var: freq_percent.index,
+                'Pourcentage': freq_percent.values,
+                'Source': label
+            })
+        df_obs = prepare_df(df_observed, "Observé")
+        df_mod = prepare_df(df_modelised, "Modélisé")
+        hist_df = pd.concat([df_obs, df_mod], ignore_index=True)
+        fig = px.bar(
+            hist_df,
+            x=var,
+            y='Pourcentage',
+            color='Source',
+            barmode='group'  # Affichage côte à côte
+        )
+        fig.update_layout(
+            bargap=0.15,
+            xaxis_title="",
+            yaxis_title="Pourcentage de stations",
+            height=height,
+            xaxis=dict(
+                categoryorder='array',
+                categoryarray=month_order
+            )
+        )
+    else:
+        # Affichage standard pour les autres stats
+        df_observed['Source'] = "Observé"
+        df_modelised['Source'] = "Modélisé"
+        df_all = pd.concat([df_observed, df_modelised], ignore_index=True)
+        fig = px.histogram(
+            df_all,
+            x=var,
+            color='Source',
+            nbins=50,
+            histnorm='percent',
+            barmode='overlay'  # ou 'group' si tu veux les voir côte à côte
+        )
+        fig.update_layout(
+            bargap=0.1,
+            xaxis_title=f"{stat} ({unit})" if unit else f"{stat}",
+            yaxis_title="Pourcentage de stations",
+            height=height
+        )
+    return fig

app/utils/legends_utils.py ADDED Viewed

	@@ -0,0 +1,221 @@

+from io import BytesIO
+import base64
+import polars as pl
+import numpy as np
+import datetime as dt
+import matplotlib.pyplot as plt
+def get_stat_column_name(stat_key: str, scale_key: str) -> str:
+    if stat_key == "mean":
+        return f"mean_all_{scale_key}"
+    elif stat_key == "max":
+        return f"max_all_{scale_key}"
+    elif stat_key == "mean-max":
+        return f"max_mean_{scale_key}"
+    elif stat_key == "date":
+        return "date_max_h" if scale_key == "mm_h" else "date_max_j"
+    elif stat_key == "month":
+        return "mois_pluvieux_h" if scale_key == "mm_h" else "mois_pluvieux_j"
+    elif stat_key == "numday":
+        return "jours_pluie_moyen"
+    else:
+        raise ValueError(f"Statistique inconnue : {stat_key}")
+def get_stat_unit(stat_key: str, scale_key: str) -> str:
+    if stat_key in ["mean", "max", "mean-max"]:
+        return "mm/h" if scale_key == "mm_h" else "mm/j"
+    elif stat_key == "sum":
+        return "mm"
+    elif stat_key == "numday":
+        return "jours"
+    else:
+        return ""
+def formalised_legend(df: pl.DataFrame, column_to_show: str, colormap, vmin=None, vmax=None, is_categorical=False, categories=None):
+    df = df.clone()
+    if is_categorical and categories is not None:
+        # Cas spécial catégoriel : ex : best_model
+        mapping = {cat: i for i, cat in enumerate(categories)}  # s_gev=0, ns_gev_m1=1, etc.
+        df = df.with_columns([
+            pl.col(column_to_show).map_elements(lambda x: mapping.get(x, None), return_dtype=pl.Float64).alias("value_norm")
+        ])
+        vals = df["value_norm"].to_numpy()
+        colors = (255 * np.array(colormap(vals / (len(categories) - 1)))[:, :3]).astype(np.uint8)
+        alpha = np.full((colors.shape[0], 1), 255, dtype=np.uint8)
+        rgba = np.hstack([colors, alpha])
+        df = df.with_columns([
+            pl.Series("fill_color", rgba.tolist(), dtype=pl.List(pl.UInt8)),
+            pl.col(column_to_show).alias("val_fmt"),  # on garde le nom du modèle comme texte
+            pl.col("lat").round(3).cast(pl.Utf8).alias("lat_fmt"),
+            pl.col("lon").round(3).cast(pl.Utf8).alias("lon_fmt"),
+        ])
+        return df, 0, len(categories) - 1
+    if column_to_show.startswith("date"):
+        # Conversion correcte en datetime (Polars)
+        df = df.with_columns(
+            pl.col(column_to_show).str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S%.6f", strict=False)
+        )
+        # Récupération min/max en datetime Python natif
+        min_dt = df[column_to_show].min()
+        max_dt = df[column_to_show].max()
+        if isinstance(min_dt, dt.date):
+            min_dt = dt.datetime.combine(min_dt, dt.time.min)
+        if isinstance(max_dt, dt.date):
+            max_dt = dt.datetime.combine(max_dt, dt.time.min)
+        vmin = min_dt if vmin is None else vmin
+        vmax = max_dt if vmax is None else vmax
+        # Gestion safe des timestamps sur Windows (pré-1970)
+        def safe_timestamp(d):
+            epoch = dt.datetime(1970, 1, 1)
+            return (d - epoch).total_seconds()
+        vmin_ts = safe_timestamp(vmin)
+        vmax_ts = safe_timestamp(vmax)
+        # Ajout de la colonne normalisée dans Polars
+        df = df.with_columns([
+            ((pl.col(column_to_show).cast(pl.Datetime).dt.timestamp() - vmin_ts) / (vmax_ts - vmin_ts))
+            .clip(0.0, 1.0)
+            .alias("value_norm")
+        ])
+        val_fmt_func = lambda x: x.strftime("%Y-%m-%d")
+    elif column_to_show.startswith("mois_pluvieux"):
+        df = df.with_columns(pl.col(column_to_show).cast(pl.Int32))
+        value_norm = ((df[column_to_show] - 1) / 11).clip(0.0, 1.0)
+        df = df.with_columns(value_norm.alias("value_norm"))
+        mois_labels = [
+            "Janvier", "Février", "Mars", "Avril", "Mai", "Juin",
+            "Juillet", "Août", "Septembre", "Octobre", "Novembre", "Décembre"
+        ]
+        val_fmt_func = lambda x: mois_labels[int(x) - 1] if 1 <= int(x) <= 12 else "Inconnu"
+        vmin, vmax = 1, 12
+    else: # ➔ Cas général (continu)
+        if vmax is None:
+            vmax = df[column_to_show].max()
+            if vmax is None: # Que des NaN
+                return df, None, None
+        if vmin is None:
+            vmin = df[column_to_show].min()
+            if vmin > 0:
+                vmin = 0
+        value_norm = ((df[column_to_show] - vmin) / (vmax - vmin)).clip(0.0, 1.0)
+        df = df.with_columns(value_norm.alias("value_norm"))
+        val_fmt_func = lambda x: f"{x:.2f}"
+    # Application de la colormap
+    # Étape 1 : extraire les valeurs (en NumPy)
+    vals = df["value_norm"].to_numpy()
+    # Étape 2 : appliquer le colormap sur tout le tableau (résultat : Nx4 array RGBA)
+    colors = (255 * np.array(colormap(vals))[:, :3]).astype(np.uint8)
+    # Étape 3 : ajouter l'alpha (255)
+    alpha = np.full((colors.shape[0], 1), 255, dtype=np.uint8)
+    rgba = np.hstack([colors, alpha])
+    # Étape 4 : réinjecter dans Polars
+    fill_color = pl.Series("fill_color", rgba.tolist(), dtype=pl.List(pl.UInt8))
+    df = df.with_columns([
+        pl.Series("fill_color", fill_color),
+        df[column_to_show].map_elements(val_fmt_func, return_dtype=pl.String).alias("val_fmt"), # val_fmt optimisé si float
+        pl.col("lat").round(3).cast(pl.Utf8).alias("lat_fmt"),
+        pl.col("lon").round(3).cast(pl.Utf8).alias("lon_fmt")
+    ])
+    return df, vmin, vmax
+def display_vertical_color_legend(height, colormap, vmin, vmax, n_ticks=5, label="", model_labels=None):
+    if model_labels is not None:
+        # Si une liste de labels de modèles est fournie, on fait une légende discrète
+        color_boxes = ""
+        for idx, name in enumerate(model_labels):
+            rgba = colormap(idx / (len(model_labels) - 1))  # Normalisé entre 0-1
+            rgb = [int(255 * c) for c in rgba[:3]]
+            color = f"rgb({rgb[0]}, {rgb[1]}, {rgb[2]})"
+            color_boxes += (
+                f'<div style="display: flex; align-items: center; margin-bottom: 6px;">'
+                f'  <div style="width: 18px; height: 18px; background-color: {color}; margin-right: 8px; border: 1px solid #ccc;"></div>'
+                f'  <div style="font-size: 12px;">{name}</div>'
+                f'</div>'
+            )
+        html_legend = (
+            f'<div style="text-align: left; font-size: 13px; margin-bottom: 4px;">{label}</div>'
+            f'<div style="display: flex; flex-direction: column;">{color_boxes}</div>'
+        )
+        return html_legend
+    if isinstance(vmin, int) and isinstance(vmax, int) and (1 <= vmin <= 12) and (1 <= vmax <= 12):
+        mois_labels = [
+            "Janvier", "Février", "Mars", "Avril", "Mai", "Juin",
+            "Juillet", "Août", "Septembre", "Octobre", "Novembre", "Décembre"
+        ]
+        color_boxes = ""
+        for mois in range(vmin, vmax + 1):
+            rgba = colormap((mois - 1) / 11)
+            rgb = [int(255 * c) for c in rgba[:3]]
+            color = f"rgb({rgb[0]}, {rgb[1]}, {rgb[2]})"
+            label_mois = mois_labels[mois - 1]
+            color_boxes += (
+                f'<div style="display: flex; align-items: center; margin-bottom: 4px;">'
+                f'  <div style="width: 14px; height: 14px; background-color: {color}; '
+                f'border: 1px solid #ccc; margin-right: 6px;"></div>'
+                f'  <div style="font-size: 12px;">{label_mois}</div>'
+                f'</div>'
+            )
+        html_mois = (
+            f'<div style="text-align: left; font-size: 13px; margin-bottom: 4px;">{label}</div>'
+            f'<div style="display: flex; flex-direction: column;">{color_boxes}</div>'
+        )
+        return html_mois
+    gradient = np.linspace(1, 0, 64).reshape(64, 1)
+    fig, ax = plt.subplots(figsize=(1, 3), dpi=30)
+    ax.imshow(gradient, aspect='auto', cmap=colormap)
+    ax.axis('off')
+    buf = BytesIO()
+    plt.savefig(buf, format="png", bbox_inches='tight', pad_inches=0, transparent=True)
+    plt.close(fig)
+    base64_img = base64.b64encode(buf.getvalue()).decode()
+    if isinstance(vmin, dt.datetime) and isinstance(vmax, dt.datetime):
+        ticks_seconds = np.linspace(vmax.timestamp(), vmin.timestamp(), n_ticks)
+        ticks = [dt.datetime.fromtimestamp(t).strftime("%Y-%m-%d") for t in ticks_seconds]
+    else:
+        ticks_vals = np.linspace(vmax, vmin, n_ticks)
+        ticks = [f"{val:.2f}" for val in ticks_vals]
+    html_gradient = f"""
+        <div style="text-align: left; font-size: 13px;">{label}</div>
+        <div style="display: flex; flex-direction: row; align-items: center; height: {height-30}px;">
+            <img src="data:image/png;base64,{base64_img}"
+                 style="height: 100%; width: 20px; border: 1px solid #ccc; border-radius: 5px;"/>
+            <div style="display: flex; flex-direction: column; justify-content: space-between;
+                        margin-left: 8px; height: 100%; font-size: 12px;">
+                {''.join(f'<div>{tick}</div>' for tick in ticks)}
+            </div>
+        </div>
+    """
+    return html_gradient

app/utils/map_utils.py ADDED Viewed

	@@ -0,0 +1,223 @@

+from pathlib import Path
+import pydeck as pdk
+import streamlit as st
+import polars as pl
+import geopandas as gpd
+def prepare_layer(df: pl.DataFrame) -> pl.DataFrame:
+    cols = ["lat", "lon", "lat_fmt", "lon_fmt", "altitude", "val_fmt", "fill_color"]
+    if "is_significant" in df.columns:
+        cols.append("is_significant")
+    return df.select(cols)
+def fast_to_dicts(df: pl.DataFrame) -> list[dict]:
+    cols = df.columns
+    result = []
+    # Conversion explicite des colonnes en listes Python natives
+    arrays = {
+        col: (
+            df[col].to_list()  # pour List ou String ou autre
+            if df[col].dtype == pl.List
+            else df[col].to_numpy().tolist()
+        )
+        for col in cols
+    }
+    n = len(df)
+    for i in range(n):
+        row = {col: arrays[col][i] for col in cols}
+        result.append(row)
+    return result
+def create_layer(df: pl.DataFrame) -> pdk.Layer:
+    layers = []
+    df = prepare_layer(df)
+    if "is_significant" in df.columns:
+        df_sig = df.filter(pl.col("is_significant"))
+        df_non_sig = df.filter(~pl.col("is_significant"))
+    else:
+        df_sig = pl.DataFrame()
+        df_non_sig = df
+    # Points significatifs
+    if len(df_sig) > 0:
+        df_sig = df_sig.with_columns(pl.lit("*").alias("star_text"))
+        layers.append(
+            pdk.Layer(
+                "TextLayer",
+                data=fast_to_dicts(df_sig),
+                get_position=["lon", "lat"],
+                get_text="star_text",
+                get_size=5,
+                get_color=[0, 0, 0, 255],  # noir
+                get_angle=0,
+                get_text_anchor="center",
+                get_alignment_baseline="bottom",
+                pickable=False
+            )
+        )
+        layers.append(
+            pdk.Layer(
+                "GridCellLayer",
+                data=fast_to_dicts(df_sig),
+                get_position=["lon", "lat"],
+                get_fill_color="fill_color",
+                cell_size=2500,
+                elevation=0,
+                elevation_scale=0,
+                lighting=None,
+                pickable=True,
+                opacity=0.2,
+                extruded=False
+            )
+        )
+    # Points non significatifs
+    if len(df_non_sig) > 0:
+        layers.append(
+            pdk.Layer(
+                "GridCellLayer",
+                data=fast_to_dicts(df_non_sig),
+                get_position=["lon", "lat"],
+                get_fill_color="fill_color",
+                cell_size=2500,
+                elevation=0,
+                elevation_scale=0,
+                lighting=None,
+                pickable=True,
+                opacity=0.2,
+                extruded=False
+            )
+        )
+    return layers
+def create_scatter_layer(df: pl.DataFrame, radius=1500) -> list[pdk.Layer]:
+    layers = []
+    df = prepare_layer(df)
+    if "is_significant" in df.columns:
+        df_sig = df.filter(pl.col("is_significant"))
+        df_non_sig = df.filter(~pl.col("is_significant"))
+    else:
+        df_sig = pl.DataFrame()
+        df_non_sig = df
+    # Points significatifs avec IconLayer (Triangle non rempli)
+    if len(df_sig) > 0:
+        layers.append(
+            pdk.Layer(
+                "ScatterplotLayer",
+                data=fast_to_dicts(df_sig),
+                get_position=["lon", "lat"],
+                get_fill_color="fill_color",
+                get_line_color=[0, 0, 0],
+                line_width_min_pixels=0.2,
+                get_radius=radius,
+                radius_scale=3,
+                radius_min_pixels=2,
+                pickable=True,
+                stroked=False
+            )
+        )
+    # Points non significatifs en ScatterplotLayer classique
+    if len(df_non_sig) > 0:
+        layers.append(
+            pdk.Layer(
+                "ScatterplotLayer",
+                data=fast_to_dicts(df_non_sig),
+                get_position=["lon", "lat"],
+                get_fill_color="fill_color",
+                get_line_color=[0, 0, 0],
+                line_width_min_pixels=0.2,
+                get_radius=radius,
+                radius_scale=1,
+                radius_min_pixels=2,
+                pickable=True,
+                stroked=False
+            )
+        )
+    return layers
+def create_tooltip(label: str) -> dict:
+    return {
+        "html": f"""
+            ({{lat_fmt}}, {{lon_fmt}})<br>
+            {{altitude}} m<br>
+            {{val_fmt}} {label}
+        """,
+        "style": {
+            "backgroundColor": "steelblue",
+            "color": "white"
+        },
+        "condition": "altitude !== 'undefined'"
+    }
+def relief():
+    # Lire et reprojeter le shapefile
+    gdf = gpd.read_file(Path("data/external/niveaux/selection_courbes_niveau_france.shp").resolve()).to_crs(epsg=4326)
+    # Extraire les chemins
+    path_data = []
+    for _, row in gdf.iterrows():
+        geom = row.geometry
+        altitude = row["coordonnees"]  # ou la colonne correcte (parfois 'ALTITUDE', à adapter)
+        if geom.geom_type == "LineString":
+            path_data.append({"path": list(geom.coords), "altitude": altitude})
+        elif geom.geom_type == "MultiLineString":
+            for line in geom.geoms:
+                path_data.append({"path": list(line.coords), "altitude": altitude})
+    # Couleur fixe blanc
+    return pdk.Layer(
+        "PathLayer",
+        data=path_data,
+        get_path="path",
+        get_color="[0, 0, 0, 100]",
+        width_scale=1,
+        width_min_pixels=0.5,
+        pickable=False
+    )
+def plot_map(layers, view_state, tooltip, activate_relief: bool=False):
+    if not isinstance(layers, list):
+        layers = [layers]
+    # Supprime les couches nulles/indéfinies
+    layers = [layer for layer in layers if layer is not None]
+    if activate_relief:
+        relief_layer = relief()
+        if relief_layer is not None:
+            layers.append(relief_layer)
+    try:
+        return pdk.Deck(
+            layers=layers,
+            initial_view_state=view_state,
+            tooltip=tooltip,
+            map_style=None
+        )
+    except Exception as e:
+        st.error(f"Erreur lors de la création de la carte : {e}")
+        return None

app/utils/menus_utils.py ADDED Viewed

	@@ -0,0 +1,224 @@

+import streamlit as st
+from pathlib import Path
+from app.utils.config_utils import reverse_param_label
+def menu_statisticals(min_years: int, max_years: int, STATS, SEASON):
+    if "selected_point" not in st.session_state:
+        st.session_state["selected_point"] = None
+    if "run_analysis" not in st.session_state:
+        st.session_state["run_analysis"] = False
+    # Crée les colonnes
+    col0, col1, col2, col3, col4, col5, col6, col7 = st.columns([0.3, 0.3, 0.2, 0.25, 0.25, 0.25, 0.2, 0.2])
+    with col0:
+        st.selectbox("Statistique étudiée", list(STATS.keys()), key="stat_choice")
+    with col1:
+        st.slider(
+            "Staturation couleurs",
+            min_value=0.950,
+            max_value=1.00,
+            value=0.995,
+            step=0.001,
+            format="%.3f",
+            key="quantile_choice"
+        )
+    with col2:
+        st.selectbox("Saison", list(SEASON.keys()), key="season_choice")
+    with col3:
+        season = st.session_state["season_choice"]
+        if season in ["Année hydrologique", "Hiver"]:
+            st.slider(
+                "Période",
+                min_value=min_years+1,
+                max_value=max_years,
+                value=(min_years+1, max_years),
+                key="year_range"
+            )
+        else:
+            st.slider(
+                "Période",
+                min_value=min_years,
+                max_value=max_years,
+                value=(min_years, max_years),
+                key="year_range"
+            )
+    with col4:
+        if st.session_state["stat_choice"] in ["Cumul", "Jour de pluie"]:
+            st.selectbox("Echelle temporelle", ["Journalière"], key="scale_choice")
+        else:
+            st.selectbox("Echelle temporelle", ["Journalière", "Horaire"], key="scale_choice")
+    with col5:
+        st.slider(
+            "Données manquantes",
+            min_value=0.0,
+            max_value=1.0,
+            value=0.1,
+            step=0.01,
+            key="missing_rate"
+        )
+    with col6:
+        st.checkbox("Courbes de niveaux", value=False, key="show_relief") # Case à cocher
+        st.checkbox("Afficher les stations", value=False,  key="show_stations") # Case à cocher
+    with col7:
+        if st.button("Lancer l’analyse"):
+            st.session_state["run_analysis"] = True
+    if st.session_state["run_analysis"]:
+        return (
+            st.session_state["stat_choice"],
+            st.session_state["quantile_choice"],
+            st.session_state["year_range"][0],
+            st.session_state["year_range"][1],
+            st.session_state["season_choice"],
+            st.session_state["scale_choice"],
+            st.session_state["missing_rate"],
+            st.session_state["show_relief"],
+            st.session_state["show_stations"]
+        )
+    else:
+        return None
+def menu_gev(config: dict, model_options: dict, ns_param_map: dict, SEASON, show_param: bool):
+    if "run_analysis" not in st.session_state:
+        st.session_state["run_analysis"] = False
+    col0, col1, col2, col3, col4, col5, col6, col7 = st.columns([0.6, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5])
+    # Échelle
+    with col0:
+        Echelle = st.selectbox("Echelle temporelle", ["Journalière", "Horaire"], key="scale_choice")
+        st.session_state["echelle"] = "quotidien" if Echelle.lower() == "journalière" else "horaire"
+        st.session_state["unit"] = "mm/j" if st.session_state["echelle"] == "quotidien" else "mm/h"
+    # Modèle
+    with col1:
+        selected_model = st.selectbox(
+            "Modèle GEV",
+            [None] + list(model_options.keys()),
+            format_func=lambda x: "— Choisir un modèle —" if x is None else x,
+            key="model_type"
+        )
+    if st.session_state["model_type"] is not None:
+        model_name = model_options[st.session_state["model_type"]]
+        st.session_state["model_name"] = model_name
+        # Quantile
+        with col2:
+            st.selectbox("Choix de la saison", list(SEASON.keys()), key="season_choice")
+        with col3:
+            st.slider(
+                "Percentile de retrait",
+                min_value=0.950,
+                max_value=1.000,
+                value=1.000,
+                step=0.001,
+                format="%.3f",
+                key="quantile_choice"
+            )
+        # Paramètre GEV
+        with col4:
+            if show_param:
+                param_map = ns_param_map[model_name]
+                available_params = list(param_map.values())  # labels unicode
+                selected_label = st.selectbox(
+                    "Paramètre GEV à afficher",
+                    available_params,
+                    index=0,
+                    key="gev_param_choice"
+                )
+                # Conversion propre
+                st.session_state["param_choice"] = reverse_param_label(
+                    selected_label, model_name, ns_param_map
+                )
+            else:
+                # st.session_state["param_choice"] = "Δqᵀ"
+                # selected_label = "Δqᵀ"
+                selected_label = st.selectbox(
+                    "Quantité à afficher",
+                    ["Δqᵀ", "ΔE", "ΔVar", "ΔCV"],
+                    index=0,
+                    key="delta_param_choice"
+                )
+                st.session_state["param_choice"] = selected_label
+        if selected_label in ["Δqᵀ"]:
+            with col5:
+                st.slider(
+                    "Niveau de retour",
+                    min_value=10,
+                    max_value=100,
+                    value=10,
+                    step=10,
+                    key="T_choice"
+                )
+        else:
+            st.session_state["T_choice"] = None
+        if selected_label in ["Δqᵀ", "ΔE", "ΔVar", "ΔCV"]:
+            with col6:
+                st.slider(
+                    "Delta annees",
+                    min_value=1,
+                    max_value=60,
+                    value=10,
+                    step=1,
+                    key="par_X_annees"
+                )
+        else:
+            st.session_state["par_X_annees"] = None
+        # Bouton d’analyse
+        with col7:
+            if st.button("Lancer l’analyse"):
+                st.session_state["run_analysis"] = True
+        if st.session_state["run_analysis"]:
+            # Valeurs par défaut
+            stat_choice_key = "max"
+            scale_choice_key = "mm_j" if st.session_state["echelle"] == "quotidien" else "mm_h"
+            season_choice_key = SEASON[st.session_state["season_choice"]]
+            min_year_choice = config["years"]["min"] + 1 if season_choice_key in ["hydro", "djf"] else config["years"]["min"]
+            max_year_choice = config["years"]["max"]
+            missing_rate = 0.15
+            # Répertoires
+            mod_dir = Path(config["gev"]["modelised"]) / st.session_state["echelle"] / season_choice_key
+            obs_dir = Path(config["gev"]["observed"]) / st.session_state["echelle"] / season_choice_key
+            return {
+                "echelle": st.session_state["echelle"],
+                "unit": st.session_state["unit"],
+                "model_name": st.session_state["model_name"],
+                "model_name_pres": selected_model,
+                "param_choice": st.session_state["param_choice"],
+                "param_choice_pres": selected_label,
+                "quantile_choice": st.session_state["quantile_choice"],
+                "stat_choice_key": stat_choice_key,
+                "scale_choice_key": scale_choice_key,
+                "season_choice_key": season_choice_key,
+                "season_choice": st.session_state["season_choice"],
+                "min_year_choice": min_year_choice,
+                "max_year_choice": max_year_choice,
+                "missing_rate": missing_rate,
+                "mod_dir": mod_dir,
+                "obs_dir": obs_dir,
+                "T_choice": st.session_state["T_choice"],
+                "par_X_annees": st.session_state["par_X_annees"]
+            }
+    return None

app/utils/scatter_plot_utils.py ADDED Viewed

	@@ -0,0 +1,432 @@

+import polars as pl
+import plotly.express as px
+import plotly.graph_objects as go
+import numpy as np
+from scipy.stats import genextreme
+def generate_scatter_plot_interactive(df: pl.DataFrame, stat_choice: str, unit_label: str, height: int,
+                                      x_label: str = "AROME", y_label: str = "Station"):
+    df_pd = df.select(["NUM_POSTE_obs", "NUM_POSTE_mod", "lat", "lon", x_label, y_label]).to_pandas()
+    fig = px.scatter(
+        df_pd,
+        x=x_label,
+        y=y_label,
+        title="",
+        opacity=0.5,
+        width=height,
+        height=height,
+        labels={
+            x_label: f"{stat_choice} du modèle AROME ({unit_label})",
+            y_label: f"{stat_choice} des stations ({unit_label})"
+        },
+        hover_data={"lat": True, "lon": True}
+    )
+    precision = ".1f" if unit_label == "mm/j" else ".2f"
+    fig.update_traces(
+        hovertemplate=
+        "Lat: %{customdata[2]:.4f}<br>Lon: %{customdata[3]:.4f}<br>"
+        f"{x_label} : %{{x:{precision}}}<br>{y_label} : %{{y:{precision}}}<extra></extra>",
+        customdata=df_pd[["NUM_POSTE_obs", "NUM_POSTE_mod", "lat", "lon"]].values
+    )
+    x_range = [df_pd[x_label].min(), df_pd[x_label].max()]
+    y_range = [df_pd[y_label].min(), df_pd[y_label].max()]
+    min_diag = min(x_range[0], y_range[0])
+    max_diag = min(x_range[1], y_range[1])
+    # Ajouter le trait y = x sans légende
+    fig.add_trace(
+        go.Scatter(
+            x=[min_diag, max_diag],
+            y=[min_diag, max_diag],
+            mode='lines',
+            line=dict(color='red', dash='dash'),
+            showlegend=False,
+            hoverinfo='skip'
+        )
+    )
+    # Ajouter une annotation "y = x" en bout de ligne
+    fig.add_annotation(
+        x=max_diag,
+        y=max_diag,
+        text="y = x",
+        showarrow=False,
+        font=dict(color='red'),
+        xanchor="left",
+        yanchor="bottom"
+    )
+    return fig
+def generate_return_period_plot_interactive(
+    T, y_obs, y_mod,
+    label_obs="Stations", label_mod="AROME",
+    unit: str = "mm/j", height: int = 600,
+    points_obs: dict | None = None,
+    points_mod: dict | None = None
+):
+    fig = go.Figure()
+    # Courbe observations
+    fig.add_trace(go.Scatter(
+        x=T,
+        y=y_obs,
+        mode="lines",
+        name=label_obs,
+        line=dict(color="blue"),
+        hovertemplate="Période : %{x:.1f} ans<br>Précipitation : %{y:.1f} " + unit + "<extra></extra>"
+    ))
+    # Courbe modèle
+    fig.add_trace(go.Scatter(
+        x=T,
+        y=y_mod,
+        mode="lines",
+        name=label_mod,
+        line=dict(color="orange"),
+        hovertemplate="Période : %{x:.1f} ans<br>Précipitation : %{y:.1f} " + unit + "<extra></extra>"
+    ))
+    # Points maximas observés (facultatif)
+    if points_obs is not None:
+        fig.add_trace(go.Scatter(
+            x=points_obs["year"],
+            y=points_obs["value"],
+            mode="markers",
+            name="Maximas mesurés",
+            marker=dict(color="blue", size=4, symbol="x"),
+            hovertemplate="Période : %{x:.1f} ans<br>Max observé : %{y:.1f} " + unit + "<extra></extra>"
+        ))
+    # Maximas annuels bruts (facultatif)
+    if points_mod is not None:
+        fig.add_trace(go.Scatter(
+            x=points_mod["year"],
+            y=points_mod["value"],
+            mode="markers",
+            name="Maximas modélisés",
+            marker=dict(color="orange", size=4, symbol="x"),
+            hovertemplate="Année : %{x:.1f}<br>Max : %{y:.1f} " + unit + "<extra></extra>"
+        ))
+    fig.update_layout(
+        xaxis=dict(
+            title="Période de retour (ans)",
+            type="log",
+            showgrid=True,
+            minor=dict(ticklen=4, showgrid=True),
+        ),
+        yaxis=dict(
+            title=f"Précipitation ({unit})",
+            showgrid=True,
+            minor=dict(ticklen=4, showgrid=True),
+        ),
+        template="plotly_white",
+        height=height
+    )
+    return fig
+def generate_gev_density_comparison_interactive(
+    maxima_obs: np.ndarray,
+    maxima_mod: np.ndarray,
+    params_obs: dict,
+    params_mod: dict,
+    unit: str = "mm/j",
+    height: int = 500,
+    t_norm: float = 0.0,  # Covariable normalisée (ex: 0 pour année médiane)
+):
+    """
+    Trace deux courbes de densité GEV (observée et modélisée) superposées, sans histogramme.
+    """
+    # --- Récupération des paramètres observés ---
+    mu_obs = params_obs.get("mu0", 0) + params_obs.get("mu1", 0) * t_norm
+    sigma_obs = params_obs.get("sigma0", 0) + params_obs.get("sigma1", 0) * t_norm
+    xi_obs = params_obs.get("xi", 0)
+    # --- Récupération des paramètres modélisés ---
+    mu_mod = params_mod.get("mu0", 0) + params_mod.get("mu1", 0) * t_norm
+    sigma_mod = params_mod.get("sigma0", 0) + params_mod.get("sigma1", 0) * t_norm
+    xi_mod = params_mod.get("xi", 0)
+    # --- Domaine commun pour tracer ---
+    minima = min(maxima_obs.min(), maxima_mod.min()) * 0.9
+    maxima = max(maxima_obs.max(), maxima_mod.max()) * 1.1
+    x = np.linspace(minima, maxima, 500)
+    # --- Densités ---
+    density_obs = genextreme.pdf(x, c=-xi_obs, loc=mu_obs, scale=sigma_obs)
+    density_mod = genextreme.pdf(x, c=-xi_mod, loc=mu_mod, scale=sigma_mod)
+    # --- Création figure ---
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(
+        x=x,
+        y=density_obs,
+        mode="lines",
+        name="GEV observée",
+        line=dict(color="blue"),
+        hovertemplate="Maxima : %{x:.1f} " + unit + "<br>Densité : %{y:.3f}<extra></extra>",
+    ))
+    fig.add_trace(go.Scatter(
+        x=x,
+        y=density_mod,
+        mode="lines",
+        name="GEV modélisée",
+        line=dict(color="orange"),
+        hovertemplate="Maxima : %{x:.1f} " + unit + "<br>Densité : %{y:.3f}<extra></extra>",
+    ))
+    fig.update_layout(
+        title="",
+        xaxis_title=f"Maximum journalier ({unit})",
+        yaxis_title="Densité",
+        template="plotly_white",
+        height=height,
+    )
+    return fig
+import numpy as np
+import plotly.graph_objects as go
+from scipy.stats import genextreme
+import matplotlib.cm as cm
+import matplotlib.colors as mcolors
+def generate_gev_density_comparison_interactive_3D(
+    maxima_obs: np.ndarray,
+    maxima_mod: np.ndarray,
+    params_obs: dict,
+    params_mod: dict,
+    unit: str = "mm/j",
+    height: int = 500,
+    min_year: int = 1960,
+    max_year: int = 2015,
+):
+    """
+    Trace deux ensembles de courbes de densité GEV (observée et modélisée) superposées,
+    en faisant varier la couleur de violet (min_year) à jaune (max_year).
+    """
+    # --- Génération des années ---
+    years = np.arange(min_year, max_year + 1)
+    # --- Couleurs violet -> jaune ---
+    cmap = cm.get_cmap('plasma')
+    norm = mcolors.Normalize(vmin=min_year, vmax=max_year)
+    colors = [mcolors.to_hex(cmap(norm(year))) for year in years]
+    # --- Domaine commun pour tracer ---
+    minima = min(maxima_obs.min(), maxima_mod.min()) * 0.9
+    maxima = max(maxima_obs.max(), maxima_mod.max()) * 1.1
+    x = np.linspace(minima, maxima, 500)
+    # --- Création de la figure ---
+    fig = go.Figure()
+    for i, year in enumerate(years):
+        t_norm = (year - (min_year + max_year) / 2) / (max_year - min_year)
+        # Densité observée
+        mu_obs = params_obs.get("mu0", 0) + params_obs.get("mu1", 0) * t_norm
+        sigma_obs = params_obs.get("sigma0", 0) + params_obs.get("sigma1", 0) * t_norm
+        xi_obs = params_obs.get("xi", 0)
+        density_obs = genextreme.pdf(x, c=-xi_obs, loc=mu_obs, scale=sigma_obs)
+        fig.add_trace(go.Scatter(
+            x=x,
+            y=density_obs,
+            mode="lines",
+            line=dict(color=colors[i]),
+            name=f"Obs {year}",
+            hovertemplate=f"Obs {year}<br>Maxima : %{{x:.1f}} {unit}<br>Densité : %{{y:.3f}}<extra></extra>",
+            showlegend=False,
+        ))
+        # Densité modélisée
+        mu_mod = params_mod.get("mu0", 0) + params_mod.get("mu1", 0) * t_norm
+        sigma_mod = params_mod.get("sigma0", 0) + params_mod.get("sigma1", 0) * t_norm
+        xi_mod = params_mod.get("xi", 0)
+        density_mod = genextreme.pdf(x, c=-xi_mod, loc=mu_mod, scale=sigma_mod)
+        fig.add_trace(go.Scatter(
+            x=x,
+            y=density_mod,
+            mode="lines",
+            line=dict(color=colors[i]),
+            name=f"Mod {year}",
+            hovertemplate=f"Mod {year}<br>Maxima : %{{x:.1f}} {unit}<br>Densité : %{{y:.3f}}<extra></extra>",
+            showlegend=False,
+        ))
+    # --- Layout final ---
+    fig.update_layout(
+        title="",
+        xaxis_title=f"Maximum journalier ({unit})",
+        yaxis_title="Densité",
+        template="plotly_white",
+        height=height,
+    )
+    return fig
+def generate_time_series_maxima_interactive(
+    years_obs: np.ndarray,
+    max_obs: np.ndarray,
+    years_mod: np.ndarray,
+    max_mod: np.ndarray,
+    unit: str = "mm/j",
+    height: int = 500,
+    nr_year: int = 20,
+    return_levels_obs: float | None = None,
+    return_levels_mod: float | None = None
+):
+    fig_time_series = go.Figure()
+    # --- Observations (seulement en 'x' sans lignes)
+    fig_time_series.add_trace(go.Scatter(
+        x=years_obs,
+        y=max_obs,
+        mode='markers',
+        name='Maximas observés',
+        marker=dict(symbol='x', size=4, color="blue")
+    ))
+    # --- Modèle (seulement en 'x' sans lignes)
+    fig_time_series.add_trace(go.Scatter(
+        x=years_mod,
+        y=max_mod,
+        mode='markers',
+        name='Maximas modélisés',
+        marker=dict(symbol='x', size=4, color="orange")
+    ))
+    # --- Niveau de retour 20 ans observé
+    if return_levels_obs is not None:
+        fig_time_series.add_trace(go.Scatter(
+            x=years_obs,   # ➔ Utilise toutes les années observées !
+            y=return_levels_obs,
+            mode='lines',
+            name=f'NR observé {nr_year} ans',
+            line=dict(color='blue', dash='solid')
+        ))
+    # --- Niveau de retour 20 ans modélisé
+    if return_levels_mod is not None:
+        fig_time_series.add_trace(go.Scatter(
+            x=years_mod,   # ➔ Utilise toutes les années modélisées !
+            y=return_levels_mod,
+            mode='lines',
+            name=f'NR modélisé {nr_year} ans',
+            line=dict(color='orange', dash='solid')
+        ))
+    fig_time_series.update_layout(
+        title="",
+        xaxis_title="Année",
+        yaxis_title=f"Maxima annuel ({unit})",
+        height=height,
+        template="plotly_white"
+    )
+    return fig_time_series
+import numpy as np
+import plotly.graph_objects as go
+from scipy.stats import genextreme
+def generate_loglikelihood_profile_xi(
+    maxima: np.ndarray,
+    params: dict,
+    unit: str = "mm/j",
+    xi_range: float = 3,
+    height: int = 500,
+    t_norm: float = 0.0
+):
+    """
+    Trace le profil de log-vraisemblance autour de ξ ajusté.
+    - maxima : valeurs maximales (array)
+    - params : dictionnaire des paramètres GEV
+    - unit : unité des maxima
+    - xi_range : +/- intervalle autour de ξ pour tracer
+    - height : hauteur de la figure
+    - t_norm : covariable temporelle normalisée
+    """
+    # Récupération des paramètres (à t_norm donné)
+    mu = params.get("mu0", 0) + params.get("mu1", 0) * t_norm
+    sigma = params.get("sigma0", 0) + params.get("sigma1", 0) * t_norm
+    xi_fit = params.get("xi", 0)
+    def compute_nllh(x, mu, sigma, xi):
+        if sigma <= 0:
+            return np.inf
+        try:
+            return -np.sum(genextreme.logpdf(x, c=-xi, loc=mu, scale=sigma))
+        except Exception:
+            return np.inf
+    # Points autour du ξ ajusté
+    xis = np.linspace(xi_fit - xi_range, xi_fit + xi_range, 200)
+    logliks = [-compute_nllh(maxima, mu, sigma, xi) for xi in xis]
+    # --- Création figure Plotly ---
+    fig = go.Figure()
+    fig.add_trace(go.Scatter(
+        x=xis,
+        y=logliks,
+        mode="lines",
+        line=dict(color="blue"),
+        name="Log-vraisemblance",
+        hovertemplate="ξ : %{x:.3f}<br>Log-likelihood : %{y:.1f}<extra></extra>"
+    ))
+    # Conversion en array pour traitement
+    logliks = np.array(logliks)
+    # Filtrage des valeurs finies
+    finite_logliks = logliks[np.isfinite(logliks)]
+    if finite_logliks.size > 0:
+        ymin = finite_logliks.min() - 1  # Marge sous le min réel
+        ymax = finite_logliks.max()
+    else:
+        ymin, ymax = -10, 0  # Valeurs par défaut si tout est -inf
+    # Ajout de la ligne verticale
+    fig.add_trace(go.Scatter(
+        x=[xi_fit, xi_fit],
+        y=[ymin, ymax],
+        mode="lines",
+        line=dict(color="red", dash="dash"),
+        name=f"ξ ajusté ({xi_fit:.3f})"
+    ))
+    fig.update_layout(
+        title="",
+        xaxis_title="ξ",
+        yaxis_title="Log-vraisemblance",
+        template="plotly_white",
+        height=height
+    )
+    return fig

app/utils/show_info.py ADDED Viewed

	@@ -0,0 +1,19 @@

+def show_info_data(col, label, n_points_valides, n_points_total):
+    both_defined = n_points_valides is not None and n_points_total is not None
+    if both_defined:
+        return col.markdown(f"""
+                **{label}**
+                {n_points_valides} / {n_points_total}
+                Tx couverture : {(n_points_valides / n_points_total * 100):.1f}%
+                """)
+    else:
+        return None
+def show_info_metric(col, label, metric):
+    if metric is not None:
+        return col.markdown(f"""
+                **{label}**
+                {metric:.3f}
+                """)
+    else:
+        return None

app/utils/stats_utils.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import streamlit as st
+import polars as pl
+import numpy as np
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+def compute_statistic_per_point(df: pl.DataFrame, stat_key: str) -> pl.DataFrame:
+    cols = df.columns
+    if stat_key == "mean":
+        has_h = "mean_mm_h" in cols
+        if has_h:
+            df = df.with_columns(
+                (pl.col("mean_mm_h") * 24).alias("mean_mm_j")
+            )
+        return df.group_by("NUM_POSTE").agg([
+            *( [pl.col("mean_mm_h").mean().alias("mean_all_mm_h")] if has_h else [] ),
+            *( [pl.col("mean_mm_j").mean().alias("mean_all_mm_j")] if has_h else [] ),
+        ])
+    elif stat_key == "max":
+        return df.group_by("NUM_POSTE").agg([
+            *( [pl.col("max_mm_h").max().alias("max_all_mm_h")] if "max_mm_h" in cols else [] ),
+            *( [pl.col("max_mm_j").max().alias("max_all_mm_j")] if "max_mm_j" in cols else [] ),
+        ])
+    elif stat_key == "mean-max":
+        return df.group_by("NUM_POSTE").agg([
+            *( [pl.col("max_mm_h").mean().alias("max_mean_mm_h")] if "max_mm_h" in cols else [] ),
+            *( [pl.col("max_mm_j").mean().alias("max_mean_mm_j")] if "max_mm_j" in cols else [] ),
+        ])
+    elif stat_key == "date":
+        res = []
+        if "max_mm_h" in cols and "max_date_mm_h" in cols:
+            df_h = (
+                df.sort("max_mm_h", descending=True)
+                .group_by("NUM_POSTE")
+                .agg(pl.col("max_date_mm_h").first().alias("date_max_h"))
+            )
+            res.append(df_h)
+        if "max_mm_j" in cols and "max_date_mm_j" in cols:
+            df_j = (
+                df.sort("max_mm_j", descending=True)
+                .group_by("NUM_POSTE")
+                .agg(pl.col("max_date_mm_j").first().alias("date_max_j"))
+            )
+            res.append(df_j)
+        if not res:
+            raise ValueError("Aucune date de maximum disponible.")
+        elif len(res) == 1:
+            return res[0]
+        else:
+            return res[0].join(res[1], on="NUM_POSTE", how="outer")
+    elif stat_key == "month":
+        exprs = []
+        if "max_date_mm_h" in cols:
+            exprs.append(
+                pl.col("max_date_mm_h")
+                .str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S%.f", strict=False)
+                .dt.month()
+                .alias("mois_max_h")
+            )
+        if "max_date_mm_j" in cols:
+            exprs.append(
+                pl.col("max_date_mm_j")
+                .str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S%.f", strict=False)
+                .dt.month()
+                .alias("mois_max_j")
+            )
+        if not exprs:
+            raise ValueError("Aucune date de maximum pour extraire les mois.")
+        df = df.with_columns(exprs)
+        mois_h = mois_j = None
+        if "mois_max_h" in df.columns:
+            mois_h = (
+                df.drop_nulls("mois_max_h")
+                .group_by(["NUM_POSTE", "mois_max_h"])
+                .len()
+                .sort(["NUM_POSTE", "len"], descending=[False, True])
+                .unique(subset=["NUM_POSTE"])
+                .select(["NUM_POSTE", "mois_max_h"])
+                .rename({"mois_max_h": "mois_pluvieux_h"})
+            )
+        if "mois_max_j" in df.columns:
+            mois_j = (
+                df.drop_nulls("mois_max_j")
+                .group_by(["NUM_POSTE", "mois_max_j"])
+                .len()
+                .sort(["NUM_POSTE", "len"], descending=[False, True])
+                .unique(subset=["NUM_POSTE"])
+                .select(["NUM_POSTE", "mois_max_j"])
+                .rename({"mois_max_j": "mois_pluvieux_j"})
+            )
+        if mois_h is None and mois_j is None:
+            return pl.DataFrame(schema={"NUM_POSTE": pl.Int64, "mois_pluvieux_h": pl.Int32, "mois_pluvieux_j": pl.Int32})
+        elif mois_h is None:
+            return mois_j.with_columns([pl.lit(None, dtype=pl.Int32).alias("mois_pluvieux_h")])
+        elif mois_j is None:
+            return mois_h.with_columns([pl.lit(None, dtype=pl.Int32).alias("mois_pluvieux_j")])
+        else:
+            return mois_h.join(mois_j, on="NUM_POSTE", how="outer")
+    elif stat_key == "numday":
+        if "n_days_gt1mm" not in df.columns:
+            raise ValueError("Colonne `n_days_gt1mm` manquante.")
+        return (
+            df.group_by("NUM_POSTE")
+            .agg(pl.col("n_days_gt1mm").mean().alias("jours_pluie_moyen"))
+        )
+    else:
+        raise ValueError(f"Statistique inconnue : {stat_key}")
+def generate_metrics(df: pl.DataFrame, x_label: str = "AROME", y_label: str = "Station"):
+    x = df[x_label].to_numpy()
+    y = df[y_label].to_numpy()
+    if len(x) != len(y):
+        st.error("Longueur x et y différente")
+        return np.nan, np.nan, np.nan, np.nan
+    # Filtrage des NaNs sur les deux colonnes
+    mask = ~np.isnan(x) & ~np.isnan(y)
+    x_valid = x[mask]
+    y_valid = y[mask]
+    if len(x_valid) == 0:
+        st.warning("Aucune donnée valide après suppression des NaN.")
+        return np.nan, np.nan, np.nan, np.nan
+    rmse = np.sqrt(mean_squared_error(y_valid, x_valid))
+    mae = mean_absolute_error(y_valid, x_valid)
+    me = np.mean(x_valid - y_valid)
+    corr = np.corrcoef(x_valid, y_valid)[0, 1] if len(x_valid) > 1 else np.nan
+    r2_corr = corr**2 if not np.isnan(corr) else np.nan
+    return me, mae, rmse, r2_corr

download_data.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from huggingface_hub import snapshot_download
+import os
+import traceback
+cache_path = os.path.expanduser("~/.cache/huggingface/hub")
+try:
+    print("Téléchargement des métadonnées...")
+    snapshot_download(
+        repo_id="ncsdecoopman/ExtremePrecipit",
+        repo_type="dataset",
+        revision="main",
+        local_dir="data",
+        cache_dir=cache_path,
+        allow_patterns=["metadonnees/*"]
+    )
+    print("Téléchargement des reliefs...")
+    snapshot_download(
+        repo_id="ncsdecoopman/ExtremePrecipit",
+        repo_type="dataset",
+        revision="main",
+        local_dir="data",
+        cache_dir=cache_path,
+        allow_patterns=["external/*"]
+    )
+    for echelle in ["quotidien", "horaire"]:
+        print(f"Téléchargement des statistiques AROMES (mod)... - Echelle {echelle}")
+        snapshot_download(
+            repo_id="ncsdecoopman/ExtremePrecipit",
+            repo_type="dataset",
+            revision="main",
+            local_dir="data",
+            cache_dir=cache_path,
+            allow_patterns=["statisticals/modelised*"]
+        )
+        print(f"Téléchargement des statistiques STATIONS observées... - Echelle {echelle}")
+        snapshot_download(
+            repo_id="ncsdecoopman/ExtremePrecipit",
+            repo_type="dataset",
+            revision="main",
+            local_dir="data",
+            cache_dir=cache_path,
+            allow_patterns=["statisticals/observed*"]
+        )
+except Exception as e:
+    print("Erreur pendant le téléchargement :")
+    traceback.print_exc()
+    raise SystemExit(1)

main.py ADDED Viewed

	@@ -0,0 +1,372 @@

+import streamlit as st
+from app.utils.map_utils import plot_map
+from app.utils.legends_utils import get_stat_unit
+from app.pipelines.import_data import pipeline_data
+from app.pipelines.import_config import pipeline_config
+from app.pipelines.import_map import pipeline_map
+from app.pipelines.import_scatter import pipeline_scatter
+from app.utils.show_info import show_info_data, show_info_metric
+st.set_page_config(layout="wide", page_title="Analyse interactive des précipitations en France (1959–2022)", page_icon="🌧️")
+st.markdown("""
+<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600&display=swap" rel="stylesheet">
+""", unsafe_allow_html=True)
+st.markdown("""
+    <style>
+        * {
+            font-size: 10px !important;
+        }
+        /* Responsive layout des colonnes */
+        @media screen and (max-width: 1000px) {
+            .element-container:has(> .stColumn) {
+                display: flex;
+                flex-wrap: wrap;
+            }
+            .element-container:has(> .stColumn) .stColumn {
+                width: 48% !important;
+                min-width: 48% !important;
+            }
+        }
+        @media screen and (max-width: 600px) {
+            .element-container:has(> .stColumn) .stColumn {
+                width: 100% !important;
+                min-width: 100% !important;
+            }
+        }
+    </style>
+""", unsafe_allow_html=True)
+css = """
+<style>
+/* -------------------- VARIABLES GLOBALES -------------------- */
+:root{
+  --primary:#5A7BFF;
+  --primary-light:#8FA0FF;
+  --accent:#FF7A59;
+  --bg:rgba(245,247,250,0.65);
+  --card:rgba(255,255,255,0.35);
+  --text:#1F2D3D;
+  --text-light:#6B7C93;
+  --radius:18px;
+  --shadow:0 12px 28px rgba(0,0,0,.12);
+  --blur:18px;
+  --font:"Inter", system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+}
+/* -------------------- RESET & BODY -------------------- */
+html, body, [class*="stAppViewContainer"]{
+  font-family: var(--font) !important;
+  color: var(--text);
+}
+body{
+  background: linear-gradient(135deg,#EEF2FF 0%,#FDFBFF 60%,#F0F4FF 100%) fixed !important;
+}
+/* Conteneur principal */
+.block-container{
+  padding-top: 2.5rem !important;
+  padding-bottom: 3rem !important;
+  max-width: 98%;
+}
+/* -------------------- EN-TÊTES -------------------- */
+h1,h2,h3,h4{
+  font-weight: 600 !important;
+  letter-spacing: -0.01em;
+  color: var(--text);
+}
+h1{
+  font-size: 2.1rem !important;
+  margin-bottom: 1.2rem;
+}
+/* -------------------- CARTES / WIDGETS -------------------- */
+section.main > div{
+  backdrop-filter: blur(var(--blur));
+  background: var(--card);
+  border-radius: var(--radius);
+  box-shadow: var(--shadow);
+  padding: 1.5rem 1.8rem;
+}
+/* -------------------- LABELS DES WIDGETS -------------------- */
+.css-10trblm, .stSlider label, .stSelectbox label, .stNumberInput label, .stMultiSelect label{
+  font-size: 0.88rem !important;
+  font-weight: 500 !important;
+  color: var(--text-light) !important;
+  margin-bottom: .4rem !important;
+  text-transform: uppercase;
+  letter-spacing: .04em;
+}
+/* -------------------- SELECTBOX -------------------- */
+.stSelectbox > div div[data-baseweb="select"]{
+  background: rgba(255,255,255,0.55);
+  border-radius: var(--radius) !important;
+  border: 1px solid rgba(0,0,0,.05);
+  box-shadow: inset 0 2px 4px rgba(0,0,0,.04);
+}
+.stSelectbox > div div[data-baseweb="select"]:hover{
+  border-color: var(--primary-light);
+}
+.stSelectbox svg{
+  stroke: var(--primary) !important;
+}
+/* -------------------- SLIDER -------------------- */
+[data-testid="stSlider"] > div{
+  padding-top: .6rem;
+}
+[data-testid="stSlider"] [data-testid="stThumbValue"]{
+  background: var(--primary);
+  color: #fff;
+  border-radius: 10px;
+  padding: 2px 8px;
+  font-size: .75rem;
+  box-shadow: var(--shadow);
+}
+[data-testid="stSlider"] [data-testid="stTickBar"]{
+  background: rgba(0,0,0,.08);
+}
+[data-testid="stSlider"] [data-testid="stTrack"]{
+  background: rgba(0,0,0,.12);
+}
+[data-testid="stSlider"] [data-testid="stTrack"] > div{
+  background: var(--primary);
+}
+/* -------------------- INPUT NUMBER / TEXT -------------------- */
+.stNumberInput input, .stTextInput input{
+  background: rgba(255,255,255,0.6) !important;
+  border-radius: var(--radius) !important;
+  border: 1px solid rgba(0,0,0,.05) !important;
+  box-shadow: inset 0 2px 4px rgba(0,0,0,.05) !important;
+}
+/* -------------------- BOUTON -------------------- */
+.stButton>button{
+  background: var(--primary) !important;
+  color: #fff !important;
+  border: none !important;
+  border-radius: var(--radius) !important;
+  padding: .65rem 1.4rem !important;
+  font-weight: 600 !important;
+  letter-spacing: .02em;
+  transition: all .22s ease;
+  box-shadow: 0 8px 18px rgba(90,123,255,.28);
+}
+.stButton>button:hover{
+  background: var(--primary-light) !important;
+  transform: translateY(-2px) !important;
+  box-shadow: 0 12px 24px rgba(90,123,255,.32);
+}
+.stButton>button:active{
+  transform: translateY(0) scale(.98) !important;
+}
+/* Petit bouton (col6) */
+[class*="stColumn"]:nth-child(7) .stButton>button{
+  padding: .55rem .9rem !important;
+  font-size: .85rem !important;
+}
+/* -------------------- TOOLTIPS -------------------- */
+[data-baseweb="tooltip"]{
+  backdrop-filter: blur(12px);
+  background: rgba(0,0,0,.75);
+  color: #fff;
+  border-radius: 8px;
+  font-size: .75rem;
+  padding: .4rem .65rem;
+}
+/* -------------------- SIDEBAR -------------------- */
+.sidebar .block-container{
+  padding: 1rem 1rem 2rem 1rem !important;
+}
+[class*="stSidebar"]{
+  background: rgba(255,255,255,0.7) !important;
+  backdrop-filter: blur(18px);
+  box-shadow: var(--shadow);
+}
+/* -------------------- CHARTS -------------------- */
+.js-plotly-plot .plotly .main-svg{
+  border-radius: var(--radius);
+  box-shadow: var(--shadow);
+}
+/* -------------------- SCROLLBAR -------------------- */
+::-webkit-scrollbar{
+  width: 8px;
+  height: 8px;
+}
+::-webkit-scrollbar-thumb{
+  background: var(--primary-light);
+  border-radius: 10px;
+}
+::-webkit-scrollbar-track{
+  background: transparent;
+}
+/* -------------------- SLIDER -------------------- */
+/* Cacher totalement les chiffres min/max + graduations */
+[data-testid="stSliderTickBarMin"], [data-testid="stSliderTickBarMax"]{
+    display:none !important;
+}
+stSliderTickBarMin
+/* -------------------- MASQUER MENU & FOOTER -------------------- */
+#MainMenu{visibility:hidden;}
+footer{visibility:hidden;}
+header{visibility:hidden;}
+/* -------------------- GRADIENT TEXT -------------------- */
+.gradient-premium {
+  font-size: 2.5rem !important;             /* Titre XXL */
+  font-weight: 800 !important;              /* Plus de présence */
+  letter-spacing: -0.025em !important;      /* Ajustement espacement */
+  /* Dégradé en trois couleurs */
+  background: linear-gradient(
+    360deg,
+    #5A7BFF 10%,
+    #5A7BFF 100%,
+    #F0F4FF 150%
+  ) !important;
+  color: transparent !important;
+  -webkit-text-fill-color: transparent !important;
+  -webkit-background-clip: text !important;
+  background-clip: text !important;
+  /* contour/glow léger */
+  text-shadow:
+    0 0 2px rgba(255,255,255,0.8)
+  display: inline-block;
+}
+</style>
+"""
+st.markdown(css, unsafe_allow_html=True)
+def show(
+    config_path: dict,
+    height: int=600
+):
+    # Chargement des config
+    params_config = pipeline_config(config_path, type="stat")
+    config = params_config["config"]
+    stat_choice = params_config["stat_choice"]
+    season_choice = params_config["season_choice"]
+    stat_choice_key = params_config["stat_choice_key"]
+    scale_choice_key = params_config["scale_choice_key"]
+    min_year_choice = params_config["min_year_choice"]
+    max_year_choice = params_config["max_year_choice"]
+    season_choice_key = params_config["season_choice_key"]
+    missing_rate = params_config["missing_rate"]
+    quantile_choice = params_config["quantile_choice"]
+    scale_choice = params_config["scale_choice"]
+    show_relief = params_config["show_relief"]
+    show_stations = params_config["show_stations"]
+    # Préparation des paramètres pour pipeline_data
+    params_load = (
+        stat_choice_key,
+        scale_choice_key,
+        min_year_choice,
+        max_year_choice,
+        season_choice_key,
+        missing_rate,
+        quantile_choice,
+        scale_choice
+    )
+    # Obtention des données
+    result = pipeline_data(params_load, config, use_cache=True)
+    # Chargement des affichages graphiques
+    unit_label = get_stat_unit(stat_choice_key, scale_choice_key)
+    params_map = (
+        stat_choice_key,
+        result,
+        unit_label,
+        height
+    )
+    layer, scatter_layer, tooltip, view_state, html_legend = pipeline_map(params_map)
+    col1, col2, col3 = st.columns([1, 0.15, 1])
+    with col1:
+        scatter_layer = None if not show_stations else scatter_layer
+        deck = plot_map([layer, scatter_layer], view_state, tooltip, activate_relief=show_relief)
+        st.markdown(
+            f"""
+            <div style='text-align: left; margin-bottom: 10px;'>
+                <b>{stat_choice} des précipitations de {min_year_choice} à {max_year_choice} ({season_choice.lower()})</b>
+            </div>
+            """,
+            unsafe_allow_html=True
+        )
+        if deck:
+            st.pydeck_chart(deck, use_container_width=True, height=height)
+    with col2:
+        st.markdown(html_legend, unsafe_allow_html=True)
+    with col3:
+        params_scatter = (
+            result,
+            stat_choice_key,
+            scale_choice_key,
+            stat_choice,unit_label,
+            height
+        )
+        n_tot_mod, n_tot_obs, me, mae, rmse, r2, scatter = pipeline_scatter(params_scatter)
+        st.markdown(
+            """
+            <div style='text-align: left; font-size: 0.8em; color: grey; margin-top: 0px;'>
+                Données CP-RCM, 2.5 km, forçage ERA5, réanalyse ECMWF
+            </div>
+            """,
+            unsafe_allow_html=True
+        )
+        st.plotly_chart(scatter, use_container_width=True)
+    col0bis, col1bis, col2bis, col3bis, col4bis, col5bis, col6bis = st.columns(7)
+    show_info_data(col0bis, "CP-AROME map", result["modelised_show"].shape[0], n_tot_mod)
+    show_info_data(col1bis, "Stations", result["observed_show"].shape[0], n_tot_obs)
+    show_info_data(col2bis, "CP-AROME plot", result["modelised"].shape[0], n_tot_mod)
+    show_info_metric(col3bis, "ME", me)
+    show_info_metric(col4bis, "MAE", mae)
+    show_info_metric(col5bis, "RMSE", rmse)
+    show_info_metric(col6bis, "r²", r2)
+if __name__ == "__main__":
+    config_path = "app/config/config.yaml"
+    st.markdown("""
+      <div style="text-align: center; margin-bottom: 2rem;">
+        <h1 style="
+          font-family: var(--font);
+          margin: 0;
+        ">
+          <span class="gradient-premium">
+            Analyse interactive des précipitations en France — 1959 – 2022
+          </span>
+        </h1>
+      </div>
+    """, unsafe_allow_html=True)
+    show(config_path)

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+# Core UI and plotting
+streamlit==1.43.2
+pydeck==0.9.1
+matplotlib==3.10.0
+plotly==5.24.1
+streamlit_plotly_events==0.0.6
+streamlit_folium==0.24.0
+geopandas
+# Data manipulation
+pandas==2.2.3
+polars==1.26.0
+numpy==2.2.2
+scipy==1.15.1
+# File and compression utils
+huggingface_hub==0.29.2
+pyyaml==6.0.2
+# Calculs
+scikit-learn==1.6.1