| """GFS → ERA5 variable name, unit, and pressure-level conversion tables. | |
| These are pure data. Tests exercise them directly. The fetcher in gfs.py | |
| consumes them. | |
| ERA5 (via ARCO Zarr, what GraphCast + GenCast are trained on) uses long | |
| human-readable names: ``2m_temperature``, ``10m_u_component_of_wind``, etc. | |
| GFS GRIB2 uses short codes: ``t2m``, ``u10``, etc. Both encode the same | |
| physical quantities but with different conventions. | |
| Everything here is single-source-of-truth: if GFS introduces a new short-code | |
| convention, it only gets added here and the fetcher picks it up automatically. | |
| """ | |
| from __future__ import annotations | |
| from typing import Dict, Tuple | |
| # --------------------------------------------------------------------------- | |
| # Surface variables (single-level) | |
| # --------------------------------------------------------------------------- | |
| # Each entry: ERA5 canonical name → (GFS cfgrib short-name, unit conversion) | |
| # Unit conversion is a (scale, offset) tuple applied as gfs_value * scale + offset. | |
| # When no conversion is needed (same units), use (1.0, 0.0). | |
| # | |
| # GFS units from cfgrib: | |
| # t2m: K (matches ERA5) | |
| # u10: m/s (matches ERA5) | |
| # v10: m/s (matches ERA5) | |
| # prmsl:Pa (matches ERA5) | |
| # tp: kg/m^2 (≡ mm of liquid water, ERA5 has m — divide by 1000) | |
| # | |
| # GFS GRIB2 has total_precipitation as an accumulated quantity over the forecast | |
| # step — cfgrib exposes ``tp`` in kg/m² which is numerically the same as mm of | |
| # liquid water. ERA5 reports precipitation in m. We divide by 1000 to match. | |
| # | |
| # ``ecCodes`` shortName differs from cfgrib's output variable name for a | |
| # handful of surface fields. The GRIB *message* is indexed by the ecCodes | |
| # shortName (``2t``, ``10u``, ``10v``, ``prmsl``, ``tp``) which is what | |
| # ``filter_by_keys`` takes. When cfgrib materialises the filtered message as | |
| # an xarray variable, it uses its own naming (``t2m``, ``u10``, ``v10``, | |
| # ``prmsl``, ``tp``). We carry the ecCodes shortName here because the filter | |
| # is load-bearing — get this wrong and filter_by_keys returns an empty | |
| # dataset, which was the Phase-2-take-1 failure mode. | |
| SURFACE_VARS: Dict[str, Tuple[str, Tuple[float, float]]] = { | |
| "2m_temperature": ("2t", (1.0, 0.0)), | |
| "10m_u_component_of_wind": ("10u", (1.0, 0.0)), | |
| "10m_v_component_of_wind": ("10v", (1.0, 0.0)), | |
| "mean_sea_level_pressure": ("prmsl", (1.0, 0.0)), | |
| "total_precipitation_6hr": ("tp", (1e-3, 0.0)), # mm → m | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Pressure-level variables (3D: time × level × lat × lon) | |
| # --------------------------------------------------------------------------- | |
| # | |
| # GFS units on pressure levels: | |
| # t: K (matches ERA5) | |
| # q: kg/kg (matches ERA5) | |
| # u: m/s (matches ERA5) | |
| # v: m/s (matches ERA5) | |
| # w: Pa/s (matches ERA5 vertical_velocity) | |
| # gh: gpm (geopotential HEIGHT in meters — ERA5 has geopotential in m²/s²) | |
| # → multiply by g = 9.80665 to get geopotential. | |
| PRESSURE_LEVEL_VARS: Dict[str, Tuple[str, Tuple[float, float]]] = { | |
| "temperature": ("t", (1.0, 0.0)), | |
| "specific_humidity": ("q", (1.0, 0.0)), | |
| "u_component_of_wind": ("u", (1.0, 0.0)), | |
| "v_component_of_wind": ("v", (1.0, 0.0)), | |
| "vertical_velocity": ("w", (1.0, 0.0)), | |
| "geopotential": ("gh", (9.80665, 0.0)), # gpm → m²/s² | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Static variables (no time dim) | |
| # --------------------------------------------------------------------------- | |
| # | |
| # These aren't in standard GFS pgrb2 files. They're loaded from a small ERA5 | |
| # snapshot cached in the repo. See static_vars.py. | |
| STATIC_VARS: Tuple[str, ...] = ( | |
| "geopotential_at_surface", | |
| "land_sea_mask", | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Pressure levels | |
| # --------------------------------------------------------------------------- | |
| # | |
| # GraphCast operational uses 13 levels. GenCast 1.0° uses the same 13 (the | |
| # checkpoint's task_config lists them explicitly). Both models' task_configs | |
| # carry the level list, so the fetcher never hard-codes it — but the levels | |
| # here are what we select DOWN to from GFS's richer level set. | |
| # | |
| # GFS pgrb2.0p25 natively provides: 10, 20, 30, 40, 50, 70, 100, 150, 200, | |
| # 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 925, | |
| # 950, 975, 1000 hPa — all 13 GraphCast levels are in there. | |
| GRAPHCAST_PRESSURE_LEVELS: Tuple[int, ...] = ( | |
| 50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000, | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Convenience helpers | |
| # --------------------------------------------------------------------------- | |
| def gfs_short_name(era5_name: str) -> str: | |
| """Return the GFS short-name for an ERA5 variable, or raise KeyError.""" | |
| if era5_name in SURFACE_VARS: | |
| return SURFACE_VARS[era5_name][0] | |
| if era5_name in PRESSURE_LEVEL_VARS: | |
| return PRESSURE_LEVEL_VARS[era5_name][0] | |
| raise KeyError(f"No GFS mapping for ERA5 variable {era5_name!r}") | |
| def unit_convert(era5_name: str, gfs_values): | |
| """Apply the stored (scale, offset) conversion to a GFS value/array.""" | |
| if era5_name in SURFACE_VARS: | |
| scale, offset = SURFACE_VARS[era5_name][1] | |
| elif era5_name in PRESSURE_LEVEL_VARS: | |
| scale, offset = PRESSURE_LEVEL_VARS[era5_name][1] | |
| else: | |
| raise KeyError(f"No unit conversion for {era5_name!r}") | |
| return gfs_values * scale + offset | |
| def all_era5_names() -> Tuple[str, ...]: | |
| """All ERA5 canonical names handled by this mapping (surface + pressure).""" | |
| return tuple(SURFACE_VARS.keys()) + tuple(PRESSURE_LEVEL_VARS.keys()) | |