climate-risk-engine / tests /test_init_sources /test_variable_mapping.py
jtlevine's picture
Fix GFS shortName regression: use ecCodes names, not cfgrib names
edf30d2
"""Tests for the GFS ↔ ERA5 variable mapping tables.
These are pure-data tests — no network, no cfgrib, no xarray I/O. They
protect against accidental breakage of the lookup tables that the GFS
fetcher in ``src/init_sources/gfs.py`` depends on.
"""
from __future__ import annotations
import numpy as np
import pytest
from src.init_sources import variable_mapping as vm
class TestSurfaceVarsLookup:
def test_all_surface_vars_return_short_name_and_scale(self):
for era5_name, (short, (scale, offset)) in vm.SURFACE_VARS.items():
assert isinstance(era5_name, str) and era5_name
assert isinstance(short, str) and short
assert isinstance(scale, float)
assert isinstance(offset, float)
def test_expected_surface_vars_present(self):
# These 5 surface fields are load-bearing for GraphCast init.
expected = {
"2m_temperature", "10m_u_component_of_wind",
"10m_v_component_of_wind", "mean_sea_level_pressure",
"total_precipitation_6hr",
}
assert expected <= set(vm.SURFACE_VARS.keys()), (
"Missing surface vars — GraphCast will refuse to init without them"
)
def test_precipitation_converts_mm_to_m(self):
# GFS ``tp`` is kg/m² (≡ mm of liquid water); ERA5 is m.
# 12 mm of rain → 0.012 m.
scale, offset = vm.SURFACE_VARS["total_precipitation_6hr"][1]
assert scale == pytest.approx(1e-3)
assert offset == 0.0
def test_temperature_is_already_kelvin(self):
# Both GFS and ERA5 ship t2m in Kelvin — no conversion.
scale, offset = vm.SURFACE_VARS["2m_temperature"][1]
assert scale == 1.0
assert offset == 0.0
def test_surface_short_names_are_eccodes_not_cfgrib(self):
# Phase 2 regression: cfgrib's ``filter_by_keys`` takes the ecCodes
# shortName (``2t``, ``10u``, ``10v``, etc.), NOT cfgrib's output
# xarray variable name (``t2m``, ``u10``, ``v10``). Getting this
# wrong returns an empty Dataset silently — the first smoke run
# burned ~$3 of A100 discovering it.
expected_eccodes_names = {
"2m_temperature": "2t",
"10m_u_component_of_wind": "10u",
"10m_v_component_of_wind": "10v",
"mean_sea_level_pressure": "prmsl",
"total_precipitation_6hr": "tp",
}
for era5_name, eccodes in expected_eccodes_names.items():
assert vm.SURFACE_VARS[era5_name][0] == eccodes, (
f"{era5_name!r}: must use ecCodes shortName "
f"{eccodes!r}, not cfgrib output name"
)
class TestPressureLevelVarsLookup:
def test_geopotential_converts_gpm_to_m2s2(self):
# GFS `gh` is geopotential HEIGHT in geopotential metres; ERA5
# stores geopotential (height × g) in m²/s². Check the scale is g.
scale, offset = vm.PRESSURE_LEVEL_VARS["geopotential"][1]
assert scale == pytest.approx(9.80665)
assert offset == 0.0
def test_six_pressure_level_variables_present(self):
expected = {
"temperature", "specific_humidity", "u_component_of_wind",
"v_component_of_wind", "vertical_velocity", "geopotential",
}
assert expected <= set(vm.PRESSURE_LEVEL_VARS.keys())
class TestPressureLevelsSelection:
def test_graphcast_has_13_canonical_levels(self):
assert len(vm.GRAPHCAST_PRESSURE_LEVELS) == 13
def test_levels_in_descending_model_atmosphere(self):
# GraphCast and GenCast list their levels ascending in pressure
# (top of atmosphere → surface). 50 .. 1000 hPa strictly increasing.
levels = list(vm.GRAPHCAST_PRESSURE_LEVELS)
assert levels == sorted(levels), "levels must be ascending pressure"
assert min(levels) >= 50, "lowest level must be ≥ 50 hPa"
assert max(levels) <= 1000, "highest level must be ≤ 1000 hPa"
def test_levels_available_in_gfs_pgrb2_native(self):
# GFS pgrb2.0p25 natively provides these levels. If any GraphCast
# level isn't in this list, the select-to-13 step would need to
# interpolate instead of just subsetting.
gfs_native = {
10, 20, 30, 40, 50, 70, 100, 150, 200, 250, 300, 350, 400, 450,
500, 550, 600, 650, 700, 750, 800, 850, 900, 925, 950, 975, 1000,
}
assert set(vm.GRAPHCAST_PRESSURE_LEVELS) <= gfs_native
class TestHelpers:
def test_gfs_short_name_surface(self):
# ecCodes shortName convention — see test_surface_short_names_are_eccodes_not_cfgrib.
assert vm.gfs_short_name("2m_temperature") == "2t"
assert vm.gfs_short_name("mean_sea_level_pressure") == "prmsl"
def test_gfs_short_name_pressure_level(self):
assert vm.gfs_short_name("temperature") == "t"
assert vm.gfs_short_name("geopotential") == "gh"
def test_gfs_short_name_raises_on_unknown(self):
with pytest.raises(KeyError):
vm.gfs_short_name("not_a_variable")
def test_unit_convert_temperature_is_identity(self):
xs = np.array([270.0, 290.0, 310.0], dtype=np.float32)
ys = vm.unit_convert("2m_temperature", xs)
np.testing.assert_allclose(ys, xs)
def test_unit_convert_precipitation_mm_to_m(self):
mm = np.array([0.0, 5.0, 12.5], dtype=np.float32)
meters = vm.unit_convert("total_precipitation_6hr", mm)
np.testing.assert_allclose(meters, [0.0, 0.005, 0.0125], rtol=1e-5)
def test_unit_convert_geopotential_applies_g(self):
gpm = np.array([0.0, 5000.0, 10_000.0], dtype=np.float32)
m2s2 = vm.unit_convert("geopotential", gpm)
np.testing.assert_allclose(
m2s2, [0.0, 5000.0 * 9.80665, 10_000.0 * 9.80665], rtol=1e-6
)
def test_all_era5_names_union(self):
names = vm.all_era5_names()
assert set(names) == set(vm.SURFACE_VARS) | set(vm.PRESSURE_LEVEL_VARS)