"""Smoke tests for ``POST /evaluate``. These run the analytical mission evaluator end-to-end. Unlike the predict tests they do *not* depend on the quantile-calibration artifact. """ from __future__ import annotations import pytest from fastapi.testclient import TestClient PRIMARY_TARGETS = { "range_km", "energy_margin_raw_pct", "slope_capability_deg", "total_mass_kg", } def test_evaluate_returns_all_primary_targets( client: TestClient, sample_design: dict[str, float | int], ) -> None: payload = {"design": sample_design, "scenario_name": "equatorial_mare_traverse"} response = client.post("/evaluate", json=payload) assert response.status_code == 200, response.text body = response.json() assert body["scenario_name"] == "equatorial_mare_traverse" targets = {m["target"] for m in body["metrics"]} assert targets == PRIMARY_TARGETS for metric in body["metrics"]: assert isinstance(metric["value"], (int, float)) thermal = body["thermal"] for key in ( "survives", "peak_sun_temp_c", "lunar_night_temp_c", "min_operating_temp_c", "max_operating_temp_c", "rhu_power_w", "hibernation_power_w", "surface_area_m2", "hot_case_ok", "cold_case_ok", ): assert key in thermal # The default architecture has a -30/+50 °C envelope and these # are the limits the survival flag is judged against. assert thermal["min_operating_temp_c"] == -30.0 assert thermal["max_operating_temp_c"] == 50.0 # Schema v6 (v6 schema update): the per-evaluation drivetrain diagnostic # was renamed from ``motor_torque`` to ``stall`` and exposes the # explicit slip / capacity headroom rather than the v5 OK/NOT-OK # composite. See ``StallDiagnosticOut`` in webapp.backend.schemas. stall = body["stall"] for key in ( "stalled", "peak_torque_demand_nm", "peak_torque_capacity_nm", ): assert key in stall assert stall["peak_torque_demand_nm"] >= 0.0 assert stall["peak_torque_capacity_nm"] > 0.0 arch = body["architecture"] for key in ( "mobility_architecture", "obstacle_capability_m", "required_obstacle_height_m", "obstacle_margin_m", "obstacle_requirement_met", "architecture_mass_kg", ): assert key in arch # Schema v6 also surfaces the runtime-derived effective duty cycle # and cruise speed at the top level so the frontend can show what # the evaluator actually used (vs. the design's δ_des). assert "effective_duty_cycle" in body assert 0.0 <= body["effective_duty_cycle"] <= 0.6 assert "cruise_speed_mps" in body assert body["cruise_speed_mps"] >= 0.0 assert body["elapsed_ms"] > 0 def test_evaluate_thermal_cold_case_drives_failure_for_no_rhu_design( client: TestClient, sample_design: dict[str, float | int], ) -> None: """The default architecture has 0 W RHU; cold case should be the failing one. With no RHU and 2 W of hibernation power, a 0.2-ish m² enclosure radiates to ~133 K (well below the −30 °C limit) and the hot case sits comfortably under +50 °C at any latitude. The dialog leans on this distinction to explain *why* survival fails, so we pin it here. """ payload = {"design": sample_design, "scenario_name": "equatorial_mare_traverse"} response = client.post("/evaluate", json=payload) assert response.status_code == 200 thermal = response.json()["thermal"] if not thermal["survives"]: assert not thermal["cold_case_ok"] # Hot case should never be the failure for this sample design at # equatorial latitude (sanity guard against a regression that # silently flips the model). assert thermal["hot_case_ok"] def test_evaluate_payload_override_increases_total_mass( client: TestClient, sample_design: dict[str, float | int], ) -> None: """Schema v9: the ``payload_mass_kg`` override is a top-level mass line item, so a non-zero override raises ``total_mass_kg`` ~one-for-one and leaves the other primary targets at or below their no-payload values. """ base = {"design": sample_design, "scenario_name": "equatorial_mare_traverse"} base_resp = client.post("/evaluate", json={**base, "payload_mass_kg": 0.0}) heavy_resp = client.post("/evaluate", json={**base, "payload_mass_kg": 10.0}) assert base_resp.status_code == 200, base_resp.text assert heavy_resp.status_code == 200, heavy_resp.text base_mass = {m["target"]: m["value"] for m in base_resp.json()["metrics"]}[ "total_mass_kg" ] heavy_mass = {m["target"]: m["value"] for m in heavy_resp.json()["metrics"]}[ "total_mass_kg" ] # Payload sits outside the dry-mass growth margin, so the delta is the # payload itself (no extra margin applied on top). assert heavy_mass == pytest.approx(base_mass + 10.0, abs=1e-6) def test_evaluate_payload_power_override_reduces_range( client: TestClient, sample_design: dict[str, float | int], ) -> None: """Schema v9: ``payload_power_w`` adds to the continuous ops-time load, so a non-zero override never increases range and typically shrinks it. """ base = {"design": sample_design, "scenario_name": "equatorial_mare_traverse"} quiet = client.post("/evaluate", json={**base, "payload_power_w": 0.0}) noisy = client.post("/evaluate", json={**base, "payload_power_w": 25.0}) assert quiet.status_code == 200, quiet.text assert noisy.status_code == 200, noisy.text quiet_range = {m["target"]: m["value"] for m in quiet.json()["metrics"]}["range_km"] noisy_range = {m["target"]: m["value"] for m in noisy.json()["metrics"]}["range_km"] assert noisy_range <= quiet_range + 1e-9 def test_evaluate_rejects_out_of_bounds_payload( client: TestClient, sample_design: dict[str, float | int], ) -> None: """Payload overrides are bounded ``[0, 30]`` at the HTTP boundary.""" response = client.post( "/evaluate", json={ "design": sample_design, "scenario_name": "equatorial_mare_traverse", "payload_mass_kg": 999.0, }, ) assert response.status_code == 422 def test_evaluate_mission_duration_override_increases_range( client: TestClient, sample_design: dict[str, float | int], ) -> None: """Longer ``mission_duration_earth_days`` extends the simulation window, so range_km should not decrease when duration doubles on a non-binding mare traverse (energy-limited, not cap-limited). """ base = {"design": sample_design, "scenario_name": "equatorial_mare_traverse"} short_resp = client.post( "/evaluate", json={**base, "mission_duration_earth_days": 7.0} ) long_resp = client.post( "/evaluate", json={**base, "mission_duration_earth_days": 28.0} ) assert short_resp.status_code == 200, short_resp.text assert long_resp.status_code == 200, long_resp.text short_range = {m["target"]: m["value"] for m in short_resp.json()["metrics"]}[ "range_km" ] long_range = {m["target"]: m["value"] for m in long_resp.json()["metrics"]}[ "range_km" ] assert long_range > short_range + 1e-9 def test_evaluate_rejects_out_of_bounds_mission_duration( client: TestClient, sample_design: dict[str, float | int], ) -> None: """Mission-duration overrides are bounded ``[0.5, 90]`` at the HTTP boundary.""" response = client.post( "/evaluate", json={ "design": sample_design, "scenario_name": "equatorial_mare_traverse", "mission_duration_earth_days": 0.1, }, ) assert response.status_code == 422 def test_evaluate_unknown_scenario_returns_404( client: TestClient, sample_design: dict[str, float | int], ) -> None: payload = {"design": sample_design, "scenario_name": "no_such_scenario"} response = client.post("/evaluate", json=payload) assert response.status_code == 404 def test_evaluate_rejects_out_of_bounds_design( client: TestClient, sample_design: dict[str, float | int], ) -> None: bad = dict(sample_design) bad["wheel_radius_m"] = 5.0 response = client.post( "/evaluate", json={"design": bad, "scenario_name": "equatorial_mare_traverse"}, ) assert response.status_code == 422 def test_evaluate_values_match_primary_metrics_shape( client: TestClient, sample_design: dict[str, float | int], ) -> None: """Sanity-check the projection of ``MissionMetrics`` onto the four primary targets. Range and total mass are strictly positive for every well-formed scenario; slope is bounded above by 90°; energy margin is unbounded but should be finite. This is a coarse "no NaN snuck through" guard. """ payload = {"design": sample_design, "scenario_name": "polar_prospecting"} response = client.post("/evaluate", json=payload) assert response.status_code == 200 body = response.json() by_target = {m["target"]: m["value"] for m in body["metrics"]} assert by_target["total_mass_kg"] > 0 assert by_target["range_km"] >= 0 assert 0 <= by_target["slope_capability_deg"] <= 90 assert by_target["energy_margin_raw_pct"] == by_target["energy_margin_raw_pct"] # not NaN def test_evaluate_and_predict_agree_within_surrogate_noise_floor( client: TestClient, sample_design: dict[str, float | int], surrogate_v7_1_compatible: bool, ) -> None: """The surrogate's median should track the evaluator within R²-noise. On the canonical equatorial-mare scenario for the Yutu-2-ish sample design, the tuned-median tuned median has R² ≥ 0.99 on every primary target. We pick a generous tolerance per target rather than assert exact equality so this test does not flake on XGBoost-version churn or harmless quantile-head retrains. """ if not surrogate_v7_1_compatible: pytest.skip( "schema-v7_1 quantile_bundles.joblib not on disk; pre-v7_1 " "bundles lack scenario_operational_duty_cycle and KeyError " "on the v7_1 feature row." ) payload = {"design": sample_design, "scenario_name": "equatorial_mare_traverse"} eval_resp = client.post("/evaluate", json=payload) pred_resp = client.post("/predict", json=payload) assert eval_resp.status_code == 200 if pred_resp.status_code == 503: # Quantile bundles missing (mirrors the predict-test skip path). return assert pred_resp.status_code == 200 evaluator = {m["target"]: m["value"] for m in eval_resp.json()["metrics"]} surrogate = {p["target"]: p["q50"] for p in pred_resp.json()["predictions"]} # Per-target relative tolerance on the median. Energy margin runs # large positive on equatorial-mare so we use absolute tolerance # (a 5 pp gap on a 600 % margin is still <1 % relative error). # The slope tolerance is set to ~2x the v9 surrogate's overall test # RMSE (0.930 deg) divided by a typical equatorial-mare sample-design # slope_capability (~22 deg) — i.e. tight enough to catch wiring bugs # but loose enough not to flake on a single-point tail residual at # the surrogate noise floor. Widened from 0.08 (v6) to 0.10 (v9) # because the v9 median head is marginally noisier on slope after the # payload-feature retrain (test R² 0.978). See # ``reports/surrogate_v9/median_sanity.csv``. # total_mass is a near-analytic function of design + payload, so the # median head learns it to high precision (test R² 0.999, RMSE # 0.567 kg). The 0.04 rel tol (~1.8 kg at this ~44 kg sample design) # is ~3x RMSE — a single-point tail allowance now that payload is an # extra LHS input adding a little variance, still tight enough to # catch a units / wiring regression. rel_tol = { "range_km": 0.10, "slope_capability_deg": 0.10, "total_mass_kg": 0.05, } for tgt, tol in rel_tol.items(): e = evaluator[tgt] s = surrogate[tgt] assert abs(e - s) <= max(tol * abs(e), 1e-3), (tgt, e, s) # Energy margin: tolerate a 50-pp gap in absolute terms. assert abs(evaluator["energy_margin_raw_pct"] - surrogate["energy_margin_raw_pct"]) <= 50