Spaces:
Running
Running
| """Smoke tests for ``POST /evaluate``. | |
| These run the analytical mission evaluator end-to-end. Unlike the | |
| predict tests they do *not* depend on the quantile-calibration artifact. | |
| """ | |
| from __future__ import annotations | |
| import pytest | |
| from fastapi.testclient import TestClient | |
| PRIMARY_TARGETS = { | |
| "range_km", | |
| "energy_margin_raw_pct", | |
| "slope_capability_deg", | |
| "total_mass_kg", | |
| } | |
| def test_evaluate_returns_all_primary_targets( | |
| client: TestClient, | |
| sample_design: dict[str, float | int], | |
| ) -> None: | |
| payload = {"design": sample_design, "scenario_name": "equatorial_mare_traverse"} | |
| response = client.post("/evaluate", json=payload) | |
| assert response.status_code == 200, response.text | |
| body = response.json() | |
| assert body["scenario_name"] == "equatorial_mare_traverse" | |
| targets = {m["target"] for m in body["metrics"]} | |
| assert targets == PRIMARY_TARGETS | |
| for metric in body["metrics"]: | |
| assert isinstance(metric["value"], (int, float)) | |
| thermal = body["thermal"] | |
| for key in ( | |
| "survives", | |
| "peak_sun_temp_c", | |
| "lunar_night_temp_c", | |
| "min_operating_temp_c", | |
| "max_operating_temp_c", | |
| "rhu_power_w", | |
| "hibernation_power_w", | |
| "surface_area_m2", | |
| "hot_case_ok", | |
| "cold_case_ok", | |
| ): | |
| assert key in thermal | |
| # The default architecture has a -30/+50 °C envelope and these | |
| # are the limits the survival flag is judged against. | |
| assert thermal["min_operating_temp_c"] == -30.0 | |
| assert thermal["max_operating_temp_c"] == 50.0 | |
| # Schema v6 (v6 schema update): the per-evaluation drivetrain diagnostic | |
| # was renamed from ``motor_torque`` to ``stall`` and exposes the | |
| # explicit slip / capacity headroom rather than the v5 OK/NOT-OK | |
| # composite. See ``StallDiagnosticOut`` in webapp.backend.schemas. | |
| stall = body["stall"] | |
| for key in ( | |
| "stalled", | |
| "peak_torque_demand_nm", | |
| "peak_torque_capacity_nm", | |
| ): | |
| assert key in stall | |
| assert stall["peak_torque_demand_nm"] >= 0.0 | |
| assert stall["peak_torque_capacity_nm"] > 0.0 | |
| arch = body["architecture"] | |
| for key in ( | |
| "mobility_architecture", | |
| "obstacle_capability_m", | |
| "required_obstacle_height_m", | |
| "obstacle_margin_m", | |
| "obstacle_requirement_met", | |
| "architecture_mass_kg", | |
| ): | |
| assert key in arch | |
| # Schema v6 also surfaces the runtime-derived effective duty cycle | |
| # and cruise speed at the top level so the frontend can show what | |
| # the evaluator actually used (vs. the design's δ_des). | |
| assert "effective_duty_cycle" in body | |
| assert 0.0 <= body["effective_duty_cycle"] <= 0.6 | |
| assert "cruise_speed_mps" in body | |
| assert body["cruise_speed_mps"] >= 0.0 | |
| assert body["elapsed_ms"] > 0 | |
| def test_evaluate_thermal_cold_case_drives_failure_for_no_rhu_design( | |
| client: TestClient, | |
| sample_design: dict[str, float | int], | |
| ) -> None: | |
| """The default architecture has 0 W RHU; cold case should be the failing one. | |
| With no RHU and 2 W of hibernation power, a 0.2-ish m² enclosure | |
| radiates to ~133 K (well below the −30 °C limit) and the hot case | |
| sits comfortably under +50 °C at any latitude. The dialog leans on | |
| this distinction to explain *why* survival fails, so we pin it | |
| here. | |
| """ | |
| payload = {"design": sample_design, "scenario_name": "equatorial_mare_traverse"} | |
| response = client.post("/evaluate", json=payload) | |
| assert response.status_code == 200 | |
| thermal = response.json()["thermal"] | |
| if not thermal["survives"]: | |
| assert not thermal["cold_case_ok"] | |
| # Hot case should never be the failure for this sample design at | |
| # equatorial latitude (sanity guard against a regression that | |
| # silently flips the model). | |
| assert thermal["hot_case_ok"] | |
| def test_evaluate_payload_override_increases_total_mass( | |
| client: TestClient, | |
| sample_design: dict[str, float | int], | |
| ) -> None: | |
| """Schema v9: the ``payload_mass_kg`` override is a top-level mass line | |
| item, so a non-zero override raises ``total_mass_kg`` ~one-for-one and | |
| leaves the other primary targets at or below their no-payload values. | |
| """ | |
| base = {"design": sample_design, "scenario_name": "equatorial_mare_traverse"} | |
| base_resp = client.post("/evaluate", json={**base, "payload_mass_kg": 0.0}) | |
| heavy_resp = client.post("/evaluate", json={**base, "payload_mass_kg": 10.0}) | |
| assert base_resp.status_code == 200, base_resp.text | |
| assert heavy_resp.status_code == 200, heavy_resp.text | |
| base_mass = {m["target"]: m["value"] for m in base_resp.json()["metrics"]}[ | |
| "total_mass_kg" | |
| ] | |
| heavy_mass = {m["target"]: m["value"] for m in heavy_resp.json()["metrics"]}[ | |
| "total_mass_kg" | |
| ] | |
| # Payload sits outside the dry-mass growth margin, so the delta is the | |
| # payload itself (no extra margin applied on top). | |
| assert heavy_mass == pytest.approx(base_mass + 10.0, abs=1e-6) | |
| def test_evaluate_payload_power_override_reduces_range( | |
| client: TestClient, | |
| sample_design: dict[str, float | int], | |
| ) -> None: | |
| """Schema v9: ``payload_power_w`` adds to the continuous ops-time load, | |
| so a non-zero override never increases range and typically shrinks it. | |
| """ | |
| base = {"design": sample_design, "scenario_name": "equatorial_mare_traverse"} | |
| quiet = client.post("/evaluate", json={**base, "payload_power_w": 0.0}) | |
| noisy = client.post("/evaluate", json={**base, "payload_power_w": 25.0}) | |
| assert quiet.status_code == 200, quiet.text | |
| assert noisy.status_code == 200, noisy.text | |
| quiet_range = {m["target"]: m["value"] for m in quiet.json()["metrics"]}["range_km"] | |
| noisy_range = {m["target"]: m["value"] for m in noisy.json()["metrics"]}["range_km"] | |
| assert noisy_range <= quiet_range + 1e-9 | |
| def test_evaluate_rejects_out_of_bounds_payload( | |
| client: TestClient, | |
| sample_design: dict[str, float | int], | |
| ) -> None: | |
| """Payload overrides are bounded ``[0, 30]`` at the HTTP boundary.""" | |
| response = client.post( | |
| "/evaluate", | |
| json={ | |
| "design": sample_design, | |
| "scenario_name": "equatorial_mare_traverse", | |
| "payload_mass_kg": 999.0, | |
| }, | |
| ) | |
| assert response.status_code == 422 | |
| def test_evaluate_mission_duration_override_increases_range( | |
| client: TestClient, | |
| sample_design: dict[str, float | int], | |
| ) -> None: | |
| """Longer ``mission_duration_earth_days`` extends the simulation window, | |
| so range_km should not decrease when duration doubles on a non-binding | |
| mare traverse (energy-limited, not cap-limited). | |
| """ | |
| base = {"design": sample_design, "scenario_name": "equatorial_mare_traverse"} | |
| short_resp = client.post( | |
| "/evaluate", json={**base, "mission_duration_earth_days": 7.0} | |
| ) | |
| long_resp = client.post( | |
| "/evaluate", json={**base, "mission_duration_earth_days": 28.0} | |
| ) | |
| assert short_resp.status_code == 200, short_resp.text | |
| assert long_resp.status_code == 200, long_resp.text | |
| short_range = {m["target"]: m["value"] for m in short_resp.json()["metrics"]}[ | |
| "range_km" | |
| ] | |
| long_range = {m["target"]: m["value"] for m in long_resp.json()["metrics"]}[ | |
| "range_km" | |
| ] | |
| assert long_range > short_range + 1e-9 | |
| def test_evaluate_rejects_out_of_bounds_mission_duration( | |
| client: TestClient, | |
| sample_design: dict[str, float | int], | |
| ) -> None: | |
| """Mission-duration overrides are bounded ``[0.5, 90]`` at the HTTP boundary.""" | |
| response = client.post( | |
| "/evaluate", | |
| json={ | |
| "design": sample_design, | |
| "scenario_name": "equatorial_mare_traverse", | |
| "mission_duration_earth_days": 0.1, | |
| }, | |
| ) | |
| assert response.status_code == 422 | |
| def test_evaluate_unknown_scenario_returns_404( | |
| client: TestClient, | |
| sample_design: dict[str, float | int], | |
| ) -> None: | |
| payload = {"design": sample_design, "scenario_name": "no_such_scenario"} | |
| response = client.post("/evaluate", json=payload) | |
| assert response.status_code == 404 | |
| def test_evaluate_rejects_out_of_bounds_design( | |
| client: TestClient, | |
| sample_design: dict[str, float | int], | |
| ) -> None: | |
| bad = dict(sample_design) | |
| bad["wheel_radius_m"] = 5.0 | |
| response = client.post( | |
| "/evaluate", | |
| json={"design": bad, "scenario_name": "equatorial_mare_traverse"}, | |
| ) | |
| assert response.status_code == 422 | |
| def test_evaluate_values_match_primary_metrics_shape( | |
| client: TestClient, | |
| sample_design: dict[str, float | int], | |
| ) -> None: | |
| """Sanity-check the projection of ``MissionMetrics`` onto the four primary targets. | |
| Range and total mass are strictly positive for every well-formed | |
| scenario; slope is bounded above by 90°; energy margin is unbounded | |
| but should be finite. This is a coarse "no NaN snuck through" guard. | |
| """ | |
| payload = {"design": sample_design, "scenario_name": "polar_prospecting"} | |
| response = client.post("/evaluate", json=payload) | |
| assert response.status_code == 200 | |
| body = response.json() | |
| by_target = {m["target"]: m["value"] for m in body["metrics"]} | |
| assert by_target["total_mass_kg"] > 0 | |
| assert by_target["range_km"] >= 0 | |
| assert 0 <= by_target["slope_capability_deg"] <= 90 | |
| assert by_target["energy_margin_raw_pct"] == by_target["energy_margin_raw_pct"] # not NaN | |
| def test_evaluate_and_predict_agree_within_surrogate_noise_floor( | |
| client: TestClient, | |
| sample_design: dict[str, float | int], | |
| surrogate_v7_1_compatible: bool, | |
| ) -> None: | |
| """The surrogate's median should track the evaluator within R²-noise. | |
| On the canonical equatorial-mare scenario for the Yutu-2-ish | |
| sample design, the tuned-median tuned median has R² ≥ 0.99 on every | |
| primary target. We pick a generous tolerance per target rather | |
| than assert exact equality so this test does not flake on | |
| XGBoost-version churn or harmless quantile-head retrains. | |
| """ | |
| if not surrogate_v7_1_compatible: | |
| pytest.skip( | |
| "schema-v7_1 quantile_bundles.joblib not on disk; pre-v7_1 " | |
| "bundles lack scenario_operational_duty_cycle and KeyError " | |
| "on the v7_1 feature row." | |
| ) | |
| payload = {"design": sample_design, "scenario_name": "equatorial_mare_traverse"} | |
| eval_resp = client.post("/evaluate", json=payload) | |
| pred_resp = client.post("/predict", json=payload) | |
| assert eval_resp.status_code == 200 | |
| if pred_resp.status_code == 503: | |
| # Quantile bundles missing (mirrors the predict-test skip path). | |
| return | |
| assert pred_resp.status_code == 200 | |
| evaluator = {m["target"]: m["value"] for m in eval_resp.json()["metrics"]} | |
| surrogate = {p["target"]: p["q50"] for p in pred_resp.json()["predictions"]} | |
| # Per-target relative tolerance on the median. Energy margin runs | |
| # large positive on equatorial-mare so we use absolute tolerance | |
| # (a 5 pp gap on a 600 % margin is still <1 % relative error). | |
| # The slope tolerance is set to ~2x the v9 surrogate's overall test | |
| # RMSE (0.930 deg) divided by a typical equatorial-mare sample-design | |
| # slope_capability (~22 deg) — i.e. tight enough to catch wiring bugs | |
| # but loose enough not to flake on a single-point tail residual at | |
| # the surrogate noise floor. Widened from 0.08 (v6) to 0.10 (v9) | |
| # because the v9 median head is marginally noisier on slope after the | |
| # payload-feature retrain (test R² 0.978). See | |
| # ``reports/surrogate_v9/median_sanity.csv``. | |
| # total_mass is a near-analytic function of design + payload, so the | |
| # median head learns it to high precision (test R² 0.999, RMSE | |
| # 0.567 kg). The 0.04 rel tol (~1.8 kg at this ~44 kg sample design) | |
| # is ~3x RMSE — a single-point tail allowance now that payload is an | |
| # extra LHS input adding a little variance, still tight enough to | |
| # catch a units / wiring regression. | |
| rel_tol = { | |
| "range_km": 0.10, | |
| "slope_capability_deg": 0.10, | |
| "total_mass_kg": 0.05, | |
| } | |
| for tgt, tol in rel_tol.items(): | |
| e = evaluator[tgt] | |
| s = surrogate[tgt] | |
| assert abs(e - s) <= max(tol * abs(e), 1e-3), (tgt, e, s) | |
| # Energy margin: tolerate a 50-pp gap in absolute terms. | |
| assert abs(evaluator["energy_margin_raw_pct"] - surrogate["energy_margin_raw_pct"]) <= 50 | |