roverdevkit / scripts /run_rediscovery_baseline.py
jjreif's picture
Deploy roverdevkit @ 2676a67
b3d14e3
Raw
History Blame Contribute Delete
13 kB
"""Generate the feasible-design null baseline for the §5.4 rediscovery check.
The rediscovery distance ratio historically rests on the unit-cube
random-pair null (the mean pairwise L2 between uniform unit-cube points,
~1.20), which is generous because the box is mostly infeasible. (The
closed-form RMS separation sqrt(9/6) ~= 1.22 is slightly larger, but the
mean is the matched analogue of the feasible-region mean reported here.)
This script builds the tougher null the paper
outline (pre-submission checklist) calls for: a feasibility-restricted
random baseline. For each registry rover it draws ``--n-samples`` uniform
designs from the optimiser box bounds, keeps the feasible subset under the
rover's class-generic scenario + mass-ceiling budget, and reports the
feasible-region pairwise-distance / centroid / nearest-design statistics
via :mod:`roverdevkit.validation.rediscovery_baseline`.
When a rediscovery summary CSV is available (``--rediscovery-summary``,
defaults to ``reports/rediscovery_loo_evaluator/summary.csv``) the script
joins it and emits both ratios per rover:
- ``ratio_vs_unit_cube`` = design_space_distance / ~1.20 (the old null)
- ``ratio_vs_feasible`` = design_space_distance / feasible_random_pair_mean
(the defensible, tougher null)
Outputs (under ``--out-dir``, defaults to ``reports/rediscovery_baseline``):
- ``feasible_baseline.csv`` — one row per rover with the null statistics
(and both ratios when the rediscovery summary is supplied).
- ``feasible_baseline_report.md`` — human-readable rollup.
Usage
-----
::
python scripts/run_rediscovery_baseline.py
python scripts/run_rediscovery_baseline.py --flown-only --n-samples 8000
"""
from __future__ import annotations
import argparse
import logging
import sys
from pathlib import Path
from statistics import median
import pandas as pd
from roverdevkit.validation.rediscovery_baseline import (
UNIT_CUBE_RANDOM_PAIR,
FeasibleBaselineResult,
compute_feasible_baseline_all,
)
# Mirror the rediscovery sweep's per-rover mass-ceiling slop so the
# feasible region matches the budget NSGA-II actually searched. CADRE-unit
# runs at slop 0.50 (see DEFAULT_PER_ROVER_OVERRIDES); everything else at
# the 0.10 default.
_PER_ROVER_SLOP: dict[str, float] = {"CADRE-unit": 0.50}
def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
p = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
p.add_argument("--out-dir", type=Path, default=Path("reports/rediscovery_baseline"))
p.add_argument(
"--flown-only",
action="store_true",
help="Restrict to flown rovers (Pragyan, Yutu-2). Default: all six.",
)
p.add_argument("--n-samples", type=int, default=200_000)
p.add_argument(
"--max-full-evals",
type=int,
default=3000,
help="Cap on draws sent to the full evaluator per rover.",
)
p.add_argument("--seed", type=int, default=0)
p.add_argument("--mass-ceiling-slop", type=float, default=0.10)
p.add_argument(
"--require-mass-ceiling",
action="store_true",
help=(
"Stricter sensitivity mode: additionally require each feasible "
"design to sit within the rover's mass-ceiling budget (the "
"constraint NSGA-II carried). Default: physical viability only. "
"Note the ultra-micro rovers (CADRE-unit, Tenacious) have no "
"uniformly-sampleable in-budget feasible designs in this mode."
),
)
p.add_argument(
"--rediscovery-summary",
type=Path,
default=Path("reports/rediscovery_loo_evaluator/summary.csv"),
help=(
"Rediscovery summary CSV used to compute both ratios. "
"If missing, only the null statistics are written."
),
)
p.add_argument(
"--log-level", default="INFO", choices=("DEBUG", "INFO", "WARNING", "ERROR")
)
return p.parse_args(argv)
def _results_to_frame(results: list[FeasibleBaselineResult]) -> pd.DataFrame:
rows = []
for r in results:
rows.append(
{
"rover_name": r.rover_name,
"class_generic_scenario": r.class_generic_scenario,
"mass_budget_kg": r.mass_budget_kg,
"n_sampled": r.n_sampled,
"n_mass_feasible": r.n_mass_feasible,
"n_full_evaluated": r.n_full_evaluated,
"n_feasible": r.n_feasible,
"feasible_fraction": r.feasible_fraction,
"rover_to_centroid_distance": r.rover_to_centroid_distance,
"rover_to_nearest_feasible_distance": r.rover_to_nearest_feasible_distance,
"feasible_random_pair_mean": r.feasible_random_pair_mean,
"feasible_random_pair_median": r.feasible_random_pair_median,
"unit_cube_random_pair": r.unit_cube_random_pair,
}
)
return pd.DataFrame(rows)
def _join_rediscovery(df: pd.DataFrame, summary_path: Path) -> pd.DataFrame:
"""Add design_space_distance + both ratio columns when available."""
if not summary_path.exists():
logging.getLogger(__name__).warning(
"rediscovery summary not found at %s; skipping ratio columns",
summary_path,
)
return df
redis = pd.read_csv(summary_path)[["rover_name", "design_space_distance"]]
merged = df.merge(redis, on="rover_name", how="left")
merged["ratio_vs_unit_cube"] = (
merged["design_space_distance"] / merged["unit_cube_random_pair"]
)
merged["ratio_vs_feasible"] = (
merged["design_space_distance"] / merged["feasible_random_pair_mean"]
)
return merged
def _fmt(value: object, spec: str = "{:.3f}") -> str:
if value is None or (isinstance(value, float) and pd.isna(value)):
return "n/a"
if isinstance(value, bool):
return str(value)
if isinstance(value, (int,)):
return str(value)
if isinstance(value, float):
return spec.format(value)
return str(value)
def _markdown(df: pd.DataFrame, args: argparse.Namespace) -> str:
has_ratio = "ratio_vs_feasible" in df.columns
if args.require_mass_ceiling:
draw_line = (
f"- Draws per rover: `{args.n_samples}` uniform draws, "
f"mass-pre-filtered on the bottom-up mass model, then up to "
f"`{args.max_full_evals}` full evaluations of the mass-feasible subset"
)
else:
draw_line = (
f"- Draws per rover: up to `{args.max_full_evals}` uniform draws "
"from the optimiser box bounds, full-evaluated directly"
)
lines: list[str] = [
"# §5.4 feasible-design null baseline",
"",
draw_line,
f"- Seed: `{args.seed}`",
f"- Feasibility: "
f"`{'physical viability + mass ceiling' if args.require_mass_ceiling else 'physical viability (not stalled, energy >= 0, range > 0)'}`",
f"- Unit-cube random-pair null (reference): "
f"`{UNIT_CUBE_RANDOM_PAIR:.3f}` (mean pairwise L2; RMS sqrt(9/6)=1.225)",
"",
"## What this is",
"",
"The historical rediscovery ratio divides each rover's nearest-",
"Pareto design-space distance by the **unit-cube** random-pair null",
"(~1.20). A reviewer can object that the 9-D box includes physically",
"infeasible designs (rovers that stall, run an energy deficit, or",
"make no forward progress), so a null spanning it is trivially",
"beatable. This baseline restricts the random comparison to",
"**feasible** designs (not stalled, non-negative energy balance,",
"non-zero range) under each rover's class-generic scenario, giving",
"the tougher, defensible null (`feasible_random_pair_mean`).",
"",
"Empirically the physically-feasible region fills most of the box",
"(`feas_frac` ~0.77-0.92), so the feasible null (~1.17) sits only",
"marginally below ~1.20 — which is the reportable result: the",
"rediscovery ratio is **not** an artifact of infeasible space.",
"",
"## Per-rover results",
"",
]
cols = [
("rover_name", "rover"),
("class_generic_scenario", "scenario"),
("n_feasible", "n_feasible"),
("n_full_evaluated", "n_eval"),
("feasible_fraction", "feas_frac"),
("feasible_random_pair_mean", "feas_pair_mean"),
("rover_to_centroid_distance", "to_centroid"),
("rover_to_nearest_feasible_distance", "to_nearest"),
]
if has_ratio:
cols += [
("design_space_distance", "redisc_dist"),
("ratio_vs_unit_cube", "ratio_unitcube"),
("ratio_vs_feasible", "ratio_feasible"),
]
header = "| " + " | ".join(label for _, label in cols) + " |"
sep = "| " + " | ".join("---" for _ in cols) + " |"
lines.append(header)
lines.append(sep)
for _, row in df.iterrows():
cells = []
for key, _label in cols:
v = row[key]
if key == "feasible_fraction" and v is not None and not pd.isna(v):
pct = float(v) * 100.0
cells.append(f"{pct:.3f}%" if pct < 1.0 else f"{pct:.2f}%")
else:
cells.append(_fmt(v))
lines.append("| " + " | ".join(cells) + " |")
lines.append("")
feas_means = [
float(v) for v in df["feasible_random_pair_mean"].tolist() if pd.notna(v)
]
lines.extend(
[
"## Aggregate",
"",
f"- Median feasible-region random-pair null: "
f"`{median(feas_means):.3f}`" if feas_means else
"- Median feasible-region random-pair null: `n/a`",
f"- Unit-cube random-pair null: `{UNIT_CUBE_RANDOM_PAIR:.3f}`",
]
)
if has_ratio:
in_scope = df[df["rover_name"] != "Yutu-2"]
rf = [float(v) for v in in_scope["ratio_vs_feasible"].tolist() if pd.notna(v)]
ru = [float(v) for v in in_scope["ratio_vs_unit_cube"].tolist() if pd.notna(v)]
if rf:
lines.append(
f"- Median in-scope (<50 kg) ratio vs feasible null: "
f"`{median(rf):.2f}`"
)
if ru:
lines.append(
f"- Median in-scope (<50 kg) ratio vs unit-cube null: "
f"`{median(ru):.2f}`"
)
lines.extend(
[
"",
"## Interpretation",
"",
"- `feasible_random_pair_mean` is the tougher analogue of the",
" ~1.20 unit-cube null: the typical separation between two random",
" *feasible* rovers under the rover's scenario. Because physical",
" viability fills most of the box (`feas_frac`), this null",
" (~1.17) sits only marginally below ~1.20, and `ratio_vs_feasible`",
" stays close to `ratio_vs_unit_cube`. The takeaway is the honest",
" one a reviewer asked for: the rediscovery ratio survives the",
" feasibility-restricted null, so it is not an artifact of a null",
" diluted by infeasible designs.",
"- `to_centroid` is the rover's distance to the centroid of the",
" feasible region (the 'typical feasible design'); every in-scope",
" rover's rediscovery distance is below its `to_centroid`, i.e.",
" the optimiser lands closer than the average feasible rover.",
" `to_nearest` is N-dependent and reported for context only.",
"- Yutu-2 (out of scope, ~135 kg) is included for reference; the",
" in-scope aggregate excludes it.",
]
)
return "\n".join(lines) + "\n"
def main(argv: list[str] | None = None) -> int:
args = _parse_args(argv)
logging.basicConfig(
level=getattr(logging, args.log_level),
format="%(asctime)s %(levelname)-7s %(name)s | %(message)s",
)
results = compute_feasible_baseline_all(
flown_only=args.flown_only,
n_samples=args.n_samples,
max_full_evals=args.max_full_evals,
seed=args.seed,
mass_ceiling_slop=args.mass_ceiling_slop,
require_mass_ceiling=args.require_mass_ceiling,
per_rover_mass_ceiling_slop=_PER_ROVER_SLOP if args.require_mass_ceiling else None,
)
df = _results_to_frame(results)
df = _join_rediscovery(df, args.rediscovery_summary)
args.out_dir.mkdir(parents=True, exist_ok=True)
csv_path = args.out_dir / "feasible_baseline.csv"
df.to_csv(csv_path, index=False)
md_path = args.out_dir / "feasible_baseline_report.md"
md_path.write_text(_markdown(df, args))
print(f"Wrote 2 artifact(s) to {args.out_dir}:")
print(f" csv: {csv_path}")
print(f" report: {md_path}")
return 0
if __name__ == "__main__":
sys.exit(main())