"""Precompute per-module baselines for the corpus. For every (repo, module) entry in `corpus/manifest.json` whose `source == "local"`, this script: 1. Generates the full mutant set for the module (deterministic, AST-based). 2. Runs the existing test suite against each mutant — killed = exit != 0. 3. Computes line+branch coverage of the module under the existing suite. 4. Writes `corpus/_baselines/__.json` with the surviving mutants (incl. mutated_source), coverage, and a module summary. 5. Updates `manifest.json` in place with the summary stats. Run: python scripts/precompute_baselines.py python scripts/precompute_baselines.py --only-repo mini_calendar python scripts/precompute_baselines.py --only-module csv_normalizer.normalizer python scripts/precompute_baselines.py --skip-existing """ from __future__ import annotations import argparse import json import os import shutil import subprocess import sys import tempfile from pathlib import Path # Make `src/` importable when running directly from repo root. ROOT = Path(__file__).resolve().parents[1] SRC = ROOT / "src" if str(SRC) not in sys.path: sys.path.insert(0, str(SRC)) from mutant_hunter.corpus.baselines import ( # noqa: E402 BASELINE_ROOT, Baseline, LOCAL_LIBS_ROOT, MANIFEST_PATH, list_existing_tests, module_to_relpath, save_baseline, summarize_module, ) from mutant_hunter.mutation.engine import MutationEngine # noqa: E402 def _count_loc(path: Path) -> int: lines = path.read_text(encoding="utf-8").splitlines() return sum(1 for ln in lines if ln.strip()) def _atomic_write_json(path: Path, payload: dict) -> None: tmp = path.with_suffix(path.suffix + ".tmp") tmp.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") os.replace(tmp, path) def _validate_manifest(manifest: dict) -> None: if manifest.get("version") != "v1": raise ValueError("manifest.json: expected version == 'v1'") if not isinstance(manifest.get("repos"), list): raise ValueError("manifest.json: missing 'repos' list") for repo in manifest["repos"]: for k in ("name", "source", "path", "license", "modules"): if k not in repo: raise ValueError(f"manifest.json: repo missing key '{k}'") def _compute_coverage( repo_dir: Path, module_relpath: Path, test_dir: str, timeout_s: float, package_name: str | None = None, ) -> float: """Run coverage in a fresh option-B workspace. Return percent_covered or 0.0.""" from mutant_hunter.safety.sandbox import Sandbox # local import to avoid cycle workspace = Sandbox.make_workspace( repo_dir, package_name=package_name, test_dir=test_dir, ) try: env = os.environ.copy() env["PYTHONPATH"] = str(workspace) + ( os.pathsep + env["PYTHONPATH"] if env.get("PYTHONPATH") else "" ) env["PYTHONNOUSERSITE"] = "1" env["PYTHONDONTWRITEBYTECODE"] = "1" try: subprocess.run( [sys.executable, "-m", "coverage", "run", "--branch", "-m", "pytest", "-q", test_dir], cwd=str(workspace), text=True, capture_output=True, timeout=timeout_s, check=False, env=env, ) rpt = subprocess.run( [sys.executable, "-m", "coverage", "json", "-o", "coverage.json"], cwd=str(workspace), text=True, capture_output=True, timeout=timeout_s, check=False, env=env, ) if rpt.returncode != 0: return 0.0 data = json.loads((workspace / "coverage.json").read_text(encoding="utf-8")) except subprocess.TimeoutExpired: return 0.0 files = data.get("files", {}) if package_name is None: target_name = module_relpath.as_posix() else: target_name = (Path(package_name) / module_relpath).as_posix() for k, v in files.items(): kp = k.replace("\\", "/") if kp.endswith(target_name): return float(v.get("summary", {}).get("percent_covered", 0.0)) return 0.0 finally: shutil.rmtree(workspace.parent, ignore_errors=True) def main() -> int: ap = argparse.ArgumentParser(description="Compute baseline mutation scores + coverage.") ap.add_argument("--manifest", type=str, default=str(MANIFEST_PATH)) ap.add_argument("--timeout-s", type=float, default=8.0, help="Per-mutant pytest timeout") ap.add_argument("--cov-timeout-s", type=float, default=60.0) ap.add_argument("--only-repo", type=str, default=None) ap.add_argument("--only-module", type=str, default=None) ap.add_argument("--skip-existing", action="store_true", help="Skip modules with a baseline file already on disk.") args = ap.parse_args() manifest_path = Path(args.manifest) manifest = json.loads(manifest_path.read_text(encoding="utf-8")) _validate_manifest(manifest) BASELINE_ROOT.mkdir(parents=True, exist_ok=True) engine = MutationEngine() for repo in manifest["repos"]: if repo.get("source") != "local": continue repo_name = repo["name"] if args.only_repo and repo_name != args.only_repo: continue repo_path = LOCAL_LIBS_ROOT / repo_name if not repo_path.exists(): raise FileNotFoundError(f"missing local repo dir: {repo_path}") for mod in repo["modules"]: dotted = mod["module"] if args.only_module and dotted != args.only_module: continue cache_path = BASELINE_ROOT / f"{repo_name}__{dotted}.json" if args.skip_existing and cache_path.exists(): print(f"Skipping existing baseline for {repo_name}:{dotted}", flush=True) continue relpath = module_to_relpath(repo_name, dotted) source_path = repo_path / relpath if not source_path.exists(): raise FileNotFoundError(f"module source not found: {dotted} -> {source_path}") test_dir = mod.get("test_dir", "tests") loc = _count_loc(source_path) print(f"=== {repo_name}:{dotted} (loc={loc}) ===", flush=True) cov = _compute_coverage( repo_path, relpath, test_dir, args.cov_timeout_s, package_name=repo_name ) print(f" coverage_baseline = {cov:.2f}%", flush=True) print(" running mutation baseline ...", flush=True) report = engine.baseline_report( repo_dir=repo_path, module_relpath=relpath, test_dir=test_dir, package_name=repo_name, timeout_s=args.timeout_s, ) print( f" total={report.total_mutants} killed={report.killed} " f"survived={report.survived} score={report.mutation_score:.3f}", flush=True, ) module_summary = summarize_module(source_path.read_text(encoding="utf-8")) existing_tests = list_existing_tests(repo_path, test_dir) baseline = Baseline( repo=repo_name, module=dotted, module_relpath=str(relpath).replace("\\", "/"), total_mutants=report.total_mutants, surviving_mutants=report.survived_mutants, baseline_mutation_score=report.mutation_score, coverage_baseline=cov, module_summary=module_summary, existing_test_names=existing_tests, ) written = save_baseline(baseline) print(f" wrote {written}", flush=True) mod["loc"] = loc mod["total_mutants"] = report.total_mutants mod["surviving_mutants"] = report.survived mod["baseline_mutation_score"] = round(report.mutation_score, 6) mod["coverage_baseline"] = round(cov, 4) _atomic_write_json(manifest_path, manifest) print(f"Updated {manifest_path}") return 0 if __name__ == "__main__": raise SystemExit(main())