| |
| from __future__ import annotations |
|
|
| import argparse |
| import json |
| import subprocess |
| import time |
| from pathlib import Path |
|
|
|
|
| def _phase_commands(profile: str) -> list[tuple[str, list[str]]]: |
| phases: list[tuple[str, list[str]]] = [ |
| ("sim_stack_baseline", ["./scripts/test_sim.sh"]), |
| ( |
| "voice_loop_edges", |
| [ |
| "uv", |
| "run", |
| "pytest", |
| "-q", |
| "tests/test_runtime_conversation.py", |
| "tests/test_main_audio.py", |
| "-k", |
| "barge_in or tts_barge_in_soak_harness_stability", |
| ], |
| ), |
| ( |
| "voice_repair_confirmation", |
| [ |
| "uv", |
| "run", |
| "pytest", |
| "-q", |
| "tests/test_main_lifecycle.py", |
| "-k", |
| ( |
| "requires_stt_repair or " |
| "requires_confirmation_respects_voice_profile_confirmation_mode or " |
| "followup_carryover" |
| ), |
| ], |
| ), |
| ( |
| "autonomy_checkpoint_edges", |
| [ |
| "uv", |
| "run", |
| "pytest", |
| "-q", |
| "tests/test_tools_services.py", |
| "-k", |
| ( |
| "planner_engine_autonomy_cycle_requires_checkpoint_then_executes or " |
| "home_orchestrator_automation_pipeline_local_apply_and_rollback" |
| ), |
| ], |
| ), |
| ] |
| if profile == "full": |
| phases.extend( |
| [ |
| ( |
| "operator_contract_edges", |
| [ |
| "uv", |
| "run", |
| "pytest", |
| "-q", |
| "tests/test_tools_services.py", |
| "-k", |
| ( |
| "system_status_contract_reports_expected_fields or " |
| "system_status_reports_snapshot or " |
| "identity_guest_session_capability_enforced" |
| ), |
| ], |
| ), |
| ( |
| "recovery_replay_edges", |
| [ |
| "uv", |
| "run", |
| "pytest", |
| "-q", |
| "tests/test_tools_services.py", |
| "-k", |
| ( |
| "dead_letter_queue_captures_webhook_failure_and_replays or " |
| "bind_reconciles_interrupted_recovery_entries" |
| ), |
| ], |
| ), |
| ] |
| ) |
| return phases |
|
|
|
|
| def _run_phase(name: str, command: list[str]) -> dict[str, object]: |
| started_at = time.time() |
| started_mono = time.monotonic() |
| proc = subprocess.run(command, capture_output=True, text=True) |
| finished_at = time.time() |
| return { |
| "phase": name, |
| "command": command, |
| "started_at": started_at, |
| "finished_at": finished_at, |
| "duration_sec": time.monotonic() - started_mono, |
| "exit_code": proc.returncode, |
| "status": "passed" if proc.returncode == 0 else "failed", |
| "stdout_tail": proc.stdout[-4000:], |
| "stderr_tail": proc.stderr[-2000:], |
| } |
|
|
|
|
| def _artifact_checks( |
| results: list[dict[str, object]], |
| *, |
| expected_phase_count_per_cycle: int, |
| repeat: int, |
| ) -> dict[str, object]: |
| names = [str(row.get("phase", "")) for row in results if str(row.get("phase", "")).strip()] |
| valid_status = all(str(row.get("status", "")) in {"passed", "failed"} for row in results) |
| has_timestamps = all( |
| isinstance(row.get("started_at"), float) and isinstance(row.get("finished_at"), float) |
| for row in results |
| ) |
| cycle_phase_counts: dict[int, int] = {} |
| for row in results: |
| try: |
| cycle = int(row.get("cycle", 1)) |
| except (TypeError, ValueError): |
| cycle = 1 |
| cycle_phase_counts[cycle] = cycle_phase_counts.get(cycle, 0) + 1 |
| return { |
| "phase_names": names, |
| "all_status_valid": valid_status, |
| "all_timestamps_present": has_timestamps, |
| "expected_phase_count_per_cycle": expected_phase_count_per_cycle, |
| "expected_total_phase_count": expected_phase_count_per_cycle * max(1, repeat), |
| "cycle_phase_counts": {str(cycle): count for cycle, count in sorted(cycle_phase_counts.items())}, |
| } |
|
|
|
|
| def main() -> int: |
| parser = argparse.ArgumentParser(description="Run simulation-first acceptance profile checks.") |
| parser.add_argument("--profile", choices=("fast", "full"), default="fast") |
| parser.add_argument("--repeat", type=int, default=1, help="Run full phase set this many cycles.") |
| parser.add_argument( |
| "--output", |
| default=".artifacts/quality/sim-acceptance-fast-repeat1.json", |
| help="JSON artifact path", |
| ) |
| args = parser.parse_args() |
| if args.repeat <= 0: |
| raise SystemExit("--repeat must be >= 1.") |
|
|
| phase_plan = _phase_commands(args.profile) |
| results: list[dict[str, object]] = [] |
| failed = False |
| for cycle in range(1, args.repeat + 1): |
| for phase_index, (name, command) in enumerate(phase_plan, start=1): |
| result = _run_phase(name, command) |
| result["cycle"] = cycle |
| result["cycle_phase_index"] = phase_index |
| results.append(result) |
| print( |
| f"[sim-acceptance] cycle {cycle}/{args.repeat} {name}: " |
| f"{result['status']} ({result['duration_sec']:.2f}s)" |
| ) |
| if int(result["exit_code"]) != 0: |
| failed = True |
| break |
| if failed: |
| break |
|
|
| phase_count_per_cycle = len(phase_plan) |
| cycle_phase_counts: dict[int, int] = {} |
| cycle_failed: dict[int, bool] = {} |
| for row in results: |
| cycle = int(row.get("cycle", 1) or 1) |
| cycle_phase_counts[cycle] = cycle_phase_counts.get(cycle, 0) + 1 |
| if int(row.get("exit_code", 1)) != 0: |
| cycle_failed[cycle] = True |
| cycles_completed = sum( |
| 1 |
| for cycle in range(1, args.repeat + 1) |
| if cycle_phase_counts.get(cycle, 0) == phase_count_per_cycle |
| and not cycle_failed.get(cycle, False) |
| ) |
| accepted = cycles_completed == args.repeat and all(int(row.get("exit_code", 1)) == 0 for row in results) |
|
|
| summary = { |
| "profile": args.profile, |
| "repeat": args.repeat, |
| "cycles_completed": cycles_completed, |
| "phase_count": len(results), |
| "passed_count": sum(1 for row in results if row.get("status") == "passed"), |
| "failed_count": sum(1 for row in results if row.get("status") != "passed"), |
| "accepted": accepted, |
| "expected_phase_count": phase_count_per_cycle, |
| "artifact_checks": _artifact_checks( |
| results, |
| expected_phase_count_per_cycle=phase_count_per_cycle, |
| repeat=args.repeat, |
| ), |
| "results": results, |
| "generated_at": time.time(), |
| } |
|
|
| out_path = Path(args.output) |
| out_path.parent.mkdir(parents=True, exist_ok=True) |
| out_path.write_text(json.dumps(summary, indent=2), encoding="utf-8") |
| print(json.dumps(summary, indent=2)) |
| return 0 if bool(summary["accepted"]) else 1 |
|
|
|
|
| if __name__ == "__main__": |
| raise SystemExit(main()) |
|
|