""" Run Evaluation Script. Runs the offline evaluation suite (RAGAS + latency + ablation). Usage: python scripts/run_eval.py --mode S3 python scripts/run_eval.py --mode all --test-set evaluation/test_queries/general_queries.json python scripts/run_eval.py --mode S3 --multihop """ from __future__ import annotations import argparse import asyncio import json from pathlib import Path def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Run MemoryBridge evaluation suite." ) parser.add_argument( "--mode", choices=["S1", "S2", "S3", "all"], default="S3", help="Ablation condition: S1 (no retrieval), S2 (profile-only), S3 (full), all (compare)." ) parser.add_argument( "--test-set", default="memorybridge/evaluation/test_queries/general_queries.json", help="Path to JSON test query file." ) parser.add_argument( "--multihop", action="store_true", help="Also run multi-hop KG queries from test_queries/multihop_queries.json." ) parser.add_argument( "--config", default="memorybridge/config/settings.yaml", help="Path to settings.yaml." ) parser.add_argument( "--output-dir", default="evaluation/results", help="Directory to write evaluation results JSON files." ) return parser.parse_args() async def main_async(args: argparse.Namespace) -> None: from memorybridge.evaluation.ablation_runner import AblationRunner runner = AblationRunner(args.config) test_queries = json.loads(Path(args.test_set).read_text()) if args.multihop: multihop_path = "memorybridge/evaluation/test_queries/multihop_queries.json" test_queries += json.loads(Path(multihop_path).read_text()) print(f"Loaded {len(test_queries)} test queries.") print(f"Running ablation mode: {args.mode}") if args.mode == "all": results = await runner.run_all() else: results = {args.mode: await runner.run(args.mode)} # type: ignore[arg-type] output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) for mode, result in results.items(): out_path = output_dir / f"ablation_{mode}.json" out_path.write_text(json.dumps(result, indent=2)) print(f"Saved {mode} results → {out_path}") def main() -> None: args = parse_args() asyncio.run(main_async(args)) if __name__ == "__main__": main()