Spaces:
Build error
Build error
| """ | |
| Run Evaluation Script. | |
| Runs the offline evaluation suite (RAGAS + latency + ablation). | |
| Usage: | |
| python scripts/run_eval.py --mode S3 | |
| python scripts/run_eval.py --mode all --test-set evaluation/test_queries/general_queries.json | |
| python scripts/run_eval.py --mode S3 --multihop | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import asyncio | |
| import json | |
| from pathlib import Path | |
| def parse_args() -> argparse.Namespace: | |
| parser = argparse.ArgumentParser( | |
| description="Run MemoryBridge evaluation suite." | |
| ) | |
| parser.add_argument( | |
| "--mode", choices=["S1", "S2", "S3", "all"], default="S3", | |
| help="Ablation condition: S1 (no retrieval), S2 (profile-only), S3 (full), all (compare)." | |
| ) | |
| parser.add_argument( | |
| "--test-set", | |
| default="memorybridge/evaluation/test_queries/general_queries.json", | |
| help="Path to JSON test query file." | |
| ) | |
| parser.add_argument( | |
| "--multihop", action="store_true", | |
| help="Also run multi-hop KG queries from test_queries/multihop_queries.json." | |
| ) | |
| parser.add_argument( | |
| "--config", default="memorybridge/config/settings.yaml", | |
| help="Path to settings.yaml." | |
| ) | |
| parser.add_argument( | |
| "--output-dir", default="evaluation/results", | |
| help="Directory to write evaluation results JSON files." | |
| ) | |
| return parser.parse_args() | |
| async def main_async(args: argparse.Namespace) -> None: | |
| from memorybridge.evaluation.ablation_runner import AblationRunner | |
| runner = AblationRunner(args.config) | |
| test_queries = json.loads(Path(args.test_set).read_text()) | |
| if args.multihop: | |
| multihop_path = "memorybridge/evaluation/test_queries/multihop_queries.json" | |
| test_queries += json.loads(Path(multihop_path).read_text()) | |
| print(f"Loaded {len(test_queries)} test queries.") | |
| print(f"Running ablation mode: {args.mode}") | |
| if args.mode == "all": | |
| results = await runner.run_all() | |
| else: | |
| results = {args.mode: await runner.run(args.mode)} # type: ignore[arg-type] | |
| output_dir = Path(args.output_dir) | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| for mode, result in results.items(): | |
| out_path = output_dir / f"ablation_{mode}.json" | |
| out_path.write_text(json.dumps(result, indent=2)) | |
| print(f"Saved {mode} results → {out_path}") | |
| def main() -> None: | |
| args = parse_args() | |
| asyncio.run(main_async(args)) | |
| if __name__ == "__main__": | |
| main() | |