memorybridge / scripts /run_eval.py
kimandrew927's picture
Initial Space deployment
1004967
"""
Run Evaluation Script.
Runs the offline evaluation suite (RAGAS + latency + ablation).
Usage:
python scripts/run_eval.py --mode S3
python scripts/run_eval.py --mode all --test-set evaluation/test_queries/general_queries.json
python scripts/run_eval.py --mode S3 --multihop
"""
from __future__ import annotations
import argparse
import asyncio
import json
from pathlib import Path
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Run MemoryBridge evaluation suite."
)
parser.add_argument(
"--mode", choices=["S1", "S2", "S3", "all"], default="S3",
help="Ablation condition: S1 (no retrieval), S2 (profile-only), S3 (full), all (compare)."
)
parser.add_argument(
"--test-set",
default="memorybridge/evaluation/test_queries/general_queries.json",
help="Path to JSON test query file."
)
parser.add_argument(
"--multihop", action="store_true",
help="Also run multi-hop KG queries from test_queries/multihop_queries.json."
)
parser.add_argument(
"--config", default="memorybridge/config/settings.yaml",
help="Path to settings.yaml."
)
parser.add_argument(
"--output-dir", default="evaluation/results",
help="Directory to write evaluation results JSON files."
)
return parser.parse_args()
async def main_async(args: argparse.Namespace) -> None:
from memorybridge.evaluation.ablation_runner import AblationRunner
runner = AblationRunner(args.config)
test_queries = json.loads(Path(args.test_set).read_text())
if args.multihop:
multihop_path = "memorybridge/evaluation/test_queries/multihop_queries.json"
test_queries += json.loads(Path(multihop_path).read_text())
print(f"Loaded {len(test_queries)} test queries.")
print(f"Running ablation mode: {args.mode}")
if args.mode == "all":
results = await runner.run_all()
else:
results = {args.mode: await runner.run(args.mode)} # type: ignore[arg-type]
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
for mode, result in results.items():
out_path = output_dir / f"ablation_{mode}.json"
out_path.write_text(json.dumps(result, indent=2))
print(f"Saved {mode} results → {out_path}")
def main() -> None:
args = parse_args()
asyncio.run(main_async(args))
if __name__ == "__main__":
main()