File size: 2,489 Bytes
1004967
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
"""
Run Evaluation Script.

Runs the offline evaluation suite (RAGAS + latency + ablation).

Usage:
    python scripts/run_eval.py --mode S3
    python scripts/run_eval.py --mode all --test-set evaluation/test_queries/general_queries.json
    python scripts/run_eval.py --mode S3 --multihop
"""

from __future__ import annotations

import argparse
import asyncio
import json
from pathlib import Path


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Run MemoryBridge evaluation suite."
    )
    parser.add_argument(
        "--mode", choices=["S1", "S2", "S3", "all"], default="S3",
        help="Ablation condition: S1 (no retrieval), S2 (profile-only), S3 (full), all (compare)."
    )
    parser.add_argument(
        "--test-set",
        default="memorybridge/evaluation/test_queries/general_queries.json",
        help="Path to JSON test query file."
    )
    parser.add_argument(
        "--multihop", action="store_true",
        help="Also run multi-hop KG queries from test_queries/multihop_queries.json."
    )
    parser.add_argument(
        "--config", default="memorybridge/config/settings.yaml",
        help="Path to settings.yaml."
    )
    parser.add_argument(
        "--output-dir", default="evaluation/results",
        help="Directory to write evaluation results JSON files."
    )
    return parser.parse_args()


async def main_async(args: argparse.Namespace) -> None:
    from memorybridge.evaluation.ablation_runner import AblationRunner

    runner = AblationRunner(args.config)

    test_queries = json.loads(Path(args.test_set).read_text())
    if args.multihop:
        multihop_path = "memorybridge/evaluation/test_queries/multihop_queries.json"
        test_queries += json.loads(Path(multihop_path).read_text())

    print(f"Loaded {len(test_queries)} test queries.")
    print(f"Running ablation mode: {args.mode}")

    if args.mode == "all":
        results = await runner.run_all()
    else:
        results = {args.mode: await runner.run(args.mode)}  # type: ignore[arg-type]

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    for mode, result in results.items():
        out_path = output_dir / f"ablation_{mode}.json"
        out_path.write_text(json.dumps(result, indent=2))
        print(f"Saved {mode} results → {out_path}")


def main() -> None:
    args = parse_args()
    asyncio.run(main_async(args))


if __name__ == "__main__":
    main()