File size: 1,160 Bytes
49d5e37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""Run Navis Web benchmark modes and emit evaluation artifacts."""

from __future__ import annotations

import argparse
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

from inference import run_benchmark_comparison
from navis_web_env.reporting import write_evaluation_artifacts


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Evaluate Navis Web benchmark modes.")
    parser.add_argument(
        "--modes",
        nargs="+",
        default=["heuristic", "oracle"],
        help="Agent modes to benchmark.",
    )
    parser.add_argument(
        "--output-dir",
        default="outputs/evals",
        help="Directory where report files should be written.",
    )
    return parser.parse_args()


def main() -> None:
    args = parse_args()
    comparison = run_benchmark_comparison(args.modes)
    output_paths = write_evaluation_artifacts(comparison, Path(args.output_dir))
    for label, path in output_paths.items():
        print(f"[REPORT] {label}={path}", flush=True)


if __name__ == "__main__":
    main()