stateshiftbench-code-review / scripts /summarize_results.py
xsx001's picture
Add StateShiftBench review code
89e75e1 verified
#!/usr/bin/env python3
"""Summarize StateShiftBench episode JSONL files."""
import argparse
import json
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from stateshiftbench.metrics import summarize_episodes
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--episodes", required=True)
args = parser.parse_args()
episodes = [json.loads(line) for line in Path(args.episodes).read_text().splitlines() if line]
print(json.dumps(summarize_episodes(episodes), indent=2, sort_keys=True))
if __name__ == "__main__":
main()