Spaces:

remdms
/

mediastorm

Sleeping

remdms Claude Sonnet 4.6 commited on Mar 31

Commit

a36a4f5

1 Parent(s): 51fa501

feat(eval): add cli.py eval command with diff and history

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show

cli.py CHANGED Viewed

@@ -192,5 +192,46 @@ def audit(subset: int, verbose: bool):
     asyncio.run(run_audit(subset=subset, verbose=verbose))
 if __name__ == "__main__":
     cli()

     asyncio.run(run_audit(subset=subset, verbose=verbose))
+@cli.command(name="eval")
+@click.option("--verbose", "-v", is_flag=True, help="Show per-query details.")
+@click.option("--history", is_flag=True, help="Show history of past runs.")
+def eval_cmd(verbose: bool, history: bool):
+    """Run retrieval evaluation and compare to previous run."""
+    from mediastorm.eval.runner import (
+        _build_run_data, save_run, load_previous_run, load_all_runs,
+    )
+    from mediastorm.eval.display import (
+        print_scores, print_verbose, print_diff, print_history,
+    )
+    if history:
+        runs = load_all_runs()
+        print_history(runs)
+        return
+    # Load previous run BEFORE running eval (so the new run isn't compared to itself)
+    previous = load_previous_run()
+    # Run evaluation
+    from eval_retrieval import run_eval
+    click.echo("Running retrieval evaluation...")
+    eval_result = asyncio.run(run_eval(verbose=False, quiet=True))
+    # Build and save run data
+    run_data = _build_run_data(eval_result)
+    path = save_run(run_data)
+    click.echo(f"Results saved to {path}")
+    # Display
+    if verbose:
+        print_verbose(run_data)
+    print_scores(run_data)
+    if previous:
+        print_diff(run_data, previous)
+    else:
+        click.echo("\nFirst run — no comparison available.")
 if __name__ == "__main__":
     cli()