#!/usr/bin/env python3 """Call HF Space app API for remote A/B evaluation. This lets us run experiments remotely (for faster iteration workflow) and store returned JSON/CSV artifacts locally. """ from __future__ import annotations import argparse import os import shutil from dataclasses import dataclass from pathlib import Path from typing import Optional, Tuple from gradio_client import Client, handle_file DEFAULT_SPACE = "kaier111/camera-motion-ab-eval" DEFAULT_JUDGE_MODEL = "Qwen/Qwen2.5-7B-Instruct" @dataclass(frozen=True) class RunResult: summary_md: str json_path: str csv_path: str logs: str def _as_file_output_path(value: object) -> Optional[str]: if isinstance(value, str): return value if isinstance(value, dict): # gradio may return {"path": "..."} in some versions. p = value.get("path") return str(p) if p else None return None def call_space( *, space_id: str, mode: str, hf_token: str, enable_llm_judge: bool, judge_model: str, judge_token: str, builtin_cases: str, max_shots: int, video: str, shots_jsonl: str, gt_json: str, sample_ids: str, timeout_sec: int, ) -> RunResult: httpx_timeout = max(120, int(timeout_sec)) client = Client(space_id, httpx_kwargs={"timeout": httpx_timeout}) stub_path = "/tmp/space_eval_stub.txt" if not os.path.exists(stub_path): with open(stub_path, "w", encoding="utf-8") as f: f.write("stub") # Some Gradio versions mark File params as required at API layer even # when builtin mode does not consume them; provide stubs as fallback. video_arg = handle_file(video) if video else handle_file(stub_path) shots_arg = handle_file(shots_jsonl) if shots_jsonl else handle_file(stub_path) gt_arg = handle_file(gt_json) if gt_json else handle_file(stub_path) # Use positional args to avoid name-mapping drift across gradio-client versions. job = client.submit( mode, hf_token, enable_llm_judge, judge_model, judge_token, builtin_cases, float(max_shots), video_arg, shots_arg, gt_arg, sample_ids, api_name="/run_eval", ) out = job.result(timeout=timeout_sec) if not isinstance(out, (list, tuple)) or len(out) != 4: raise RuntimeError(f"Unexpected space output: {type(out)} -> {out}") summary_md = str(out[0]) json_path = _as_file_output_path(out[1]) csv_path = _as_file_output_path(out[2]) logs = str(out[3]) if not json_path or not csv_path: raise RuntimeError(f"Space did not return output files. json={out[1]} csv={out[2]}") return RunResult(summary_md=summary_md, json_path=json_path, csv_path=csv_path, logs=logs) def main() -> int: parser = argparse.ArgumentParser(description="Run remote eval against HF Space app API") parser.add_argument("--space-id", default=DEFAULT_SPACE) parser.add_argument("--mode", choices=("builtin", "custom"), default="custom") parser.add_argument("--hf-token", default=os.environ.get("HF_TOKEN", "")) parser.add_argument("--enable-llm-judge", action="store_true") parser.add_argument("--judge-model", default=os.environ.get("JUDGE_MODEL", DEFAULT_JUDGE_MODEL)) parser.add_argument("--judge-token", default=os.environ.get("JUDGE_TOKEN", "")) parser.add_argument("--builtin-cases", default="baseus,runner,vertical") parser.add_argument("--max-shots", type=int, default=1) parser.add_argument("--video", default="") parser.add_argument("--shots-jsonl", default="") parser.add_argument("--gt-json", default="") parser.add_argument("--sample-ids", default="") parser.add_argument("--timeout-sec", type=int, default=3600) parser.add_argument("--out-json", default="space_ab_report.json") parser.add_argument("--out-csv", default="space_ab_report.csv") parser.add_argument("--out-log", default="space_ab_report.log") args = parser.parse_args() if not args.hf_token: raise RuntimeError("HF token required: --hf-token or HF_TOKEN") if args.mode == "custom": if not args.video: raise RuntimeError("--video is required in custom mode") if not args.gt_json: raise RuntimeError("--gt-json is required in custom mode") res = call_space( space_id=args.space_id, mode=args.mode, hf_token=args.hf_token, enable_llm_judge=bool(args.enable_llm_judge), judge_model=args.judge_model, judge_token=args.judge_token, builtin_cases=args.builtin_cases, max_shots=max(0, int(args.max_shots)), video=args.video, shots_jsonl=args.shots_jsonl, gt_json=args.gt_json, sample_ids=args.sample_ids, timeout_sec=max(1, int(args.timeout_sec)), ) out_json = Path(args.out_json).resolve() out_csv = Path(args.out_csv).resolve() out_log = Path(args.out_log).resolve() out_json.parent.mkdir(parents=True, exist_ok=True) out_csv.parent.mkdir(parents=True, exist_ok=True) out_log.parent.mkdir(parents=True, exist_ok=True) shutil.copyfile(res.json_path, str(out_json)) shutil.copyfile(res.csv_path, str(out_csv)) out_log.write_text(res.logs, encoding="utf-8") print("[SPACE] summary") print(res.summary_md) print(f"[SPACE] json -> {out_json}") print(f"[SPACE] csv -> {out_csv}") print(f"[SPACE] log -> {out_log}") return 0 if __name__ == "__main__": raise SystemExit(main())