Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """Call HF Space app API for remote A/B evaluation. | |
| This lets us run experiments remotely (for faster iteration workflow) and | |
| store returned JSON/CSV artifacts locally. | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import os | |
| import shutil | |
| from dataclasses import dataclass | |
| from pathlib import Path | |
| from typing import Optional, Tuple | |
| from gradio_client import Client, handle_file | |
| DEFAULT_SPACE = "kaier111/camera-motion-ab-eval" | |
| DEFAULT_JUDGE_MODEL = "Qwen/Qwen2.5-7B-Instruct" | |
| class RunResult: | |
| summary_md: str | |
| json_path: str | |
| csv_path: str | |
| logs: str | |
| def _as_file_output_path(value: object) -> Optional[str]: | |
| if isinstance(value, str): | |
| return value | |
| if isinstance(value, dict): | |
| # gradio may return {"path": "..."} in some versions. | |
| p = value.get("path") | |
| return str(p) if p else None | |
| return None | |
| def call_space( | |
| *, | |
| space_id: str, | |
| mode: str, | |
| hf_token: str, | |
| enable_llm_judge: bool, | |
| judge_model: str, | |
| judge_token: str, | |
| builtin_cases: str, | |
| max_shots: int, | |
| video: str, | |
| shots_jsonl: str, | |
| gt_json: str, | |
| sample_ids: str, | |
| timeout_sec: int, | |
| ) -> RunResult: | |
| httpx_timeout = max(120, int(timeout_sec)) | |
| client = Client(space_id, httpx_kwargs={"timeout": httpx_timeout}) | |
| stub_path = "/tmp/space_eval_stub.txt" | |
| if not os.path.exists(stub_path): | |
| with open(stub_path, "w", encoding="utf-8") as f: | |
| f.write("stub") | |
| # Some Gradio versions mark File params as required at API layer even | |
| # when builtin mode does not consume them; provide stubs as fallback. | |
| video_arg = handle_file(video) if video else handle_file(stub_path) | |
| shots_arg = handle_file(shots_jsonl) if shots_jsonl else handle_file(stub_path) | |
| gt_arg = handle_file(gt_json) if gt_json else handle_file(stub_path) | |
| # Use positional args to avoid name-mapping drift across gradio-client versions. | |
| job = client.submit( | |
| mode, | |
| hf_token, | |
| enable_llm_judge, | |
| judge_model, | |
| judge_token, | |
| builtin_cases, | |
| float(max_shots), | |
| video_arg, | |
| shots_arg, | |
| gt_arg, | |
| sample_ids, | |
| api_name="/run_eval", | |
| ) | |
| out = job.result(timeout=timeout_sec) | |
| if not isinstance(out, (list, tuple)) or len(out) != 4: | |
| raise RuntimeError(f"Unexpected space output: {type(out)} -> {out}") | |
| summary_md = str(out[0]) | |
| json_path = _as_file_output_path(out[1]) | |
| csv_path = _as_file_output_path(out[2]) | |
| logs = str(out[3]) | |
| if not json_path or not csv_path: | |
| raise RuntimeError(f"Space did not return output files. json={out[1]} csv={out[2]}") | |
| return RunResult(summary_md=summary_md, json_path=json_path, csv_path=csv_path, logs=logs) | |
| def main() -> int: | |
| parser = argparse.ArgumentParser(description="Run remote eval against HF Space app API") | |
| parser.add_argument("--space-id", default=DEFAULT_SPACE) | |
| parser.add_argument("--mode", choices=("builtin", "custom"), default="custom") | |
| parser.add_argument("--hf-token", default=os.environ.get("HF_TOKEN", "")) | |
| parser.add_argument("--enable-llm-judge", action="store_true") | |
| parser.add_argument("--judge-model", default=os.environ.get("JUDGE_MODEL", DEFAULT_JUDGE_MODEL)) | |
| parser.add_argument("--judge-token", default=os.environ.get("JUDGE_TOKEN", "")) | |
| parser.add_argument("--builtin-cases", default="baseus,runner,vertical") | |
| parser.add_argument("--max-shots", type=int, default=1) | |
| parser.add_argument("--video", default="") | |
| parser.add_argument("--shots-jsonl", default="") | |
| parser.add_argument("--gt-json", default="") | |
| parser.add_argument("--sample-ids", default="") | |
| parser.add_argument("--timeout-sec", type=int, default=3600) | |
| parser.add_argument("--out-json", default="space_ab_report.json") | |
| parser.add_argument("--out-csv", default="space_ab_report.csv") | |
| parser.add_argument("--out-log", default="space_ab_report.log") | |
| args = parser.parse_args() | |
| if not args.hf_token: | |
| raise RuntimeError("HF token required: --hf-token or HF_TOKEN") | |
| if args.mode == "custom": | |
| if not args.video: | |
| raise RuntimeError("--video is required in custom mode") | |
| if not args.gt_json: | |
| raise RuntimeError("--gt-json is required in custom mode") | |
| res = call_space( | |
| space_id=args.space_id, | |
| mode=args.mode, | |
| hf_token=args.hf_token, | |
| enable_llm_judge=bool(args.enable_llm_judge), | |
| judge_model=args.judge_model, | |
| judge_token=args.judge_token, | |
| builtin_cases=args.builtin_cases, | |
| max_shots=max(0, int(args.max_shots)), | |
| video=args.video, | |
| shots_jsonl=args.shots_jsonl, | |
| gt_json=args.gt_json, | |
| sample_ids=args.sample_ids, | |
| timeout_sec=max(1, int(args.timeout_sec)), | |
| ) | |
| out_json = Path(args.out_json).resolve() | |
| out_csv = Path(args.out_csv).resolve() | |
| out_log = Path(args.out_log).resolve() | |
| out_json.parent.mkdir(parents=True, exist_ok=True) | |
| out_csv.parent.mkdir(parents=True, exist_ok=True) | |
| out_log.parent.mkdir(parents=True, exist_ok=True) | |
| shutil.copyfile(res.json_path, str(out_json)) | |
| shutil.copyfile(res.csv_path, str(out_csv)) | |
| out_log.write_text(res.logs, encoding="utf-8") | |
| print("[SPACE] summary") | |
| print(res.summary_md) | |
| print(f"[SPACE] json -> {out_json}") | |
| print(f"[SPACE] csv -> {out_csv}") | |
| print(f"[SPACE] log -> {out_log}") | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |