Spaces:
Sleeping
Sleeping
File size: 5,568 Bytes
3b6218c dc95314 9f04d42 3b6218c dc95314 3b6218c dc95314 3b6218c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | #!/usr/bin/env python3
"""Call HF Space app API for remote A/B evaluation.
This lets us run experiments remotely (for faster iteration workflow) and
store returned JSON/CSV artifacts locally.
"""
from __future__ import annotations
import argparse
import os
import shutil
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Tuple
from gradio_client import Client, handle_file
DEFAULT_SPACE = "kaier111/camera-motion-ab-eval"
DEFAULT_JUDGE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
@dataclass(frozen=True)
class RunResult:
summary_md: str
json_path: str
csv_path: str
logs: str
def _as_file_output_path(value: object) -> Optional[str]:
if isinstance(value, str):
return value
if isinstance(value, dict):
# gradio may return {"path": "..."} in some versions.
p = value.get("path")
return str(p) if p else None
return None
def call_space(
*,
space_id: str,
mode: str,
hf_token: str,
enable_llm_judge: bool,
judge_model: str,
judge_token: str,
builtin_cases: str,
max_shots: int,
video: str,
shots_jsonl: str,
gt_json: str,
sample_ids: str,
timeout_sec: int,
) -> RunResult:
httpx_timeout = max(120, int(timeout_sec))
client = Client(space_id, httpx_kwargs={"timeout": httpx_timeout})
stub_path = "/tmp/space_eval_stub.txt"
if not os.path.exists(stub_path):
with open(stub_path, "w", encoding="utf-8") as f:
f.write("stub")
# Some Gradio versions mark File params as required at API layer even
# when builtin mode does not consume them; provide stubs as fallback.
video_arg = handle_file(video) if video else handle_file(stub_path)
shots_arg = handle_file(shots_jsonl) if shots_jsonl else handle_file(stub_path)
gt_arg = handle_file(gt_json) if gt_json else handle_file(stub_path)
# Use positional args to avoid name-mapping drift across gradio-client versions.
job = client.submit(
mode,
hf_token,
enable_llm_judge,
judge_model,
judge_token,
builtin_cases,
float(max_shots),
video_arg,
shots_arg,
gt_arg,
sample_ids,
api_name="/run_eval",
)
out = job.result(timeout=timeout_sec)
if not isinstance(out, (list, tuple)) or len(out) != 4:
raise RuntimeError(f"Unexpected space output: {type(out)} -> {out}")
summary_md = str(out[0])
json_path = _as_file_output_path(out[1])
csv_path = _as_file_output_path(out[2])
logs = str(out[3])
if not json_path or not csv_path:
raise RuntimeError(f"Space did not return output files. json={out[1]} csv={out[2]}")
return RunResult(summary_md=summary_md, json_path=json_path, csv_path=csv_path, logs=logs)
def main() -> int:
parser = argparse.ArgumentParser(description="Run remote eval against HF Space app API")
parser.add_argument("--space-id", default=DEFAULT_SPACE)
parser.add_argument("--mode", choices=("builtin", "custom"), default="custom")
parser.add_argument("--hf-token", default=os.environ.get("HF_TOKEN", ""))
parser.add_argument("--enable-llm-judge", action="store_true")
parser.add_argument("--judge-model", default=os.environ.get("JUDGE_MODEL", DEFAULT_JUDGE_MODEL))
parser.add_argument("--judge-token", default=os.environ.get("JUDGE_TOKEN", ""))
parser.add_argument("--builtin-cases", default="baseus,runner,vertical")
parser.add_argument("--max-shots", type=int, default=1)
parser.add_argument("--video", default="")
parser.add_argument("--shots-jsonl", default="")
parser.add_argument("--gt-json", default="")
parser.add_argument("--sample-ids", default="")
parser.add_argument("--timeout-sec", type=int, default=3600)
parser.add_argument("--out-json", default="space_ab_report.json")
parser.add_argument("--out-csv", default="space_ab_report.csv")
parser.add_argument("--out-log", default="space_ab_report.log")
args = parser.parse_args()
if not args.hf_token:
raise RuntimeError("HF token required: --hf-token or HF_TOKEN")
if args.mode == "custom":
if not args.video:
raise RuntimeError("--video is required in custom mode")
if not args.gt_json:
raise RuntimeError("--gt-json is required in custom mode")
res = call_space(
space_id=args.space_id,
mode=args.mode,
hf_token=args.hf_token,
enable_llm_judge=bool(args.enable_llm_judge),
judge_model=args.judge_model,
judge_token=args.judge_token,
builtin_cases=args.builtin_cases,
max_shots=max(0, int(args.max_shots)),
video=args.video,
shots_jsonl=args.shots_jsonl,
gt_json=args.gt_json,
sample_ids=args.sample_ids,
timeout_sec=max(1, int(args.timeout_sec)),
)
out_json = Path(args.out_json).resolve()
out_csv = Path(args.out_csv).resolve()
out_log = Path(args.out_log).resolve()
out_json.parent.mkdir(parents=True, exist_ok=True)
out_csv.parent.mkdir(parents=True, exist_ok=True)
out_log.parent.mkdir(parents=True, exist_ok=True)
shutil.copyfile(res.json_path, str(out_json))
shutil.copyfile(res.csv_path, str(out_csv))
out_log.write_text(res.logs, encoding="utf-8")
print("[SPACE] summary")
print(res.summary_md)
print(f"[SPACE] json -> {out_json}")
print(f"[SPACE] csv -> {out_csv}")
print(f"[SPACE] log -> {out_log}")
return 0
if __name__ == "__main__":
raise SystemExit(main())
|