File size: 5,568 Bytes
3b6218c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc95314
 
9f04d42
 
3b6218c
dc95314
 
 
 
 
3b6218c
dc95314
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b6218c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#!/usr/bin/env python3
"""Call HF Space app API for remote A/B evaluation.

This lets us run experiments remotely (for faster iteration workflow) and
store returned JSON/CSV artifacts locally.
"""

from __future__ import annotations

import argparse
import os
import shutil
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Tuple

from gradio_client import Client, handle_file


DEFAULT_SPACE = "kaier111/camera-motion-ab-eval"
DEFAULT_JUDGE_MODEL = "Qwen/Qwen2.5-7B-Instruct"


@dataclass(frozen=True)
class RunResult:
    summary_md: str
    json_path: str
    csv_path: str
    logs: str


def _as_file_output_path(value: object) -> Optional[str]:
    if isinstance(value, str):
        return value
    if isinstance(value, dict):
        # gradio may return {"path": "..."} in some versions.
        p = value.get("path")
        return str(p) if p else None
    return None


def call_space(
    *,
    space_id: str,
    mode: str,
    hf_token: str,
    enable_llm_judge: bool,
    judge_model: str,
    judge_token: str,
    builtin_cases: str,
    max_shots: int,
    video: str,
    shots_jsonl: str,
    gt_json: str,
    sample_ids: str,
    timeout_sec: int,
) -> RunResult:
    httpx_timeout = max(120, int(timeout_sec))
    client = Client(space_id, httpx_kwargs={"timeout": httpx_timeout})
    stub_path = "/tmp/space_eval_stub.txt"
    if not os.path.exists(stub_path):
        with open(stub_path, "w", encoding="utf-8") as f:
            f.write("stub")

    # Some Gradio versions mark File params as required at API layer even
    # when builtin mode does not consume them; provide stubs as fallback.
    video_arg = handle_file(video) if video else handle_file(stub_path)
    shots_arg = handle_file(shots_jsonl) if shots_jsonl else handle_file(stub_path)
    gt_arg = handle_file(gt_json) if gt_json else handle_file(stub_path)

    # Use positional args to avoid name-mapping drift across gradio-client versions.
    job = client.submit(
        mode,
        hf_token,
        enable_llm_judge,
        judge_model,
        judge_token,
        builtin_cases,
        float(max_shots),
        video_arg,
        shots_arg,
        gt_arg,
        sample_ids,
        api_name="/run_eval",
    )
    out = job.result(timeout=timeout_sec)
    if not isinstance(out, (list, tuple)) or len(out) != 4:
        raise RuntimeError(f"Unexpected space output: {type(out)} -> {out}")
    summary_md = str(out[0])
    json_path = _as_file_output_path(out[1])
    csv_path = _as_file_output_path(out[2])
    logs = str(out[3])
    if not json_path or not csv_path:
        raise RuntimeError(f"Space did not return output files. json={out[1]} csv={out[2]}")
    return RunResult(summary_md=summary_md, json_path=json_path, csv_path=csv_path, logs=logs)


def main() -> int:
    parser = argparse.ArgumentParser(description="Run remote eval against HF Space app API")
    parser.add_argument("--space-id", default=DEFAULT_SPACE)
    parser.add_argument("--mode", choices=("builtin", "custom"), default="custom")
    parser.add_argument("--hf-token", default=os.environ.get("HF_TOKEN", ""))
    parser.add_argument("--enable-llm-judge", action="store_true")
    parser.add_argument("--judge-model", default=os.environ.get("JUDGE_MODEL", DEFAULT_JUDGE_MODEL))
    parser.add_argument("--judge-token", default=os.environ.get("JUDGE_TOKEN", ""))
    parser.add_argument("--builtin-cases", default="baseus,runner,vertical")
    parser.add_argument("--max-shots", type=int, default=1)
    parser.add_argument("--video", default="")
    parser.add_argument("--shots-jsonl", default="")
    parser.add_argument("--gt-json", default="")
    parser.add_argument("--sample-ids", default="")
    parser.add_argument("--timeout-sec", type=int, default=3600)
    parser.add_argument("--out-json", default="space_ab_report.json")
    parser.add_argument("--out-csv", default="space_ab_report.csv")
    parser.add_argument("--out-log", default="space_ab_report.log")
    args = parser.parse_args()

    if not args.hf_token:
        raise RuntimeError("HF token required: --hf-token or HF_TOKEN")
    if args.mode == "custom":
        if not args.video:
            raise RuntimeError("--video is required in custom mode")
        if not args.gt_json:
            raise RuntimeError("--gt-json is required in custom mode")

    res = call_space(
        space_id=args.space_id,
        mode=args.mode,
        hf_token=args.hf_token,
        enable_llm_judge=bool(args.enable_llm_judge),
        judge_model=args.judge_model,
        judge_token=args.judge_token,
        builtin_cases=args.builtin_cases,
        max_shots=max(0, int(args.max_shots)),
        video=args.video,
        shots_jsonl=args.shots_jsonl,
        gt_json=args.gt_json,
        sample_ids=args.sample_ids,
        timeout_sec=max(1, int(args.timeout_sec)),
    )

    out_json = Path(args.out_json).resolve()
    out_csv = Path(args.out_csv).resolve()
    out_log = Path(args.out_log).resolve()
    out_json.parent.mkdir(parents=True, exist_ok=True)
    out_csv.parent.mkdir(parents=True, exist_ok=True)
    out_log.parent.mkdir(parents=True, exist_ok=True)
    shutil.copyfile(res.json_path, str(out_json))
    shutil.copyfile(res.csv_path, str(out_csv))
    out_log.write_text(res.logs, encoding="utf-8")

    print("[SPACE] summary")
    print(res.summary_md)
    print(f"[SPACE] json -> {out_json}")
    print(f"[SPACE] csv  -> {out_csv}")
    print(f"[SPACE] log  -> {out_log}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())