Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """Minimal Hugging Face Spaces app for VideoMAE A/B + optional LLM judge.""" | |
| from __future__ import annotations | |
| import json | |
| import os | |
| import subprocess | |
| import sys | |
| import tempfile | |
| from typing import List, Optional | |
| import gradio as gr | |
| DEFAULT_JUDGE_MODEL = os.environ.get("JUDGE_MODEL", "Qwen/Qwen2.5-7B-Instruct") | |
| VARIANT_KEYS: List[str] = [ | |
| "A_baseline_locked", | |
| "B_replay_priority", | |
| "C_replay_plus_cv_shake_static", | |
| "D_replay_plus_compound_rescue", | |
| ] | |
| def _build_summary_md(payload: dict) -> str: | |
| summary = payload.get("summary", {}) | |
| lines = [ | |
| "### Benchmark Summary", | |
| "", | |
| "| Variant | Strict Hit Rate | Strict Hits | Scored Shots | LLM Mean | LLM Pass Rate | LLM Judged |", | |
| "|---|---:|---:|---:|---:|---:|---:|", | |
| ] | |
| for name in VARIANT_KEYS: | |
| v = summary.get(name, {}) | |
| lines.append( | |
| ( | |
| f"| `{name}` | {v.get('strict_hit_rate', 0)} | {v.get('strict_hits', 0)}" | |
| f" | {v.get('scored_shots', 0)} | {v.get('llm_overall_mean', 0)}" | |
| f" | {v.get('llm_pass_rate', 0)} | {v.get('llm_judged', 0)} |" | |
| ) | |
| ) | |
| lines.append("") | |
| lines.append("```json") | |
| lines.append(json.dumps(summary, ensure_ascii=False, indent=2)) | |
| lines.append("```") | |
| return "\n".join(lines) | |
| def run_eval( | |
| mode: str, | |
| hf_token: str, | |
| enable_llm_judge: bool, | |
| judge_model: str, | |
| judge_token: str, | |
| builtin_cases: str, | |
| max_shots: int, | |
| video_path: Optional[str], | |
| shots_jsonl_path: Optional[str], | |
| gt_json_path: Optional[str], | |
| sample_ids: str, | |
| ) -> tuple[str, Optional[str], Optional[str], str]: | |
| # Gradio API can pass `/app` placeholder for empty File inputs. | |
| if video_path and os.path.isdir(video_path): | |
| video_path = None | |
| if shots_jsonl_path and os.path.isdir(shots_jsonl_path): | |
| shots_jsonl_path = None | |
| if gt_json_path and os.path.isdir(gt_json_path): | |
| gt_json_path = None | |
| token = (hf_token or "").strip() or os.environ.get("HF_TOKEN", "").strip() | |
| if not token: | |
| return ("HF token 为空。请在输入框填 `hf_...` 或在 Space Secret 设置 `HF_TOKEN`。", None, None, "") | |
| tmpdir = tempfile.mkdtemp(prefix="videomae_ab_") | |
| output_json = os.path.join(tmpdir, "ab_report.json") | |
| output_csv = os.path.join(tmpdir, "ab_report.csv") | |
| cmd = [ | |
| sys.executable, | |
| "run_videomae_ab_test.py", | |
| "--hf-token", | |
| token, | |
| "--output-json", | |
| output_json, | |
| "--output-csv", | |
| output_csv, | |
| "--max-shots", | |
| str(max(0, int(max_shots))), | |
| ] | |
| if mode == "builtin": | |
| if (builtin_cases or "").strip(): | |
| cmd.extend(["--cases", builtin_cases.strip()]) | |
| else: | |
| if not video_path: | |
| return ("自定义模式缺少视频文件。", None, None, "") | |
| if not gt_json_path: | |
| return ("自定义模式缺少 GT 文件(json)。", None, None, "") | |
| cmd.extend(["--video", video_path, "--gt-json", gt_json_path, "--case-name", "space_custom"]) | |
| if shots_jsonl_path: | |
| cmd.extend(["--shots-jsonl", shots_jsonl_path]) | |
| if (sample_ids or "").strip(): | |
| cmd.extend(["--sample-ids", sample_ids.strip()]) | |
| if enable_llm_judge: | |
| cmd.append("--llm-judge") | |
| cmd.extend(["--judge-model", (judge_model or DEFAULT_JUDGE_MODEL).strip()]) | |
| jt = (judge_token or "").strip() or os.environ.get("JUDGE_TOKEN", "").strip() | |
| if jt: | |
| cmd.extend(["--judge-token", jt]) | |
| proc = subprocess.run(cmd, capture_output=True, text=True) | |
| logs = ((proc.stdout or "") + "\n" + (proc.stderr or "")).strip() | |
| if proc.returncode != 0: | |
| msg = "运行失败,请检查日志。" | |
| return (msg, None, None, logs[-12000:]) | |
| with open(output_json, "r", encoding="utf-8") as f: | |
| payload = json.load(f) | |
| return (_build_summary_md(payload), output_json, output_csv, logs[-12000:]) | |
| def run_eval_api( | |
| mode: str, | |
| hf_token: str, | |
| enable_llm_judge: bool, | |
| judge_model: str, | |
| judge_token: str, | |
| builtin_cases: str, | |
| max_shots: int, | |
| video_remote_path: str, | |
| shots_jsonl_remote_path: str, | |
| gt_json_remote_path: str, | |
| sample_ids: str, | |
| ) -> tuple[str, Optional[str], Optional[str], str]: | |
| return run_eval( | |
| mode=mode, | |
| hf_token=hf_token, | |
| enable_llm_judge=enable_llm_judge, | |
| judge_model=judge_model, | |
| judge_token=judge_token, | |
| builtin_cases=builtin_cases, | |
| max_shots=max_shots, | |
| video_path=(video_remote_path or "").strip() or None, | |
| shots_jsonl_path=(shots_jsonl_remote_path or "").strip() or None, | |
| gt_json_path=(gt_json_remote_path or "").strip() or None, | |
| sample_ids=sample_ids, | |
| ) | |
| with gr.Blocks(title="VideoMAE Camera Motion A/B") as demo: | |
| gr.Markdown( | |
| "# VideoMAE 运镜 A/B + LLM 评委\n" | |
| "默认是内置样本快速跑;切到自定义模式可上传你的视频/镜头边界/GT。" | |
| ) | |
| with gr.Row(): | |
| mode = gr.Radio( | |
| choices=[("内置样本", "builtin"), ("自定义上传", "custom")], | |
| value="builtin", | |
| label="运行模式", | |
| ) | |
| builtin_cases = gr.Textbox( | |
| value="baseus,runner,vertical", | |
| label="内置 case 过滤", | |
| info="留空=全部内置样本;逗号分隔", | |
| ) | |
| max_shots = gr.Slider(0, 20, value=3, step=1, label="每个 case 最大镜头数") | |
| with gr.Row(): | |
| hf_token = gr.Textbox(label="HF Token", type="password", placeholder="hf_xxx") | |
| enable_llm_judge = gr.Checkbox(value=True, label="启用 LLM 评委") | |
| judge_model = gr.Textbox(value=DEFAULT_JUDGE_MODEL, label="Judge Model") | |
| judge_token = gr.Textbox(label="Judge Token(可选)", type="password") | |
| with gr.Row(): | |
| video_path = gr.File(label="视频文件", type="filepath") | |
| shots_jsonl_path = gr.File(label="镜头边界 JSONL(可选)", type="filepath") | |
| gt_json_path = gr.File(label="GT JSON", type="filepath") | |
| # API-only string paths to bypass File preprocessor in queued remote calls. | |
| video_remote_path = gr.Textbox(visible=False) | |
| shots_jsonl_remote_path = gr.Textbox(visible=False) | |
| gt_json_remote_path = gr.Textbox(visible=False) | |
| api_run_btn = gr.Button("API_RUN", visible=False) | |
| sample_ids = gr.Textbox(label="sample ids(可选)", placeholder="如: 1,2,3") | |
| run_btn = gr.Button("开始评测", variant="primary") | |
| summary_md = gr.Markdown() | |
| out_json = gr.File(label="输出 JSON") | |
| out_csv = gr.File(label="输出 CSV") | |
| logs = gr.Textbox(label="运行日志", lines=16) | |
| run_btn.click( | |
| fn=run_eval, | |
| inputs=[ | |
| mode, | |
| hf_token, | |
| enable_llm_judge, | |
| judge_model, | |
| judge_token, | |
| builtin_cases, | |
| max_shots, | |
| video_path, | |
| shots_jsonl_path, | |
| gt_json_path, | |
| sample_ids, | |
| ], | |
| outputs=[summary_md, out_json, out_csv, logs], | |
| ) | |
| api_run_btn.click( | |
| fn=run_eval_api, | |
| inputs=[ | |
| mode, | |
| hf_token, | |
| enable_llm_judge, | |
| judge_model, | |
| judge_token, | |
| builtin_cases, | |
| max_shots, | |
| video_remote_path, | |
| shots_jsonl_remote_path, | |
| gt_json_remote_path, | |
| sample_ids, | |
| ], | |
| outputs=[summary_md, out_json, out_csv, logs], | |
| api_name="run_eval_api", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=int(os.environ.get("PORT", "7860")), | |
| show_error=True, | |
| ) | |