from __future__ import annotations import argparse import base64 import html import os import re import shutil import subprocess import warnings from pathlib import Path from typing import Any, Tuple import gradio as gr from gradio import processing_utils as gr_processing_utils try: import imageio_ffmpeg except ImportError: # pragma: no cover - optional runtime dependency imageio_ffmpeg = None from study_utils import ( CHOICE_OPTIONS, build_completion_markdown, build_question_payload, create_or_resume_participant, ensure_runtime_dirs, ensure_video_thumbnail, generate_participant_id, get_instruction_case, get_results_dir, load_study_config, move_question_pointer, prepare_reference_videos_for_web, sanitize_participant_id, save_current_answer, ensure_synchronized_study_videos, upgrade_existing_results_schema, ) PROJECT_ROOT = Path(__file__).resolve().parent def default_server_name() -> str: return "0.0.0.0" if os.environ.get("SPACE_ID") else "127.0.0.1" def ensure_local_ffmpeg() -> None: if imageio_ffmpeg is None: return ffmpeg_source = Path(imageio_ffmpeg.get_ffmpeg_exe()).resolve() runtime_bin = get_results_dir(PROJECT_ROOT) / "runtime_bin" runtime_bin.mkdir(parents=True, exist_ok=True) ffmpeg_target = runtime_bin / "ffmpeg.exe" if not ffmpeg_target.exists(): shutil.copy2(ffmpeg_source, ffmpeg_target) os.environ["IMAGEIO_FFMPEG_EXE"] = str(ffmpeg_target) current_path = os.environ.get("PATH", "") runtime_bin_str = str(runtime_bin) if runtime_bin_str.lower() not in current_path.lower(): os.environ["PATH"] = runtime_bin_str + os.pathsep + current_path def _probe_video_codec_with_ffmpeg(video_path: str | Path) -> tuple[str, str]: if imageio_ffmpeg is None: return "", "" path = Path(video_path) ffmpeg_exe = imageio_ffmpeg.get_ffmpeg_exe() result = subprocess.run( [ffmpeg_exe, "-i", str(path)], capture_output=True, text=True, encoding="utf-8", errors="ignore", ) probe_text = result.stderr or "" codec_match = re.search(r"Video:\s*([^\s,(]+)", probe_text) codec_name = (codec_match.group(1) if codec_match else "").strip().lower() return path.suffix.lower(), codec_name def patched_video_is_playable(video_filepath: str) -> bool: """ Avoid Gradio's hard dependency on ffprobe by checking playability with the bundled imageio-ffmpeg binary instead. """ try: container, video_codec = _probe_video_codec_with_ffmpeg(video_filepath) return (container, video_codec) in { (".mp4", "h264"), (".mp4", "av1"), (".ogg", "theora"), (".webm", "vp9"), (".webm", "vp8"), (".webm", "av1"), } except Exception: return True def patch_gradio_video_probe() -> None: gr_processing_utils.video_is_playable = patched_video_is_playable warnings.filterwarnings( "ignore", message=r"The 'css' parameter in the Blocks constructor will be removed in Gradio 6\.0\..*", category=DeprecationWarning, ) warnings.filterwarnings( "ignore", message=r"The 'theme' parameter in the Blocks constructor will be removed in Gradio 6\.0\..*", category=DeprecationWarning, ) warnings.filterwarnings( "ignore", message=r"The 'head' parameter in the Blocks constructor will be removed in Gradio 6\.0\..*", category=DeprecationWarning, ) CUSTOM_HEAD = """ """ CUSTOM_CSS = """ :root, html, body { color-scheme: light !important; background: #eef2f7 !important; color: #0f172a !important; } .gradio-container { max-width: 1380px !important; margin: 0 auto !important; padding-bottom: 32px !important; font-family: "Segoe UI", "Helvetica Neue", sans-serif !important; background: radial-gradient(circle at top left, rgba(203, 213, 225, 0.26), transparent 28%), linear-gradient(180deg, #f7f8fb 0%, #eef2f7 100%); } .gradio-container, .gradio-container *, .gradio-container .dark, .gradio-container .dark * { --body-background-fill: #f7f8fb !important; --body-background-fill-subdued: #eef2f7 !important; --background-fill-primary: #ffffff !important; --background-fill-secondary: #f8fafc !important; --block-background-fill: #ffffff !important; --block-border-color: #d8dee8 !important; --panel-background-fill: #ffffff !important; --panel-border-color: #d8dee8 !important; --input-background-fill: #ffffff !important; --input-border-color: #cbd5e1 !important; --checkbox-background-color: #ffffff !important; --checkbox-border-color: #94a3b8 !important; --checkbox-label-text-color: #0f172a !important; --body-text-color: #0f172a !important; --block-label-text-color: #0f172a !important; --block-title-text-color: #0f172a !important; --button-secondary-background-fill: #ffffff !important; --button-secondary-text-color: #1d4ed8 !important; } .gradio-container div, .gradio-container section, .gradio-container article, .gradio-container form, .gradio-container fieldset, .gradio-container label, .gradio-container [data-testid="block"], .gradio-container [data-testid="textbox"], .gradio-container [data-testid="checkbox"], .gradio-container [data-testid="radio"], .gradio-container [data-testid="markdown"], .gradio-container [data-testid="group"] { color: #0f172a !important; } .gradio-container, .gradio-container .prose, .gradio-container .prose p, .gradio-container .prose li, .gradio-container .prose strong, .gradio-container .prose h1, .gradio-container .prose h2, .gradio-container .prose h3, .gradio-container .prose h4, .gradio-container label, .gradio-container p, .gradio-container li, .gradio-container h1, .gradio-container h2, .gradio-container h3, .gradio-container h4 { color: #0f172a !important; } .block-title h1, .block-title h2, .section-heading { font-family: "Libre Baskerville", Georgia, serif !important; } .hero-card, .panel-card, .form-card { background: rgba(255, 255, 255, 0.96); border: 1px solid #d8dee8; border-radius: 20px; box-shadow: 0 14px 40px rgba(15, 23, 42, 0.06); } .hero-card { padding: 28px 30px; } .panel-card { padding: 22px 24px; } .form-card { padding: 18px; } .form-card textarea, .form-card input { background: #ffffff !important; color: #0f172a !important; } .form-card, .question-card, .form-card > div, .question-card > div, .form-card [data-testid], .question-card [data-testid], .form-card .prose, .question-card .prose, .hero-card [data-testid], .panel-card [data-testid] { background: #ffffff !important; color: #0f172a !important; } .gradio-container input, .gradio-container textarea, .gradio-container select { background: #ffffff !important; color: #0f172a !important; } .gradio-container [data-testid="checkbox"], .gradio-container [data-testid="checkbox"] * { background: transparent !important; color: #0f172a !important; } .gradio-container [data-testid="textbox"], .gradio-container [data-testid="textbox"] > *, .gradio-container [data-testid="textbox"] textarea, .gradio-container [data-testid="textbox"] input { background: #ffffff !important; color: #0f172a !important; } .gradio-container [data-testid="markdown"], .gradio-container [data-testid="markdown"] > *, .gradio-container [data-testid="group"], .gradio-container [data-testid="group"] > * { background: transparent !important; color: #0f172a !important; } .instruction-shell { display: flex; flex-direction: column; gap: 20px; } .instruction-top { display: grid; grid-template-columns: 1.02fr 0.98fr; gap: 20px; align-items: start; } .instruction-copy { display: flex; flex-direction: column; gap: 16px; } .instruction-top-right { display: flex; flex-direction: column; gap: 16px; } .lead-text { color: #334155; font-size: 17px; line-height: 1.6; margin: 0; } .instruction-list { margin: 0; padding-left: 20px; color: #334155; line-height: 1.6; } .example-caption-note { margin: 0 0 10px 0; padding: 10px 14px; border-radius: 14px; border: 1px solid #d9e2ec; background: #f8fbff; color: #334155; line-height: 1.6; } .instruction-bottom { display: grid; grid-template-columns: 1.08fr 0.92fr; gap: 20px; align-items: start; } .instruction-bottom-left, .instruction-bottom-right { display: flex; flex-direction: column; gap: 14px; } .metric-card { border: 1px solid #d9e2ec; border-radius: 16px; padding: 16px; background: #fbfcfe; } .metric-card h4 { margin: 0 0 8px 0; font-size: 17px; color: #0f172a; } .metric-card p { margin: 0; color: #475569; line-height: 1.55; } .metric-stack { display: flex; flex-direction: column; gap: 12px; margin-top: 14px; } .diagram-card { border: 1px solid #d9e2ec; border-radius: 18px; padding: 18px; background: linear-gradient(180deg, #ffffff 0%, #f8fafc 100%); } .case-walkthrough { display: flex; flex-direction: column; gap: 14px; } .case-walkthrough-head h3 { margin: 0; font-size: 24px; color: #0f172a; font-family: "Libre Baskerville", Georgia, serif !important; } .case-walkthrough-head p { margin: 6px 0 0 0; color: #475569; line-height: 1.55; } .walkthrough-grid { display: grid; grid-template-columns: 1.15fr 1fr 1fr; gap: 12px; } .thumb-card { background: #ffffff; border: 1px solid #d8e2ef; border-radius: 16px; overflow: hidden; box-shadow: 0 10px 24px rgba(15, 23, 42, 0.05); } .thumb-card.ref-card { border-color: #93c5fd; } .thumb-image { aspect-ratio: 1.2 / 1; background: #eef2f7; overflow: hidden; } .thumb-image img { width: 100%; height: 100%; object-fit: cover; display: block; } .thumb-body { padding: 12px 13px 14px; } .thumb-title { display: inline-block; font-size: 12px; font-weight: 700; letter-spacing: 0.06em; text-transform: uppercase; color: #1d4ed8; background: #dbeafe; border-radius: 999px; padding: 5px 9px; margin-bottom: 8px; } .thumb-card.candidate-card .thumb-title { color: #0f172a; background: #eaf1fb; } .thumb-body h4 { margin: 0 0 6px 0; font-size: 17px; color: #0f172a; } .thumb-body p { margin: 0; font-size: 14px; color: #475569; line-height: 1.5; } .walkthrough-note { border: 1px solid #d9e2ec; border-radius: 16px; background: #ffffff; padding: 14px 15px; } .walkthrough-note > strong { display: block; color: #0f172a; margin-bottom: 6px; font-size: 15px; } .walkthrough-note p { margin: 0; color: #475569; line-height: 1.6; } .walkthrough-note p + p { margin-top: 10px; } .walkthrough-note p strong { display: inline; margin: 0; font-size: inherit; color: #0f172a; } .walkthrough-note.accent-note { background: linear-gradient(180deg, #eff6ff 0%, #ffffff 100%); border-color: #bfdbfe; } .progress-chip { display: inline-block; padding: 8px 14px; border-radius: 999px; background: #0f172a; color: white; font-weight: 700; letter-spacing: 0.01em; } .meta-line { margin-top: 8px; color: #475569; font-size: 15px; } .study-shell .gr-video { border-radius: 16px !important; overflow: hidden !important; border: 1px solid #dbe2ea !important; background: #f8fafc !important; } .gradio-container video { background: #0f172a !important; } .video-panel { background: #ffffff; } .reference-panel { border: 1px solid #bfdbfe !important; box-shadow: 0 12px 28px rgba(37, 99, 235, 0.08); } .candidate-panel { border: 1px solid #dbe4ee !important; } .video-caption { color: #475569; font-size: 14px; margin-top: -4px; margin-bottom: 10px; } .question-card { padding: 18px 20px; border-radius: 18px; border: 1px solid #d9e2ec; background: rgba(255, 255, 255, 0.9); } .choice-input { margin-top: 8px; } .choice-input fieldset { border: 1px solid #d7e3f4 !important; border-radius: 16px !important; background: #f8fbff !important; padding: 12px 14px !important; } .choice-input label { border: 1px solid #cbdcf5 !important; border-radius: 12px !important; background: #ffffff !important; padding: 10px 12px !important; color: #1e293b !important; font-weight: 600 !important; transition: all 0.2s ease !important; } .choice-input label:hover { border-color: #60a5fa !important; background: #eff6ff !important; } .choice-input label:has(input:checked) { border-color: #3b82f6 !important; background: linear-gradient(180deg, #dbeafe 0%, #eff6ff 100%) !important; color: #1d4ed8 !important; box-shadow: 0 0 0 1px rgba(59, 130, 246, 0.12) !important; } .choice-input input[type="radio"] { accent-color: #2563eb !important; } .gradio-container button { transition: transform 0.15s ease, box-shadow 0.15s ease !important; } .gradio-container button.primary { background: linear-gradient(180deg, #2563eb 0%, #1d4ed8 100%) !important; color: #ffffff !important; border: none !important; box-shadow: 0 8px 20px rgba(37, 99, 235, 0.18) !important; } .gradio-container button.primary:hover { transform: translateY(-1px); box-shadow: 0 12px 24px rgba(37, 99, 235, 0.22) !important; } .gradio-container button.secondary { background: #ffffff !important; color: #1d4ed8 !important; border: 1px solid #bfdbfe !important; } .language-switch-row { justify-content: flex-end; margin-bottom: 8px; } .language-switch { min-width: 210px; } .language-switch fieldset { border: 1px solid #cfe0fb !important; border-radius: 999px !important; background: rgba(255, 255, 255, 0.92) !important; padding: 6px !important; box-shadow: 0 8px 20px rgba(37, 99, 235, 0.08) !important; } .language-switch label { border: none !important; border-radius: 999px !important; background: transparent !important; color: #475569 !important; min-height: 38px !important; padding: 8px 16px !important; font-weight: 700 !important; transition: all 0.18s ease !important; } .language-switch label:hover { background: #eff6ff !important; color: #1d4ed8 !important; } .language-switch label:has(input:checked) { background: linear-gradient(180deg, #2563eb 0%, #1d4ed8 100%) !important; color: #ffffff !important; box-shadow: 0 8px 16px rgba(37, 99, 235, 0.18) !important; } .language-switch input[type="radio"] { display: none !important; } .thank-card { max-width: 760px; margin: 0 auto; padding: 28px 30px; text-align: left; } .muted-caption { color: #64748b; font-size: 14px; } @media (max-width: 900px) { .instruction-top, .instruction-bottom, .walkthrough-grid { grid-template-columns: 1fr; } } """ LANGUAGE_CHOICES = [("English", "en"), ("中文", "zh")] LANGUAGE_SWITCH_LABEL = "Language / 语言" TEXT = { "en": { "language_label": "Language", "study_title": "Human Motion Reenactment User Study", "example_comparison": "Example Comparison", "reference_video": "Reference Video", "candidate_a": "Result A", "candidate_b": "Result B", "reference_caption": "Reference", "result_a_caption": "Result A", "result_b_caption": "Result B", "result_a_left": "Result A", "result_b_right": "Result B", "participant_id_label": "Participant ID (read-only)", "participant_setup_title": "Participant Setup", "generate_fresh_id": "Generate Fresh Participant ID", "start_continue": "Start / Continue Study", "previous": "Previous", "next": "Next", "submit_study": "Submit Study", "question_similarity": "Which result better matches the reference motion?", "question_quality": "Which result has better motion quality?", "question_preference": "Which result do you overall prefer?", "left_choice": "Result A", "right_choice": "Result B", "saved_progress_restored": "Saved progress restored.", "answer_all_required": "Please answer all three questions before continuing.", "session_empty": "Session state was empty. Please return to the start page and continue this browser session.", "browser_saved_id_notice": ( "This browser already has a saved participant ID. Click **Start / Continue Study** " "to resume the same session safely." ), "fresh_id_notice": ( "A fresh participant ID has been created for this browser. " "Use it only if you are starting a brand-new session." ), "completed_id_notice": ( "This participant ID has already completed the study. " "Generate a fresh ID if you need a brand-new session on this browser." ), "study_instruction": "Watch the reference clip and both anonymous results before answering the three questions on Motion Similarity, Motion Quality, and Overall Preference.", "question_word": "Question", "saved_responses": "Saved responses", "participant_id_meta": "Participant ID", "thank_you_title": "Thank you for completing the study.", "thank_you_saved": "Your responses have been saved successfully.", "thank_you_completed_at": "Completed at", "thank_you_close": "You may now close this page.", }, "zh": { "language_label": "语言", "study_title": "人体动作重演用户研究", "example_comparison": "示例对比", "reference_video": "参考视频", "candidate_a": "结果 A", "candidate_b": "结果 B", "reference_caption": "参考视频", "result_a_caption": "结果 A", "result_b_caption": "结果 B", "result_a_left": "结果 A", "result_b_right": "结果 B", "participant_id_label": "参与者编号（只读）", "participant_setup_title": "参与者设置", "generate_fresh_id": "生成新的参与者编号", "start_continue": "开始 / 继续问卷", "previous": "上一题", "next": "下一题", "submit_study": "提交问卷", "question_similarity": "哪个结果与参考动作更匹配？", "question_quality": "哪个结果的动作质量更好？", "question_preference": "你整体更偏好哪个结果？", "left_choice": "结果 A", "right_choice": "结果 B", "saved_progress_restored": "已恢复先前保存的进度。", "answer_all_required": "请先回答完这三个问题，再继续下一题。", "session_empty": "当前会话为空。请返回起始页后继续此浏览器中的问卷会话。", "browser_saved_id_notice": ( "当前浏览器已保存参与者编号。点击 **开始 / 继续问卷** 可安全地恢复同一会话。" ), "fresh_id_notice": ( "当前浏览器已生成一个新的参与者编号。仅当你需要开始一个全新的问卷会话时再使用它。" ), "completed_id_notice": ( "该参与者编号已经完成本次问卷。若你需要在此浏览器中开始全新会话，请生成新的编号。" ), "study_instruction": "请先观看参考视频和两个匿名结果，再回答动作相似性、动作质量和整体偏好这三个问题。", "question_word": "题目", "saved_responses": "已保存回答", "participant_id_meta": "参与者编号", "thank_you_title": "感谢你完成本次问卷。", "thank_you_saved": "你的回答已成功保存。", "thank_you_completed_at": "完成时间", "thank_you_close": "现在可以关闭此页面。", }, } def normalize_language(language: str | None) -> str: return language if language in TEXT else "en" def tr(language: str | None, key: str, **kwargs: Any) -> str: return TEXT[normalize_language(language)][key].format(**kwargs) def choice_options_for_language(language: str | None) -> list[tuple[str, str]]: language = normalize_language(language) return [ (tr(language, "left_choice"), "ResultA"), (tr(language, "right_choice"), "ResultB"), ] def video_caption_html(text: str) -> str: return f"

{html.escape(text)}

" def build_participant_setup_markdown(language: str | None) -> str: language = normalize_language(language) if language == "zh": return """ ### 参与者设置当前公开问卷链接会为本浏览器自动创建并保存参与者编号。之后如果你仍使用同一浏览器再次访问，就可以自动恢复同一份作答进度，而无需手动输入编号。只有在你希望于当前浏览器中开始一个全新的作答会话时，才需要点击 **生成新的参与者编号**。请不要将下方显示的编号分享给其他参与者。 """.strip() return """ ### Participant Setup This public study link now creates and stores a participant ID automatically for the current browser. Returning on the same browser will safely continue the same session without asking you to type an ID manually. Use **Generate Fresh Participant ID** only when starting a completely new participation on this browser. Please do not share the ID shown below with other participants. """.strip() def build_progress_markdown(state: dict[str, Any], language: str | None) -> str: question = state["questions"][state["current_index"]] answered_count = len(state.get("answers", {})) language = normalize_language(language) if language == "zh": return ( f"

{tr(language, 'participant_id_meta')}: {state['participant_id']}

" f"

{tr(language, 'saved_responses')}: {answered_count} / {question['total_questions']}

" ) return ( f"

{tr(language, 'participant_id_meta')}: {state['participant_id']}

" f"

{tr(language, 'saved_responses')}: {answered_count} / {question['total_questions']}

" ) def build_completion_markdown_local(state: dict[str, Any], language: str | None) -> str: completed_at = state.get("completed_at") or "" total_questions = len(state.get("questions", [])) answered_count = len(state.get("answers", {})) language = normalize_language(language) if language == "zh": return f""" ## {tr(language, "thank_you_title")} {tr(language, "thank_you_saved")} - {tr(language, "participant_id_meta")}: `{state["participant_id"]}` - {tr(language, "saved_responses")}: `{answered_count} / {total_questions}` - {tr(language, "thank_you_completed_at")}: `{completed_at}` {tr(language, "thank_you_close")} """.strip() return f""" ## {tr(language, "thank_you_title")} {tr(language, "thank_you_saved")} - {tr(language, "participant_id_meta")}: `{state["participant_id"]}` - {tr(language, "saved_responses")}: `{answered_count} / {total_questions}` - {tr(language, "thank_you_completed_at")}: `{completed_at}` {tr(language, "thank_you_close")} """.strip() def image_to_data_uri(image_path: str | Path) -> str: path = Path(image_path) if not path.exists(): return "" suffix = path.suffix.lower() mime_type = "image/jpeg" if suffix in {".jpg", ".jpeg"} else "image/png" encoded = base64.b64encode(path.read_bytes()).decode("ascii") return f"data:{mime_type};base64,{encoded}" def build_instruction_case_html(intro_case: dict[str, Any], language: str | None) -> str: reference_thumb = image_to_data_uri(ensure_video_thumbnail(intro_case["reference_video"], PROJECT_ROOT)) result_a_thumb = image_to_data_uri( ensure_video_thumbnail(intro_case["method_videos"]["anyact"], PROJECT_ROOT) ) result_b_thumb = image_to_data_uri( ensure_video_thumbnail(intro_case["method_videos"]["vlm_hy_motion"], PROJECT_ROOT) ) language = normalize_language(language) if language == "zh": return f"""

示例对比

下图说明问卷如何进行比较。参与者需要将一个源角色的运动视频与两个人体重演结果进行比较，页面上它们显示为 结果 A 和 结果 B。灰色区域表示地板。

参考视频

源角色运动视频

该视频提供需要被模仿的角色运动，主要体现姿态和动作动态。

结果 A

模仿参考角色运动的人体重演

这是结果 A 对参考角色运动进行人体重演后的表现。

结果 B

模仿参考角色运动的人体重演

这是结果 B 对参考角色运动进行人体重演后的表现。

""".strip() return f"""

Example Comparison

The figure below shows how the questionnaire works. Participants compare one video of the source character's motion against two human reenactment results that imitate that motion, displayed as Result A and Result B. The gray area indicates the floor.

Reference

Source character motion video

This video provides the character motion to be imitated, mainly in terms of pose and action dynamics.

Result A

Human reenactment imitating the reference character motion

This is the human-motion reenactment shown as Result A.

Result B

Human reenactment imitating the reference character motion

This is the human-motion reenactment shown as Result B.

""".strip() def build_judging_instruction_html(language: str | None) -> str: language = normalize_language(language) if language == "zh": return """

参与者需要判断什么

对于每一组对比，你都需要回答三个问题，分别对应 动作相似性、动作质量 和 整体偏好。请直接依据页面上显示的结果 A 和结果 B 进行判断。

动作相似性

哪个结果与参考动作更匹配，尤其是在姿态对齐和时间动态方面？

动作质量

哪个结果看起来更自然、更平滑，并且更符合人体动作的物理合理性？请主要关注全身动作、身体协调、平衡性和时间连续性，并可适当忽略面部表情与手指细节。

整体偏好

综合考虑动作相似性和动作质量后，你整体更偏好哪个结果？

""".strip() return """

What participants are asked to judge

For each comparison, please answer three questions covering Motion Similarity, Motion Quality, and Overall Preference. Please base your choice directly on the page labels, namely Result A and Result B.

Motion Similarity

Which result better matches the reference motion, especially in terms of pose alignment and temporal dynamics?

Motion Quality

Which result appears more natural, smooth, and physically plausible as a human motion sequence (mainly focusing on whole-body motion, body coordination, balance, and temporal continuity, while reasonably ignoring facial expressions and fine finger motion)?

Overall Preference

Considering both similarity and quality together, which result do you prefer overall?

""".strip() def build_instruction_html( config: dict[str, Any], case_walkthrough_html: str, judging_instruction_html: str, language: str | None, ) -> str: total_questions = config["participant_question_total"] language = normalize_language(language) if language == "zh": return f"""

{tr(language, "study_title")}

本研究用于评估人体动作重演结果的主观感知质量。每一道题中，你将观看一个参考视频和两个匿名结果，它们在页面上显示为 结果 A 和 结果 B。你需要围绕 动作相似性、动作质量 和 整体偏好 三个指标完成判断。

每位参与者需要完成 {total_questions} 个成对对比样本。
每次比较都会同时展示一个参考视频，以及并排显示的结果 A 和结果 B。
结果 A 和结果 B 的时长可能不完全相同，但它们都对应对整段参考视频中角色运动的重演；请忽略这种长度差异。
请在进入下一页之前回答完当前题目的三个问题。

{judging_instruction_html}

{case_walkthrough_html}

如何理解本任务

本研究关注的是人去模仿参考视频中角色的运动。一个优秀的结果应当既忠实保留参考运动，又能呈现自然、平稳、符合人体运动规律的动作。

当原始角色和人体构造不完全一致时，可以接受使用人体其他肢体去模拟缺失的功能部位，例如用手臂对应翅膀，只要整体动作意图和动态仍然合理并接近参考动作。

""".strip() return f"""

Human Motion Reenactment User Study

This study evaluates perceptual quality in human motion reenactment. In each question, you will watch one reference video and two anonymous results, displayed on the page as Result A and Result B. You will answer three evaluation questions covering Motion Similarity, Motion Quality, and Overall Preference.

Each participant completes {total_questions} pairwise comparison samples.
Each comparison presents one reference clip together with Result A and Result B shown side by side.
Result A and Result B may have different durations, but both are reenactments of the character motion over the full reference clip; please ignore this length difference.
Please answer all three questions before moving to the next page.

{judging_instruction_html}

{case_walkthrough_html}

How to interpret the task

The goal is to assess how well a human reenactment imitates the motion of the character in the reference video. A strong result should preserve the reference motion while still looking smooth, stable, and physically natural as human movement.

When the source character does not map directly to a human body, it is acceptable to use other human limbs to simulate the missing functional parts, such as using arms to mimic wings, as long as the motion intent and dynamics remain plausible and close to the reference.

""".strip() def build_example_caption(language: str | None) -> str: language = normalize_language(language) if language == "zh": return """

下方展示的是同一个示例案例在正式问卷中的实际观看布局。参与者需要将参考视频与结果 A、结果 B 进行比较，并回答三个评价问题。

""".strip() return """

Below is the same example case shown in the actual questionnaire layout. Participants compare the reference clip against Result A and Result B and answer the three evaluation questions.

""".strip() def resolve_browser_participant_id(browser_participant_id: str | None) -> str: sanitized = sanitize_participant_id(browser_participant_id) return sanitized or generate_participant_id() def build_intro_component_updates( config: dict[str, Any], intro_case: dict[str, Any], language: str | None, ) -> tuple[Any, ...]: language = normalize_language(language) case_walkthrough_html = build_instruction_case_html(intro_case, language) judging_instruction_html = build_judging_instruction_html(language) return ( gr.update(value=build_instruction_html(config, case_walkthrough_html, judging_instruction_html, language)), gr.update(value=build_example_caption(language), visible=False), gr.update(label=tr(language, "reference_video")), gr.update(value=video_caption_html(tr(language, "reference_caption"))), gr.update(label=tr(language, "candidate_a")), gr.update(value=video_caption_html(tr(language, "result_a_caption"))), gr.update(label=tr(language, "candidate_b")), gr.update(value=video_caption_html(tr(language, "result_b_caption"))), gr.update(value=build_participant_setup_markdown(language)), gr.update(value=tr(language, "generate_fresh_id")), gr.update(value=tr(language, "start_continue")), ) def build_study_component_updates( language: str | None, similarity_value: str | None, quality_value: str | None, preference_value: str | None, show_previous: bool, show_next: bool, show_submit: bool, ) -> tuple[Any, ...]: language = normalize_language(language) return ( gr.update(label=tr(language, "reference_video")), gr.update(label=tr(language, "result_a_left")), gr.update(label=tr(language, "result_b_right")), gr.update( choices=choice_options_for_language(language), label=tr(language, "question_similarity"), value=similarity_value, ), gr.update( choices=choice_options_for_language(language), label=tr(language, "question_quality"), value=quality_value, ), gr.update( choices=choice_options_for_language(language), label=tr(language, "question_preference"), value=preference_value, ), gr.update(value=tr(language, "previous"), visible=show_previous), gr.update(value=tr(language, "next"), visible=show_next), gr.update(value=tr(language, "submit_study"), visible=show_submit), gr.update(value=video_caption_html(tr(language, "reference_caption"))), gr.update(value=video_caption_html(tr(language, "result_a_caption"))), gr.update(value=video_caption_html(tr(language, "result_b_caption"))), ) def render_intro_view( config: dict[str, Any], intro_case: dict[str, Any], language: str | None, participant_id: str | None = None, browser_participant_id: str | None = None, start_message: str = "", ) -> Tuple[Any, ...]: language = normalize_language(language) resolved_participant_id = resolve_browser_participant_id(browser_participant_id or participant_id) intro_updates = build_intro_component_updates(config, intro_case, language) study_updates = build_study_component_updates( language, similarity_value=None, quality_value=None, preference_value=None, show_previous=False, show_next=True, show_submit=False, ) return ( gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), {}, "", "", tr(language, "study_instruction"), None, None, None, study_updates[3], study_updates[4], study_updates[5], "", study_updates[6], study_updates[7], study_updates[8], start_message, "", gr.update(value=resolved_participant_id, label=tr(language, "participant_id_label")), resolved_participant_id, language, gr.update(value=language, label=LANGUAGE_SWITCH_LABEL), *intro_updates, study_updates[9], study_updates[10], study_updates[11], ) def render_question_view( config: dict[str, Any], intro_case: dict[str, Any], language: str | None, state: dict[str, Any], study_message: str = "", draft_answers: tuple[str | None, str | None, str | None] | None = None, ) -> Tuple[Any, ...]: language = normalize_language(language) payload = build_question_payload(state) synced_videos = ensure_synchronized_study_videos( reference_video=payload["reference_video"], left_video=payload["left_video"], right_video=payload["right_video"], project_root=PROJECT_ROOT, ) similarity_value = draft_answers[0] if draft_answers and draft_answers[0] is not None else payload["answer_similarity"] quality_value = draft_answers[1] if draft_answers and draft_answers[1] is not None else payload["answer_quality"] preference_value = draft_answers[2] if draft_answers and draft_answers[2] is not None else payload["answer_preference"] intro_updates = build_intro_component_updates(config, intro_case, language) study_updates = build_study_component_updates( language, similarity_value=similarity_value, quality_value=quality_value, preference_value=preference_value, show_previous=payload["show_previous"], show_next=payload["show_next"], show_submit=payload["show_submit"], ) return ( gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), state, payload["question_token"], build_progress_markdown(state, language), tr(language, "study_instruction"), gr.update(value=synced_videos["reference_video"], label=tr(language, "reference_video")), gr.update(value=synced_videos["left_video"], label=tr(language, "result_a_left")), gr.update(value=synced_videos["right_video"], label=tr(language, "result_b_right")), study_updates[3], study_updates[4], study_updates[5], study_message, study_updates[6], study_updates[7], study_updates[8], "", "", gr.update(value=state["participant_id"], label=tr(language, "participant_id_label")), state["participant_id"], language, gr.update(value=language, label=LANGUAGE_SWITCH_LABEL), *intro_updates, study_updates[9], study_updates[10], study_updates[11], ) def render_thank_you_view( config: dict[str, Any], intro_case: dict[str, Any], language: str | None, state: dict[str, Any], ) -> Tuple[Any, ...]: language = normalize_language(language) intro_updates = build_intro_component_updates(config, intro_case, language) study_updates = build_study_component_updates( language, similarity_value=None, quality_value=None, preference_value=None, show_previous=False, show_next=False, show_submit=False, ) return ( gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), state, "", "", tr(language, "study_instruction"), gr.update(value=None), gr.update(value=None), gr.update(value=None), study_updates[3], study_updates[4], study_updates[5], "", study_updates[6], study_updates[7], study_updates[8], "", build_completion_markdown_local(state, language), gr.update(value=state["participant_id"], label=tr(language, "participant_id_label")), state["participant_id"], language, gr.update(value=language, label=LANGUAGE_SWITCH_LABEL), *intro_updates, study_updates[9], study_updates[10], study_updates[11], ) def _drop_language_selector_update(payload: Tuple[Any, ...]) -> Tuple[Any, ...]: language_selector_index = 22 return payload[:language_selector_index] + payload[language_selector_index + 1 :] def build_demo(config_path: Path) -> gr.Blocks: ensure_local_ffmpeg() patch_gradio_video_probe() config = load_study_config(config_path) ensure_runtime_dirs(PROJECT_ROOT) config = prepare_reference_videos_for_web(config, PROJECT_ROOT) upgrade_existing_results_schema(PROJECT_ROOT, config) default_language = "en" choice_options = choice_options_for_language(default_language) intro_case = get_instruction_case(config) example_left_path = intro_case["method_videos"]["anyact"] example_right_path = intro_case["method_videos"]["vlm_hy_motion"] with gr.Blocks( title=config["study_title"], css=CUSTOM_CSS, head=CUSTOM_HEAD, theme=gr.themes.Soft( primary_hue="blue", secondary_hue="sky", neutral_hue="slate", ), ) as demo: participant_state = gr.State({}) question_token = gr.State("") browser_participant_id = gr.BrowserState( "", storage_key=f"{config['study_id']}_participant_id", ) browser_language = gr.BrowserState( default_language, storage_key=f"{config['study_id']}_language", ) with gr.Row(elem_classes=["language-switch-row"]): language_selector = gr.Radio( choices=LANGUAGE_CHOICES, value=default_language, label=LANGUAGE_SWITCH_LABEL, interactive=True, show_label=False, elem_classes=["language-switch"], ) with gr.Column(visible=True) as intro_panel: intro_instruction_html = gr.HTML( build_instruction_html( config=config, case_walkthrough_html=build_instruction_case_html(intro_case, default_language), judging_instruction_html=build_judging_instruction_html(default_language), language=default_language, ) ) example_caption_md = gr.Markdown(build_example_caption(default_language), visible=False) with gr.Row(visible=False): with gr.Column(scale=5): intro_reference_video = gr.Video( value=intro_case["reference_video"], label=tr(default_language, "reference_video"), autoplay=True, loop=True, elem_classes=["panel-card", "video-panel", "reference-panel"], ) intro_reference_caption = gr.Markdown(video_caption_html(tr(default_language, "reference_caption"))) with gr.Column(scale=4): intro_left_video = gr.Video( value=example_left_path, label=tr(default_language, "candidate_a"), autoplay=True, loop=True, elem_classes=["panel-card", "video-panel", "candidate-panel"], ) intro_left_caption = gr.Markdown(video_caption_html(tr(default_language, "result_a_caption"))) with gr.Column(scale=4): intro_right_video = gr.Video( value=example_right_path, label=tr(default_language, "candidate_b"), autoplay=True, loop=True, elem_classes=["panel-card", "video-panel", "candidate-panel"], ) intro_right_caption = gr.Markdown(video_caption_html(tr(default_language, "result_b_caption"))) with gr.Group(elem_classes=["form-card"]): participant_setup_md = gr.Markdown(build_participant_setup_markdown(default_language)) participant_id_box = gr.Textbox( label=tr(default_language, "participant_id_label"), interactive=False, ) regenerate_button = gr.Button(tr(default_language, "generate_fresh_id")) start_message = gr.Markdown() start_button = gr.Button(tr(default_language, "start_continue"), variant="primary") with gr.Column(visible=False, elem_classes=["study-shell"]) as study_panel: progress_html = gr.HTML() study_notice = gr.Markdown(tr(default_language, "study_instruction")) with gr.Row(): with gr.Column(scale=5): reference_video = gr.Video( label=tr(default_language, "reference_video"), autoplay=True, loop=True, elem_id="study-reference-video", elem_classes=["panel-card", "video-panel", "reference-panel"], ) study_reference_caption = gr.Markdown(video_caption_html(tr(default_language, "reference_caption"))) with gr.Column(scale=4): left_video = gr.Video( label=tr(default_language, "result_a_left"), autoplay=True, loop=True, elem_id="study-left-video", elem_classes=["panel-card", "video-panel", "candidate-panel"], ) study_left_caption = gr.Markdown(video_caption_html(tr(default_language, "result_a_caption"))) with gr.Column(scale=4): right_video = gr.Video( label=tr(default_language, "result_b_right"), autoplay=True, loop=True, elem_id="study-right-video", elem_classes=["panel-card", "video-panel", "candidate-panel"], ) study_right_caption = gr.Markdown(video_caption_html(tr(default_language, "result_b_caption"))) with gr.Group(elem_classes=["question-card"]): similarity_radio = gr.Radio( choices=choice_options, label=tr(default_language, "question_similarity"), elem_classes=["choice-input"], ) quality_radio = gr.Radio( choices=choice_options, label=tr(default_language, "question_quality"), elem_classes=["choice-input"], ) preference_radio = gr.Radio( choices=choice_options, label=tr(default_language, "question_preference"), elem_classes=["choice-input"], ) study_message = gr.Markdown() with gr.Row(): previous_button = gr.Button(tr(default_language, "previous")) next_button = gr.Button(tr(default_language, "next"), variant="primary") submit_button = gr.Button(tr(default_language, "submit_study"), variant="primary", visible=False) with gr.Column(visible=False) as thank_panel: with gr.Group(elem_classes=["hero-card", "thank-card"]): thank_you_markdown = gr.Markdown() outputs = [ intro_panel, study_panel, thank_panel, participant_state, question_token, progress_html, study_notice, reference_video, left_video, right_video, similarity_radio, quality_radio, preference_radio, study_message, previous_button, next_button, submit_button, start_message, thank_you_markdown, participant_id_box, browser_participant_id, browser_language, language_selector, intro_instruction_html, example_caption_md, intro_reference_video, intro_reference_caption, intro_left_video, intro_left_caption, intro_right_video, intro_right_caption, participant_setup_md, regenerate_button, start_button, study_reference_caption, study_left_caption, study_right_caption, ] outputs_without_language_selector = outputs[:22] + outputs[23:] def initialize_page(saved_participant_id: str, saved_language: str) -> Tuple[Any, ...]: language = normalize_language(saved_language) resolved_participant_id = resolve_browser_participant_id(saved_participant_id) start_message = "" if sanitize_participant_id(saved_participant_id): start_message = tr(language, "browser_saved_id_notice") return render_intro_view( config=config, intro_case=intro_case, language=language, participant_id=resolved_participant_id, browser_participant_id=resolved_participant_id, start_message=start_message, ) def handle_generate_new_id(current_language: str) -> Tuple[Any, str, str]: language = normalize_language(current_language) new_participant_id = generate_participant_id() return ( gr.update(value=new_participant_id, label=tr(language, "participant_id_label")), new_participant_id, tr(language, "fresh_id_notice"), ) def handle_start( participant_id: str, current_language: str, request: gr.Request, ) -> Tuple[Any, ...]: language = normalize_language(current_language) state, status = create_or_resume_participant( project_root=PROJECT_ROOT, config=config, participant_id=participant_id, request=request, ) if status == "completed": return render_intro_view( config=config, intro_case=intro_case, language=language, participant_id=state["participant_id"], browser_participant_id=state["participant_id"], start_message=tr(language, "completed_id_notice"), ) study_message_text = tr(language, "saved_progress_restored") if status == "resumed" else "" return render_question_view( config=config, intro_case=intro_case, language=language, state=state, study_message=study_message_text, ) def handle_previous( state: dict[str, Any], current_token: str, current_browser_participant_id: str, current_language: str, ) -> Tuple[Any, ...]: language = normalize_language(current_language) if not state: resolved_participant_id = resolve_browser_participant_id(current_browser_participant_id) return render_intro_view( config=config, intro_case=intro_case, language=language, participant_id=resolved_participant_id, browser_participant_id=resolved_participant_id, start_message=tr(language, "session_empty"), ) updated_state, message = move_question_pointer( project_root=PROJECT_ROOT, participant_id=state["participant_id"], question_token=current_token, direction="previous", ) return render_question_view( config=config, intro_case=intro_case, language=language, state=updated_state, study_message=message, ) def handle_next_or_submit( state: dict[str, Any], current_token: str, answer_similarity: str, answer_quality: str, answer_preference: str, action: str, current_browser_participant_id: str, current_language: str, ) -> Tuple[Any, ...]: language = normalize_language(current_language) if not state: resolved_participant_id = resolve_browser_participant_id(current_browser_participant_id) return render_intro_view( config=config, intro_case=intro_case, language=language, participant_id=resolved_participant_id, browser_participant_id=resolved_participant_id, start_message=tr(language, "session_empty"), ) if not answer_similarity or not answer_quality or not answer_preference: return render_question_view( config=config, intro_case=intro_case, language=language, state=state, study_message=tr(language, "answer_all_required"), draft_answers=(answer_similarity, answer_quality, answer_preference), ) updated_state, message, status = save_current_answer( project_root=PROJECT_ROOT, participant_id=state["participant_id"], question_token=current_token, answer_similarity=answer_similarity, answer_quality=answer_quality, answer_preference=answer_preference, action=action, ) if status == "completed": return render_thank_you_view( config=config, intro_case=intro_case, language=language, state=updated_state, ) return render_question_view( config=config, intro_case=intro_case, language=language, state=updated_state, study_message=message, ) def handle_language_change( selected_language: str, state: dict[str, Any], current_browser_participant_id: str, current_token: str, answer_similarity: str | None, answer_quality: str | None, answer_preference: str | None, current_start_message: str, current_study_message: str, ) -> Tuple[Any, ...]: language = normalize_language(selected_language) if state: if state.get("completed_at"): return _drop_language_selector_update( render_thank_you_view( config=config, intro_case=intro_case, language=language, state=state, ) ) return _drop_language_selector_update( render_question_view( config=config, intro_case=intro_case, language=language, state=state, study_message=current_study_message, draft_answers=(answer_similarity, answer_quality, answer_preference), ) ) resolved_participant_id = resolve_browser_participant_id(current_browser_participant_id) return _drop_language_selector_update( render_intro_view( config=config, intro_case=intro_case, language=language, participant_id=resolved_participant_id, browser_participant_id=resolved_participant_id, start_message=current_start_message, ) ) demo.load(initialize_page, inputs=[browser_participant_id, browser_language], outputs=outputs) regenerate_button.click( handle_generate_new_id, inputs=[browser_language], outputs=[participant_id_box, browser_participant_id, start_message], ) start_button.click(handle_start, inputs=[browser_participant_id, browser_language], outputs=outputs) previous_button.click( handle_previous, inputs=[participant_state, question_token, browser_participant_id, browser_language], outputs=outputs, ) next_button.click( lambda state, token, similarity, quality, preference, browser_pid, current_language: handle_next_or_submit( state, token, similarity, quality, preference, "next", browser_pid, current_language ), inputs=[ participant_state, question_token, similarity_radio, quality_radio, preference_radio, browser_participant_id, browser_language, ], outputs=outputs, ) submit_button.click( lambda state, token, similarity, quality, preference, browser_pid, current_language: handle_next_or_submit( state, token, similarity, quality, preference, "submit", browser_pid, current_language ), inputs=[ participant_state, question_token, similarity_radio, quality_radio, preference_radio, browser_participant_id, browser_language, ], outputs=outputs, ) language_selector.change( handle_language_change, inputs=[ language_selector, participant_state, browser_participant_id, question_token, similarity_radio, quality_radio, preference_radio, start_message, study_message, ], outputs=outputs_without_language_selector, ) return demo def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Launch the Gradio user study application.") parser.add_argument( "--config", type=Path, default=PROJECT_ROOT / "data" / "study_config.json", help="Path to the study configuration JSON file.", ) parser.add_argument( "--port", type=int, default=int(os.environ.get("PORT", "7860")), help="Server port for Gradio.", ) parser.add_argument( "--server-name", type=str, default=os.environ.get("GRADIO_SERVER_NAME", default_server_name()), help="Server bind address for Gradio.", ) parser.add_argument( "--share", action="store_true", help="Enable Gradio's temporary public share link.", ) return parser.parse_args() def main() -> None: args = parse_args() demo = build_demo(args.config) demo.queue() demo.launch( server_name=args.server_name, server_port=args.port, share=args.share, allowed_paths=[str(PROJECT_ROOT)], ) if __name__ == "__main__": main()