Spaces:
Sleeping
Sleeping
| # app.py โ Human Eval UI (audio-stripped delivery) | |
| import os, io, csv, json, random, subprocess, hashlib, urllib.request, shutil | |
| from datetime import datetime | |
| import gradio as gr | |
| from huggingface_hub import HfApi, hf_hub_download | |
| # -------------------- Config -------------------- | |
| REPO_ID = os.getenv("RESULTS_REPO", "sgtlim/videoeval_results") # ์ ๋ก๋ํ ๋ฆฌํฌ์ ์ผ์น | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| RESULTS_FILE = "results.csv" | |
| TOTAL_PER_PARTICIPANT = 30 # ๋ชฉํ ํ๊ฐ ๊ฐ์(์ธ์ ๊ธฐ์ค) | |
| # ๐ Audio stripping (runtime mute) config | |
| MUTE_AUDIO = True | |
| HAS_FFMPEG = shutil.which("ffmpeg") is not None | |
| RAW_DIR = "/tmp/raw_videos" | |
| MUTED_DIR = "/tmp/muted_videos" | |
| os.makedirs(RAW_DIR, exist_ok=True) | |
| os.makedirs(MUTED_DIR, exist_ok=True) | |
| # -------------------- Data -------------------- | |
| # videos.json ์์: {"url": "...mp4", "id": "BodyWeightSquats__XXXX.mp4", "action": "BodyWeightSquats"} | |
| with open("videos.json", "r", encoding="utf-8") as f: | |
| V = json.load(f) | |
| api = HfApi() | |
| # ๊ต์๋ ์ง์นจ(๊ทธ๋๋ก, ๊ตต๊ฒ ์ฒ๋ฆฌ ํฌํจ) | |
| INSTRUCTION_MD = """ | |
| **Task:** You will watch a series of **AI-generated videos**. For each video, your job is to rate how well the personโs action in the AI-generated video matches the action specified as "**expected action**". Some things to keep in mind: | |
| - The generated video should **capture** the expected action **throughout the video**. | |
| - Try to **focus only** on the expected action and do **not** judge **video quality**, **attractiveness**, **background**, **camera motion**, or **objects**. | |
| - You will be **paid** once **all the videos are viewed and rated**. | |
| """ | |
| # -------------------- Audio-strip helpers -------------------- | |
| def _safe_name(s: str) -> str: | |
| return hashlib.sha1(s.encode("utf-8", errors="ignore")).hexdigest() | |
| def _get_video_id(v: dict) -> str: | |
| if "id" in v and v["id"]: | |
| return v["id"] | |
| return os.path.basename(v.get("url", "")) | |
| def _download_to_tmp(url: str) -> str: | |
| """Download remote video to RAW_DIR; return local path.""" | |
| basename = _safe_name(url) + ".mp4" | |
| dst = os.path.join(RAW_DIR, basename) | |
| if not os.path.exists(dst): | |
| urllib.request.urlretrieve(url, dst) | |
| return dst | |
| def _muted_copy_fast(local_in: str, out_path: str): | |
| """Fast path: copy video stream, drop audio (-an).""" | |
| cmd = ["ffmpeg", "-y", "-i", local_in, "-c:v", "copy", "-an", out_path] | |
| subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | |
| def _muted_copy_reencode(local_in: str, out_path: str): | |
| """Compatibility fallback: re-encode video (H.264), drop audio.""" | |
| cmd = [ | |
| "ffmpeg","-y","-i", local_in, | |
| "-vf","format=yuv420p","-movflags","+faststart", | |
| "-c:v","libx264","-crf","18","-preset","veryfast", | |
| "-an", out_path | |
| ] | |
| subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | |
| def _src_for_gradio(v: dict) -> str: | |
| """ | |
| Return a local path to a muted copy of the video if MUTE_AUDIO & ffmpeg available; | |
| otherwise return original url/path. | |
| """ | |
| src = v.get("url", "") | |
| vid_id = _get_video_id(v) or _safe_name(src) | |
| if not MUTE_AUDIO or not HAS_FFMPEG or not src: | |
| return src # fall back to original (no mute possible) | |
| # Resolve local input path (handles http(s) and local paths) | |
| if src.startswith("http://") or src.startswith("https://"): | |
| local_in = _download_to_tmp(src) | |
| else: | |
| local_in = src | |
| # Build a stable muted cache filename by video_id | |
| muted_out = os.path.join(MUTED_DIR, f"{_safe_name(vid_id)}.mp4") | |
| if os.path.exists(muted_out): | |
| return muted_out | |
| try: | |
| _muted_copy_fast(local_in, muted_out) | |
| except Exception: | |
| try: | |
| _muted_copy_reencode(local_in, muted_out) | |
| except Exception as e2: | |
| # As a last resort, return original source | |
| print(f"[WARN] failed to produce muted copy for {vid_id}: {type(e2).__name__}: {e2}") | |
| return src | |
| return muted_out | |
| def _extract_action(v): | |
| if "action" in v and v["action"]: | |
| return v["action"] | |
| raw = v.get("id", "") | |
| return raw.split("__")[0].split(".")[0] | |
| # -------------------- HF CSV helpers -------------------- | |
| def _read_csv_bytes(): | |
| try: | |
| p = hf_hub_download( | |
| repo_id=REPO_ID, filename=RESULTS_FILE, repo_type="dataset", | |
| token=HF_TOKEN, local_dir="/tmp", local_dir_use_symlinks=False | |
| ) | |
| return open(p, "rb").read() | |
| except Exception: | |
| return None | |
| def _append(old_bytes, row): | |
| s = io.StringIO() | |
| w = csv.writer(s) | |
| if not old_bytes: | |
| w.writerow(["ts_iso", "participant_id", "video_id", "overall", "notes"]) | |
| else: | |
| s.write(old_bytes.decode("utf-8", errors="ignore")) | |
| w.writerow(row) | |
| return s.getvalue().encode("utf-8") | |
| def _load_eval_counts(): | |
| """ | |
| Hugging Face dataset์ results.csv๋ฅผ ์ฝ์ด video_id๋ณ ํ๊ฐ ๊ฐ์(dict)๋ฅผ ๋ฐํ. | |
| ์์ผ๋ฉด 0์ผ๋ก ์ด๊ธฐํ. | |
| """ | |
| counts = {_get_video_id(v): 0 for v in V} | |
| b = _read_csv_bytes() | |
| if not b: | |
| return counts | |
| s = io.StringIO(b.decode("utf-8", errors="ignore")) | |
| r = csv.reader(s) | |
| rows = list(r) | |
| if not rows: | |
| return counts | |
| header = rows[0] | |
| body = rows[1:] if header and ("video_id" in header or "overall" in header) else rows | |
| vid_col = header.index("video_id") if header and "video_id" in header else None | |
| for row in body: | |
| try: | |
| vid = row[vid_col] if vid_col is not None else row[2] # ts, pid, video_id, overall, notes | |
| if vid in counts: | |
| counts[vid] += 1 | |
| except Exception: | |
| continue | |
| return counts | |
| def push(participant_id, video_id, score, notes=""): | |
| if not participant_id or not participant_id.strip(): | |
| return gr.update(visible=True, value="โ Please enter your Participant ID before proceeding.") | |
| if not video_id or score is None: | |
| return gr.update(visible=True, value="โ Fill out all fields.") | |
| try: | |
| old = _read_csv_bytes() | |
| row = [ | |
| datetime.utcnow().isoformat(), | |
| participant_id.strip(), | |
| video_id, # โ action ๋์ video_id ์ ์ฅ | |
| float(score), # overall | |
| notes or "" | |
| ] | |
| newb = _append(old, row) | |
| if not REPO_ID: | |
| return gr.update(visible=True, value="โ RESULTS_REPO is not set.") | |
| if not HF_TOKEN: | |
| return gr.update(visible=True, value="โ HF_TOKEN is missing. Set a write token for the dataset repo.") | |
| api.upload_file( | |
| path_or_fileobj=io.BytesIO(newb), | |
| path_in_repo=RESULTS_FILE, | |
| repo_id=REPO_ID, | |
| repo_type="dataset", | |
| token=HF_TOKEN, | |
| commit_message="append" | |
| ) | |
| return gr.update(visible=True, value=f"โ Saved for {video_id}.") | |
| except Exception as e: | |
| return gr.update( | |
| visible=True, | |
| value=f"โ Save failed: {type(e).__name__}: {e}\n" | |
| f"- Check HF_TOKEN permission\n- Check REPO_ID\n- Create dataset repo if missing" | |
| ) | |
| def _progress_html(done, total): | |
| pct = int(100 * done / max(1, total)) | |
| return f""" | |
| <div style="border:1px solid #ddd; height:20px; border-radius:6px; overflow:hidden; margin-top:6px;"> | |
| <div style="height:100%; width:{pct}%; background:#3b82f6;"></div> | |
| </div> | |
| <div style="font-size:12px; margin-top:4px;">{done} / {total}</div> | |
| """ | |
| # -------------------- Scheduling (least-first + anchor) -------------------- | |
| def _build_order_least_first_with_anchor(total:int, anchor_idx:int, repeats:int, min_gap:int=1): | |
| """ | |
| - results.csv๋ฅผ ์ฝ์ด video_id๋ณ ์นด์ดํธ๋ฅผ ๊ณ์ฐ | |
| - ์ต์ปค(์ฒซ ๋น๋์ค) Nํ ํฌํจ, ์ฐ์ ๊ธ์ง | |
| - ๋๋จธ์ง๋ '๊ฐ์ฅ ์ ๊ฒ ํ๊ฐ๋ ์'์ผ๋ก ์ค๋ณต ์์ด ์ฑ์ | |
| """ | |
| assert repeats <= total | |
| N = len(V) | |
| assert N >= 1 | |
| def vid_of(i): return _get_video_id(V[i]) | |
| counts = _load_eval_counts() | |
| candidates = [i for i in range(N) if i != anchor_idx] | |
| random.shuffle(candidates) # ๋๋ฅ ๋๋คํ | |
| candidates.sort(key=lambda i: counts.get(vid_of(i), 0)) | |
| others_needed = total - repeats | |
| if len(candidates) < others_needed: | |
| raise ValueError("Not enough unique non-anchor videos to fill the schedule without duplication.") | |
| others = candidates[:others_needed] | |
| random.shuffle(others) | |
| seq = [None] * total | |
| segment = total // repeats if repeats > 0 else total | |
| anchor_positions = [] | |
| for k in range(repeats): | |
| lo = k * segment | |
| hi = (k + 1) * segment if k < repeats - 1 else total | |
| cand = random.randrange(lo, hi) | |
| def ok(pos): | |
| return all(abs(pos - p) >= (min_gap + 1) for p in anchor_positions) | |
| found = None | |
| for d in range(0, max(1, segment)): | |
| for sgn in (+1, -1): | |
| pos = cand + sgn * d | |
| if 0 <= pos < total and ok(pos): | |
| found = pos | |
| break | |
| if found is not None: | |
| break | |
| if found is None: | |
| for pos in range(total): | |
| if ok(pos): | |
| found = pos | |
| break | |
| if found is None: | |
| raise RuntimeError("Failed to place anchor without adjacency.") | |
| anchor_positions.append(found) | |
| for pos in anchor_positions: | |
| seq[pos] = anchor_idx | |
| j = 0 | |
| for i in range(total): | |
| if seq[i] is None: | |
| seq[i] = others[j] | |
| j += 1 | |
| assert sum(1 for x in seq if x == anchor_idx) == repeats | |
| for i in range(1, total): | |
| assert not (seq[i] == anchor_idx and seq[i-1] == anchor_idx), "Adjacent anchors found." | |
| return seq | |
| # -------------------- Example videos (download to local cache) -------------------- | |
| EXAMPLES = { | |
| "BodyWeightSquats": { | |
| "real": "examples/BodyWeightSquats_real.mp4", | |
| "bad": "examples/BodyWeightSquats_bad.mp4", | |
| }, | |
| "WallPushUps": { | |
| "real": "examples/WallPushUps_real.mp4", | |
| "bad": "examples/WallPushUps_bad.mp4", | |
| }, | |
| } | |
| EX_CACHE = {} | |
| for cls, files in EXAMPLES.items(): | |
| EX_CACHE[cls] = {"real": None, "bad": None} | |
| for kind, fname in files.items(): | |
| try: | |
| EX_CACHE[cls][kind] = hf_hub_download( | |
| repo_id=REPO_ID, | |
| filename=fname, | |
| repo_type="dataset", | |
| token=HF_TOKEN, | |
| local_dir="/tmp", | |
| local_dir_use_symlinks=False, | |
| ) | |
| except Exception as e: | |
| print(f"[WARN] example missing: {cls} {kind} -> {fname}: {e}") | |
| def _example_src(cls: str, kind: str): | |
| p = EX_CACHE[cls][kind] | |
| if not p: | |
| return None | |
| # dict ๋ชจ์์ผ๋ก ๊ฐ์ธ์ ๋์ผ ํ์ดํ๋ผ์ธ(_src_for_gradio) ์ฌ์ฉ | |
| return _src_for_gradio({"url": p, "id": f"example::{cls}::{kind}"}) | |
| # -------------------- CSS -------------------- | |
| GLOBAL_CSS = """ | |
| /* ===== ๊ณตํต ๋ณ์ ํฌ๋ช ํ (v3/v4 ๋๋ค) ===== */ | |
| :root, .gradio-container { | |
| --body-background-fill: transparent !important; | |
| --background-fill-primary: transparent !important; | |
| --background-fill-secondary: transparent !important; | |
| --block-background-fill: transparent !important; | |
| --block-border-color: transparent !important; | |
| --panel-background-fill: transparent !important; | |
| --panel-border-color: transparent !important; | |
| --section-header-background-fill: transparent !important; | |
| --shadow-drop: 0 0 0 rgba(0,0,0,0) !important; | |
| --shadow-spread: 0 0 0 rgba(0,0,0,0) !important; | |
| } | |
| .gradio-container .bg-white, | |
| .gradio-container .bg-gray-50, | |
| .gradio-container .bg-gray-100, | |
| .gradio-container .bg-slate-50, | |
| .gradio-container .bg-neutral-50, | |
| .gradio-container .bg-secondary, | |
| .gradio-container .border, | |
| .gradio-container .shadow, | |
| .gradio-container .shadow-sm, | |
| .gradio-container .shadow-md, | |
| .gradio-container .ring-1, | |
| .gradio-container .ring, | |
| .gradio-container .gr-card, | |
| .gradio-container .prose > *:where(hr) { | |
| background: transparent !important; | |
| box-shadow: none !important; | |
| border-color: transparent !important; | |
| } | |
| .gradio-container .gr-panel, | |
| .gradio-container .gr-group, | |
| .gradio-container .gr-box, | |
| .gradio-container .gr-row, | |
| .gradio-container .gr-column, | |
| .gradio-container .gr-accordion, | |
| .gradio-container .gr-block, | |
| .gradio-container .gr-form, | |
| .gradio-container .gr-tabs, | |
| .gradio-container .gr-tabitem, | |
| .gradio-container .gr-section-header { | |
| background: transparent !important; | |
| box-shadow: none !important; | |
| border: none !important; | |
| } | |
| .gradio-container hr, | |
| .gradio-container .gr-divider, | |
| .gradio-container .gr-accordion .label { | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| } | |
| html, body, .gradio-container { background: transparent !important; } | |
| #eval [class*="bg-"], | |
| #eval [class*="border"], | |
| #eval [class*="shadow"], | |
| #eval .gr-panel, #eval .gr-group, #eval .gr-box, #eval .gr-row, #eval .gr-column, | |
| #eval .gr-block, #eval .gr-form, #eval .gr-section-header, #eval .gr-accordion { | |
| background: transparent !important; | |
| border-color: transparent !important; | |
| box-shadow: none !important; | |
| } | |
| #eval .gr-form, #eval .gr-panel { background: transparent !important; box-shadow:none !important; border:none !important; } | |
| """ | |
| # -------------------- UI -------------------- | |
| with gr.Blocks(fill_height=True, css=GLOBAL_CSS) as demo: | |
| order_state = gr.State(value=[]) | |
| ptr_state = gr.State(value=0) | |
| cur_video_id = gr.State(value="") | |
| # ------------------ PAGE 1: Intro + Examples ------------------ | |
| page_intro = gr.Group(visible=True) | |
| with page_intro: | |
| gr.Markdown("## ๐ฏ Action Consistency Human Evaluation") | |
| gr.Markdown(INSTRUCTION_MD) | |
| # Examples: BodyWeightSquats | |
| with gr.Group(): | |
| gr.Markdown("### Examples: BodyWeightSquats") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("**Expected depiction of action**") | |
| gr.Video(value=_example_src("BodyWeightSquats","real"), height=240, autoplay=False) | |
| with gr.Column(): | |
| gr.Markdown("**Poorly generated action**") | |
| gr.Video(value=_example_src("BodyWeightSquats","bad"), height=240, autoplay=False) | |
| if not (EX_CACHE["BodyWeightSquats"]["real"] and EX_CACHE["BodyWeightSquats"]["bad"]): | |
| gr.Markdown("> โ ๏ธ Upload `examples/BodyWeightSquats_real.mp4` and `_bad.mp4` to show both samples.") | |
| # Examples: WallPushUps | |
| with gr.Group(): | |
| gr.Markdown("### Examples: WallPushUps") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("**Expected depiction of action**") | |
| gr.Video(value=_example_src("WallPushUps","real"), height=240, autoplay=False) | |
| with gr.Column(): | |
| gr.Markdown("**Poorly generated action**") | |
| gr.Video(value=_example_src("WallPushUps","bad"), height=240, autoplay=False) | |
| if not (EX_CACHE["WallPushUps"]["real"] and EX_CACHE["WallPushUps"]["bad"]): | |
| gr.Markdown("> โ ๏ธ Upload `examples/WallPushUps_real.mp4` and `_bad.mp4` to show both samples.") | |
| understood = gr.Checkbox(label="I have read and understand the task.", value=False) | |
| start_btn = gr.Button("Yes, start", variant="secondary", interactive=False) | |
| def _toggle_start(checked: bool): | |
| return gr.update(interactive=checked, variant=("primary" if checked else "secondary")) | |
| understood.change(_toggle_start, inputs=understood, outputs=start_btn) | |
| # ------------------ PAGE 2: Evaluation ------------------ | |
| page_eval = gr.Group(visible=False, elem_id="eval") | |
| with page_eval: | |
| # PID ์ ๋ ฅ | |
| with gr.Row(): | |
| pid = gr.Textbox(label="Participant ID (required)", placeholder="e.g., Youngsun-2025/10/01") | |
| # ์ง์นจ(์๋ฌธ) + ๋น๋์ค + ์งํ๋ฐ / ์ค๋ฅธ์ชฝ์ ์ฌ๋ผ์ด๋ + Save&Next | |
| with gr.Row(equal_height=True): | |
| with gr.Column(scale=1): | |
| gr.Markdown(INSTRUCTION_MD) # ๊ต์๋ ๋ฌธ๊ตฌ ๊ทธ๋๋ก | |
| video = gr.Video(label="Video", height=360) | |
| progress = gr.HTML(_progress_html(0, TOTAL_PER_PARTICIPANT)) | |
| with gr.Column(scale=1): | |
| action_tb = gr.Textbox(label="Expected action", interactive=False) | |
| score = gr.Slider(minimum=0.0, maximum=10.0, step=0.1, value=5.0, | |
| label="Action Consistency (0.0 (Worst) - 10.0 (Best))") | |
| save_next = gr.Button("๐พ Save & Next โถ", variant="secondary", interactive=False) | |
| status = gr.Markdown(visible=False) | |
| done_state = gr.State(0) | |
| # PID ์ ๋ ฅ์ ๋ฐ๋ผ Save&Next ํ ๊ธ | |
| def _toggle_by_pid(pid_text: str): | |
| enabled = bool(pid_text and pid_text.strip()) | |
| return gr.update(interactive=enabled, variant=("primary" if enabled else "secondary")) | |
| pid.change(_toggle_by_pid, inputs=pid, outputs=save_next) | |
| # -------- ํ์ด์ง ์ ํ & ์ฒซ ๋ก๋ -------- | |
| ANCHOR_IDX = 0 # videos.json์ ๋งจ ์ฒซ ๋น๋์ค | |
| ANCHOR_REPEATS = 5 # ์ต์ปค 5ํ | |
| MIN_GAP = 1 # ์ต์ปค ์ฐ์ ๊ธ์ง(์ธ์ ๊ธ์ง) | |
| def _start_and_load_first(): | |
| total = TOTAL_PER_PARTICIPANT | |
| order = _build_order_least_first_with_anchor( | |
| total=total, | |
| anchor_idx=ANCHOR_IDX, | |
| repeats=ANCHOR_REPEATS, | |
| min_gap=MIN_GAP | |
| ) | |
| first_idx = order[0] | |
| v0 = V[first_idx] | |
| return ( | |
| gr.update(visible=False), # page_intro off | |
| gr.update(visible=True), # page_eval on | |
| _src_for_gradio(v0), # ๐ muted source | |
| _extract_action(v0), # expected action label | |
| 5.0, # score reset | |
| gr.update(visible=False, value=""), # status hide | |
| 0, # done count | |
| _progress_html(0, TOTAL_PER_PARTICIPANT), | |
| order, # order_state | |
| 1, # ptr_state | |
| _get_video_id(v0) # cur_video_id | |
| ) | |
| start_btn.click( | |
| _start_and_load_first, | |
| inputs=[], | |
| outputs=[page_intro, page_eval, video, action_tb, score, status, done_state, progress, order_state, ptr_state, cur_video_id] | |
| ) | |
| # -------- Save & Next -------- | |
| def save_and_next(participant_id, current_video_id, score_val, done_cnt, order, ptr): | |
| if not participant_id or not participant_id.strip(): | |
| # PID ์์ผ๋ฉด ๊ธฐ์กด ํ๋ฉด ์ ์ง | |
| return ( | |
| gr.update(visible=True, value="โ Please enter your Participant ID."), | |
| gr.update(), gr.update(), # video, action_tb ๋ณ๊ฒฝ ์์ | |
| done_cnt, | |
| _progress_html(done_cnt, TOTAL_PER_PARTICIPANT), | |
| 5.0, | |
| ptr, | |
| current_video_id | |
| ) | |
| status_msg = push(participant_id, current_video_id, score_val, "") | |
| new_done = int(done_cnt) + 1 | |
| # ์ข ๋ฃ ์กฐ๊ฑด: ๋ชฉํ ๊ฐ์ ๋ฌ์ฑ or ์์ ์์ง | |
| if new_done >= TOTAL_PER_PARTICIPANT or ptr >= len(order): | |
| return ( | |
| status_msg, # status | |
| None, # video ๋น์ฐ๊ธฐ | |
| "", # action_tb ๋น์ฐ๊ธฐ | |
| TOTAL_PER_PARTICIPANT, # done_state ์ต์ข | |
| _progress_html(TOTAL_PER_PARTICIPANT, TOTAL_PER_PARTICIPANT), | |
| 5.0, # score ๋ฆฌ์ | |
| ptr, | |
| current_video_id | |
| ) | |
| # ๋ค์ ์์ ๋ก๋ | |
| next_idx = order[ptr] | |
| v = V[next_idx] | |
| next_vid = _get_video_id(v) | |
| return ( | |
| status_msg, | |
| _src_for_gradio(v), # ๐ muted source | |
| _extract_action(v), | |
| new_done, | |
| _progress_html(new_done, TOTAL_PER_PARTICIPANT), | |
| 5.0, | |
| ptr + 1, | |
| next_vid | |
| ) | |
| save_next.click( | |
| save_and_next, | |
| # โ cur_video_id๋ฅผ ๋ ๋ฒ์งธ ์ธ์๋ก ๋๊น | |
| inputs=[pid, cur_video_id, score, done_state, order_state, ptr_state], | |
| # โ ๋ง์ง๋ง์ cur_video_id๋ฅผ outputs๋ก ๋ฐ์(์ํ ๊ฐฑ์ ) | |
| outputs=[status, video, action_tb, done_state, progress, score, ptr_state, cur_video_id] | |
| ) | |
| if __name__ == "__main__": | |
| if MUTE_AUDIO and not HAS_FFMPEG: | |
| print("[WARN] MUTE_AUDIO=True but ffmpeg not found. Videos will be served with original audio.") | |
| demo.launch() | |