# app.py — Human Eval UI (audio-stripped delivery) import os, io, csv, json, random, subprocess, hashlib, urllib.request, shutil from datetime import datetime import gradio as gr from huggingface_hub import HfApi, hf_hub_download # -------------------- Config -------------------- REPO_ID = os.getenv("RESULTS_REPO", "sgtlim/videoeval_results") # 업로드한 리포와 일치 HF_TOKEN = os.getenv("HF_TOKEN") RESULTS_FILE = "results.csv" TOTAL_PER_PARTICIPANT = 30 # 목표 평가 개수(세션 기준) # 🔇 Audio stripping (runtime mute) config MUTE_AUDIO = True HAS_FFMPEG = shutil.which("ffmpeg") is not None RAW_DIR = "/tmp/raw_videos" MUTED_DIR = "/tmp/muted_videos" os.makedirs(RAW_DIR, exist_ok=True) os.makedirs(MUTED_DIR, exist_ok=True) # -------------------- Data -------------------- # videos.json 예시: {"url": "...mp4", "id": "BodyWeightSquats__XXXX.mp4", "action": "BodyWeightSquats"} with open("videos.json", "r", encoding="utf-8") as f: V = json.load(f) api = HfApi() # 교수님 지침(그대로, 굵게 처리 포함) INSTRUCTION_MD = """ **Task:** You will watch a series of **AI-generated videos**. For each video, your job is to rate how well the person’s action in the AI-generated video matches the action specified as "**expected action**". Some things to keep in mind: - The generated video should **capture** the expected action **throughout the video**. - Try to **focus only** on the expected action and do **not** judge **video quality**, **attractiveness**, **background**, **camera motion**, or **objects**. - You will be **paid** once **all the videos are viewed and rated**. """ # -------------------- Audio-strip helpers -------------------- def _safe_name(s: str) -> str: return hashlib.sha1(s.encode("utf-8", errors="ignore")).hexdigest() def _get_video_id(v: dict) -> str: if "id" in v and v["id"]: return v["id"] return os.path.basename(v.get("url", "")) def _download_to_tmp(url: str) -> str: """Download remote video to RAW_DIR; return local path.""" basename = _safe_name(url) + ".mp4" dst = os.path.join(RAW_DIR, basename) if not os.path.exists(dst): urllib.request.urlretrieve(url, dst) return dst def _muted_copy_fast(local_in: str, out_path: str): """Fast path: copy video stream, drop audio (-an).""" cmd = ["ffmpeg", "-y", "-i", local_in, "-c:v", "copy", "-an", out_path] subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) def _muted_copy_reencode(local_in: str, out_path: str): """Compatibility fallback: re-encode video (H.264), drop audio.""" cmd = [ "ffmpeg","-y","-i", local_in, "-vf","format=yuv420p","-movflags","+faststart", "-c:v","libx264","-crf","18","-preset","veryfast", "-an", out_path ] subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) def _src_for_gradio(v: dict) -> str: """ Return a local path to a muted copy of the video if MUTE_AUDIO & ffmpeg available; otherwise return original url/path. """ src = v.get("url", "") vid_id = _get_video_id(v) or _safe_name(src) if not MUTE_AUDIO or not HAS_FFMPEG or not src: return src # fall back to original (no mute possible) # Resolve local input path (handles http(s) and local paths) if src.startswith("http://") or src.startswith("https://"): local_in = _download_to_tmp(src) else: local_in = src # Build a stable muted cache filename by video_id muted_out = os.path.join(MUTED_DIR, f"{_safe_name(vid_id)}.mp4") if os.path.exists(muted_out): return muted_out try: _muted_copy_fast(local_in, muted_out) except Exception: try: _muted_copy_reencode(local_in, muted_out) except Exception as e2: # As a last resort, return original source print(f"[WARN] failed to produce muted copy for {vid_id}: {type(e2).__name__}: {e2}") return src return muted_out def _extract_action(v): if "action" in v and v["action"]: return v["action"] raw = v.get("id", "") return raw.split("__")[0].split(".")[0] # -------------------- HF CSV helpers -------------------- def _read_csv_bytes(): try: p = hf_hub_download( repo_id=REPO_ID, filename=RESULTS_FILE, repo_type="dataset", token=HF_TOKEN, local_dir="/tmp", local_dir_use_symlinks=False ) return open(p, "rb").read() except Exception: return None def _append(old_bytes, row): s = io.StringIO() w = csv.writer(s) if not old_bytes: w.writerow(["ts_iso", "participant_id", "video_id", "overall", "notes"]) else: s.write(old_bytes.decode("utf-8", errors="ignore")) w.writerow(row) return s.getvalue().encode("utf-8") def _load_eval_counts(): """ Hugging Face dataset의 results.csv를 읽어 video_id별 평가 개수(dict)를 반환. 없으면 0으로 초기화. """ counts = {_get_video_id(v): 0 for v in V} b = _read_csv_bytes() if not b: return counts s = io.StringIO(b.decode("utf-8", errors="ignore")) r = csv.reader(s) rows = list(r) if not rows: return counts header = rows[0] body = rows[1:] if header and ("video_id" in header or "overall" in header) else rows vid_col = header.index("video_id") if header and "video_id" in header else None for row in body: try: vid = row[vid_col] if vid_col is not None else row[2] # ts, pid, video_id, overall, notes if vid in counts: counts[vid] += 1 except Exception: continue return counts def push(participant_id, video_id, score, notes=""): if not participant_id or not participant_id.strip(): return gr.update(visible=True, value="❗ Please enter your Participant ID before proceeding.") if not video_id or score is None: return gr.update(visible=True, value="❗ Fill out all fields.") try: old = _read_csv_bytes() row = [ datetime.utcnow().isoformat(), participant_id.strip(), video_id, # ✅ action 대신 video_id 저장 float(score), # overall notes or "" ] newb = _append(old, row) if not REPO_ID: return gr.update(visible=True, value="❗ RESULTS_REPO is not set.") if not HF_TOKEN: return gr.update(visible=True, value="❗ HF_TOKEN is missing. Set a write token for the dataset repo.") api.upload_file( path_or_fileobj=io.BytesIO(newb), path_in_repo=RESULTS_FILE, repo_id=REPO_ID, repo_type="dataset", token=HF_TOKEN, commit_message="append" ) return gr.update(visible=True, value=f"✅ Saved for {video_id}.") except Exception as e: return gr.update( visible=True, value=f"❌ Save failed: {type(e).__name__}: {e}\n" f"- Check HF_TOKEN permission\n- Check REPO_ID\n- Create dataset repo if missing" ) def _progress_html(done, total): pct = int(100 * done / max(1, total)) return f"""
{done} / {total}
""" # -------------------- Scheduling (least-first + anchor) -------------------- def _build_order_least_first_with_anchor(total:int, anchor_idx:int, repeats:int, min_gap:int=1): """ - results.csv를 읽어 video_id별 카운트를 계산 - 앵커(첫 비디오) N회 포함, 연속 금지 - 나머지는 '가장 적게 평가된 순'으로 중복 없이 채움 """ assert repeats <= total N = len(V) assert N >= 1 def vid_of(i): return _get_video_id(V[i]) counts = _load_eval_counts() candidates = [i for i in range(N) if i != anchor_idx] random.shuffle(candidates) # 동률 랜덤화 candidates.sort(key=lambda i: counts.get(vid_of(i), 0)) others_needed = total - repeats if len(candidates) < others_needed: raise ValueError("Not enough unique non-anchor videos to fill the schedule without duplication.") others = candidates[:others_needed] random.shuffle(others) seq = [None] * total segment = total // repeats if repeats > 0 else total anchor_positions = [] for k in range(repeats): lo = k * segment hi = (k + 1) * segment if k < repeats - 1 else total cand = random.randrange(lo, hi) def ok(pos): return all(abs(pos - p) >= (min_gap + 1) for p in anchor_positions) found = None for d in range(0, max(1, segment)): for sgn in (+1, -1): pos = cand + sgn * d if 0 <= pos < total and ok(pos): found = pos break if found is not None: break if found is None: for pos in range(total): if ok(pos): found = pos break if found is None: raise RuntimeError("Failed to place anchor without adjacency.") anchor_positions.append(found) for pos in anchor_positions: seq[pos] = anchor_idx j = 0 for i in range(total): if seq[i] is None: seq[i] = others[j] j += 1 assert sum(1 for x in seq if x == anchor_idx) == repeats for i in range(1, total): assert not (seq[i] == anchor_idx and seq[i-1] == anchor_idx), "Adjacent anchors found." return seq # -------------------- Example videos (download to local cache) -------------------- EXAMPLES = { "BodyWeightSquats": { "real": "examples/BodyWeightSquats_real.mp4", "bad": "examples/BodyWeightSquats_bad.mp4", }, "WallPushUps": { "real": "examples/WallPushUps_real.mp4", "bad": "examples/WallPushUps_bad.mp4", }, } EX_CACHE = {} for cls, files in EXAMPLES.items(): EX_CACHE[cls] = {"real": None, "bad": None} for kind, fname in files.items(): try: EX_CACHE[cls][kind] = hf_hub_download( repo_id=REPO_ID, filename=fname, repo_type="dataset", token=HF_TOKEN, local_dir="/tmp", local_dir_use_symlinks=False, ) except Exception as e: print(f"[WARN] example missing: {cls} {kind} -> {fname}: {e}") def _example_src(cls: str, kind: str): p = EX_CACHE[cls][kind] if not p: return None # dict 모양으로 감싸서 동일 파이프라인(_src_for_gradio) 사용 return _src_for_gradio({"url": p, "id": f"example::{cls}::{kind}"}) # -------------------- CSS -------------------- GLOBAL_CSS = """ /* ===== 공통 변수 투명화 (v3/v4 둘다) ===== */ :root, .gradio-container { --body-background-fill: transparent !important; --background-fill-primary: transparent !important; --background-fill-secondary: transparent !important; --block-background-fill: transparent !important; --block-border-color: transparent !important; --panel-background-fill: transparent !important; --panel-border-color: transparent !important; --section-header-background-fill: transparent !important; --shadow-drop: 0 0 0 rgba(0,0,0,0) !important; --shadow-spread: 0 0 0 rgba(0,0,0,0) !important; } .gradio-container .bg-white, .gradio-container .bg-gray-50, .gradio-container .bg-gray-100, .gradio-container .bg-slate-50, .gradio-container .bg-neutral-50, .gradio-container .bg-secondary, .gradio-container .border, .gradio-container .shadow, .gradio-container .shadow-sm, .gradio-container .shadow-md, .gradio-container .ring-1, .gradio-container .ring, .gradio-container .gr-card, .gradio-container .prose > *:where(hr) { background: transparent !important; box-shadow: none !important; border-color: transparent !important; } .gradio-container .gr-panel, .gradio-container .gr-group, .gradio-container .gr-box, .gradio-container .gr-row, .gradio-container .gr-column, .gradio-container .gr-accordion, .gradio-container .gr-block, .gradio-container .gr-form, .gradio-container .gr-tabs, .gradio-container .gr-tabitem, .gradio-container .gr-section-header { background: transparent !important; box-shadow: none !important; border: none !important; } .gradio-container hr, .gradio-container .gr-divider, .gradio-container .gr-accordion .label { background: transparent !important; border: none !important; box-shadow: none !important; } html, body, .gradio-container { background: transparent !important; } #eval [class*="bg-"], #eval [class*="border"], #eval [class*="shadow"], #eval .gr-panel, #eval .gr-group, #eval .gr-box, #eval .gr-row, #eval .gr-column, #eval .gr-block, #eval .gr-form, #eval .gr-section-header, #eval .gr-accordion { background: transparent !important; border-color: transparent !important; box-shadow: none !important; } #eval .gr-form, #eval .gr-panel { background: transparent !important; box-shadow:none !important; border:none !important; } """ # -------------------- UI -------------------- with gr.Blocks(fill_height=True, css=GLOBAL_CSS) as demo: order_state = gr.State(value=[]) ptr_state = gr.State(value=0) cur_video_id = gr.State(value="") # ------------------ PAGE 1: Intro + Examples ------------------ page_intro = gr.Group(visible=True) with page_intro: gr.Markdown("## 🎯 Action Consistency Human Evaluation") gr.Markdown(INSTRUCTION_MD) # Examples: BodyWeightSquats with gr.Group(): gr.Markdown("### Examples: BodyWeightSquats") with gr.Row(): with gr.Column(): gr.Markdown("**Expected depiction of action**") gr.Video(value=_example_src("BodyWeightSquats","real"), height=240, autoplay=False) with gr.Column(): gr.Markdown("**Poorly generated action**") gr.Video(value=_example_src("BodyWeightSquats","bad"), height=240, autoplay=False) if not (EX_CACHE["BodyWeightSquats"]["real"] and EX_CACHE["BodyWeightSquats"]["bad"]): gr.Markdown("> ⚠️ Upload `examples/BodyWeightSquats_real.mp4` and `_bad.mp4` to show both samples.") # Examples: WallPushUps with gr.Group(): gr.Markdown("### Examples: WallPushUps") with gr.Row(): with gr.Column(): gr.Markdown("**Expected depiction of action**") gr.Video(value=_example_src("WallPushUps","real"), height=240, autoplay=False) with gr.Column(): gr.Markdown("**Poorly generated action**") gr.Video(value=_example_src("WallPushUps","bad"), height=240, autoplay=False) if not (EX_CACHE["WallPushUps"]["real"] and EX_CACHE["WallPushUps"]["bad"]): gr.Markdown("> ⚠️ Upload `examples/WallPushUps_real.mp4` and `_bad.mp4` to show both samples.") understood = gr.Checkbox(label="I have read and understand the task.", value=False) start_btn = gr.Button("Yes, start", variant="secondary", interactive=False) def _toggle_start(checked: bool): return gr.update(interactive=checked, variant=("primary" if checked else "secondary")) understood.change(_toggle_start, inputs=understood, outputs=start_btn) # ------------------ PAGE 2: Evaluation ------------------ page_eval = gr.Group(visible=False, elem_id="eval") with page_eval: # PID 입력 with gr.Row(): pid = gr.Textbox(label="Participant ID (required)", placeholder="e.g., Youngsun-2025/10/01") # 지침(원문) + 비디오 + 진행바 / 오른쪽에 슬라이더 + Save&Next with gr.Row(equal_height=True): with gr.Column(scale=1): gr.Markdown(INSTRUCTION_MD) # 교수님 문구 그대로 video = gr.Video(label="Video", height=360) progress = gr.HTML(_progress_html(0, TOTAL_PER_PARTICIPANT)) with gr.Column(scale=1): action_tb = gr.Textbox(label="Expected action", interactive=False) score = gr.Slider(minimum=0.0, maximum=10.0, step=0.1, value=5.0, label="Action Consistency (0.0 (Worst) - 10.0 (Best))") save_next = gr.Button("💾 Save & Next ▶", variant="secondary", interactive=False) status = gr.Markdown(visible=False) done_state = gr.State(0) # PID 입력에 따라 Save&Next 토글 def _toggle_by_pid(pid_text: str): enabled = bool(pid_text and pid_text.strip()) return gr.update(interactive=enabled, variant=("primary" if enabled else "secondary")) pid.change(_toggle_by_pid, inputs=pid, outputs=save_next) # -------- 페이지 전환 & 첫 로드 -------- ANCHOR_IDX = 0 # videos.json의 맨 첫 비디오 ANCHOR_REPEATS = 5 # 앵커 5회 MIN_GAP = 1 # 앵커 연속 금지(인접 금지) def _start_and_load_first(): total = TOTAL_PER_PARTICIPANT order = _build_order_least_first_with_anchor( total=total, anchor_idx=ANCHOR_IDX, repeats=ANCHOR_REPEATS, min_gap=MIN_GAP ) first_idx = order[0] v0 = V[first_idx] return ( gr.update(visible=False), # page_intro off gr.update(visible=True), # page_eval on _src_for_gradio(v0), # 🔇 muted source _extract_action(v0), # expected action label 5.0, # score reset gr.update(visible=False, value=""), # status hide 0, # done count _progress_html(0, TOTAL_PER_PARTICIPANT), order, # order_state 1, # ptr_state _get_video_id(v0) # cur_video_id ) start_btn.click( _start_and_load_first, inputs=[], outputs=[page_intro, page_eval, video, action_tb, score, status, done_state, progress, order_state, ptr_state, cur_video_id] ) # -------- Save & Next -------- def save_and_next(participant_id, current_video_id, score_val, done_cnt, order, ptr): if not participant_id or not participant_id.strip(): # PID 없으면 기존 화면 유지 return ( gr.update(visible=True, value="❗ Please enter your Participant ID."), gr.update(), gr.update(), # video, action_tb 변경 없음 done_cnt, _progress_html(done_cnt, TOTAL_PER_PARTICIPANT), 5.0, ptr, current_video_id ) status_msg = push(participant_id, current_video_id, score_val, "") new_done = int(done_cnt) + 1 # 종료 조건: 목표 개수 달성 or 순서 소진 if new_done >= TOTAL_PER_PARTICIPANT or ptr >= len(order): return ( status_msg, # status None, # video 비우기 "", # action_tb 비우기 TOTAL_PER_PARTICIPANT, # done_state 최종 _progress_html(TOTAL_PER_PARTICIPANT, TOTAL_PER_PARTICIPANT), 5.0, # score 리셋 ptr, current_video_id ) # 다음 영상 로드 next_idx = order[ptr] v = V[next_idx] next_vid = _get_video_id(v) return ( status_msg, _src_for_gradio(v), # 🔇 muted source _extract_action(v), new_done, _progress_html(new_done, TOTAL_PER_PARTICIPANT), 5.0, ptr + 1, next_vid ) save_next.click( save_and_next, # ✅ cur_video_id를 두 번째 인자로 넘김 inputs=[pid, cur_video_id, score, done_state, order_state, ptr_state], # ✅ 마지막에 cur_video_id를 outputs로 받음(상태 갱신) outputs=[status, video, action_tb, done_state, progress, score, ptr_state, cur_video_id] ) if __name__ == "__main__": if MUTE_AUDIO and not HAS_FFMPEG: print("[WARN] MUTE_AUDIO=True but ffmpeg not found. Videos will be served with original audio.") demo.launch()