videoeval_humaneval / app_older.py
Youngsun Lim
no sound videos
d521b9d
# app.py โ€” Human Eval UI (audio-stripped delivery)
import os, io, csv, json, random, subprocess, hashlib, urllib.request, shutil
from datetime import datetime
import gradio as gr
from huggingface_hub import HfApi, hf_hub_download
# -------------------- Config --------------------
REPO_ID = os.getenv("RESULTS_REPO", "sgtlim/videoeval_results") # ์—…๋กœ๋“œํ•œ ๋ฆฌํฌ์™€ ์ผ์น˜
HF_TOKEN = os.getenv("HF_TOKEN")
RESULTS_FILE = "results.csv"
TOTAL_PER_PARTICIPANT = 30 # ๋ชฉํ‘œ ํ‰๊ฐ€ ๊ฐœ์ˆ˜(์„ธ์…˜ ๊ธฐ์ค€)
# ๐Ÿ”‡ Audio stripping (runtime mute) config
MUTE_AUDIO = True
HAS_FFMPEG = shutil.which("ffmpeg") is not None
RAW_DIR = "/tmp/raw_videos"
MUTED_DIR = "/tmp/muted_videos"
os.makedirs(RAW_DIR, exist_ok=True)
os.makedirs(MUTED_DIR, exist_ok=True)
# -------------------- Data --------------------
# videos.json ์˜ˆ์‹œ: {"url": "...mp4", "id": "BodyWeightSquats__XXXX.mp4", "action": "BodyWeightSquats"}
with open("videos.json", "r", encoding="utf-8") as f:
V = json.load(f)
api = HfApi()
# ๊ต์ˆ˜๋‹˜ ์ง€์นจ(๊ทธ๋Œ€๋กœ, ๊ตต๊ฒŒ ์ฒ˜๋ฆฌ ํฌํ•จ)
INSTRUCTION_MD = """
**Task:** You will watch a series of **AI-generated videos**. For each video, your job is to rate how well the personโ€™s action in the AI-generated video matches the action specified as "**expected action**". Some things to keep in mind:
- The generated video should **capture** the expected action **throughout the video**.
- Try to **focus only** on the expected action and do **not** judge **video quality**, **attractiveness**, **background**, **camera motion**, or **objects**.
- You will be **paid** once **all the videos are viewed and rated**.
"""
# -------------------- Audio-strip helpers --------------------
def _safe_name(s: str) -> str:
return hashlib.sha1(s.encode("utf-8", errors="ignore")).hexdigest()
def _get_video_id(v: dict) -> str:
if "id" in v and v["id"]:
return v["id"]
return os.path.basename(v.get("url", ""))
def _download_to_tmp(url: str) -> str:
"""Download remote video to RAW_DIR; return local path."""
basename = _safe_name(url) + ".mp4"
dst = os.path.join(RAW_DIR, basename)
if not os.path.exists(dst):
urllib.request.urlretrieve(url, dst)
return dst
def _muted_copy_fast(local_in: str, out_path: str):
"""Fast path: copy video stream, drop audio (-an)."""
cmd = ["ffmpeg", "-y", "-i", local_in, "-c:v", "copy", "-an", out_path]
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
def _muted_copy_reencode(local_in: str, out_path: str):
"""Compatibility fallback: re-encode video (H.264), drop audio."""
cmd = [
"ffmpeg","-y","-i", local_in,
"-vf","format=yuv420p","-movflags","+faststart",
"-c:v","libx264","-crf","18","-preset","veryfast",
"-an", out_path
]
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
def _src_for_gradio(v: dict) -> str:
"""
Return a local path to a muted copy of the video if MUTE_AUDIO & ffmpeg available;
otherwise return original url/path.
"""
src = v.get("url", "")
vid_id = _get_video_id(v) or _safe_name(src)
if not MUTE_AUDIO or not HAS_FFMPEG or not src:
return src # fall back to original (no mute possible)
# Resolve local input path (handles http(s) and local paths)
if src.startswith("http://") or src.startswith("https://"):
local_in = _download_to_tmp(src)
else:
local_in = src
# Build a stable muted cache filename by video_id
muted_out = os.path.join(MUTED_DIR, f"{_safe_name(vid_id)}.mp4")
if os.path.exists(muted_out):
return muted_out
try:
_muted_copy_fast(local_in, muted_out)
except Exception:
try:
_muted_copy_reencode(local_in, muted_out)
except Exception as e2:
# As a last resort, return original source
print(f"[WARN] failed to produce muted copy for {vid_id}: {type(e2).__name__}: {e2}")
return src
return muted_out
def _extract_action(v):
if "action" in v and v["action"]:
return v["action"]
raw = v.get("id", "")
return raw.split("__")[0].split(".")[0]
# -------------------- HF CSV helpers --------------------
def _read_csv_bytes():
try:
p = hf_hub_download(
repo_id=REPO_ID, filename=RESULTS_FILE, repo_type="dataset",
token=HF_TOKEN, local_dir="/tmp", local_dir_use_symlinks=False
)
return open(p, "rb").read()
except Exception:
return None
def _append(old_bytes, row):
s = io.StringIO()
w = csv.writer(s)
if not old_bytes:
w.writerow(["ts_iso", "participant_id", "video_id", "overall", "notes"])
else:
s.write(old_bytes.decode("utf-8", errors="ignore"))
w.writerow(row)
return s.getvalue().encode("utf-8")
def _load_eval_counts():
"""
Hugging Face dataset์˜ results.csv๋ฅผ ์ฝ์–ด video_id๋ณ„ ํ‰๊ฐ€ ๊ฐœ์ˆ˜(dict)๋ฅผ ๋ฐ˜ํ™˜.
์—†์œผ๋ฉด 0์œผ๋กœ ์ดˆ๊ธฐํ™”.
"""
counts = {_get_video_id(v): 0 for v in V}
b = _read_csv_bytes()
if not b:
return counts
s = io.StringIO(b.decode("utf-8", errors="ignore"))
r = csv.reader(s)
rows = list(r)
if not rows:
return counts
header = rows[0]
body = rows[1:] if header and ("video_id" in header or "overall" in header) else rows
vid_col = header.index("video_id") if header and "video_id" in header else None
for row in body:
try:
vid = row[vid_col] if vid_col is not None else row[2] # ts, pid, video_id, overall, notes
if vid in counts:
counts[vid] += 1
except Exception:
continue
return counts
def push(participant_id, video_id, score, notes=""):
if not participant_id or not participant_id.strip():
return gr.update(visible=True, value="โ— Please enter your Participant ID before proceeding.")
if not video_id or score is None:
return gr.update(visible=True, value="โ— Fill out all fields.")
try:
old = _read_csv_bytes()
row = [
datetime.utcnow().isoformat(),
participant_id.strip(),
video_id, # โœ… action ๋Œ€์‹  video_id ์ €์žฅ
float(score), # overall
notes or ""
]
newb = _append(old, row)
if not REPO_ID:
return gr.update(visible=True, value="โ— RESULTS_REPO is not set.")
if not HF_TOKEN:
return gr.update(visible=True, value="โ— HF_TOKEN is missing. Set a write token for the dataset repo.")
api.upload_file(
path_or_fileobj=io.BytesIO(newb),
path_in_repo=RESULTS_FILE,
repo_id=REPO_ID,
repo_type="dataset",
token=HF_TOKEN,
commit_message="append"
)
return gr.update(visible=True, value=f"โœ… Saved for {video_id}.")
except Exception as e:
return gr.update(
visible=True,
value=f"โŒ Save failed: {type(e).__name__}: {e}\n"
f"- Check HF_TOKEN permission\n- Check REPO_ID\n- Create dataset repo if missing"
)
def _progress_html(done, total):
pct = int(100 * done / max(1, total))
return f"""
<div style="border:1px solid #ddd; height:20px; border-radius:6px; overflow:hidden; margin-top:6px;">
<div style="height:100%; width:{pct}%; background:#3b82f6;"></div>
</div>
<div style="font-size:12px; margin-top:4px;">{done} / {total}</div>
"""
# -------------------- Scheduling (least-first + anchor) --------------------
def _build_order_least_first_with_anchor(total:int, anchor_idx:int, repeats:int, min_gap:int=1):
"""
- results.csv๋ฅผ ์ฝ์–ด video_id๋ณ„ ์นด์šดํŠธ๋ฅผ ๊ณ„์‚ฐ
- ์•ต์ปค(์ฒซ ๋น„๋””์˜ค) NํšŒ ํฌํ•จ, ์—ฐ์† ๊ธˆ์ง€
- ๋‚˜๋จธ์ง€๋Š” '๊ฐ€์žฅ ์ ๊ฒŒ ํ‰๊ฐ€๋œ ์ˆœ'์œผ๋กœ ์ค‘๋ณต ์—†์ด ์ฑ„์›€
"""
assert repeats <= total
N = len(V)
assert N >= 1
def vid_of(i): return _get_video_id(V[i])
counts = _load_eval_counts()
candidates = [i for i in range(N) if i != anchor_idx]
random.shuffle(candidates) # ๋™๋ฅ  ๋žœ๋คํ™”
candidates.sort(key=lambda i: counts.get(vid_of(i), 0))
others_needed = total - repeats
if len(candidates) < others_needed:
raise ValueError("Not enough unique non-anchor videos to fill the schedule without duplication.")
others = candidates[:others_needed]
random.shuffle(others)
seq = [None] * total
segment = total // repeats if repeats > 0 else total
anchor_positions = []
for k in range(repeats):
lo = k * segment
hi = (k + 1) * segment if k < repeats - 1 else total
cand = random.randrange(lo, hi)
def ok(pos):
return all(abs(pos - p) >= (min_gap + 1) for p in anchor_positions)
found = None
for d in range(0, max(1, segment)):
for sgn in (+1, -1):
pos = cand + sgn * d
if 0 <= pos < total and ok(pos):
found = pos
break
if found is not None:
break
if found is None:
for pos in range(total):
if ok(pos):
found = pos
break
if found is None:
raise RuntimeError("Failed to place anchor without adjacency.")
anchor_positions.append(found)
for pos in anchor_positions:
seq[pos] = anchor_idx
j = 0
for i in range(total):
if seq[i] is None:
seq[i] = others[j]
j += 1
assert sum(1 for x in seq if x == anchor_idx) == repeats
for i in range(1, total):
assert not (seq[i] == anchor_idx and seq[i-1] == anchor_idx), "Adjacent anchors found."
return seq
# -------------------- Example videos (download to local cache) --------------------
EXAMPLES = {
"BodyWeightSquats": {
"real": "examples/BodyWeightSquats_real.mp4",
"bad": "examples/BodyWeightSquats_bad.mp4",
},
"WallPushUps": {
"real": "examples/WallPushUps_real.mp4",
"bad": "examples/WallPushUps_bad.mp4",
},
}
EX_CACHE = {}
for cls, files in EXAMPLES.items():
EX_CACHE[cls] = {"real": None, "bad": None}
for kind, fname in files.items():
try:
EX_CACHE[cls][kind] = hf_hub_download(
repo_id=REPO_ID,
filename=fname,
repo_type="dataset",
token=HF_TOKEN,
local_dir="/tmp",
local_dir_use_symlinks=False,
)
except Exception as e:
print(f"[WARN] example missing: {cls} {kind} -> {fname}: {e}")
def _example_src(cls: str, kind: str):
p = EX_CACHE[cls][kind]
if not p:
return None
# dict ๋ชจ์–‘์œผ๋กœ ๊ฐ์‹ธ์„œ ๋™์ผ ํŒŒ์ดํ”„๋ผ์ธ(_src_for_gradio) ์‚ฌ์šฉ
return _src_for_gradio({"url": p, "id": f"example::{cls}::{kind}"})
# -------------------- CSS --------------------
GLOBAL_CSS = """
/* ===== ๊ณตํ†ต ๋ณ€์ˆ˜ ํˆฌ๋ช…ํ™” (v3/v4 ๋‘˜๋‹ค) ===== */
:root, .gradio-container {
--body-background-fill: transparent !important;
--background-fill-primary: transparent !important;
--background-fill-secondary: transparent !important;
--block-background-fill: transparent !important;
--block-border-color: transparent !important;
--panel-background-fill: transparent !important;
--panel-border-color: transparent !important;
--section-header-background-fill: transparent !important;
--shadow-drop: 0 0 0 rgba(0,0,0,0) !important;
--shadow-spread: 0 0 0 rgba(0,0,0,0) !important;
}
.gradio-container .bg-white,
.gradio-container .bg-gray-50,
.gradio-container .bg-gray-100,
.gradio-container .bg-slate-50,
.gradio-container .bg-neutral-50,
.gradio-container .bg-secondary,
.gradio-container .border,
.gradio-container .shadow,
.gradio-container .shadow-sm,
.gradio-container .shadow-md,
.gradio-container .ring-1,
.gradio-container .ring,
.gradio-container .gr-card,
.gradio-container .prose > *:where(hr) {
background: transparent !important;
box-shadow: none !important;
border-color: transparent !important;
}
.gradio-container .gr-panel,
.gradio-container .gr-group,
.gradio-container .gr-box,
.gradio-container .gr-row,
.gradio-container .gr-column,
.gradio-container .gr-accordion,
.gradio-container .gr-block,
.gradio-container .gr-form,
.gradio-container .gr-tabs,
.gradio-container .gr-tabitem,
.gradio-container .gr-section-header {
background: transparent !important;
box-shadow: none !important;
border: none !important;
}
.gradio-container hr,
.gradio-container .gr-divider,
.gradio-container .gr-accordion .label {
background: transparent !important;
border: none !important;
box-shadow: none !important;
}
html, body, .gradio-container { background: transparent !important; }
#eval [class*="bg-"],
#eval [class*="border"],
#eval [class*="shadow"],
#eval .gr-panel, #eval .gr-group, #eval .gr-box, #eval .gr-row, #eval .gr-column,
#eval .gr-block, #eval .gr-form, #eval .gr-section-header, #eval .gr-accordion {
background: transparent !important;
border-color: transparent !important;
box-shadow: none !important;
}
#eval .gr-form, #eval .gr-panel { background: transparent !important; box-shadow:none !important; border:none !important; }
"""
# -------------------- UI --------------------
with gr.Blocks(fill_height=True, css=GLOBAL_CSS) as demo:
order_state = gr.State(value=[])
ptr_state = gr.State(value=0)
cur_video_id = gr.State(value="")
# ------------------ PAGE 1: Intro + Examples ------------------
page_intro = gr.Group(visible=True)
with page_intro:
gr.Markdown("## ๐ŸŽฏ Action Consistency Human Evaluation")
gr.Markdown(INSTRUCTION_MD)
# Examples: BodyWeightSquats
with gr.Group():
gr.Markdown("### Examples: BodyWeightSquats")
with gr.Row():
with gr.Column():
gr.Markdown("**Expected depiction of action**")
gr.Video(value=_example_src("BodyWeightSquats","real"), height=240, autoplay=False)
with gr.Column():
gr.Markdown("**Poorly generated action**")
gr.Video(value=_example_src("BodyWeightSquats","bad"), height=240, autoplay=False)
if not (EX_CACHE["BodyWeightSquats"]["real"] and EX_CACHE["BodyWeightSquats"]["bad"]):
gr.Markdown("> โš ๏ธ Upload `examples/BodyWeightSquats_real.mp4` and `_bad.mp4` to show both samples.")
# Examples: WallPushUps
with gr.Group():
gr.Markdown("### Examples: WallPushUps")
with gr.Row():
with gr.Column():
gr.Markdown("**Expected depiction of action**")
gr.Video(value=_example_src("WallPushUps","real"), height=240, autoplay=False)
with gr.Column():
gr.Markdown("**Poorly generated action**")
gr.Video(value=_example_src("WallPushUps","bad"), height=240, autoplay=False)
if not (EX_CACHE["WallPushUps"]["real"] and EX_CACHE["WallPushUps"]["bad"]):
gr.Markdown("> โš ๏ธ Upload `examples/WallPushUps_real.mp4` and `_bad.mp4` to show both samples.")
understood = gr.Checkbox(label="I have read and understand the task.", value=False)
start_btn = gr.Button("Yes, start", variant="secondary", interactive=False)
def _toggle_start(checked: bool):
return gr.update(interactive=checked, variant=("primary" if checked else "secondary"))
understood.change(_toggle_start, inputs=understood, outputs=start_btn)
# ------------------ PAGE 2: Evaluation ------------------
page_eval = gr.Group(visible=False, elem_id="eval")
with page_eval:
# PID ์ž…๋ ฅ
with gr.Row():
pid = gr.Textbox(label="Participant ID (required)", placeholder="e.g., Youngsun-2025/10/01")
# ์ง€์นจ(์›๋ฌธ) + ๋น„๋””์˜ค + ์ง„ํ–‰๋ฐ” / ์˜ค๋ฅธ์ชฝ์— ์Šฌ๋ผ์ด๋” + Save&Next
with gr.Row(equal_height=True):
with gr.Column(scale=1):
gr.Markdown(INSTRUCTION_MD) # ๊ต์ˆ˜๋‹˜ ๋ฌธ๊ตฌ ๊ทธ๋Œ€๋กœ
video = gr.Video(label="Video", height=360)
progress = gr.HTML(_progress_html(0, TOTAL_PER_PARTICIPANT))
with gr.Column(scale=1):
action_tb = gr.Textbox(label="Expected action", interactive=False)
score = gr.Slider(minimum=0.0, maximum=10.0, step=0.1, value=5.0,
label="Action Consistency (0.0 (Worst) - 10.0 (Best))")
save_next = gr.Button("๐Ÿ’พ Save & Next โ–ถ", variant="secondary", interactive=False)
status = gr.Markdown(visible=False)
done_state = gr.State(0)
# PID ์ž…๋ ฅ์— ๋”ฐ๋ผ Save&Next ํ† ๊ธ€
def _toggle_by_pid(pid_text: str):
enabled = bool(pid_text and pid_text.strip())
return gr.update(interactive=enabled, variant=("primary" if enabled else "secondary"))
pid.change(_toggle_by_pid, inputs=pid, outputs=save_next)
# -------- ํŽ˜์ด์ง€ ์ „ํ™˜ & ์ฒซ ๋กœ๋“œ --------
ANCHOR_IDX = 0 # videos.json์˜ ๋งจ ์ฒซ ๋น„๋””์˜ค
ANCHOR_REPEATS = 5 # ์•ต์ปค 5ํšŒ
MIN_GAP = 1 # ์•ต์ปค ์—ฐ์† ๊ธˆ์ง€(์ธ์ ‘ ๊ธˆ์ง€)
def _start_and_load_first():
total = TOTAL_PER_PARTICIPANT
order = _build_order_least_first_with_anchor(
total=total,
anchor_idx=ANCHOR_IDX,
repeats=ANCHOR_REPEATS,
min_gap=MIN_GAP
)
first_idx = order[0]
v0 = V[first_idx]
return (
gr.update(visible=False), # page_intro off
gr.update(visible=True), # page_eval on
_src_for_gradio(v0), # ๐Ÿ”‡ muted source
_extract_action(v0), # expected action label
5.0, # score reset
gr.update(visible=False, value=""), # status hide
0, # done count
_progress_html(0, TOTAL_PER_PARTICIPANT),
order, # order_state
1, # ptr_state
_get_video_id(v0) # cur_video_id
)
start_btn.click(
_start_and_load_first,
inputs=[],
outputs=[page_intro, page_eval, video, action_tb, score, status, done_state, progress, order_state, ptr_state, cur_video_id]
)
# -------- Save & Next --------
def save_and_next(participant_id, current_video_id, score_val, done_cnt, order, ptr):
if not participant_id or not participant_id.strip():
# PID ์—†์œผ๋ฉด ๊ธฐ์กด ํ™”๋ฉด ์œ ์ง€
return (
gr.update(visible=True, value="โ— Please enter your Participant ID."),
gr.update(), gr.update(), # video, action_tb ๋ณ€๊ฒฝ ์—†์Œ
done_cnt,
_progress_html(done_cnt, TOTAL_PER_PARTICIPANT),
5.0,
ptr,
current_video_id
)
status_msg = push(participant_id, current_video_id, score_val, "")
new_done = int(done_cnt) + 1
# ์ข…๋ฃŒ ์กฐ๊ฑด: ๋ชฉํ‘œ ๊ฐœ์ˆ˜ ๋‹ฌ์„ฑ or ์ˆœ์„œ ์†Œ์ง„
if new_done >= TOTAL_PER_PARTICIPANT or ptr >= len(order):
return (
status_msg, # status
None, # video ๋น„์šฐ๊ธฐ
"", # action_tb ๋น„์šฐ๊ธฐ
TOTAL_PER_PARTICIPANT, # done_state ์ตœ์ข…
_progress_html(TOTAL_PER_PARTICIPANT, TOTAL_PER_PARTICIPANT),
5.0, # score ๋ฆฌ์…‹
ptr,
current_video_id
)
# ๋‹ค์Œ ์˜์ƒ ๋กœ๋“œ
next_idx = order[ptr]
v = V[next_idx]
next_vid = _get_video_id(v)
return (
status_msg,
_src_for_gradio(v), # ๐Ÿ”‡ muted source
_extract_action(v),
new_done,
_progress_html(new_done, TOTAL_PER_PARTICIPANT),
5.0,
ptr + 1,
next_vid
)
save_next.click(
save_and_next,
# โœ… cur_video_id๋ฅผ ๋‘ ๋ฒˆ์งธ ์ธ์ž๋กœ ๋„˜๊น€
inputs=[pid, cur_video_id, score, done_state, order_state, ptr_state],
# โœ… ๋งˆ์ง€๋ง‰์— cur_video_id๋ฅผ outputs๋กœ ๋ฐ›์Œ(์ƒํƒœ ๊ฐฑ์‹ )
outputs=[status, video, action_tb, done_state, progress, score, ptr_state, cur_video_id]
)
if __name__ == "__main__":
if MUTE_AUDIO and not HAS_FFMPEG:
print("[WARN] MUTE_AUDIO=True but ffmpeg not found. Videos will be served with original audio.")
demo.launch()