Spaces:

SGTLIM
/

videoeval_humaneval

Sleeping

videoeval_humaneval / app_older.py

Youngsun Lim

no sound videos

d521b9d 3 months ago

21.1 kB

	# app.py — Human Eval UI (audio-stripped delivery)
	import os, io, csv, json, random, subprocess, hashlib, urllib.request, shutil
	from datetime import datetime

	import gradio as gr
	from huggingface_hub import HfApi, hf_hub_download

	# -------------------- Config --------------------
	REPO_ID = os.getenv("RESULTS_REPO", "sgtlim/videoeval_results") # 업로드한 리포와 일치
	HF_TOKEN = os.getenv("HF_TOKEN")
	RESULTS_FILE = "results.csv"
	TOTAL_PER_PARTICIPANT = 30 # 목표 평가 개수(세션 기준)

	# 🔇 Audio stripping (runtime mute) config
	MUTE_AUDIO = True
	HAS_FFMPEG = shutil.which("ffmpeg") is not None
	RAW_DIR = "/tmp/raw_videos"
	MUTED_DIR = "/tmp/muted_videos"
	os.makedirs(RAW_DIR, exist_ok=True)
	os.makedirs(MUTED_DIR, exist_ok=True)

	# -------------------- Data --------------------
	# videos.json 예시: {"url": "...mp4", "id": "BodyWeightSquats__XXXX.mp4", "action": "BodyWeightSquats"}
	with open("videos.json", "r", encoding="utf-8") as f:
	V = json.load(f)

	api = HfApi()

	# 교수님 지침(그대로, 굵게 처리 포함)
	INSTRUCTION_MD = """
	Task: You will watch a series of AI-generated videos. For each video, your job is to rate how well the person’s action in the AI-generated video matches the action specified as "expected action". Some things to keep in mind:
	- The generated video should capture the expected action throughout the video.
	- Try to focus only on the expected action and do not judge video quality, attractiveness, background, camera motion, or objects.
	- You will be paid once all the videos are viewed and rated.
	"""

	# -------------------- Audio-strip helpers --------------------
	def _safe_name(s: str) -> str:
	return hashlib.sha1(s.encode("utf-8", errors="ignore")).hexdigest()

	def _get_video_id(v: dict) -> str:
	if "id" in v and v["id"]:
	return v["id"]
	return os.path.basename(v.get("url", ""))

	def _download_to_tmp(url: str) -> str:
	"""Download remote video to RAW_DIR; return local path."""
	basename = _safe_name(url) + ".mp4"
	dst = os.path.join(RAW_DIR, basename)
	if not os.path.exists(dst):
	urllib.request.urlretrieve(url, dst)
	return dst

	def _muted_copy_fast(local_in: str, out_path: str):
	"""Fast path: copy video stream, drop audio (-an)."""
	cmd = ["ffmpeg", "-y", "-i", local_in, "-c:v", "copy", "-an", out_path]
	subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

	def _muted_copy_reencode(local_in: str, out_path: str):
	"""Compatibility fallback: re-encode video (H.264), drop audio."""
	cmd = [
	"ffmpeg","-y","-i", local_in,
	"-vf","format=yuv420p","-movflags","+faststart",
	"-c:v","libx264","-crf","18","-preset","veryfast",
	"-an", out_path
	]
	subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

	def _src_for_gradio(v: dict) -> str:
	"""
	Return a local path to a muted copy of the video if MUTE_AUDIO & ffmpeg available;
	otherwise return original url/path.
	"""
	src = v.get("url", "")
	vid_id = _get_video_id(v) or _safe_name(src)

	if not MUTE_AUDIO or not HAS_FFMPEG or not src:
	return src # fall back to original (no mute possible)

	# Resolve local input path (handles http(s) and local paths)
	if src.startswith("http://") or src.startswith("https://"):
	local_in = _download_to_tmp(src)
	else:
	local_in = src

	# Build a stable muted cache filename by video_id
	muted_out = os.path.join(MUTED_DIR, f"{_safe_name(vid_id)}.mp4")
	if os.path.exists(muted_out):
	return muted_out

	try:
	_muted_copy_fast(local_in, muted_out)
	except Exception:
	try:
	_muted_copy_reencode(local_in, muted_out)
	except Exception as e2:
	# As a last resort, return original source
	print(f"[WARN] failed to produce muted copy for {vid_id}: {type(e2).__name__}: {e2}")
	return src

	return muted_out

	def _extract_action(v):
	if "action" in v and v["action"]:
	return v["action"]
	raw = v.get("id", "")
	return raw.split("__")[0].split(".")[0]

	# -------------------- HF CSV helpers --------------------
	def _read_csv_bytes():
	try:
	p = hf_hub_download(
	repo_id=REPO_ID, filename=RESULTS_FILE, repo_type="dataset",
	token=HF_TOKEN, local_dir="/tmp", local_dir_use_symlinks=False
	)
	return open(p, "rb").read()
	except Exception:
	return None

	def _append(old_bytes, row):
	s = io.StringIO()
	w = csv.writer(s)
	if not old_bytes:
	w.writerow(["ts_iso", "participant_id", "video_id", "overall", "notes"])
	else:
	s.write(old_bytes.decode("utf-8", errors="ignore"))
	w.writerow(row)
	return s.getvalue().encode("utf-8")

	def _load_eval_counts():
	"""
	Hugging Face dataset의 results.csv를 읽어 video_id별 평가 개수(dict)를 반환.
	없으면 0으로 초기화.
	"""
	counts = {_get_video_id(v): 0 for v in V}
	b = _read_csv_bytes()
	if not b:
	return counts

	s = io.StringIO(b.decode("utf-8", errors="ignore"))
	r = csv.reader(s)
	rows = list(r)
	if not rows:
	return counts

	header = rows[0]
	body = rows[1:] if header and ("video_id" in header or "overall" in header) else rows
	vid_col = header.index("video_id") if header and "video_id" in header else None

	for row in body:
	try:
	vid = row[vid_col] if vid_col is not None else row[2] # ts, pid, video_id, overall, notes
	if vid in counts:
	counts[vid] += 1
	except Exception:
	continue
	return counts

	def push(participant_id, video_id, score, notes=""):
	if not participant_id or not participant_id.strip():
	return gr.update(visible=True, value="❗ Please enter your Participant ID before proceeding.")
	if not video_id or score is None:
	return gr.update(visible=True, value="❗ Fill out all fields.")

	try:
	old = _read_csv_bytes()
	row = [
	datetime.utcnow().isoformat(),
	participant_id.strip(),
	video_id, # ✅ action 대신 video_id 저장
	float(score), # overall
	notes or ""
	]
	newb = _append(old, row)

	if not REPO_ID:
	return gr.update(visible=True, value="❗ RESULTS_REPO is not set.")
	if not HF_TOKEN:
	return gr.update(visible=True, value="❗ HF_TOKEN is missing. Set a write token for the dataset repo.")

	api.upload_file(
	path_or_fileobj=io.BytesIO(newb),
	path_in_repo=RESULTS_FILE,
	repo_id=REPO_ID,
	repo_type="dataset",
	token=HF_TOKEN,
	commit_message="append"
	)
	return gr.update(visible=True, value=f"✅ Saved for {video_id}.")
	except Exception as e:
	return gr.update(
	visible=True,
	value=f"❌ Save failed: {type(e).__name__}: {e}\n"
	f"- Check HF_TOKEN permission\n- Check REPO_ID\n- Create dataset repo if missing"
	)

	def _progress_html(done, total):
	pct = int(100 * done / max(1, total))
	return f"""
	<div style="border:1px solid #ddd; height:20px; border-radius:6px; overflow:hidden; margin-top:6px;">
	<div style="height:100%; width:{pct}%; background:#3b82f6;"></div>
	</div>
	<div style="font-size:12px; margin-top:4px;">{done} / {total}</div>
	"""

	# -------------------- Scheduling (least-first + anchor) --------------------
	def _build_order_least_first_with_anchor(total:int, anchor_idx:int, repeats:int, min_gap:int=1):
	"""
	- results.csv를 읽어 video_id별 카운트를 계산
	- 앵커(첫 비디오) N회 포함, 연속 금지
	- 나머지는 '가장 적게 평가된 순'으로 중복 없이 채움
	"""
	assert repeats <= total
	N = len(V)
	assert N >= 1

	def vid_of(i): return _get_video_id(V[i])

	counts = _load_eval_counts()

	candidates = [i for i in range(N) if i != anchor_idx]
	random.shuffle(candidates) # 동률 랜덤화
	candidates.sort(key=lambda i: counts.get(vid_of(i), 0))

	others_needed = total - repeats
	if len(candidates) < others_needed:
	raise ValueError("Not enough unique non-anchor videos to fill the schedule without duplication.")

	others = candidates[:others_needed]
	random.shuffle(others)

	seq = [None] * total
	segment = total // repeats if repeats > 0 else total
	anchor_positions = []
	for k in range(repeats):
	lo = k * segment
	hi = (k + 1) * segment if k < repeats - 1 else total
	cand = random.randrange(lo, hi)

	def ok(pos):
	return all(abs(pos - p) >= (min_gap + 1) for p in anchor_positions)

	found = None
	for d in range(0, max(1, segment)):
	for sgn in (+1, -1):
	pos = cand + sgn * d
	if 0 <= pos < total and ok(pos):
	found = pos
	break
	if found is not None:
	break
	if found is None:
	for pos in range(total):
	if ok(pos):
	found = pos
	break
	if found is None:
	raise RuntimeError("Failed to place anchor without adjacency.")
	anchor_positions.append(found)

	for pos in anchor_positions:
	seq[pos] = anchor_idx

	j = 0
	for i in range(total):
	if seq[i] is None:
	seq[i] = others[j]
	j += 1

	assert sum(1 for x in seq if x == anchor_idx) == repeats
	for i in range(1, total):
	assert not (seq[i] == anchor_idx and seq[i-1] == anchor_idx), "Adjacent anchors found."

	return seq

	# -------------------- Example videos (download to local cache) --------------------
	EXAMPLES = {
	"BodyWeightSquats": {
	"real": "examples/BodyWeightSquats_real.mp4",
	"bad": "examples/BodyWeightSquats_bad.mp4",
	},
	"WallPushUps": {
	"real": "examples/WallPushUps_real.mp4",
	"bad": "examples/WallPushUps_bad.mp4",
	},
	}
	EX_CACHE = {}
	for cls, files in EXAMPLES.items():
	EX_CACHE[cls] = {"real": None, "bad": None}
	for kind, fname in files.items():
	try:
	EX_CACHE[cls][kind] = hf_hub_download(
	repo_id=REPO_ID,
	filename=fname,
	repo_type="dataset",
	token=HF_TOKEN,
	local_dir="/tmp",
	local_dir_use_symlinks=False,
	)
	except Exception as e:
	print(f"[WARN] example missing: {cls} {kind} -> {fname}: {e}")

	def _example_src(cls: str, kind: str):
	p = EX_CACHE[cls][kind]
	if not p:
	return None
	# dict 모양으로 감싸서 동일 파이프라인(_src_for_gradio) 사용
	return _src_for_gradio({"url": p, "id": f"example::{cls}::{kind}"})

	# -------------------- CSS --------------------
	GLOBAL_CSS = """
	/* ===== 공통 변수 투명화 (v3/v4 둘다) ===== */
	:root, .gradio-container {
	--body-background-fill: transparent !important;
	--background-fill-primary: transparent !important;
	--background-fill-secondary: transparent !important;
	--block-background-fill: transparent !important;
	--block-border-color: transparent !important;
	--panel-background-fill: transparent !important;
	--panel-border-color: transparent !important;
	--section-header-background-fill: transparent !important;
	--shadow-drop: 0 0 0 rgba(0,0,0,0) !important;
	--shadow-spread: 0 0 0 rgba(0,0,0,0) !important;
	}
	.gradio-container .bg-white,
	.gradio-container .bg-gray-50,
	.gradio-container .bg-gray-100,
	.gradio-container .bg-slate-50,
	.gradio-container .bg-neutral-50,
	.gradio-container .bg-secondary,
	.gradio-container .border,
	.gradio-container .shadow,
	.gradio-container .shadow-sm,
	.gradio-container .shadow-md,
	.gradio-container .ring-1,
	.gradio-container .ring,
	.gradio-container .gr-card,
	.gradio-container .prose > *:where(hr) {
	background: transparent !important;
	box-shadow: none !important;
	border-color: transparent !important;
	}
	.gradio-container .gr-panel,
	.gradio-container .gr-group,
	.gradio-container .gr-box,
	.gradio-container .gr-row,
	.gradio-container .gr-column,
	.gradio-container .gr-accordion,
	.gradio-container .gr-block,
	.gradio-container .gr-form,
	.gradio-container .gr-tabs,
	.gradio-container .gr-tabitem,
	.gradio-container .gr-section-header {
	background: transparent !important;
	box-shadow: none !important;
	border: none !important;
	}
	.gradio-container hr,
	.gradio-container .gr-divider,
	.gradio-container .gr-accordion .label {
	background: transparent !important;
	border: none !important;
	box-shadow: none !important;
	}
	html, body, .gradio-container { background: transparent !important; }
	#eval [class*="bg-"],
	#eval [class*="border"],
	#eval [class*="shadow"],
	#eval .gr-panel, #eval .gr-group, #eval .gr-box, #eval .gr-row, #eval .gr-column,
	#eval .gr-block, #eval .gr-form, #eval .gr-section-header, #eval .gr-accordion {
	background: transparent !important;
	border-color: transparent !important;
	box-shadow: none !important;
	}
	#eval .gr-form, #eval .gr-panel { background: transparent !important; box-shadow:none !important; border:none !important; }
	"""

	# -------------------- UI --------------------
	with gr.Blocks(fill_height=True, css=GLOBAL_CSS) as demo:
	order_state = gr.State(value=[])
	ptr_state = gr.State(value=0)
	cur_video_id = gr.State(value="")

	# ------------------ PAGE 1: Intro + Examples ------------------
	page_intro = gr.Group(visible=True)
	with page_intro:
	gr.Markdown("## 🎯 Action Consistency Human Evaluation")
	gr.Markdown(INSTRUCTION_MD)

	# Examples: BodyWeightSquats
	with gr.Group():
	gr.Markdown("### Examples: BodyWeightSquats")
	with gr.Row():
	with gr.Column():
	gr.Markdown("Expected depiction of action")
	gr.Video(value=_example_src("BodyWeightSquats","real"), height=240, autoplay=False)
	with gr.Column():
	gr.Markdown("Poorly generated action")
	gr.Video(value=_example_src("BodyWeightSquats","bad"), height=240, autoplay=False)
	if not (EX_CACHE["BodyWeightSquats"]["real"] and EX_CACHE["BodyWeightSquats"]["bad"]):
	gr.Markdown("> ⚠️ Upload `examples/BodyWeightSquats_real.mp4` and `_bad.mp4` to show both samples.")

	# Examples: WallPushUps
	with gr.Group():
	gr.Markdown("### Examples: WallPushUps")
	with gr.Row():
	with gr.Column():
	gr.Markdown("Expected depiction of action")
	gr.Video(value=_example_src("WallPushUps","real"), height=240, autoplay=False)
	with gr.Column():
	gr.Markdown("Poorly generated action")
	gr.Video(value=_example_src("WallPushUps","bad"), height=240, autoplay=False)
	if not (EX_CACHE["WallPushUps"]["real"] and EX_CACHE["WallPushUps"]["bad"]):
	gr.Markdown("> ⚠️ Upload `examples/WallPushUps_real.mp4` and `_bad.mp4` to show both samples.")

	understood = gr.Checkbox(label="I have read and understand the task.", value=False)
	start_btn = gr.Button("Yes, start", variant="secondary", interactive=False)

	def _toggle_start(checked: bool):
	return gr.update(interactive=checked, variant=("primary" if checked else "secondary"))
	understood.change(_toggle_start, inputs=understood, outputs=start_btn)

	# ------------------ PAGE 2: Evaluation ------------------
	page_eval = gr.Group(visible=False, elem_id="eval")
	with page_eval:
	# PID 입력
	with gr.Row():
	pid = gr.Textbox(label="Participant ID (required)", placeholder="e.g., Youngsun-2025/10/01")

	# 지침(원문) + 비디오 + 진행바 / 오른쪽에 슬라이더 + Save&Next
	with gr.Row(equal_height=True):
	with gr.Column(scale=1):
	gr.Markdown(INSTRUCTION_MD) # 교수님 문구 그대로
	video = gr.Video(label="Video", height=360)
	progress = gr.HTML(_progress_html(0, TOTAL_PER_PARTICIPANT))
	with gr.Column(scale=1):
	action_tb = gr.Textbox(label="Expected action", interactive=False)
	score = gr.Slider(minimum=0.0, maximum=10.0, step=0.1, value=5.0,
	label="Action Consistency (0.0 (Worst) - 10.0 (Best))")
	save_next = gr.Button("💾 Save & Next ▶", variant="secondary", interactive=False)

	status = gr.Markdown(visible=False)
	done_state = gr.State(0)

	# PID 입력에 따라 Save&Next 토글
	def _toggle_by_pid(pid_text: str):
	enabled = bool(pid_text and pid_text.strip())
	return gr.update(interactive=enabled, variant=("primary" if enabled else "secondary"))
	pid.change(_toggle_by_pid, inputs=pid, outputs=save_next)

	# -------- 페이지 전환 & 첫 로드 --------
	ANCHOR_IDX = 0 # videos.json의 맨 첫 비디오
	ANCHOR_REPEATS = 5 # 앵커 5회
	MIN_GAP = 1 # 앵커 연속 금지(인접 금지)

	def _start_and_load_first():
	total = TOTAL_PER_PARTICIPANT
	order = _build_order_least_first_with_anchor(
	total=total,
	anchor_idx=ANCHOR_IDX,
	repeats=ANCHOR_REPEATS,
	min_gap=MIN_GAP
	)
	first_idx = order[0]
	v0 = V[first_idx]
	return (
	gr.update(visible=False), # page_intro off
	gr.update(visible=True), # page_eval on
	_src_for_gradio(v0), # 🔇 muted source
	_extract_action(v0), # expected action label
	5.0, # score reset
	gr.update(visible=False, value=""), # status hide
	0, # done count
	_progress_html(0, TOTAL_PER_PARTICIPANT),
	order, # order_state
	1, # ptr_state
	_get_video_id(v0) # cur_video_id
	)

	start_btn.click(
	_start_and_load_first,
	inputs=[],
	outputs=[page_intro, page_eval, video, action_tb, score, status, done_state, progress, order_state, ptr_state, cur_video_id]
	)

	# -------- Save & Next --------
	def save_and_next(participant_id, current_video_id, score_val, done_cnt, order, ptr):
	if not participant_id or not participant_id.strip():
	# PID 없으면 기존 화면 유지
	return (
	gr.update(visible=True, value="❗ Please enter your Participant ID."),
	gr.update(), gr.update(), # video, action_tb 변경 없음
	done_cnt,
	_progress_html(done_cnt, TOTAL_PER_PARTICIPANT),
	5.0,
	ptr,
	current_video_id
	)

	status_msg = push(participant_id, current_video_id, score_val, "")

	new_done = int(done_cnt) + 1
	# 종료 조건: 목표 개수 달성 or 순서 소진
	if new_done >= TOTAL_PER_PARTICIPANT or ptr >= len(order):
	return (
	status_msg, # status
	None, # video 비우기
	"", # action_tb 비우기
	TOTAL_PER_PARTICIPANT, # done_state 최종
	_progress_html(TOTAL_PER_PARTICIPANT, TOTAL_PER_PARTICIPANT),
	5.0, # score 리셋
	ptr,
	current_video_id
	)

	# 다음 영상 로드
	next_idx = order[ptr]
	v = V[next_idx]
	next_vid = _get_video_id(v)
	return (
	status_msg,
	_src_for_gradio(v), # 🔇 muted source
	_extract_action(v),
	new_done,
	_progress_html(new_done, TOTAL_PER_PARTICIPANT),
	5.0,
	ptr + 1,
	next_vid
	)

	save_next.click(
	save_and_next,
	# ✅ cur_video_id를 두 번째 인자로 넘김
	inputs=[pid, cur_video_id, score, done_state, order_state, ptr_state],
	# ✅ 마지막에 cur_video_id를 outputs로 받음(상태 갱신)
	outputs=[status, video, action_tb, done_state, progress, score, ptr_state, cur_video_id]
	)

	if __name__ == "__main__":
	if MUTE_AUDIO and not HAS_FFMPEG:
	print("[WARN] MUTE_AUDIO=True but ffmpeg not found. Videos will be served with original audio.")
	demo.launch()