# app.py — Human Eval UI (audio-stripped delivery)
import os, io, csv, json, random, subprocess, hashlib, urllib.request, shutil
from datetime import datetime

import gradio as gr
from huggingface_hub import HfApi, hf_hub_download

# -------------------- Config --------------------
REPO_ID = os.getenv("RESULTS_REPO", "sgtlim/videoeval_results")  # 업로드한 리포와 일치
HF_TOKEN = os.getenv("HF_TOKEN")
RESULTS_FILE = "results.csv"
TOTAL_PER_PARTICIPANT = 30  # 목표 평가 개수(세션 기준)

# 🔇 Audio stripping (runtime mute) config
MUTE_AUDIO = True
HAS_FFMPEG = shutil.which("ffmpeg") is not None
RAW_DIR = "/tmp/raw_videos"
MUTED_DIR = "/tmp/muted_videos"
os.makedirs(RAW_DIR, exist_ok=True)
os.makedirs(MUTED_DIR, exist_ok=True)

# -------------------- Data --------------------
# videos.json 예시: {"url": "...mp4", "id": "BodyWeightSquats__XXXX.mp4", "action": "BodyWeightSquats"}
with open("videos.json", "r", encoding="utf-8") as f:
    V = json.load(f)

api = HfApi()

# 교수님 지침(그대로, 굵게 처리 포함)
INSTRUCTION_MD = """
**Task:** You will watch a series of **AI-generated videos**. For each video, your job is to rate how well the person’s action in the AI-generated video matches the action specified as "**expected action**". Some things to keep in mind:
- The generated video should **capture** the expected action **throughout the video**.
- Try to **focus only** on the expected action and do **not** judge **video quality**, **attractiveness**, **background**, **camera motion**, or **objects**.
- You will be **paid** once **all the videos are viewed and rated**.
"""

# -------------------- Audio-strip helpers --------------------
def _safe_name(s: str) -> str:
    return hashlib.sha1(s.encode("utf-8", errors="ignore")).hexdigest()

def _get_video_id(v: dict) -> str:
    if "id" in v and v["id"]:
        return v["id"]
    return os.path.basename(v.get("url", ""))

def _download_to_tmp(url: str) -> str:
    """Download remote video to RAW_DIR; return local path."""
    basename = _safe_name(url) + ".mp4"
    dst = os.path.join(RAW_DIR, basename)
    if not os.path.exists(dst):
        urllib.request.urlretrieve(url, dst)
    return dst

def _muted_copy_fast(local_in: str, out_path: str):
    """Fast path: copy video stream, drop audio (-an)."""
    cmd = ["ffmpeg", "-y", "-i", local_in, "-c:v", "copy", "-an", out_path]
    subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

def _muted_copy_reencode(local_in: str, out_path: str):
    """Compatibility fallback: re-encode video (H.264), drop audio."""
    cmd = [
        "ffmpeg","-y","-i", local_in,
        "-vf","format=yuv420p","-movflags","+faststart",
        "-c:v","libx264","-crf","18","-preset","veryfast",
        "-an", out_path
    ]
    subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

def _src_for_gradio(v: dict) -> str:
    """
    Return a local path to a muted copy of the video if MUTE_AUDIO & ffmpeg available;
    otherwise return original url/path.
    """
    src = v.get("url", "")
    vid_id = _get_video_id(v) or _safe_name(src)

    if not MUTE_AUDIO or not HAS_FFMPEG or not src:
        return src  # fall back to original (no mute possible)

    # Resolve local input path (handles http(s) and local paths)
    if src.startswith("http://") or src.startswith("https://"):
        local_in = _download_to_tmp(src)
    else:
        local_in = src

    # Build a stable muted cache filename by video_id
    muted_out = os.path.join(MUTED_DIR, f"{_safe_name(vid_id)}.mp4")
    if os.path.exists(muted_out):
        return muted_out

    try:
        _muted_copy_fast(local_in, muted_out)
    except Exception:
        try:
            _muted_copy_reencode(local_in, muted_out)
        except Exception as e2:
            # As a last resort, return original source
            print(f"[WARN] failed to produce muted copy for {vid_id}: {type(e2).__name__}: {e2}")
            return src

    return muted_out

def _extract_action(v):
    if "action" in v and v["action"]:
        return v["action"]
    raw = v.get("id", "")
    return raw.split("__")[0].split(".")[0]

# -------------------- HF CSV helpers --------------------
def _read_csv_bytes():
    try:
        p = hf_hub_download(
            repo_id=REPO_ID, filename=RESULTS_FILE, repo_type="dataset",
            token=HF_TOKEN, local_dir="/tmp", local_dir_use_symlinks=False
        )
        return open(p, "rb").read()
    except Exception:
        return None

def _append(old_bytes, row):
    s = io.StringIO()
    w = csv.writer(s)
    if not old_bytes:
        w.writerow(["ts_iso", "participant_id", "video_id", "overall", "notes"])
    else:
        s.write(old_bytes.decode("utf-8", errors="ignore"))
    w.writerow(row)
    return s.getvalue().encode("utf-8")

def _load_eval_counts():
    """
    Hugging Face dataset의 results.csv를 읽어 video_id별 평가 개수(dict)를 반환.
    없으면 0으로 초기화.
    """
    counts = {_get_video_id(v): 0 for v in V}
    b = _read_csv_bytes()
    if not b:
        return counts

    s = io.StringIO(b.decode("utf-8", errors="ignore"))
    r = csv.reader(s)
    rows = list(r)
    if not rows:
        return counts

    header = rows[0]
    body = rows[1:] if header and ("video_id" in header or "overall" in header) else rows
    vid_col = header.index("video_id") if header and "video_id" in header else None

    for row in body:
        try:
            vid = row[vid_col] if vid_col is not None else row[2]  # ts, pid, video_id, overall, notes
            if vid in counts:
                counts[vid] += 1
        except Exception:
            continue
    return counts

def push(participant_id, video_id, score, notes=""):
    if not participant_id or not participant_id.strip():
        return gr.update(visible=True, value="❗ Please enter your Participant ID before proceeding.")
    if not video_id or score is None:
        return gr.update(visible=True, value="❗ Fill out all fields.")

    try:
        old = _read_csv_bytes()
        row = [
            datetime.utcnow().isoformat(),
            participant_id.strip(),
            video_id,           # ✅ action 대신 video_id 저장
            float(score),       # overall
            notes or ""
        ]
        newb = _append(old, row)

        if not REPO_ID:
            return gr.update(visible=True, value="❗ RESULTS_REPO is not set.")
        if not HF_TOKEN:
            return gr.update(visible=True, value="❗ HF_TOKEN is missing. Set a write token for the dataset repo.")

        api.upload_file(
            path_or_fileobj=io.BytesIO(newb),
            path_in_repo=RESULTS_FILE,
            repo_id=REPO_ID,
            repo_type="dataset",
            token=HF_TOKEN,
            commit_message="append"
        )
        return gr.update(visible=True, value=f"✅ Saved for {video_id}.")
    except Exception as e:
        return gr.update(
            visible=True,
            value=f"❌ Save failed: {type(e).__name__}: {e}\n"
                  f"- Check HF_TOKEN permission\n- Check REPO_ID\n- Create dataset repo if missing"
        )

def _progress_html(done, total):
    pct = int(100 * done / max(1, total))
    return f"""
    <div style="border:1px solid #ddd; height:20px; border-radius:6px; overflow:hidden; margin-top:6px;">
      <div style="height:100%; width:{pct}%; background:#3b82f6;"></div>
    </div>
    <div style="font-size:12px; margin-top:4px;">{done} / {total}</div>
    """

# -------------------- Scheduling (least-first + anchor) --------------------
def _build_order_least_first_with_anchor(total:int, anchor_idx:int, repeats:int, min_gap:int=1):
    """
    - results.csv를 읽어 video_id별 카운트를 계산
    - 앵커(첫 비디오) N회 포함, 연속 금지
    - 나머지는 '가장 적게 평가된 순'으로 중복 없이 채움
    """
    assert repeats <= total
    N = len(V)
    assert N >= 1

    def vid_of(i): return _get_video_id(V[i])

    counts = _load_eval_counts()

    candidates = [i for i in range(N) if i != anchor_idx]
    random.shuffle(candidates)  # 동률 랜덤화
    candidates.sort(key=lambda i: counts.get(vid_of(i), 0))

    others_needed = total - repeats
    if len(candidates) < others_needed:
        raise ValueError("Not enough unique non-anchor videos to fill the schedule without duplication.")

    others = candidates[:others_needed]
    random.shuffle(others)

    seq = [None] * total
    segment = total // repeats if repeats > 0 else total
    anchor_positions = []
    for k in range(repeats):
        lo = k * segment
        hi = (k + 1) * segment if k < repeats - 1 else total
        cand = random.randrange(lo, hi)

        def ok(pos):
            return all(abs(pos - p) >= (min_gap + 1) for p in anchor_positions)

        found = None
        for d in range(0, max(1, segment)):
            for sgn in (+1, -1):
                pos = cand + sgn * d
                if 0 <= pos < total and ok(pos):
                    found = pos
                    break
            if found is not None:
                break
        if found is None:
            for pos in range(total):
                if ok(pos):
                    found = pos
                    break
        if found is None:
            raise RuntimeError("Failed to place anchor without adjacency.")
        anchor_positions.append(found)

    for pos in anchor_positions:
        seq[pos] = anchor_idx

    j = 0
    for i in range(total):
        if seq[i] is None:
            seq[i] = others[j]
            j += 1

    assert sum(1 for x in seq if x == anchor_idx) == repeats
    for i in range(1, total):
        assert not (seq[i] == anchor_idx and seq[i-1] == anchor_idx), "Adjacent anchors found."

    return seq

# -------------------- Example videos (download to local cache) --------------------
EXAMPLES = {
    "BodyWeightSquats": {
        "real": "examples/BodyWeightSquats_real.mp4",
        "bad":  "examples/BodyWeightSquats_bad.mp4",
    },
    "WallPushUps": {
        "real": "examples/WallPushUps_real.mp4",
        "bad":  "examples/WallPushUps_bad.mp4",
    },
}
EX_CACHE = {}
for cls, files in EXAMPLES.items():
    EX_CACHE[cls] = {"real": None, "bad": None}
    for kind, fname in files.items():
        try:
            EX_CACHE[cls][kind] = hf_hub_download(
                repo_id=REPO_ID,
                filename=fname,
                repo_type="dataset",
                token=HF_TOKEN,
                local_dir="/tmp",
                local_dir_use_symlinks=False,
            )
        except Exception as e:
            print(f"[WARN] example missing: {cls} {kind} -> {fname}: {e}")

def _example_src(cls: str, kind: str):
    p = EX_CACHE[cls][kind]
    if not p:
        return None
    # dict 모양으로 감싸서 동일 파이프라인(_src_for_gradio) 사용
    return _src_for_gradio({"url": p, "id": f"example::{cls}::{kind}"})

# -------------------- CSS --------------------
GLOBAL_CSS = """
/* ===== 공통 변수 투명화 (v3/v4 둘다) ===== */
:root, .gradio-container {
  --body-background-fill: transparent !important;
  --background-fill-primary: transparent !important;
  --background-fill-secondary: transparent !important;
  --block-background-fill: transparent !important;
  --block-border-color: transparent !important;
  --panel-background-fill: transparent !important;
  --panel-border-color: transparent !important;
  --section-header-background-fill: transparent !important;
  --shadow-drop: 0 0 0 rgba(0,0,0,0) !important;
  --shadow-spread: 0 0 0 rgba(0,0,0,0) !important;
}
.gradio-container .bg-white,
.gradio-container .bg-gray-50,
.gradio-container .bg-gray-100,
.gradio-container .bg-slate-50,
.gradio-container .bg-neutral-50,
.gradio-container .bg-secondary,
.gradio-container .border,
.gradio-container .shadow,
.gradio-container .shadow-sm,
.gradio-container .shadow-md,
.gradio-container .ring-1,
.gradio-container .ring,
.gradio-container .gr-card,
.gradio-container .prose > *:where(hr) {
  background: transparent !important;
  box-shadow: none !important;
  border-color: transparent !important;
}
.gradio-container .gr-panel,
.gradio-container .gr-group,
.gradio-container .gr-box,
.gradio-container .gr-row,
.gradio-container .gr-column,
.gradio-container .gr-accordion,
.gradio-container .gr-block,
.gradio-container .gr-form,
.gradio-container .gr-tabs,
.gradio-container .gr-tabitem,
.gradio-container .gr-section-header {
  background: transparent !important;
  box-shadow: none !important;
  border: none !important;
}
.gradio-container hr,
.gradio-container .gr-divider,
.gradio-container .gr-accordion .label {
  background: transparent !important;
  border: none !important;
  box-shadow: none !important;
}
html, body, .gradio-container { background: transparent !important; }
#eval [class*="bg-"],
#eval [class*="border"],
#eval [class*="shadow"],
#eval .gr-panel, #eval .gr-group, #eval .gr-box, #eval .gr-row, #eval .gr-column,
#eval .gr-block, #eval .gr-form, #eval .gr-section-header, #eval .gr-accordion {
  background: transparent !important;
  border-color: transparent !important;
  box-shadow: none !important;
}
#eval .gr-form, #eval .gr-panel { background: transparent !important; box-shadow:none !important; border:none !important; }
"""

# -------------------- UI --------------------
with gr.Blocks(fill_height=True, css=GLOBAL_CSS) as demo:
    order_state = gr.State(value=[])
    ptr_state   = gr.State(value=0)
    cur_video_id = gr.State(value="")

    # ------------------ PAGE 1: Intro + Examples ------------------
    page_intro = gr.Group(visible=True)
    with page_intro:
        gr.Markdown("## 🎯 Action Consistency Human Evaluation")
        gr.Markdown(INSTRUCTION_MD)

        # Examples: BodyWeightSquats
        with gr.Group():
            gr.Markdown("### Examples: BodyWeightSquats")
            with gr.Row():
                with gr.Column():
                    gr.Markdown("**Expected depiction of action**")
                    gr.Video(value=_example_src("BodyWeightSquats","real"), height=240, autoplay=False)
                with gr.Column():
                    gr.Markdown("**Poorly generated action**")
                    gr.Video(value=_example_src("BodyWeightSquats","bad"), height=240, autoplay=False)
            if not (EX_CACHE["BodyWeightSquats"]["real"] and EX_CACHE["BodyWeightSquats"]["bad"]):
                gr.Markdown("> ⚠️ Upload `examples/BodyWeightSquats_real.mp4` and `_bad.mp4` to show both samples.")

        # Examples: WallPushUps
        with gr.Group():
            gr.Markdown("### Examples: WallPushUps")
            with gr.Row():
                with gr.Column():
                    gr.Markdown("**Expected depiction of action**")
                    gr.Video(value=_example_src("WallPushUps","real"), height=240, autoplay=False)
                with gr.Column():
                    gr.Markdown("**Poorly generated action**")
                    gr.Video(value=_example_src("WallPushUps","bad"), height=240, autoplay=False)
            if not (EX_CACHE["WallPushUps"]["real"] and EX_CACHE["WallPushUps"]["bad"]):
                gr.Markdown("> ⚠️ Upload `examples/WallPushUps_real.mp4` and `_bad.mp4` to show both samples.")

        understood = gr.Checkbox(label="I have read and understand the task.", value=False)
        start_btn  = gr.Button("Yes, start", variant="secondary", interactive=False)

        def _toggle_start(checked: bool):
            return gr.update(interactive=checked, variant=("primary" if checked else "secondary"))
        understood.change(_toggle_start, inputs=understood, outputs=start_btn)

    # ------------------ PAGE 2: Evaluation ------------------
    page_eval = gr.Group(visible=False, elem_id="eval")
    with page_eval:
        # PID 입력
        with gr.Row():
            pid = gr.Textbox(label="Participant ID (required)", placeholder="e.g., Youngsun-2025/10/01")

        # 지침(원문) + 비디오 + 진행바 / 오른쪽에 슬라이더 + Save&Next
        with gr.Row(equal_height=True):
            with gr.Column(scale=1):
                gr.Markdown(INSTRUCTION_MD)  # 교수님 문구 그대로
                video = gr.Video(label="Video", height=360)
                progress = gr.HTML(_progress_html(0, TOTAL_PER_PARTICIPANT))
            with gr.Column(scale=1):
                action_tb = gr.Textbox(label="Expected action", interactive=False)
                score = gr.Slider(minimum=0.0, maximum=10.0, step=0.1, value=5.0,
                                  label="Action Consistency (0.0 (Worst) - 10.0 (Best))")
                save_next = gr.Button("💾 Save & Next ▶", variant="secondary", interactive=False)

        status = gr.Markdown(visible=False)
        done_state = gr.State(0)

        # PID 입력에 따라 Save&Next 토글
        def _toggle_by_pid(pid_text: str):
            enabled = bool(pid_text and pid_text.strip())
            return gr.update(interactive=enabled, variant=("primary" if enabled else "secondary"))
        pid.change(_toggle_by_pid, inputs=pid, outputs=save_next)

    # -------- 페이지 전환 & 첫 로드 --------
    ANCHOR_IDX = 0       # videos.json의 맨 첫 비디오
    ANCHOR_REPEATS = 5   # 앵커 5회
    MIN_GAP = 1          # 앵커 연속 금지(인접 금지)

    def _start_and_load_first():
        total = TOTAL_PER_PARTICIPANT
        order = _build_order_least_first_with_anchor(
            total=total,
            anchor_idx=ANCHOR_IDX,
            repeats=ANCHOR_REPEATS,
            min_gap=MIN_GAP
        )
        first_idx = order[0]
        v0 = V[first_idx]
        return (
            gr.update(visible=False),                # page_intro off
            gr.update(visible=True),                 # page_eval on
            _src_for_gradio(v0),                     # 🔇 muted source
            _extract_action(v0),                     # expected action label
            5.0,                                     # score reset
            gr.update(visible=False, value=""),      # status hide
            0,                                       # done count
            _progress_html(0, TOTAL_PER_PARTICIPANT),
            order,                                   # order_state
            1,                                       # ptr_state
            _get_video_id(v0)                        # cur_video_id
        )

    start_btn.click(
        _start_and_load_first,
        inputs=[],
        outputs=[page_intro, page_eval, video, action_tb, score, status, done_state, progress, order_state, ptr_state, cur_video_id]
    )

    # -------- Save & Next --------
    def save_and_next(participant_id, current_video_id, score_val, done_cnt, order, ptr):
        if not participant_id or not participant_id.strip():
            # PID 없으면 기존 화면 유지
            return (
                gr.update(visible=True, value="❗ Please enter your Participant ID."),
                gr.update(), gr.update(),                    # video, action_tb 변경 없음
                done_cnt,
                _progress_html(done_cnt, TOTAL_PER_PARTICIPANT),
                5.0,
                ptr,
                current_video_id
            )

        status_msg = push(participant_id, current_video_id, score_val, "")

        new_done = int(done_cnt) + 1
        # 종료 조건: 목표 개수 달성 or 순서 소진
        if new_done >= TOTAL_PER_PARTICIPANT or ptr >= len(order):
            return (
                status_msg,                                  # status
                None,                                        # video 비우기
                "",                                          # action_tb 비우기
                TOTAL_PER_PARTICIPANT,                       # done_state 최종
                _progress_html(TOTAL_PER_PARTICIPANT, TOTAL_PER_PARTICIPANT),
                5.0,                                         # score 리셋
                ptr,
                current_video_id
            )

        # 다음 영상 로드
        next_idx = order[ptr]
        v = V[next_idx]
        next_vid = _get_video_id(v)
        return (
            status_msg,
            _src_for_gradio(v),           # 🔇 muted source
            _extract_action(v),
            new_done,
            _progress_html(new_done, TOTAL_PER_PARTICIPANT),
            5.0,
            ptr + 1,
            next_vid
        )

    save_next.click(
        save_and_next,
        # ✅ cur_video_id를 두 번째 인자로 넘김
        inputs=[pid, cur_video_id, score, done_state, order_state, ptr_state],
        # ✅ 마지막에 cur_video_id를 outputs로 받음(상태 갱신)
        outputs=[status, video, action_tb, done_state, progress, score, ptr_state, cur_video_id]
    )

if __name__ == "__main__":
    if MUTE_AUDIO and not HAS_FFMPEG:
        print("[WARN] MUTE_AUDIO=True but ffmpeg not found. Videos will be served with original audio.")
    demo.launch()