Spaces:

MovieBench
/

MovieBench2

Running

evanzyfan Cursor commited on 10 days ago

Commit

27d0b7a

1 Parent(s): b339a0d

Add preference ranking user study app

- app.py: Gradio app for collecting preference rankings of AI-generated movies
- requirements.txt: Python dependencies

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (2) hide show

app.py +621 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,621 @@

+"""
+MovieBench Preference Ranking User Study Application (Gradio Version)
+A simplified Gradio web application for collecting human preference rankings
+of AI-generated movies. For each story, presents results from different methods
+side-by-side with shuffled anonymous labels, and collects preference ordering.
+"""
+import json
+import os
+import random
+import threading
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+import gradio as gr
+from huggingface_hub import CommitScheduler, snapshot_download
+# ============================================================================
+# Configuration
+# ============================================================================
+DATA_DIR = os.environ.get("DATA_DIR", "./data")
+OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "./results_pref")
+NUM_GROUPS = int(os.environ.get("NUM_GROUPS", "5"))
+RESULTS_REPO_ID = os.environ.get("RESULTS_REPO_ID", "")
+DATA_REPO_ID = os.environ.get("DATA_REPO_ID", "")
+HF_TOKEN = os.environ.get("HF_TOKEN", None)
+MAX_METHODS = 8
+if DATA_REPO_ID and not Path(DATA_DIR).exists():
+    print(f"Downloading data from {DATA_REPO_ID} ...")
+    downloaded = snapshot_download(
+        repo_id=DATA_REPO_ID,
+        repo_type="dataset",
+        local_dir=DATA_DIR,
+        token=HF_TOKEN,
+    )
+    print(f"Data downloaded to {downloaded}")
+Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
+scheduler: Optional[CommitScheduler] = None
+if RESULTS_REPO_ID:
+    scheduler = CommitScheduler(
+        repo_id=RESULTS_REPO_ID,
+        repo_type="dataset",
+        folder_path=OUTPUT_DIR,
+        every=3,
+        path_in_repo="result_new/preference",
+        token=HF_TOKEN,
+    )
+# ============================================================================
+# Data Loading Functions
+# ============================================================================
+def _load_story_scripts() -> Dict[str, str]:
+    """Load original story scripts from vistory_test_lite.json (keyed by story id)."""
+    script_path = Path(DATA_DIR) / "vistory_test_lite.json"
+    if script_path.exists():
+        with open(script_path, "r", encoding="utf-8-sig") as f:
+            entries = json.load(f)
+        return {entry["id"]: entry["script"] for entry in entries}
+    return {}
+STORY_SCRIPTS = _load_story_scripts()
+def load_summary() -> List[Dict[str, str]]:
+    """Load summary.json that maps sample IDs to agents and story IDs."""
+    summary_path = Path(DATA_DIR) / "summary.json"
+    if summary_path.exists():
+        with open(summary_path, "r", encoding="utf-8-sig") as f:
+            return json.load(f)
+    return []
+def get_available_samples() -> List[str]:
+    """Get list of available sample directory IDs."""
+    data_path = Path(DATA_DIR)
+    if not data_path.exists():
+        return []
+    return sorted([d.name for d in data_path.iterdir() if d.is_dir()])
+def get_stories_with_agents() -> Dict[str, List[Dict[str, str]]]:
+    """Build mapping: story_id -> [{agent, shuffled_id}, ...]."""
+    summary = load_summary()
+    available = set(get_available_samples())
+    mapping: Dict[str, List[Dict[str, str]]] = {}
+    for entry in summary:
+        sid = entry["shuffled_id"]
+        if sid not in available:
+            continue
+        story_id = entry["story_id"]
+        mapping.setdefault(story_id, []).append({
+            "agent": entry["agent"],
+            "shuffled_id": sid,
+        })
+    for v in mapping.values():
+        v.sort(key=lambda x: x["agent"])
+    return mapping
+def get_movie_video_path(shuffled_id: str) -> str:
+    """Return the path to a sample's final movie video."""
+    p = Path(DATA_DIR) / shuffled_id / "final_video.mp4"
+    return str(p) if p.exists() else ""
+_save_lock = threading.Lock()
+# ============================================================================
+# Group Management
+# ============================================================================
+def _partition_list(items: List, num_chunks: int) -> List[List]:
+    """Split items into num_chunks chunks as evenly as possible."""
+    chunk_size, remainder = divmod(len(items), num_chunks)
+    chunks: List[List] = []
+    start = 0
+    for i in range(num_chunks):
+        end = start + chunk_size + (1 if i < remainder else 0)
+        chunks.append(items[start:end])
+        start = end
+    return chunks
+def get_or_create_group_config(group_id: str) -> Dict[str, Any]:
+    """Load existing group config or create a new one."""
+    group_dir = Path(OUTPUT_DIR) / f"group_{group_id}"
+    mapping_path = group_dir / "mapping.json"
+    if mapping_path.exists():
+        with open(mapping_path, "r", encoding="utf-8-sig") as f:
+            return json.load(f)
+    return create_group_config(group_id)
+def create_group_config(group_id: str) -> Dict[str, Any]:
+    """Create a group config with deterministic story partitioning and method shuffle.
+    Stories are shuffled with a fixed global seed and split into NUM_GROUPS
+    non-overlapping chunks.  The agent display order is shuffled per-group
+    so that anonymous labels (Method A, B, ...) are consistent within a group
+    but differ across groups.
+    """
+    group_dir = Path(OUTPUT_DIR) / f"group_{group_id}"
+    group_dir.mkdir(parents=True, exist_ok=True)
+    stories_map = get_stories_with_agents()
+    try:
+        group_index = (int(group_id) - 1) % NUM_GROUPS
+    except ValueError:
+        group_index = hash(group_id) % NUM_GROUPS
+    unique_stories = sorted(stories_map.keys())
+    story_rng = random.Random("moviebench_pref_story_partition")
+    story_rng.shuffle(unique_stories)
+    story_chunks = _partition_list(unique_stories, NUM_GROUPS)
+    selected_stories = story_chunks[group_index]
+    all_agents = set()
+    for story_id in selected_stories:
+        for entry in stories_map.get(story_id, []):
+            all_agents.add(entry["agent"])
+    all_agents_sorted = sorted(all_agents)
+    method_rng = random.Random(f"moviebench_pref_group_{group_id}")
+    shuffled_agents = list(all_agents_sorted)
+    method_rng.shuffle(shuffled_agents)
+    labels = [chr(ord("A") + i) for i in range(len(shuffled_agents))]
+    method_display_map = {}
+    for i, agent in enumerate(shuffled_agents):
+        method_display_map[f"Method {labels[i]}"] = agent
+    presentation_rng = random.Random(f"moviebench_pref_order_{group_id}")
+    story_order = list(selected_stories)
+    presentation_rng.shuffle(story_order)
+    config = {
+        "group_id": group_id,
+        "group_index": group_index,
+        "num_groups": NUM_GROUPS,
+        "created_at": datetime.now().isoformat(),
+        "stories": story_order,
+        "total_stories": len(unique_stories),
+        "stories_in_group": len(story_order),
+        "agents": all_agents_sorted,
+        "method_order": shuffled_agents,
+        "method_display_map": method_display_map,
+    }
+    with _save_lock:
+        with open(group_dir / "mapping.json", "w", encoding="utf-8") as f:
+            json.dump(config, f, indent=2, ensure_ascii=False)
+    return config
+def save_ranking_result(
+    group_id: str,
+    story_id: str,
+    evaluator_id: str,
+    method_display_map: Dict[str, str],
+    ranking: Dict[str, int],
+    comment: str,
+) -> str:
+    """Save a preference ranking result to JSON. Returns a status message."""
+    group_dir = Path(OUTPUT_DIR) / f"group_{group_id}"
+    story_dir = group_dir / story_id
+    filename = f"{story_id}_{evaluator_id}.json"
+    result_data = {
+        "evaluator_id": evaluator_id,
+        "group_id": group_id,
+        "timestamp": datetime.now().isoformat(),
+        "story_id": story_id,
+        "method_order": method_display_map,
+        "ranking": ranking,
+        "comment": comment,
+    }
+    filepath = story_dir / filename
+    with _save_lock:
+        story_dir.mkdir(parents=True, exist_ok=True)
+        with open(filepath, "w", encoding="utf-8") as f:
+            json.dump(result_data, f, indent=4, ensure_ascii=False)
+    return f"Saved to {filepath}"
+# ============================================================================
+# Gradio Interface
+# ============================================================================
+CUSTOM_CSS = """
+.gradio-container {
+    max-width: 1600px !important;
+    margin-left: auto !important;
+    margin-right: auto !important;
+}
+.title-text {
+    text-align: center;
+    background: linear-gradient(135deg, #7c5cff 0%, #ff6b9d 100%);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    font-size: 2rem;
+    font-weight: 700;
+    margin-bottom: 1rem;
+}
+.method-label {
+    text-align: center;
+    font-size: 1.1rem;
+    font-weight: 600;
+    padding: 6px 0;
+}
+"""
+def create_app():
+    """Create the Gradio application."""
+    with gr.Blocks(
+        title="MovieBench: Preference Ranking",
+        css=CUSTOM_CSS,
+        theme=gr.themes.Soft(
+            primary_hue="purple",
+            secondary_hue="pink",
+            neutral_hue="slate",
+        ),
+    ) as app:
+        current_evaluator = gr.State("anonymous")
+        current_group = gr.State("")
+        group_config_state = gr.State({})
+        current_story_idx = gr.State(0)
+        gr.Markdown(
+            "# MovieBench: Preference Ranking",
+            elem_classes=["title-text"],
+        )
+        # ================================================================
+        # Tab 1: Setup
+        # ================================================================
+        with gr.Tab("Setup", id="tab_setup"):
+            gr.Markdown("### Enter your evaluator ID and group ID to begin")
+            with gr.Row():
+                evaluator_input = gr.Textbox(
+                    label="Evaluator ID",
+                    placeholder="Enter your name or ID",
+                    value="anonymous",
+                    scale=2,
+                )
+                group_input = gr.Textbox(
+                    label="Group ID",
+                    placeholder=f"Enter group ID (1-{NUM_GROUPS})",
+                    value="",
+                    scale=2,
+                )
+            load_group_btn = gr.Button("Load / Create Group", variant="primary")
+            group_info = gr.Markdown("*Enter a Group ID and click 'Load / Create Group'*")
+            def load_group(group_id: str, evaluator_id: str):
+                if not group_id:
+                    return (
+                        "*Please enter a Group ID*",
+                        evaluator_id,
+                        group_id,
+                        {},
+                    )
+                config = get_or_create_group_config(group_id)
+                stories = config.get("stories", [])
+                agents = config.get("agents", [])
+                method_map = config.get("method_display_map", {})
+                display_lines = ", ".join(sorted(method_map.keys()))
+                info_md = (
+                    f"### Group `{group_id}` loaded "
+                    f"(partition {config.get('group_index', 0) + 1}/{config.get('num_groups', NUM_GROUPS)})\n\n"
+                    f"**Stories in group:** {len(stories)}/{config.get('total_stories', '?')}\n\n"
+                    f"**Agents:** {len(agents)} ({', '.join(agents)})\n\n"
+                    f"**Display labels:** {display_lines}\n\n"
+                    f"**Story order:** {', '.join(stories)}\n\n"
+                    f"**Created:** {config.get('created_at', 'N/A')}\n\n"
+                    f"Go to the **Preference Evaluation** tab to start ranking."
+                )
+                return info_md, evaluator_id, group_id, config
+            load_group_btn.click(
+                load_group,
+                inputs=[group_input, evaluator_input],
+                outputs=[group_info, current_evaluator, current_group, group_config_state],
+            )
+        # ================================================================
+        # Tab 2: Preference Evaluation
+        # ================================================================
+        with gr.Tab("Preference Evaluation", id="tab_eval"):
+            gr.Markdown("### Rank the methods by preference for each story")
+            with gr.Row():
+                story_progress = gr.Markdown("**Progress:** Load a group first")
+                story_nav_prev = gr.Button("Previous Story", size="sm")
+                story_nav_next = gr.Button("Next Story", size="sm")
+            with gr.Accordion("Story Script", open=True):
+                story_script_display = gr.Markdown(
+                    "*Load a group and go to this tab to see stories*"
+                )
+            gr.Markdown("---")
+            gr.Markdown("### Method Videos")
+            method_cols: List[gr.Column] = []
+            method_videos: List[gr.Video] = []
+            method_labels: List[gr.Markdown] = []
+            method_ranks: List[gr.Dropdown] = []
+            with gr.Row():
+                for i in range(MAX_METHODS):
+                    with gr.Column(visible=False) as col:
+                        lbl = gr.Markdown(
+                            f"**Method {chr(ord('A') + i)}**",
+                            elem_classes=["method-label"],
+                        )
+                        vid = gr.Video(
+                            label=f"Method {chr(ord('A') + i)}",
+                            height=300,
+                        )
+                        rank = gr.Dropdown(
+                            label="Rank",
+                            choices=[],
+                            value=None,
+                            interactive=True,
+                        )
+                    method_cols.append(col)
+                    method_videos.append(vid)
+                    method_labels.append(lbl)
+                    method_ranks.append(rank)
+            gr.Markdown("---")
+            rank_comment = gr.Textbox(
+                label="Comment (optional)",
+                placeholder="Any additional notes about your ranking decision...",
+                lines=2,
+            )
+            with gr.Row():
+                submit_btn = gr.Button("Submit & Next Story", variant="primary")
+                eval_status = gr.Markdown("")
+            # ============================================================
+            # Helper functions
+            # ============================================================
+            def _build_story_display(story_idx: int, config: Dict[str, Any]):
+                """Build all output values for displaying a given story.
+                Returns a flat list matching the outputs wired to the UI:
+                  [progress_md, script_md,
+                   col_0_visible, vid_0, lbl_0, rank_0_choices, rank_0_value,
+                   col_1_visible, vid_1, lbl_1, rank_1_choices, rank_1_value,
+                   ... (MAX_METHODS times)]
+                """
+                stories = config.get("stories", [])
+                method_order: List[str] = config.get("method_order", [])
+                method_display_map: Dict[str, str] = config.get("method_display_map", {})
+                stories_map = get_stories_with_agents()
+                num_methods = len(method_order)
+                rank_choices = [str(r) for r in range(1, num_methods + 1)]
+                if not stories or story_idx >= len(stories):
+                    outputs: list = [
+                        "**Progress:** No stories loaded",
+                        "*Load a group first*",
+                    ]
+                    for _ in range(MAX_METHODS):
+                        outputs.extend([
+                            gr.update(visible=False),
+                            None,
+                            "",
+                            gr.update(choices=[], value=None),
+                        ])
+                    return outputs
+                story_id = stories[story_idx]
+                script_text = STORY_SCRIPTS.get(story_id, "(Script not available)")
+                progress_md = f"**Progress:** Story {story_idx + 1}/{len(stories)} (`{story_id}`)"
+                script_md = f"**Story ID:** `{story_id}`\n\n{script_text}"
+                agent_to_sid: Dict[str, str] = {}
+                for entry in stories_map.get(story_id, []):
+                    agent_to_sid[entry["agent"]] = entry["shuffled_id"]
+                label_to_agent = {}
+                for label in sorted(method_display_map.keys()):
+                    label_to_agent[label] = method_display_map[label]
+                sorted_labels = sorted(label_to_agent.keys())
+                outputs = [progress_md, script_md]
+                for i in range(MAX_METHODS):
+                    if i < len(sorted_labels):
+                        label = sorted_labels[i]
+                        agent = label_to_agent[label]
+                        sid = agent_to_sid.get(agent, "")
+                        video_path = get_movie_video_path(sid) if sid else ""
+                        outputs.extend([
+                            gr.update(visible=True),
+                            video_path if video_path else None,
+                            f"**{label}**",
+                            gr.update(choices=rank_choices, value=None),
+                        ])
+                    else:
+                        outputs.extend([
+                            gr.update(visible=False),
+                            None,
+                            "",
+                            gr.update(choices=[], value=None),
+                        ])
+                return outputs
+            def update_story_display(story_idx: int, config: Dict[str, Any]):
+                return _build_story_display(story_idx, config)
+            def go_prev_story(story_idx: int):
+                return max(0, story_idx - 1)
+            def go_next_story(story_idx: int, config: Dict[str, Any]):
+                stories = config.get("stories", [])
+                return min(len(stories) - 1, story_idx + 1) if stories else 0
+            def submit_ranking(
+                story_idx: int,
+                evaluator_id: str,
+                group_id: str,
+                config: Dict[str, Any],
+                comment: str,
+                *rank_values,
+            ):
+                """Validate and save the ranking, then advance to the next story."""
+                if not group_id or not config:
+                    return "Please load a group first", story_idx, gr.update()
+                stories = config.get("stories", [])
+                if not stories or story_idx >= len(stories):
+                    return "No stories available", story_idx, gr.update()
+                method_display_map = config.get("method_display_map", {})
+                sorted_labels = sorted(method_display_map.keys())
+                num_methods = len(sorted_labels)
+                ranking: Dict[str, int] = {}
+                used_ranks = set()
+                for i in range(num_methods):
+                    val = rank_values[i] if i < len(rank_values) else None
+                    if val is None or val == "":
+                        return (
+                            f"Please assign a rank to **{sorted_labels[i]}**",
+                            story_idx,
+                            gr.update(),
+                        )
+                    r = int(val)
+                    if r in used_ranks:
+                        return (
+                            f"Duplicate rank {r} — each method must have a unique rank",
+                            story_idx,
+                            gr.update(),
+                        )
+                    used_ranks.add(r)
+                    ranking[sorted_labels[i]] = r
+                story_id = stories[story_idx]
+                status = save_ranking_result(
+                    group_id=group_id,
+                    story_id=story_id,
+                    evaluator_id=evaluator_id,
+                    method_display_map=method_display_map,
+                    ranking=ranking,
+                    comment=comment or "",
+                )
+                next_idx = min(len(stories) - 1, story_idx + 1)
+                if next_idx == story_idx:
+                    return (
+                        f"{status}\n\nAll stories evaluated! Thank you!",
+                        next_idx,
+                        "",
+                    )
+                return (
+                    f"{status}  |  Moving to next story...",
+                    next_idx,
+                    "",
+                )
+            # ============================================================
+            # Wire up events
+            # ============================================================
+            display_outputs = [story_progress, story_script_display]
+            for i in range(MAX_METHODS):
+                display_outputs.extend([
+                    method_cols[i],
+                    method_videos[i],
+                    method_labels[i],
+                    method_ranks[i],
+                ])
+            # When group config changes, reset to story 0
+            group_config_state.change(
+                lambda cfg: [0] + _build_story_display(0, cfg),
+                inputs=[group_config_state],
+                outputs=[current_story_idx] + display_outputs,
+            )
+            # When story idx changes, update display
+            current_story_idx.change(
+                update_story_display,
+                inputs=[current_story_idx, group_config_state],
+                outputs=display_outputs,
+            )
+            story_nav_prev.click(
+                go_prev_story,
+                inputs=[current_story_idx],
+                outputs=[current_story_idx],
+            )
+            story_nav_next.click(
+                go_next_story,
+                inputs=[current_story_idx, group_config_state],
+                outputs=[current_story_idx],
+            )
+            submit_inputs = [
+                current_story_idx,
+                current_evaluator,
+                current_group,
+                group_config_state,
+                rank_comment,
+            ] + method_ranks
+            submit_btn.click(
+                submit_ranking,
+                inputs=submit_inputs,
+                outputs=[eval_status, current_story_idx, rank_comment],
+            )
+    return app
+# ============================================================================
+# Main Entry Point
+# ============================================================================
+demo = create_app()
+if __name__ == "__main__":
+    data_dir_abs = str(Path(DATA_DIR).resolve())
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True,
+        allowed_paths=[data_dir_abs],
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ gradio
2	+ huggingface_hub