Spaces:

studyOverflow
/

MBenchAnnotation

Running

App Files Files Community

studyOverflow commited on 8 days ago

Commit

e4df8aa

verified ·

1 Parent(s): 135e7b6

feat: add skyreels+longcat (pool 4672); per-annotator dedup; global progress bar

Browse files

Files changed (1) hide show

app.py +136 -15

app.py CHANGED Viewed

@@ -25,8 +25,8 @@ from huggingface_hub import CommitScheduler, hf_hub_download, hf_hub_url
 DATASET_REPO = "studyOverflow/TempMemoryData"
 MERGED_JSON_PATH = "MBench-V/merged.json"
-# 6 fully-reorganized models (584 videos each). `skyreels` and `longcat`
-# are temporarily excluded until their 0422 runs finish.
 MODELS: list[str] = [
     "causal_forcing",
     "self_forcing",
@@ -34,6 +34,8 @@ MODELS: list[str] = [
     "helios",
     "longlive",
     "memflow",
 ]
 HF_TOKEN = os.environ.get("HF_TOKEN")
@@ -118,6 +120,85 @@ def _append_annotation(record: dict[str, Any]) -> None:
     else:
         with ANN_FILE.open("a", encoding="utf-8") as f:
             f.write(line + "\n")
 # ---------------------------------------------------------------------------
@@ -165,23 +246,50 @@ def _load_item(pool_order: list[int], idx: int) -> tuple[str, str, str]:
 def start_session(annotator: str, state: dict):
     annotator = (annotator or "").strip()
     if not annotator:
-        return state, "", "⚠️ Please enter a name first.", "", "⚠️ Please enter a name first."
-    order = list(range(len(POOL)))
     rng = random.Random(f"{annotator}-{int(time.time())}")
     rng.shuffle(order)
     state = {"annotator": annotator, "order": order, "idx": 0}
     video_html, meta, prompt = _load_item(order, 0)
-    status = f"✅ Logged in as `{annotator}` — {len(order)} items to annotate."
-    return state, video_html, meta, prompt, status
 def submit_and_next(state: dict, score: float, note: str):
     if not state or "order" not in state:
-        return state, "", "⚠️ Please log in first.", "", 3, "", "⚠️ Not logged in."
     order = state["order"]
     idx = state["idx"]
     if idx >= len(order):
-        return state, "", "**All done!**", "", 3, "", "No more items."
     model, task_id = POOL[order[idx]]
     record = {
         "timestamp": time.time(),
@@ -196,15 +304,24 @@ def submit_and_next(state: dict, score: float, note: str):
     _append_annotation(record)
     state["idx"] = idx + 1
     video_html, meta, prompt = _load_item(state["order"], state["idx"])
-    return state, video_html, meta, prompt, 3, "", f"✅ Submitted ({state['idx']}). Next →"
 def skip_and_next(state: dict):
     if not state or "order" not in state:
-        return state, "", "⚠️ Please log in first.", "", 3, "", "⚠️ Not logged in."
     state["idx"] = state["idx"] + 1
     video_html, meta, prompt = _load_item(state["order"], state["idx"])
-    return state, video_html, meta, prompt, 3, "", f"⏭️ Skipped. Position: {state['idx']}"
 # ---------------------------------------------------------------------------
@@ -220,9 +337,13 @@ with gr.Blocks(title="MBench-V Annotation", theme=gr.themes.Soft()) as demo:
         2. Click **Start** — a video will appear below.
         3. Give a score (1–5, 5 = best) and optional note; click **Submit & Next**.
         4. Submissions auto-sync to the dataset repo every 5 minutes.
         """
     )
     state = gr.State()
     with gr.Row():
@@ -260,25 +381,25 @@ with gr.Blocks(title="MBench-V Annotation", theme=gr.themes.Soft()) as demo:
     login_btn.click(
         start_session,
         inputs=[annotator_in, state],
-        outputs=[state, video, meta_md, prompt_tb, status_md],
         api_name=False,
     )
     annotator_in.submit(
         start_session,
         inputs=[annotator_in, state],
-        outputs=[state, video, meta_md, prompt_tb, status_md],
         api_name=False,
     )
     submit_btn.click(
         submit_and_next,
         inputs=[state, score, note],
-        outputs=[state, video, meta_md, prompt_tb, score, note, status_md],
         api_name=False,
     )
     skip_btn.click(
         skip_and_next,
         inputs=[state],
-        outputs=[state, video, meta_md, prompt_tb, score, note, status_md],
         api_name=False,
     )

 DATASET_REPO = "studyOverflow/TempMemoryData"
 MERGED_JSON_PATH = "MBench-V/merged.json"
+# 8 fully-reorganized models (584 videos each). All 8 models have complete
+# data as of 2026-05-01.
 MODELS: list[str] = [
     "causal_forcing",
     "self_forcing",
     "helios",
     "longlive",
     "memflow",
+    "longcat",
+    "skyreels",
 ]
 HF_TOKEN = os.environ.get("HF_TOKEN")
     else:
         with ANN_FILE.open("a", encoding="utf-8") as f:
             f.write(line + "\n")
+    # Update in-memory mirror so progress stats react immediately (the
+    # committed file only arrives on the dataset every COMMIT_INTERVAL_MIN).
+    HISTORICAL_ANNOTATIONS.append(record)
+# ---------------------------------------------------------------------------
+# Load historical annotations (for dedup + progress stats)
+# ---------------------------------------------------------------------------
+def _fetch_remote_annotations() -> list[dict[str, Any]]:
+    """Download and parse every .jsonl file under `annotations/` on the dataset repo.
+    Returns a list of records. Silently returns [] if the folder does not exist
+    or any download fails — annotation UX should never be blocked by this.
+    """
+    from huggingface_hub import HfApi
+    records: list[dict[str, Any]] = []
+    try:
+        api = HfApi(token=HF_TOKEN)
+        files = api.list_repo_files(
+            repo_id=DATASET_REPO, repo_type="dataset",
+        )
+    except Exception as e:
+        print(f"[mbench-ann] list_repo_files failed: {e}")
+        return records
+    jsonls = [p for p in files if p.startswith("annotations/") and p.endswith(".jsonl")]
+    print(f"[mbench-ann] found {len(jsonls)} historical annotation files")
+    for path in jsonls:
+        try:
+            local = hf_hub_download(
+                repo_id=DATASET_REPO, filename=path,
+                repo_type="dataset", token=HF_TOKEN,
+            )
+            with open(local, encoding="utf-8") as f:
+                for line in f:
+                    line = line.strip()
+                    if not line:
+                        continue
+                    try:
+                        records.append(json.loads(line))
+                    except Exception:
+                        pass
+        except Exception as e:
+            print(f"[mbench-ann]   skip {path}: {e}")
+    return records
+HISTORICAL_ANNOTATIONS: list[dict[str, Any]] = _fetch_remote_annotations()
+print(f"[mbench-ann] loaded {len(HISTORICAL_ANNOTATIONS)} historical annotation records")
+def _global_stats() -> tuple[int, int]:
+    """(total_records, unique_(model,task_id)_pairs_covered)."""
+    seen: set[tuple[str, str]] = set()
+    for r in HISTORICAL_ANNOTATIONS:
+        if "model" in r and "task_id" in r:
+            seen.add((r["model"], r["task_id"]))
+    return len(HISTORICAL_ANNOTATIONS), len(seen)
+def _annotator_seen(annotator: str) -> set[tuple[str, str]]:
+    """`(model, task_id)` pairs already annotated by this annotator."""
+    annotator_l = annotator.strip().lower()
+    seen: set[tuple[str, str]] = set()
+    for r in HISTORICAL_ANNOTATIONS:
+        if (r.get("annotator") or "").strip().lower() == annotator_l:
+            seen.add((r.get("model", ""), r.get("task_id", "")))
+    return seen
+def _global_stats_md() -> str:
+    total, unique = _global_stats()
+    pool_sz = len(POOL)
+    pct = (unique / pool_sz * 100) if pool_sz else 0
+    return (
+        f"**Dataset progress**: {total} total annotations submitted • "
+        f"{unique} / {pool_sz} unique (model, task_id) pairs covered ({pct:.1f}%)"
+    )
 # ---------------------------------------------------------------------------
 def start_session(annotator: str, state: dict):
     annotator = (annotator or "").strip()
     if not annotator:
+        return (
+            state, "", "⚠️ Please enter a name first.", "",
+            "⚠️ Please enter a name first.", _global_stats_md(),
+        )
+    # Filter out (model, task_id) already annotated by this annotator
+    seen = _annotator_seen(annotator)
+    order: list[int] = [i for i, (m, t) in enumerate(POOL) if (m, t) not in seen]
     rng = random.Random(f"{annotator}-{int(time.time())}")
     rng.shuffle(order)
     state = {"annotator": annotator, "order": order, "idx": 0}
+    if not order:
+        status = (
+            f"🎉 Welcome back `{annotator}` — you have already annotated every item. Nothing left to do!"
+        )
+        return (
+            state,
+            "<div style='padding:24px;color:#888;text-align:center'>All done!</div>",
+            "**All done.**",
+            "",
+            status,
+            _global_stats_md(),
+        )
     video_html, meta, prompt = _load_item(order, 0)
+    skipped = len(POOL) - len(order)
+    status = (
+        f"✅ Logged in as `{annotator}` — {len(order)} items to annotate"
+        + (f" (skipped {skipped} already done)." if skipped else ".")
+    )
+    return state, video_html, meta, prompt, status, _global_stats_md()
 def submit_and_next(state: dict, score: float, note: str):
     if not state or "order" not in state:
+        return (
+            state, "", "⚠️ Please log in first.", "", 3, "",
+            "⚠️ Not logged in.", _global_stats_md(),
+        )
     order = state["order"]
     idx = state["idx"]
     if idx >= len(order):
+        return (
+            state, "", "**All done!**", "", 3, "",
+            "No more items.", _global_stats_md(),
+        )
     model, task_id = POOL[order[idx]]
     record = {
         "timestamp": time.time(),
     _append_annotation(record)
     state["idx"] = idx + 1
     video_html, meta, prompt = _load_item(state["order"], state["idx"])
+    return (
+        state, video_html, meta, prompt, 3, "",
+        f"✅ Submitted ({state['idx']}). Next →", _global_stats_md(),
+    )
 def skip_and_next(state: dict):
     if not state or "order" not in state:
+        return (
+            state, "", "⚠️ Please log in first.", "", 3, "",
+            "⚠️ Not logged in.", _global_stats_md(),
+        )
     state["idx"] = state["idx"] + 1
     video_html, meta, prompt = _load_item(state["order"], state["idx"])
+    return (
+        state, video_html, meta, prompt, 3, "",
+        f"⏭️ Skipped. Position: {state['idx']}", _global_stats_md(),
+    )
 # ---------------------------------------------------------------------------
         2. Click **Start** — a video will appear below.
         3. Give a score (1–5, 5 = best) and optional note; click **Submit & Next**.
         4. Submissions auto-sync to the dataset repo every 5 minutes.
+        _Tip_: items you've already annotated are automatically skipped.
         """
     )
+    stats_md = gr.Markdown(_global_stats_md())
     state = gr.State()
     with gr.Row():
     login_btn.click(
         start_session,
         inputs=[annotator_in, state],
+        outputs=[state, video, meta_md, prompt_tb, status_md, stats_md],
         api_name=False,
     )
     annotator_in.submit(
         start_session,
         inputs=[annotator_in, state],
+        outputs=[state, video, meta_md, prompt_tb, status_md, stats_md],
         api_name=False,
     )
     submit_btn.click(
         submit_and_next,
         inputs=[state, score, note],
+        outputs=[state, video, meta_md, prompt_tb, score, note, status_md, stats_md],
         api_name=False,
     )
     skip_btn.click(
         skip_and_next,
         inputs=[state],
+        outputs=[state, video, meta_md, prompt_tb, score, note, status_md, stats_md],
         api_name=False,
     )