studyOverflow commited on
Commit
99cdf4e
·
verified ·
1 Parent(s): c5efdae

fix: simplify UI to single page; no visibility toggling; plain-value returns

Browse files
Files changed (1) hide show
  1. app.py +66 -123
app.py CHANGED
@@ -1,19 +1,8 @@
1
  """MBench-V annotation UI (Gradio Space).
2
 
3
- Reads videos streaming from the `studyOverflow/TempMemoryData` dataset repo,
4
- writes annotations back to the same repo under `annotations/`, batched via
5
  `CommitScheduler`.
6
-
7
- Design notes
8
- ------------
9
- - Videos are NOT copied into this Space. We build CDN URLs with
10
- `hf_hub_url(..., repo_type="dataset")` and let the browser stream them.
11
- - Submissions are appended to a per-process JSONL file under `annotations/`;
12
- `CommitScheduler` pushes the directory to the dataset repo every 5 min.
13
- - Allocation is intentionally simple in this template: at start-up we build
14
- a single shuffled pool of `(model, task_id)` pairs, and each user session
15
- maintains its own index into that pool. Multi-annotator deduplication is
16
- out of scope for the first iteration.
17
  """
18
 
19
  from __future__ import annotations
@@ -36,8 +25,8 @@ from huggingface_hub import CommitScheduler, hf_hub_download, hf_hub_url
36
  DATASET_REPO = "studyOverflow/TempMemoryData"
37
  MERGED_JSON_PATH = "MBench-V/merged.json"
38
 
39
- # 6 models that are already fully reorganized on HF (584 videos each).
40
- # `skyreels` and `longcat` are excluded until their 0422 runs finish.
41
  MODELS: list[str] = [
42
  "causal_forcing",
43
  "self_forcing",
@@ -47,22 +36,17 @@ MODELS: list[str] = [
47
  "memflow",
48
  ]
49
 
50
- HF_TOKEN = os.environ.get("HF_TOKEN") # must be set in Space secrets for writes
51
 
52
- # Local staging directory that CommitScheduler will sync to the dataset repo.
53
  ANN_DIR = Path("annotations_local")
54
  ANN_DIR.mkdir(exist_ok=True)
55
-
56
- # Each Space process writes to its own JSONL so concurrent replicas don't
57
- # clobber each other's writes. `CommitScheduler` pushes the whole directory.
58
  PROCESS_ID = uuid.uuid4().hex[:8]
59
  ANN_FILE = ANN_DIR / f"ann_{PROCESS_ID}.jsonl"
60
-
61
  COMMIT_INTERVAL_MIN = 5
62
 
63
 
64
  # ---------------------------------------------------------------------------
65
- # Load merged.json (584 task records) once at startup
66
  # ---------------------------------------------------------------------------
67
 
68
  def _load_merged() -> list[dict[str, Any]]:
@@ -81,7 +65,6 @@ TASK_BY_ID: dict[str, dict[str, Any]] = {t["task_id"]: t for t in TASKS}
81
 
82
 
83
  def _extract_prompt(task: dict[str, Any]) -> str:
84
- """Return the first non-empty prompt string found in the task record."""
85
  gp = task.get("generation_prompts") or {}
86
  prompts = gp.get("prompts") or {}
87
  for level in ("level_1", "level_2", "level_3"):
@@ -93,19 +76,7 @@ def _extract_prompt(task: dict[str, Any]) -> str:
93
  return "(no prompt found)"
94
 
95
 
96
- # ---------------------------------------------------------------------------
97
- # Build the (model, task_id) pool
98
- # ---------------------------------------------------------------------------
99
-
100
- def _build_pool() -> list[tuple[str, str]]:
101
- pool: list[tuple[str, str]] = []
102
- for m in MODELS:
103
- for t in TASKS:
104
- pool.append((m, t["task_id"]))
105
- return pool
106
-
107
-
108
- POOL: list[tuple[str, str]] = _build_pool()
109
  print(f"[mbench-ann] loaded {len(TASKS)} tasks × {len(MODELS)} models = {len(POOL)} items")
110
 
111
 
@@ -118,7 +89,7 @@ def _video_url(model: str, task_id: str) -> str:
118
 
119
 
120
  # ---------------------------------------------------------------------------
121
- # CommitScheduler — pushes annotations_local/ to DATASET_REPO every 5 min
122
  # ---------------------------------------------------------------------------
123
 
124
  scheduler: CommitScheduler | None = None
@@ -135,7 +106,7 @@ if HF_TOKEN:
135
  )
136
  print(f"[mbench-ann] CommitScheduler started (every {COMMIT_INTERVAL_MIN} min)")
137
  else:
138
- print("[mbench-ann] WARNING: HF_TOKEN not set — annotations will stay local only")
139
 
140
 
141
  def _append_annotation(record: dict[str, Any]) -> None:
@@ -167,7 +138,6 @@ def _format_meta(model: str, task: dict[str, Any], idx: int, total: int) -> str:
167
 
168
 
169
  def _load_item(pool_order: list[int], idx: int) -> tuple[str, str, str]:
170
- """Return (video_url, meta_markdown, prompt_text) for position `idx`."""
171
  if idx < 0 or idx >= len(pool_order):
172
  return "", "**All done!** No more items.", ""
173
  model, task_id = POOL[pool_order[idx]]
@@ -180,53 +150,25 @@ def _load_item(pool_order: list[int], idx: int) -> tuple[str, str, str]:
180
 
181
 
182
  # ---------------------------------------------------------------------------
183
- # Gradio callbacks
184
  # ---------------------------------------------------------------------------
185
 
186
- def start_session(annotator: str, state: dict | None):
187
  annotator = (annotator or "").strip()
188
  if not annotator:
189
- return (
190
- state,
191
- gr.update(visible=True), # login panel stays
192
- gr.update(visible=False), # annotation panel hidden
193
- "",
194
- "",
195
- "",
196
- gr.update(value="Please enter a name first."),
197
- )
198
- # Build this user's shuffled order
199
  order = list(range(len(POOL)))
200
  rng = random.Random(f"{annotator}-{int(time.time())}")
201
  rng.shuffle(order)
202
  state = {"annotator": annotator, "order": order, "idx": 0}
203
  video, meta, prompt = _load_item(order, 0)
204
- return (
205
- state,
206
- gr.update(visible=False),
207
- gr.update(visible=True),
208
- video,
209
- meta,
210
- prompt,
211
- gr.update(value=f"Logged in as `{annotator}`"),
212
- )
213
 
214
 
215
- def _advance(state: dict, record_submitted: bool):
216
- state["idx"] += 1
217
- video, meta, prompt = _load_item(state["order"], state["idx"])
218
- status = (
219
- f"Submitted ({state['idx']} done). Next →"
220
- if record_submitted
221
- else f"Skipped. Next →"
222
- )
223
- # Reset score + note controls
224
- return state, video, meta, prompt, 3, "", status
225
-
226
-
227
- def submit_and_next(state: dict, score: int, note: str):
228
- if state is None or state.get("idx") is None:
229
- return state, "", "", "", 3, "", "Not logged in."
230
  order = state["order"]
231
  idx = state["idx"]
232
  if idx >= len(order):
@@ -243,82 +185,83 @@ def submit_and_next(state: dict, score: int, note: str):
243
  "note": (note or "").strip(),
244
  }
245
  _append_annotation(record)
246
- return _advance(state, record_submitted=True)
 
 
247
 
248
 
249
  def skip_and_next(state: dict):
250
- if state is None or state.get("idx") is None:
251
- return state, "", "", "", 3, "", "Not logged in."
252
- return _advance(state, record_submitted=False)
 
 
253
 
254
 
255
  # ---------------------------------------------------------------------------
256
- # Gradio UI
257
  # ---------------------------------------------------------------------------
258
 
259
- THEME = gr.themes.Soft(primary_hue="indigo")
260
-
261
-
262
- with gr.Blocks(theme=THEME, title="MBench-V Annotation") as demo:
263
  gr.Markdown(
264
  """
265
  # 🎬 MBench-V Annotation
266
 
267
- Watch each generated video and rate it **1–5** (5 = best). Click **Submit & Next** to save.
268
- Your submissions are auto-committed to the dataset repo every 5 minutes.
 
 
269
  """
270
  )
271
 
272
- session_state = gr.State(value=None)
273
 
274
- # ---- Login panel ----
275
- with gr.Group(visible=True) as login_panel:
276
- with gr.Row():
277
- annotator_in = gr.Textbox(
278
- label="Annotator name", placeholder="e.g. alice",
279
- scale=4, autofocus=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  )
281
- login_btn = gr.Button("Start annotating", variant="primary", scale=1)
282
-
283
- # ---- Annotation panel ----
284
- with gr.Group(visible=False) as ann_panel:
285
- with gr.Row():
286
- with gr.Column(scale=3):
287
- video = gr.Video(label="Generated video", autoplay=True, loop=True)
288
- with gr.Column(scale=2):
289
- meta_md = gr.Markdown()
290
- prompt_tb = gr.Textbox(
291
- label="Generation prompt",
292
- lines=10, max_lines=20, interactive=False,
293
- )
294
- with gr.Column(scale=1):
295
- score = gr.Slider(1, 5, value=3, step=1, label="Score (1 worst – 5 best)")
296
- note = gr.Textbox(label="Note (optional)", lines=4)
297
- submit_btn = gr.Button("✅ Submit & Next", variant="primary")
298
- skip_btn = gr.Button("⏭️ Skip")
299
-
300
- status = gr.Markdown("")
301
-
302
- # ---- Wiring ----
303
  login_btn.click(
304
  start_session,
305
- inputs=[annotator_in, session_state],
306
- outputs=[session_state, login_panel, ann_panel, video, meta_md, prompt_tb, status],
307
  )
308
  annotator_in.submit(
309
  start_session,
310
- inputs=[annotator_in, session_state],
311
- outputs=[session_state, login_panel, ann_panel, video, meta_md, prompt_tb, status],
312
  )
313
  submit_btn.click(
314
  submit_and_next,
315
- inputs=[session_state, score, note],
316
- outputs=[session_state, video, meta_md, prompt_tb, score, note, status],
317
  )
318
  skip_btn.click(
319
  skip_and_next,
320
- inputs=[session_state],
321
- outputs=[session_state, video, meta_md, prompt_tb, score, note, status],
322
  )
323
 
324
 
 
1
  """MBench-V annotation UI (Gradio Space).
2
 
3
+ Streams videos from `studyOverflow/TempMemoryData` (no local copy); writes
4
+ annotations back to the same dataset repo under `annotations/`, batched via
5
  `CommitScheduler`.
 
 
 
 
 
 
 
 
 
 
 
6
  """
7
 
8
  from __future__ import annotations
 
25
  DATASET_REPO = "studyOverflow/TempMemoryData"
26
  MERGED_JSON_PATH = "MBench-V/merged.json"
27
 
28
+ # 6 fully-reorganized models (584 videos each). `skyreels` and `longcat`
29
+ # are temporarily excluded until their 0422 runs finish.
30
  MODELS: list[str] = [
31
  "causal_forcing",
32
  "self_forcing",
 
36
  "memflow",
37
  ]
38
 
39
+ HF_TOKEN = os.environ.get("HF_TOKEN")
40
 
 
41
  ANN_DIR = Path("annotations_local")
42
  ANN_DIR.mkdir(exist_ok=True)
 
 
 
43
  PROCESS_ID = uuid.uuid4().hex[:8]
44
  ANN_FILE = ANN_DIR / f"ann_{PROCESS_ID}.jsonl"
 
45
  COMMIT_INTERVAL_MIN = 5
46
 
47
 
48
  # ---------------------------------------------------------------------------
49
+ # Load merged.json once at startup
50
  # ---------------------------------------------------------------------------
51
 
52
  def _load_merged() -> list[dict[str, Any]]:
 
65
 
66
 
67
  def _extract_prompt(task: dict[str, Any]) -> str:
 
68
  gp = task.get("generation_prompts") or {}
69
  prompts = gp.get("prompts") or {}
70
  for level in ("level_1", "level_2", "level_3"):
 
76
  return "(no prompt found)"
77
 
78
 
79
+ POOL: list[tuple[str, str]] = [(m, t["task_id"]) for m in MODELS for t in TASKS]
 
 
 
 
 
 
 
 
 
 
 
 
80
  print(f"[mbench-ann] loaded {len(TASKS)} tasks × {len(MODELS)} models = {len(POOL)} items")
81
 
82
 
 
89
 
90
 
91
  # ---------------------------------------------------------------------------
92
+ # CommitScheduler
93
  # ---------------------------------------------------------------------------
94
 
95
  scheduler: CommitScheduler | None = None
 
106
  )
107
  print(f"[mbench-ann] CommitScheduler started (every {COMMIT_INTERVAL_MIN} min)")
108
  else:
109
+ print("[mbench-ann] WARNING: HF_TOKEN not set — annotations stay local only")
110
 
111
 
112
  def _append_annotation(record: dict[str, Any]) -> None:
 
138
 
139
 
140
  def _load_item(pool_order: list[int], idx: int) -> tuple[str, str, str]:
 
141
  if idx < 0 or idx >= len(pool_order):
142
  return "", "**All done!** No more items.", ""
143
  model, task_id = POOL[pool_order[idx]]
 
150
 
151
 
152
  # ---------------------------------------------------------------------------
153
+ # Gradio callbacks — all return plain Python values (no gr.update mix)
154
  # ---------------------------------------------------------------------------
155
 
156
+ def start_session(annotator: str, state: dict):
157
  annotator = (annotator or "").strip()
158
  if not annotator:
159
+ return state, "", "⚠️ Please enter a name first.", "", "⚠️ Please enter a name first."
 
 
 
 
 
 
 
 
 
160
  order = list(range(len(POOL)))
161
  rng = random.Random(f"{annotator}-{int(time.time())}")
162
  rng.shuffle(order)
163
  state = {"annotator": annotator, "order": order, "idx": 0}
164
  video, meta, prompt = _load_item(order, 0)
165
+ status = f"✅ Logged in as `{annotator}` — {len(order)} items to annotate."
166
+ return state, video, meta, prompt, status
 
 
 
 
 
 
 
167
 
168
 
169
+ def submit_and_next(state: dict, score: float, note: str):
170
+ if not state or "order" not in state:
171
+ return state, "", "⚠️ Please log in first.", "", 3, "", "⚠️ Not logged in."
 
 
 
 
 
 
 
 
 
 
 
 
172
  order = state["order"]
173
  idx = state["idx"]
174
  if idx >= len(order):
 
185
  "note": (note or "").strip(),
186
  }
187
  _append_annotation(record)
188
+ state["idx"] = idx + 1
189
+ video, meta, prompt = _load_item(state["order"], state["idx"])
190
+ return state, video, meta, prompt, 3, "", f"✅ Submitted ({state['idx']}). Next →"
191
 
192
 
193
  def skip_and_next(state: dict):
194
+ if not state or "order" not in state:
195
+ return state, "", "⚠️ Please log in first.", "", 3, "", "⚠️ Not logged in."
196
+ state["idx"] = state["idx"] + 1
197
+ video, meta, prompt = _load_item(state["order"], state["idx"])
198
+ return state, video, meta, prompt, 3, "", f"⏭️ Skipped. Position: {state['idx']}"
199
 
200
 
201
  # ---------------------------------------------------------------------------
202
+ # Gradio UI — single page (no visibility toggling)
203
  # ---------------------------------------------------------------------------
204
 
205
+ with gr.Blocks(title="MBench-V Annotation", theme=gr.themes.Soft()) as demo:
 
 
 
206
  gr.Markdown(
207
  """
208
  # 🎬 MBench-V Annotation
209
 
210
+ 1. Enter a short name (any string it tags your submissions).
211
+ 2. Click **Start** a video will appear below.
212
+ 3. Give a score (1–5, 5 = best) and optional note; click **Submit & Next**.
213
+ 4. Submissions auto-sync to the dataset repo every 5 minutes.
214
  """
215
  )
216
 
217
+ state = gr.State(value={})
218
 
219
+ with gr.Row():
220
+ annotator_in = gr.Textbox(
221
+ label="Annotator name",
222
+ placeholder="e.g. alice",
223
+ scale=4,
224
+ )
225
+ login_btn = gr.Button("Start", variant="primary", scale=1)
226
+
227
+ status_md = gr.Markdown("_Not started yet._")
228
+
229
+ with gr.Row():
230
+ with gr.Column(scale=3):
231
+ video = gr.Video(label="Generated video", autoplay=True, loop=True)
232
+ with gr.Column(scale=2):
233
+ meta_md = gr.Markdown()
234
+ prompt_tb = gr.Textbox(
235
+ label="Generation prompt",
236
+ lines=10,
237
+ max_lines=20,
238
+ interactive=False,
239
  )
240
+ with gr.Column(scale=1):
241
+ score = gr.Slider(1, 5, value=3, step=1, label="Score")
242
+ note = gr.Textbox(label="Note (optional)", lines=4)
243
+ submit_btn = gr.Button("✅ Submit & Next", variant="primary")
244
+ skip_btn = gr.Button("⏭️ Skip")
245
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  login_btn.click(
247
  start_session,
248
+ inputs=[annotator_in, state],
249
+ outputs=[state, video, meta_md, prompt_tb, status_md],
250
  )
251
  annotator_in.submit(
252
  start_session,
253
+ inputs=[annotator_in, state],
254
+ outputs=[state, video, meta_md, prompt_tb, status_md],
255
  )
256
  submit_btn.click(
257
  submit_and_next,
258
+ inputs=[state, score, note],
259
+ outputs=[state, video, meta_md, prompt_tb, score, note, status_md],
260
  )
261
  skip_btn.click(
262
  skip_and_next,
263
+ inputs=[state],
264
+ outputs=[state, video, meta_md, prompt_tb, score, note, status_md],
265
  )
266
 
267