jmcinern commited on
Commit
6acd3d0
·
verified ·
1 Parent(s): e922c51

added back push to hub feature

Browse files
Files changed (1) hide show
  1. app.py +139 -157
app.py CHANGED
@@ -1,11 +1,8 @@
1
- # ab_app_k4_two_page_resume.py
2
  # Two-page Gradio app for open-sourced annotation (Master’s thesis)
3
- # Adds: resume from where you left off by cross-referencing completed items on HF (single canonical file).
4
- # - Canonical comparison key (A/B-order agnostic), includes run_ids when available
5
- # - ALWAYS reads progress from HF file: annotations_Wiki_Native.csv
6
- # - Never uses local storage to determine resume point (local file is only for local logging if desired)
7
- # - Skips already-completed items; shows overall counter (e.g., 31/60)
8
- # - Supports new role "Tester"
9
 
10
  import gradio as gr
11
  import pandas as pd
@@ -15,16 +12,19 @@ from pathlib import Path
15
  import hashlib
16
  import io
17
  import requests
18
- import shutil
19
  import os
 
 
20
 
21
  PAIRS_CSV = "./pairs.csv" # columns: run_id, model, source_type, instruction, response, text
22
 
23
  # --- Config ---
24
  K = 4
25
- OUT_FILE = "./annotations.csv" # local log (NOT used for resume)
 
 
26
  HF_ANNOTATIONS_URL = (
27
- "https://huggingface.co/datasets/jmcinern/Irish_Prompt_Response_Human_Feedback/resolve/main/annotations_Wiki_Native.csv"
28
  )
29
  SCHEMA = [
30
  "annotator_type", # Learner | Native | Tester
@@ -38,69 +38,31 @@ SCHEMA = [
38
  "instruction_B",
39
  "response_B",
40
  "timestamp",
41
- "run_id_A", # NEW: for key stability
42
- "run_id_B", # NEW: for key stability
43
- "comp_key", # NEW: canonical key for the comparison
44
  ]
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- # ---------- Utilities ----------
47
 
48
  def _stable_hash(s: str) -> int:
49
  return int(hashlib.sha256(s.encode("utf-8")).hexdigest(), 16)
50
 
51
 
52
- def _comp_key(source_type: str, text: str, model_a: str, model_b: str, run_id_a: str | None = None, run_id_b: str | None = None) -> str:
53
- """Backward-compatible, order-agnostic key that **does not rely on run_ids**.
54
- Reason: historical HF annotations may not have run_ids, so resume must match without them.
55
- Key = sha256(f"{source}|{text}|{min(model)}|{max(model)}")."""
56
- a_model, b_model = str(model_a), str(model_b)
57
- m1, m2 = sorted([a_model, b_model])
58
  raw = f"{source_type}|{text}|{m1}|{m2}"
59
  return hashlib.sha256(raw.encode("utf-8")).hexdigest()
60
 
61
 
62
- def ensure_outfile_schema():
63
- """Ensure OUT_FILE exists with SCHEMA; upgrade older files by adding columns as needed.
64
- This file is NOT used for resume, only optional local logging."""
65
- if not Path(OUT_FILE).exists():
66
- pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
67
- return
68
- try:
69
- existing = pd.read_csv(OUT_FILE)
70
- except Exception:
71
- pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
72
- return
73
- # Add missing columns
74
- for c in SCHEMA:
75
- if c not in existing.columns:
76
- existing[c] = ""
77
- # Try to backfill run_id/comp_key when possible
78
- if "comp_key" in existing.columns:
79
- missing = existing[existing["comp_key"].isna()].index
80
- for idx in missing:
81
- r = existing.loc[idx]
82
- existing.at[idx, "comp_key"] = _comp_key(r.get("source_type", ""), r.get("text", ""), r.get("model_A", ""), r.get("model_B", ""), r.get("run_id_A", ""), r.get("run_id_B", ""))
83
- # Reorder
84
- existing = existing[SCHEMA]
85
- # Backup and overwrite
86
- try:
87
- shutil.copyfile(OUT_FILE, OUT_FILE + ".bak")
88
- except Exception:
89
- pass
90
- existing.to_csv(OUT_FILE, index=False)
91
-
92
- ensure_outfile_schema()
93
-
94
- pairs_all = pd.read_csv(PAIRS_CSV)
95
-
96
- # --- Helpers for deterministic schedule ---
97
-
98
- def _shared_texts(df, m1, m2):
99
- t1 = set(df[df["model"] == m1]["text"])
100
- t2 = set(df[df["model"] == m2]["text"])
101
- return list(t1 & t2)
102
-
103
-
104
  def build_comparisons_k(source_type: str, k: int):
105
  df = pairs_all[pairs_all["source_type"] == source_type].copy()
106
  if df.empty:
@@ -114,13 +76,13 @@ def build_comparisons_k(source_type: str, k: int):
114
  shared = _shared_texts(df, m1, m2)
115
  if not shared:
116
  continue
117
- keyed = [( _stable_hash(f"{source_type}|{m1}|{m2}|{t}"), t) for t in shared]
118
  keyed.sort(key=lambda x: x[0])
119
  ordered_texts = [t for _, t in keyed]
120
 
121
  chosen = []
122
  idx = 0
123
- while len(chosen) < k and len(ordered_texts) > 0:
124
  chosen.append(ordered_texts[idx % len(ordered_texts)])
125
  idx += 1
126
 
@@ -131,27 +93,88 @@ def build_comparisons_k(source_type: str, k: int):
131
  A, B = (m1, r1), (m2, r2)
132
  else:
133
  A, B = (m2, r2), (m1, r1)
134
- run_id_a = str(A[1].get("run_id", ""))
135
- run_id_b = str(B[1].get("run_id", ""))
136
- item = {
137
- "source_type": source_type,
138
- "text": t,
139
- "model_A": A[0],
140
- "instruction_A": A[1]["instruction"],
141
- "response_A": A[1]["response"],
142
- "run_id_A": run_id_a,
143
- "model_B": B[0],
144
- "instruction_B": B[1]["instruction"],
145
- "response_B": B[1]["response"],
146
- "run_id_B": run_id_b,
147
- }
148
- item["comp_key"] = _comp_key(source_type, t, item["model_A"], item["model_B"])
149
- comps.append(item)
150
 
151
  comps.sort(key=lambda d: (d["source_type"], d["model_A"], d["model_B"], d["text"]))
152
  return comps
153
 
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  def save_row(annotator_type, item, choice):
156
  row = {
157
  "annotator_type": annotator_type,
@@ -165,60 +188,21 @@ def save_row(annotator_type, item, choice):
165
  "instruction_B": item["instruction_B"],
166
  "response_B": item["response_B"],
167
  "timestamp": time.time(),
168
- "run_id_A": item.get("run_id_A", ""),
169
- "run_id_B": item.get("run_id_B", ""),
170
- "comp_key": item.get("comp_key", _comp_key(item["source_type"], item["text"], item["model_A"], item["model_B"], item.get("run_id_A"), item.get("run_id_B")))
171
  }
172
- # Local log only; pushing to HF handled elsewhere in your pipeline
173
- df = pd.DataFrame([row])[SCHEMA]
174
- df.to_csv(OUT_FILE, mode="a", header=False, index=False)
175
-
176
-
177
- # ---------- Load completed keys from HF (single canonical file) ----------
178
 
179
- def _read_csv_from_hf(url: str) -> pd.DataFrame:
180
- headers = {}
181
- # Optional: HF token if the dataset is private
182
- token = os.getenv("HF_TOKEN")
183
- if token:
184
- headers["Authorization"] = f"Bearer {token}"
185
- resp = requests.get(url, headers=headers, timeout=20)
186
- resp.raise_for_status()
187
- # Hugging Face may return a redirect; requests follows by default
188
- return pd.read_csv(io.StringIO(resp.text))
189
-
190
-
191
- def load_done_keys_from_hf(annotator_type: str, source_type: str) -> set[str]:
192
- """Fetch annotations_Wiki_Native.csv and return a set of comp_key for this role+source.
193
- If comp_key missing, reconstruct using our canonical function; if run_ids missing, fallback to model+text only."""
194
  try:
195
- df = _read_csv_from_hf(HF_ANNOTATIONS_URL)
196
- except Exception as e:
197
- raise RuntimeError(f"Could not read resume file from HF: {e}")
198
-
199
- # Filter by role+source if those columns exist; otherwise treat all rows as potential
200
- if "annotator_type" in df.columns:
201
- df = df[df["annotator_type"].astype(str).str.strip() == annotator_type]
202
- if "source_type" in df.columns:
203
- df = df[df["source_type"].astype(str).str.strip() == source_type]
204
 
205
- keys = set()
206
- has_key = "comp_key" in df.columns
207
- for _, r in df.iterrows():
208
- if has_key and pd.notna(r.get("comp_key")) and str(r.get("comp_key")).strip() != "":
209
- keys.add(str(r.get("comp_key")).strip())
210
- else:
211
- # Reconstruct; try to use run_ids if present
212
- k = _comp_key(
213
- str(r.get("source_type", "")),
214
- str(r.get("text", "")),
215
- str(r.get("model_A", "")),
216
- str(r.get("model_B", "")),
217
- str(r.get("run_id_A", "")) if "run_id_A" in df.columns else None,
218
- str(r.get("run_id_B", "")) if "run_id_B" in df.columns else None,
219
- )
220
- keys.add(k)
221
- return keys
222
 
223
 
224
  QUESTION_MD = (
@@ -275,59 +259,58 @@ with gr.Blocks() as demo:
275
 
276
  # ---------- State ----------
277
  annotator_type = gr.State("") # Learner | Native | Tester
278
- source_state = gr.State(None) # Wiki | Oireachtas
279
- comps_state = gr.State([]) # list of dicts (FULL list, not filtered)
280
- idx_state = gr.State(0) # index into FULL list (resume point)
281
 
282
  # ---------- Handlers ----------
283
  def begin(consent, role, source):
284
  if not consent:
285
  return ("**Please tick the consent checkbox to proceed.**",
286
- gr.update(visible=True), gr.update(visible(False)),
287
  "", "", "", "", "", "", "", "", "", "", "")
288
  if role not in ["Learner", "Native", "Tester"]:
289
  return ("**Please select your annotator type.**",
290
- gr.update(visible=True), gr.update(visible(False)),
291
  "", "", "", "", "", "", "", "", "", "", "")
292
  if source not in ["Wiki", "Oireachtas"]:
293
  return ("**Please select a source (Wikipedia/Oireachtas).**",
294
- gr.update(visible=True), gr.update(visible(False)),
295
  "", "", "", "", "", "", "", "", "", "", "")
296
 
297
- full_list = build_comparisons_k(source, K)
298
- if not full_list:
299
  return ("**No items found for the selected source.**",
300
- gr.update(visible=True), gr.update(visible(False)),
301
  "", "", "", "", "", "", "", "", "", "", "")
302
 
303
- # Single resume check from HF
304
  try:
305
  done_keys = load_done_keys_from_hf(role, source)
306
  except Exception as e:
307
  return (f"**Error reading progress from HF:** {e}",
308
- gr.update(visible=True), gr.update(visible(False)),
309
  "", "", "", "", "", "", "", "", role, source, [], 0, gr.update(interactive=False), gr.update(interactive=False))
310
 
311
- total = len(full_list)
312
  resume_idx = 0
313
- for i, it in enumerate(full_list):
314
- if it.get("comp_key") not in done_keys:
 
315
  resume_idx = i
316
  break
317
  else:
318
- # all done
319
  return (f"**All done for {role} / {source}.**",
320
- gr.update(visible=True), gr.update(visible(False)),
321
- "", "", "", "", "", "", "", "", role, source, full_list, total, gr.update(interactive=False), gr.update(interactive=False))
322
 
323
- item = full_list[resume_idx]
324
- note = f"Resuming from {len(done_keys)} completed; {total - len(done_keys)} remaining."
325
- return (note,
326
- gr.update(visible=False), gr.update(visible=True),
327
  f"{resume_idx+1} / {total}",
328
  item["text"], item["instruction_A"], item["response_A"],
329
  item["instruction_B"], item["response_B"],
330
- role, source, full_list, resume_idx,
331
  gr.update(interactive=True), gr.update(interactive=True))
332
 
333
  begin_btn.click(
@@ -347,20 +330,19 @@ with gr.Blocks() as demo:
347
  return ("**No comparisons loaded.**", gr.skip(), gr.skip(), gr.skip(), gr.skip(),
348
  gr.update(interactive=False), gr.update(interactive=False), i)
349
 
350
- total = len(comp_list)
351
- # Save current item
352
  item = comp_list[i]
353
  save_row(role, item, choice)
354
 
355
  i += 1
356
- if i >= total:
 
357
  return ("**Done — thank you!**",
358
- f"{total} / {total}", "", "", "", "",
359
  gr.update(interactive=False), gr.update(interactive=False), i)
360
 
361
  nxt = comp_list[i]
362
  return (f"Saved: {choice}",
363
- f"{i+1} / {total}",
364
  nxt["text"], nxt["instruction_A"], nxt["response_A"], nxt["instruction_B"], nxt["response_B"],
365
  gr.update(interactive=True), gr.update(interactive=True), i)
366
 
 
1
+ # ab_app_k4_two_page.py (with HF resume + push on save)
2
  # Two-page Gradio app for open-sourced annotation (Master’s thesis)
3
+ # Page 1: consent + annotator type (Learner/Native/Tester) + source (Wiki/Oireachtas)
4
+ # Page 2: task only (QUESTION_MD + A/B), deterministic K=4 per model pair per source
5
+ # Saves locally AND pushes each row to a single HF CSV; resume checks that HF CSV once at Begin
 
 
 
6
 
7
  import gradio as gr
8
  import pandas as pd
 
12
  import hashlib
13
  import io
14
  import requests
 
15
  import os
16
+ import tempfile
17
+ from huggingface_hub import HfApi, hf_hub_download, create_commit, CommitOperationAdd
18
 
19
  PAIRS_CSV = "./pairs.csv" # columns: run_id, model, source_type, instruction, response, text
20
 
21
  # --- Config ---
22
  K = 4
23
+ OUT_FILE = "./annotations.csv"
24
+ HF_REPO_ID = "jmcinern/Irish_Prompt_Response_Human_Feedback" # dataset repo
25
+ HF_FILE_PATH = "annotations_Wiki_Native.csv" # single canonical file for all roles/sources
26
  HF_ANNOTATIONS_URL = (
27
+ f"https://huggingface.co/datasets/{HF_REPO_ID}/resolve/main/{HF_FILE_PATH}"
28
  )
29
  SCHEMA = [
30
  "annotator_type", # Learner | Native | Tester
 
38
  "instruction_B",
39
  "response_B",
40
  "timestamp",
 
 
 
41
  ]
42
+ if not Path(OUT_FILE).exists():
43
+ pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
44
+
45
+ pairs_all = pd.read_csv(PAIRS_CSV)
46
+
47
+ # --- Helpers for deterministic schedule ---
48
+
49
+ def _shared_texts(df, m1, m2):
50
+ t1 = set(df[df["model"] == m1]["text"])
51
+ t2 = set(df[df["model"] == m2]["text"])
52
+ return list(t1 & t2)
53
 
 
54
 
55
  def _stable_hash(s: str) -> int:
56
  return int(hashlib.sha256(s.encode("utf-8")).hexdigest(), 16)
57
 
58
 
59
+ def _comp_key(source_type: str, text: str, model_a: str, model_b: str) -> str:
60
+ """Order-agnostic, backward-compatible key (ignores run_ids)."""
61
+ m1, m2 = sorted([str(model_a), str(model_b)])
 
 
 
62
  raw = f"{source_type}|{text}|{m1}|{m2}"
63
  return hashlib.sha256(raw.encode("utf-8")).hexdigest()
64
 
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def build_comparisons_k(source_type: str, k: int):
67
  df = pairs_all[pairs_all["source_type"] == source_type].copy()
68
  if df.empty:
 
76
  shared = _shared_texts(df, m1, m2)
77
  if not shared:
78
  continue
79
+ keyed = [(_stable_hash(f"{source_type}|{m1}|{m2}|{t}"), t) for t in shared]
80
  keyed.sort(key=lambda x: x[0])
81
  ordered_texts = [t for _, t in keyed]
82
 
83
  chosen = []
84
  idx = 0
85
+ while len(chosen) < k:
86
  chosen.append(ordered_texts[idx % len(ordered_texts)])
87
  idx += 1
88
 
 
93
  A, B = (m1, r1), (m2, r2)
94
  else:
95
  A, B = (m2, r2), (m1, r1)
96
+ comps.append(
97
+ {
98
+ "source_type": source_type,
99
+ "text": t,
100
+ "model_A": A[0],
101
+ "instruction_A": A[1]["instruction"],
102
+ "response_A": A[1]["response"],
103
+ "model_B": B[0],
104
+ "instruction_B": B[1]["instruction"],
105
+ "response_B": B[1]["response"],
106
+ }
107
+ )
 
 
 
 
108
 
109
  comps.sort(key=lambda d: (d["source_type"], d["model_A"], d["model_B"], d["text"]))
110
  return comps
111
 
112
 
113
+ # ---------- HF helpers ----------
114
+
115
+ def _read_csv_from_hf(url: str) -> pd.DataFrame:
116
+ token = os.getenv("HF_TOKEN")
117
+ headers = {"Authorization": f"Bearer {token}"} if token else {}
118
+ resp = requests.get(url, headers=headers, timeout=20)
119
+ resp.raise_for_status()
120
+ return pd.read_csv(io.StringIO(resp.text))
121
+
122
+
123
+ def load_done_keys_from_hf(annotator_type: str, source_type: str) -> set:
124
+ """Read the canonical HF CSV and return comp_keys already done for this role+source."""
125
+ df = _read_csv_from_hf(HF_ANNOTATIONS_URL)
126
+ if "annotator_type" in df.columns:
127
+ df = df[df["annotator_type"].astype(str).str.strip() == annotator_type]
128
+ if "source_type" in df.columns:
129
+ df = df[df["source_type"].astype(str).str.strip() == source_type]
130
+
131
+ keys = set()
132
+ for _, r in df.iterrows():
133
+ st = str(r.get("source_type", ""))
134
+ tx = str(r.get("text", ""))
135
+ ma = str(r.get("model_A", ""))
136
+ mb = str(r.get("model_B", ""))
137
+ if not (st and tx and ma and mb):
138
+ continue
139
+ keys.add(_comp_key(st, tx, ma, mb))
140
+ return keys
141
+
142
+
143
+ def append_rows_to_hf(rows_df: pd.DataFrame):
144
+ """Append new annotations to the single HF CSV with basic schema alignment.
145
+ If the file doesn't exist, create it. Requires HF_TOKEN with write access.
146
+ """
147
+ api = HfApi()
148
+ # 1) download current csv (if missing, start new)
149
+ try:
150
+ local_path = hf_hub_download(repo_id=HF_REPO_ID, filename=HF_FILE_PATH, repo_type="dataset")
151
+ current = pd.read_csv(local_path)
152
+ except Exception:
153
+ current = pd.DataFrame(columns=SCHEMA)
154
+
155
+ for c in SCHEMA:
156
+ if c not in current.columns:
157
+ current[c] = ""
158
+ current = current[SCHEMA]
159
+
160
+ rows_df = rows_df[SCHEMA]
161
+ merged = pd.concat([current, rows_df], ignore_index=True)
162
+
163
+ # 4) write to a temp file and commit back
164
+ with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp:
165
+ merged.to_csv(tmp.name, index=False)
166
+ tmp.flush()
167
+ op = CommitOperationAdd(path_in_repo=HF_FILE_PATH, path_or_fileobj=tmp.name)
168
+ create_commit(
169
+ repo_id=HF_REPO_ID,
170
+ repo_type="dataset",
171
+ operations=[op],
172
+ commit_message="Append annotation row",
173
+ )
174
+
175
+
176
+ # --- Save row (local + push to HF) ---
177
+
178
  def save_row(annotator_type, item, choice):
179
  row = {
180
  "annotator_type": annotator_type,
 
188
  "instruction_B": item["instruction_B"],
189
  "response_B": item["response_B"],
190
  "timestamp": time.time(),
 
 
 
191
  }
192
+ df_row = pd.DataFrame([row])[SCHEMA]
 
 
 
 
 
193
 
194
+ # Local redundancy
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  try:
196
+ df_row.to_csv(OUT_FILE, mode="a", header=False, index=False)
197
+ except Exception:
198
+ pass
 
 
 
 
 
 
199
 
200
+ # Push to HF (single canonical file)
201
+ try:
202
+ append_rows_to_hf(df_row)
203
+ except Exception:
204
+ # Fail-open: allow annotator to continue, progress still in local file
205
+ pass
 
 
 
 
 
 
 
 
 
 
 
206
 
207
 
208
  QUESTION_MD = (
 
259
 
260
  # ---------- State ----------
261
  annotator_type = gr.State("") # Learner | Native | Tester
262
+ source_state = gr.State(None) # Wiki | Oireachtas
263
+ comps_state = gr.State([]) # list of dicts (full list)
264
+ idx_state = gr.State(0) # current index into full list
265
 
266
  # ---------- Handlers ----------
267
  def begin(consent, role, source):
268
  if not consent:
269
  return ("**Please tick the consent checkbox to proceed.**",
270
+ gr.update(visible=True), gr.update(visible=False),
271
  "", "", "", "", "", "", "", "", "", "", "")
272
  if role not in ["Learner", "Native", "Tester"]:
273
  return ("**Please select your annotator type.**",
274
+ gr.update(visible=True), gr.update(visible=False),
275
  "", "", "", "", "", "", "", "", "", "", "")
276
  if source not in ["Wiki", "Oireachtas"]:
277
  return ("**Please select a source (Wikipedia/Oireachtas).**",
278
+ gr.update(visible=True), gr.update(visible=False),
279
  "", "", "", "", "", "", "", "", "", "", "")
280
 
281
+ comp_list = build_comparisons_k(source, K)
282
+ if not comp_list:
283
  return ("**No items found for the selected source.**",
284
+ gr.update(visible=True), gr.update(visible=False),
285
  "", "", "", "", "", "", "", "", "", "", "")
286
 
287
+ # Resume point from HF (single check)
288
  try:
289
  done_keys = load_done_keys_from_hf(role, source)
290
  except Exception as e:
291
  return (f"**Error reading progress from HF:** {e}",
292
+ gr.update(visible=True), gr.update(visible=False),
293
  "", "", "", "", "", "", "", "", role, source, [], 0, gr.update(interactive=False), gr.update(interactive=False))
294
 
295
+ total = len(comp_list)
296
  resume_idx = 0
297
+ for i, it in enumerate(comp_list):
298
+ key = _comp_key(source, it["text"], it["model_A"], it["model_B"]) # A/B order-agnostic
299
+ if key not in done_keys:
300
  resume_idx = i
301
  break
302
  else:
 
303
  return (f"**All done for {role} / {source}.**",
304
+ gr.update(visible=True), gr.update(visible=False),
305
+ "", "", "", "", "", "", "", "", role, source, comp_list, total, gr.update(interactive=False), gr.update(interactive=False))
306
 
307
+ item = comp_list[resume_idx]
308
+ return ("", # clear gate msg
309
+ gr.update(visible=False), gr.update(visible=True), # show page2
 
310
  f"{resume_idx+1} / {total}",
311
  item["text"], item["instruction_A"], item["response_A"],
312
  item["instruction_B"], item["response_B"],
313
+ role, source, comp_list, resume_idx,
314
  gr.update(interactive=True), gr.update(interactive=True))
315
 
316
  begin_btn.click(
 
330
  return ("**No comparisons loaded.**", gr.skip(), gr.skip(), gr.skip(), gr.skip(),
331
  gr.update(interactive=False), gr.update(interactive=False), i)
332
 
 
 
333
  item = comp_list[i]
334
  save_row(role, item, choice)
335
 
336
  i += 1
337
+ if i >= len(comp_list):
338
+ # Done: disable buttons, clear fields, lock progress at max
339
  return ("**Done — thank you!**",
340
+ f"{len(comp_list)} / {len(comp_list)}", "", "", "", "",
341
  gr.update(interactive=False), gr.update(interactive=False), i)
342
 
343
  nxt = comp_list[i]
344
  return (f"Saved: {choice}",
345
+ f"{i+1} / {len(comp_list)}",
346
  nxt["text"], nxt["instruction_A"], nxt["response_A"], nxt["instruction_B"], nxt["response_B"],
347
  gr.update(interactive=True), gr.update(interactive=True), i)
348