Spaces:
Sleeping
Sleeping
Update app.py
Browse filesupdate resume progress
app.py
CHANGED
|
@@ -1,26 +1,31 @@
|
|
| 1 |
# ab_app_k4_two_page_resume.py
|
| 2 |
# Two-page Gradio app for open-sourced annotation (Master’s thesis)
|
| 3 |
-
# Adds: resume from where you left off by cross-referencing completed items on HF
|
| 4 |
-
# - Canonical comparison key (A/B-order agnostic)
|
| 5 |
-
# -
|
| 6 |
-
# -
|
|
|
|
|
|
|
| 7 |
|
| 8 |
import gradio as gr
|
| 9 |
import pandas as pd
|
| 10 |
import time
|
| 11 |
from itertools import combinations
|
| 12 |
from pathlib import Path
|
| 13 |
-
import json
|
| 14 |
import hashlib
|
| 15 |
import io
|
| 16 |
import requests
|
| 17 |
import shutil
|
|
|
|
| 18 |
|
| 19 |
PAIRS_CSV = "./pairs.csv" # columns: run_id, model, source_type, instruction, response, text
|
| 20 |
|
| 21 |
# --- Config ---
|
| 22 |
K = 4
|
| 23 |
-
OUT_FILE = "./annotations.csv"
|
|
|
|
|
|
|
|
|
|
| 24 |
SCHEMA = [
|
| 25 |
"annotator_type", # Learner | Native | Tester
|
| 26 |
"source_type", # Wiki | Oireachtas
|
|
@@ -33,6 +38,8 @@ SCHEMA = [
|
|
| 33 |
"instruction_B",
|
| 34 |
"response_B",
|
| 35 |
"timestamp",
|
|
|
|
|
|
|
| 36 |
"comp_key", # NEW: canonical key for the comparison
|
| 37 |
]
|
| 38 |
|
|
@@ -41,45 +48,44 @@ SCHEMA = [
|
|
| 41 |
def _stable_hash(s: str) -> int:
|
| 42 |
return int(hashlib.sha256(s.encode("utf-8")).hexdigest(), 16)
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
| 49 |
|
|
|
|
| 50 |
def ensure_outfile_schema():
|
| 51 |
-
"""Ensure OUT_FILE exists with SCHEMA;
|
|
|
|
| 52 |
if not Path(OUT_FILE).exists():
|
| 53 |
pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
|
| 54 |
return
|
| 55 |
-
# If exists, check columns
|
| 56 |
try:
|
| 57 |
existing = pd.read_csv(OUT_FILE)
|
| 58 |
except Exception:
|
| 59 |
-
# Corrupt or empty -> recreate
|
| 60 |
pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
|
| 61 |
return
|
| 62 |
-
|
| 63 |
-
if cols == SCHEMA:
|
| 64 |
-
return
|
| 65 |
-
# Upgrade: compute comp_key where missing, reorder columns
|
| 66 |
-
# Try to infer comp_key from rows
|
| 67 |
-
if "comp_key" not in existing.columns:
|
| 68 |
-
def infer_key(r):
|
| 69 |
-
try:
|
| 70 |
-
return _comp_key(r.get("source_type", ""), r.get("text", ""), r.get("model_A", ""), r.get("model_B", ""))
|
| 71 |
-
except Exception:
|
| 72 |
-
return ""
|
| 73 |
-
existing["comp_key"] = existing.apply(infer_key, axis=1)
|
| 74 |
-
# Add any missing columns with defaults
|
| 75 |
for c in SCHEMA:
|
| 76 |
if c not in existing.columns:
|
| 77 |
existing[c] = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
existing = existing[SCHEMA]
|
| 79 |
# Backup and overwrite
|
| 80 |
-
backup = OUT_FILE + ".bak"
|
| 81 |
try:
|
| 82 |
-
shutil.copyfile(OUT_FILE,
|
| 83 |
except Exception:
|
| 84 |
pass
|
| 85 |
existing.to_csv(OUT_FILE, index=False)
|
|
@@ -126,17 +132,21 @@ def build_comparisons_k(source_type: str, k: int):
|
|
| 126 |
A, B = (m1, r1), (m2, r2)
|
| 127 |
else:
|
| 128 |
A, B = (m2, r2), (m1, r1)
|
|
|
|
|
|
|
| 129 |
item = {
|
| 130 |
"source_type": source_type,
|
| 131 |
"text": t,
|
| 132 |
"model_A": A[0],
|
| 133 |
"instruction_A": A[1]["instruction"],
|
| 134 |
"response_A": A[1]["response"],
|
|
|
|
| 135 |
"model_B": B[0],
|
| 136 |
"instruction_B": B[1]["instruction"],
|
| 137 |
"response_B": B[1]["response"],
|
|
|
|
| 138 |
}
|
| 139 |
-
item["comp_key"] = _comp_key(source_type, t, item["model_A"], item["model_B"])
|
| 140 |
comps.append(item)
|
| 141 |
|
| 142 |
comps.sort(key=lambda d: (d["source_type"], d["model_A"], d["model_B"], d["text"]))
|
|
@@ -156,57 +166,59 @@ def save_row(annotator_type, item, choice):
|
|
| 156 |
"instruction_B": item["instruction_B"],
|
| 157 |
"response_B": item["response_B"],
|
| 158 |
"timestamp": time.time(),
|
| 159 |
-
"
|
|
|
|
|
|
|
| 160 |
}
|
| 161 |
-
#
|
| 162 |
df = pd.DataFrame([row])[SCHEMA]
|
| 163 |
df.to_csv(OUT_FILE, mode="a", header=False, index=False)
|
| 164 |
|
| 165 |
|
| 166 |
-
# ---------- Load completed keys from HF
|
| 167 |
|
| 168 |
-
def
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
resp.raise_for_status()
|
|
|
|
| 171 |
return pd.read_csv(io.StringIO(resp.text))
|
| 172 |
|
| 173 |
|
| 174 |
-
def
|
| 175 |
-
"""
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
if hf_csv_url:
|
| 182 |
-
try:
|
| 183 |
-
df = _read_csv_from_url(hf_csv_url)
|
| 184 |
-
except Exception:
|
| 185 |
-
df = None
|
| 186 |
-
if df is None:
|
| 187 |
-
try:
|
| 188 |
-
df = pd.read_csv(OUT_FILE)
|
| 189 |
-
except Exception:
|
| 190 |
-
return set()
|
| 191 |
|
| 192 |
-
# Filter by role+source
|
| 193 |
if "annotator_type" in df.columns:
|
| 194 |
df = df[df["annotator_type"].astype(str).str.strip() == annotator_type]
|
| 195 |
if "source_type" in df.columns:
|
| 196 |
df = df[df["source_type"].astype(str).str.strip() == source_type]
|
| 197 |
|
| 198 |
-
# If comp_key exists, use it; else reconstruct
|
| 199 |
keys = set()
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
return keys
|
| 211 |
|
| 212 |
|
|
@@ -242,8 +254,6 @@ with gr.Blocks() as demo:
|
|
| 242 |
consent_chk = gr.Checkbox(label="I consent to take part and for my anonymised annotations to be open-sourced.", value=False)
|
| 243 |
role_dd = gr.Dropdown(["Learner", "Native", "Tester"], label="Annotator Type (required)", value=None)
|
| 244 |
source_dd = gr.Dropdown(["Wiki", "Oireachtas"], label="Source (required)", value=None)
|
| 245 |
-
with gr.Row():
|
| 246 |
-
hf_csv_url_tb = gr.Textbox(label="(Optional) HF annotations.csv URL for resume", value="", placeholder="https://huggingface.co/datasets/<org>/<repo>/resolve/main/annotations.csv")
|
| 247 |
begin_btn = gr.Button("Begin")
|
| 248 |
gate_msg = gr.Markdown()
|
| 249 |
|
|
@@ -267,52 +277,63 @@ with gr.Blocks() as demo:
|
|
| 267 |
# ---------- State ----------
|
| 268 |
annotator_type = gr.State("") # Learner | Native | Tester
|
| 269 |
source_state = gr.State(None) # Wiki | Oireachtas
|
| 270 |
-
comps_state = gr.State([]) # list of dicts (
|
| 271 |
-
idx_state = gr.State(0) # index into
|
| 272 |
|
| 273 |
# ---------- Handlers ----------
|
| 274 |
-
def begin(consent, role, source
|
| 275 |
if not consent:
|
| 276 |
return ("**Please tick the consent checkbox to proceed.**",
|
| 277 |
-
gr.update(visible=True), gr.update(visible
|
| 278 |
-
"", "", "", "", "", "", "", "", "", "", ""
|
| 279 |
if role not in ["Learner", "Native", "Tester"]:
|
| 280 |
return ("**Please select your annotator type.**",
|
| 281 |
-
gr.update(visible=True), gr.update(visible
|
| 282 |
-
"", "", "", "", "", "", "", "", "", "", ""
|
| 283 |
if source not in ["Wiki", "Oireachtas"]:
|
| 284 |
return ("**Please select a source (Wikipedia/Oireachtas).**",
|
| 285 |
-
gr.update(visible=True), gr.update(visible
|
| 286 |
-
"", "", "", "", "", "", "", "", "", "", ""
|
| 287 |
|
| 288 |
full_list = build_comparisons_k(source, K)
|
| 289 |
if not full_list:
|
| 290 |
return ("**No items found for the selected source.**",
|
| 291 |
-
gr.update(visible=True), gr.update(visible
|
| 292 |
-
"", "", "", "", "", "", "", "", "", "", ""
|
| 293 |
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
return (f"**All done for {role} / {source}.**",
|
| 299 |
-
gr.update(visible=True), gr.update(visible
|
| 300 |
-
"", "", "", "", "", "", "", "", role, source,
|
| 301 |
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
return (resume_note,
|
| 306 |
gr.update(visible=False), gr.update(visible=True),
|
| 307 |
-
f"{
|
| 308 |
item["text"], item["instruction_A"], item["response_A"],
|
| 309 |
item["instruction_B"], item["response_B"],
|
| 310 |
-
role, source,
|
| 311 |
gr.update(interactive=True), gr.update(interactive=True))
|
| 312 |
|
| 313 |
begin_btn.click(
|
| 314 |
begin,
|
| 315 |
-
inputs=[consent_chk, role_dd, source_dd
|
| 316 |
outputs=[
|
| 317 |
gate_msg, page1, page2,
|
| 318 |
counter, ref_text, instA, respA, instB, respB,
|
|
@@ -327,18 +348,20 @@ with gr.Blocks() as demo:
|
|
| 327 |
return ("**No comparisons loaded.**", gr.skip(), gr.skip(), gr.skip(), gr.skip(),
|
| 328 |
gr.update(interactive=False), gr.update(interactive=False), i)
|
| 329 |
|
|
|
|
|
|
|
| 330 |
item = comp_list[i]
|
| 331 |
save_row(role, item, choice)
|
| 332 |
|
| 333 |
i += 1
|
| 334 |
-
if i >=
|
| 335 |
return ("**Done — thank you!**",
|
| 336 |
-
f"{
|
| 337 |
gr.update(interactive=False), gr.update(interactive=False), i)
|
| 338 |
|
| 339 |
nxt = comp_list[i]
|
| 340 |
return (f"Saved: {choice}",
|
| 341 |
-
f"{i+1} / {
|
| 342 |
nxt["text"], nxt["instruction_A"], nxt["response_A"], nxt["instruction_B"], nxt["response_B"],
|
| 343 |
gr.update(interactive=True), gr.update(interactive=True), i)
|
| 344 |
|
|
@@ -354,4 +377,4 @@ with gr.Blocks() as demo:
|
|
| 354 |
)
|
| 355 |
|
| 356 |
if __name__ == "__main__":
|
| 357 |
-
demo.launch()
|
|
|
|
| 1 |
# ab_app_k4_two_page_resume.py
|
| 2 |
# Two-page Gradio app for open-sourced annotation (Master’s thesis)
|
| 3 |
+
# Adds: resume from where you left off by cross-referencing completed items on HF (single canonical file).
|
| 4 |
+
# - Canonical comparison key (A/B-order agnostic), includes run_ids when available
|
| 5 |
+
# - ALWAYS reads progress from HF file: annotations_Wiki_Native.csv
|
| 6 |
+
# - Never uses local storage to determine resume point (local file is only for local logging if desired)
|
| 7 |
+
# - Skips already-completed items; shows overall counter (e.g., 31/60)
|
| 8 |
+
# - Supports new role "Tester"
|
| 9 |
|
| 10 |
import gradio as gr
|
| 11 |
import pandas as pd
|
| 12 |
import time
|
| 13 |
from itertools import combinations
|
| 14 |
from pathlib import Path
|
|
|
|
| 15 |
import hashlib
|
| 16 |
import io
|
| 17 |
import requests
|
| 18 |
import shutil
|
| 19 |
+
import os
|
| 20 |
|
| 21 |
PAIRS_CSV = "./pairs.csv" # columns: run_id, model, source_type, instruction, response, text
|
| 22 |
|
| 23 |
# --- Config ---
|
| 24 |
K = 4
|
| 25 |
+
OUT_FILE = "./annotations.csv" # local log (NOT used for resume)
|
| 26 |
+
HF_ANNOTATIONS_URL = (
|
| 27 |
+
"https://huggingface.co/datasets/jmcinern/Irish_Prompt_Response_Human_Feedback/resolve/main/annotations_Wiki_Native.csv"
|
| 28 |
+
)
|
| 29 |
SCHEMA = [
|
| 30 |
"annotator_type", # Learner | Native | Tester
|
| 31 |
"source_type", # Wiki | Oireachtas
|
|
|
|
| 38 |
"instruction_B",
|
| 39 |
"response_B",
|
| 40 |
"timestamp",
|
| 41 |
+
"run_id_A", # NEW: for key stability
|
| 42 |
+
"run_id_B", # NEW: for key stability
|
| 43 |
"comp_key", # NEW: canonical key for the comparison
|
| 44 |
]
|
| 45 |
|
|
|
|
| 48 |
def _stable_hash(s: str) -> int:
|
| 49 |
return int(hashlib.sha256(s.encode("utf-8")).hexdigest(), 16)
|
| 50 |
|
| 51 |
+
|
| 52 |
+
def _comp_key(source_type: str, text: str, model_a: str, model_b: str, run_id_a: str | None = None, run_id_b: str | None = None) -> str:
|
| 53 |
+
"""Order-agnostic key. If run_ids provided, include them to disambiguate different runs.
|
| 54 |
+
Canonical order = sort by (model, run_id or '')."""
|
| 55 |
+
a_model, b_model = str(model_a), str(model_b)
|
| 56 |
+
a_rid, b_rid = ("" if run_id_a is None else str(run_id_a)), ("" if run_id_b is None else str(run_id_b))
|
| 57 |
+
pair = sorted([(a_model, a_rid), (b_model, b_rid)])
|
| 58 |
+
(m1, r1), (m2, r2) = pair[0], pair[1]
|
| 59 |
+
raw = f"{source_type}|{text}|{m1}|{r1}|{m2}|{r2}"
|
| 60 |
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
| 61 |
|
| 62 |
+
|
| 63 |
def ensure_outfile_schema():
|
| 64 |
+
"""Ensure OUT_FILE exists with SCHEMA; upgrade older files by adding columns as needed.
|
| 65 |
+
This file is NOT used for resume, only optional local logging."""
|
| 66 |
if not Path(OUT_FILE).exists():
|
| 67 |
pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
|
| 68 |
return
|
|
|
|
| 69 |
try:
|
| 70 |
existing = pd.read_csv(OUT_FILE)
|
| 71 |
except Exception:
|
|
|
|
| 72 |
pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
|
| 73 |
return
|
| 74 |
+
# Add missing columns
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
for c in SCHEMA:
|
| 76 |
if c not in existing.columns:
|
| 77 |
existing[c] = ""
|
| 78 |
+
# Try to backfill run_id/comp_key when possible
|
| 79 |
+
if "comp_key" in existing.columns:
|
| 80 |
+
missing = existing[existing["comp_key"].isna()].index
|
| 81 |
+
for idx in missing:
|
| 82 |
+
r = existing.loc[idx]
|
| 83 |
+
existing.at[idx, "comp_key"] = _comp_key(r.get("source_type", ""), r.get("text", ""), r.get("model_A", ""), r.get("model_B", ""), r.get("run_id_A", ""), r.get("run_id_B", ""))
|
| 84 |
+
# Reorder
|
| 85 |
existing = existing[SCHEMA]
|
| 86 |
# Backup and overwrite
|
|
|
|
| 87 |
try:
|
| 88 |
+
shutil.copyfile(OUT_FILE, OUT_FILE + ".bak")
|
| 89 |
except Exception:
|
| 90 |
pass
|
| 91 |
existing.to_csv(OUT_FILE, index=False)
|
|
|
|
| 132 |
A, B = (m1, r1), (m2, r2)
|
| 133 |
else:
|
| 134 |
A, B = (m2, r2), (m1, r1)
|
| 135 |
+
run_id_a = str(A[1].get("run_id", ""))
|
| 136 |
+
run_id_b = str(B[1].get("run_id", ""))
|
| 137 |
item = {
|
| 138 |
"source_type": source_type,
|
| 139 |
"text": t,
|
| 140 |
"model_A": A[0],
|
| 141 |
"instruction_A": A[1]["instruction"],
|
| 142 |
"response_A": A[1]["response"],
|
| 143 |
+
"run_id_A": run_id_a,
|
| 144 |
"model_B": B[0],
|
| 145 |
"instruction_B": B[1]["instruction"],
|
| 146 |
"response_B": B[1]["response"],
|
| 147 |
+
"run_id_B": run_id_b,
|
| 148 |
}
|
| 149 |
+
item["comp_key"] = _comp_key(source_type, t, item["model_A"], item["model_B"], run_id_a, run_id_b)
|
| 150 |
comps.append(item)
|
| 151 |
|
| 152 |
comps.sort(key=lambda d: (d["source_type"], d["model_A"], d["model_B"], d["text"]))
|
|
|
|
| 166 |
"instruction_B": item["instruction_B"],
|
| 167 |
"response_B": item["response_B"],
|
| 168 |
"timestamp": time.time(),
|
| 169 |
+
"run_id_A": item.get("run_id_A", ""),
|
| 170 |
+
"run_id_B": item.get("run_id_B", ""),
|
| 171 |
+
"comp_key": item.get("comp_key", _comp_key(item["source_type"], item["text"], item["model_A"], item["model_B"], item.get("run_id_A"), item.get("run_id_B")))
|
| 172 |
}
|
| 173 |
+
# Local log only; pushing to HF handled elsewhere in your pipeline
|
| 174 |
df = pd.DataFrame([row])[SCHEMA]
|
| 175 |
df.to_csv(OUT_FILE, mode="a", header=False, index=False)
|
| 176 |
|
| 177 |
|
| 178 |
+
# ---------- Load completed keys from HF (single canonical file) ----------
|
| 179 |
|
| 180 |
+
def _read_csv_from_hf(url: str) -> pd.DataFrame:
|
| 181 |
+
headers = {}
|
| 182 |
+
# Optional: HF token if the dataset is private
|
| 183 |
+
token = os.getenv("HF_TOKEN")
|
| 184 |
+
if token:
|
| 185 |
+
headers["Authorization"] = f"Bearer {token}"
|
| 186 |
+
resp = requests.get(url, headers=headers, timeout=20)
|
| 187 |
resp.raise_for_status()
|
| 188 |
+
# Hugging Face may return a redirect; requests follows by default
|
| 189 |
return pd.read_csv(io.StringIO(resp.text))
|
| 190 |
|
| 191 |
|
| 192 |
+
def load_done_keys_from_hf(annotator_type: str, source_type: str) -> set[str]:
|
| 193 |
+
"""Fetch annotations_Wiki_Native.csv and return a set of comp_key for this role+source.
|
| 194 |
+
If comp_key missing, reconstruct using our canonical function; if run_ids missing, fallback to model+text only."""
|
| 195 |
+
try:
|
| 196 |
+
df = _read_csv_from_hf(HF_ANNOTATIONS_URL)
|
| 197 |
+
except Exception as e:
|
| 198 |
+
raise RuntimeError(f"Could not read resume file from HF: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
+
# Filter by role+source if those columns exist; otherwise treat all rows as potential
|
| 201 |
if "annotator_type" in df.columns:
|
| 202 |
df = df[df["annotator_type"].astype(str).str.strip() == annotator_type]
|
| 203 |
if "source_type" in df.columns:
|
| 204 |
df = df[df["source_type"].astype(str).str.strip() == source_type]
|
| 205 |
|
|
|
|
| 206 |
keys = set()
|
| 207 |
+
has_key = "comp_key" in df.columns
|
| 208 |
+
for _, r in df.iterrows():
|
| 209 |
+
if has_key and pd.notna(r.get("comp_key")) and str(r.get("comp_key")).strip() != "":
|
| 210 |
+
keys.add(str(r.get("comp_key")).strip())
|
| 211 |
+
else:
|
| 212 |
+
# Reconstruct; try to use run_ids if present
|
| 213 |
+
k = _comp_key(
|
| 214 |
+
str(r.get("source_type", "")),
|
| 215 |
+
str(r.get("text", "")),
|
| 216 |
+
str(r.get("model_A", "")),
|
| 217 |
+
str(r.get("model_B", "")),
|
| 218 |
+
str(r.get("run_id_A", "")) if "run_id_A" in df.columns else None,
|
| 219 |
+
str(r.get("run_id_B", "")) if "run_id_B" in df.columns else None,
|
| 220 |
+
)
|
| 221 |
+
keys.add(k)
|
| 222 |
return keys
|
| 223 |
|
| 224 |
|
|
|
|
| 254 |
consent_chk = gr.Checkbox(label="I consent to take part and for my anonymised annotations to be open-sourced.", value=False)
|
| 255 |
role_dd = gr.Dropdown(["Learner", "Native", "Tester"], label="Annotator Type (required)", value=None)
|
| 256 |
source_dd = gr.Dropdown(["Wiki", "Oireachtas"], label="Source (required)", value=None)
|
|
|
|
|
|
|
| 257 |
begin_btn = gr.Button("Begin")
|
| 258 |
gate_msg = gr.Markdown()
|
| 259 |
|
|
|
|
| 277 |
# ---------- State ----------
|
| 278 |
annotator_type = gr.State("") # Learner | Native | Tester
|
| 279 |
source_state = gr.State(None) # Wiki | Oireachtas
|
| 280 |
+
comps_state = gr.State([]) # list of dicts (FULL list, not filtered)
|
| 281 |
+
idx_state = gr.State(0) # index into FULL list (resume point)
|
| 282 |
|
| 283 |
# ---------- Handlers ----------
|
| 284 |
+
def begin(consent, role, source):
|
| 285 |
if not consent:
|
| 286 |
return ("**Please tick the consent checkbox to proceed.**",
|
| 287 |
+
gr.update(visible=True), gr.update(visible(False)),
|
| 288 |
+
"", "", "", "", "", "", "", "", "", "", "")
|
| 289 |
if role not in ["Learner", "Native", "Tester"]:
|
| 290 |
return ("**Please select your annotator type.**",
|
| 291 |
+
gr.update(visible=True), gr.update(visible(False)),
|
| 292 |
+
"", "", "", "", "", "", "", "", "", "", "")
|
| 293 |
if source not in ["Wiki", "Oireachtas"]:
|
| 294 |
return ("**Please select a source (Wikipedia/Oireachtas).**",
|
| 295 |
+
gr.update(visible=True), gr.update(visible(False)),
|
| 296 |
+
"", "", "", "", "", "", "", "", "", "", "")
|
| 297 |
|
| 298 |
full_list = build_comparisons_k(source, K)
|
| 299 |
if not full_list:
|
| 300 |
return ("**No items found for the selected source.**",
|
| 301 |
+
gr.update(visible=True), gr.update(visible(False)),
|
| 302 |
+
"", "", "", "", "", "", "", "", "", "", "")
|
| 303 |
|
| 304 |
+
# Single resume check from HF
|
| 305 |
+
try:
|
| 306 |
+
done_keys = load_done_keys_from_hf(role, source)
|
| 307 |
+
except Exception as e:
|
| 308 |
+
return (f"**Error reading progress from HF:** {e}",
|
| 309 |
+
gr.update(visible=True), gr.update(visible(False)),
|
| 310 |
+
"", "", "", "", "", "", "", "", role, source, [], 0, gr.update(interactive=False), gr.update(interactive=False))
|
| 311 |
+
|
| 312 |
+
total = len(full_list)
|
| 313 |
+
resume_idx = 0
|
| 314 |
+
for i, it in enumerate(full_list):
|
| 315 |
+
if it.get("comp_key") not in done_keys:
|
| 316 |
+
resume_idx = i
|
| 317 |
+
break
|
| 318 |
+
else:
|
| 319 |
+
# all done
|
| 320 |
return (f"**All done for {role} / {source}.**",
|
| 321 |
+
gr.update(visible=True), gr.update(visible(False)),
|
| 322 |
+
"", "", "", "", "", "", "", "", role, source, full_list, total, gr.update(interactive=False), gr.update(interactive=False))
|
| 323 |
|
| 324 |
+
item = full_list[resume_idx]
|
| 325 |
+
note = f"Resuming from {len(done_keys)} completed; {total - len(done_keys)} remaining."
|
| 326 |
+
return (note,
|
|
|
|
| 327 |
gr.update(visible=False), gr.update(visible=True),
|
| 328 |
+
f"{resume_idx+1} / {total}",
|
| 329 |
item["text"], item["instruction_A"], item["response_A"],
|
| 330 |
item["instruction_B"], item["response_B"],
|
| 331 |
+
role, source, full_list, resume_idx,
|
| 332 |
gr.update(interactive=True), gr.update(interactive=True))
|
| 333 |
|
| 334 |
begin_btn.click(
|
| 335 |
begin,
|
| 336 |
+
inputs=[consent_chk, role_dd, source_dd],
|
| 337 |
outputs=[
|
| 338 |
gate_msg, page1, page2,
|
| 339 |
counter, ref_text, instA, respA, instB, respB,
|
|
|
|
| 348 |
return ("**No comparisons loaded.**", gr.skip(), gr.skip(), gr.skip(), gr.skip(),
|
| 349 |
gr.update(interactive=False), gr.update(interactive=False), i)
|
| 350 |
|
| 351 |
+
total = len(comp_list)
|
| 352 |
+
# Save current item
|
| 353 |
item = comp_list[i]
|
| 354 |
save_row(role, item, choice)
|
| 355 |
|
| 356 |
i += 1
|
| 357 |
+
if i >= total:
|
| 358 |
return ("**Done — thank you!**",
|
| 359 |
+
f"{total} / {total}", "", "", "", "",
|
| 360 |
gr.update(interactive=False), gr.update(interactive=False), i)
|
| 361 |
|
| 362 |
nxt = comp_list[i]
|
| 363 |
return (f"Saved: {choice}",
|
| 364 |
+
f"{i+1} / {total}",
|
| 365 |
nxt["text"], nxt["instruction_A"], nxt["response_A"], nxt["instruction_B"], nxt["response_B"],
|
| 366 |
gr.update(interactive=True), gr.update(interactive=True), i)
|
| 367 |
|
|
|
|
| 377 |
)
|
| 378 |
|
| 379 |
if __name__ == "__main__":
|
| 380 |
+
demo.launch()
|