Spaces:

jmcinern
/

LLM_Comparirson_Ga

Sleeping

App Files Files Community

jmcinern commited on Aug 21, 2025

Commit

8ab6a2e

verified ·

1 Parent(s): 82be556

allowing resume from left off functionality

Browse files

Files changed (1) hide show

app.py +163 -102

app.py CHANGED Viewed

@@ -1,6 +1,9 @@
-# app.py
 # Two-page Gradio app for open-sourced annotation (Master’s thesis)
-# Adds: demo.queue() for concurrency + FileLock for safe CSV appends + ensures outputs/ exists
 import gradio as gr
 import pandas as pd
@@ -9,23 +12,17 @@ from itertools import combinations
 from pathlib import Path
 import json
 import hashlib
-from filelock import FileLock
-from huggingface_hub import HfApi
-import os
-HF_DATASET_REPO = "jmcinern/Irish_Prompt_Response_Human_Feedback"
-HF_TOKEN = os.environ.get("hf_write")
-api = HfApi()
-PAIRS_CSV = Path("pairs.csv")   # columns: run_id, model, source_type, instruction, response, text
-OUT_FILE  = Path("annotations.csv")
-LOCK_FILE = Path("annotations.csv.lock")
 # --- Config ---
 K = 4
 SCHEMA = [
-    "annotator_type",   # Learner | Native
     "source_type",      # Wiki | Oireachtas
     "text",
     "model_A",
@@ -36,24 +33,68 @@ SCHEMA = [
     "instruction_B",
     "response_B",
     "timestamp",
 ]
-if not OUT_FILE.exists():
-    pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
-# Load pairs (fail clearly if missing)
-if not PAIRS_CSV.exists():
-    raise FileNotFoundError(f"Missing {PAIRS_CSV}. Upload your pairs CSV to outputs/.")
 pairs_all = pd.read_csv(PAIRS_CSV)
 # --- Helpers for deterministic schedule ---
 def _shared_texts(df, m1, m2):
     t1 = set(df[df["model"] == m1]["text"])
     t2 = set(df[df["model"] == m2]["text"])
     return list(t1 & t2)
-def _stable_hash(s: str) -> int:
-    return int(hashlib.sha256(s.encode("utf-8")).hexdigest(), 16)
 def build_comparisons_k(source_type: str, k: int):
     df = pairs_all[pairs_all["source_type"] == source_type].copy()
@@ -68,13 +109,13 @@ def build_comparisons_k(source_type: str, k: int):
         shared = _shared_texts(df, m1, m2)
         if not shared:
             continue
-        keyed = [(_stable_hash(f"{source_type}|{m1}|{m2}|{t}"), t) for t in shared]
         keyed.sort(key=lambda x: x[0])
         ordered_texts = [t for _, t in keyed]
         chosen = []
         idx = 0
-        while len(chosen) < k and ordered_texts:
             chosen.append(ordered_texts[idx % len(ordered_texts)])
             idx += 1
@@ -85,22 +126,23 @@ def build_comparisons_k(source_type: str, k: int):
                 A, B = (m1, r1), (m2, r2)
             else:
                 A, B = (m2, r2), (m1, r1)
-            comps.append(
-                {
-                    "source_type": source_type,
-                    "text": t,
-                    "model_A": A[0],
-                    "instruction_A": A[1]["instruction"],
-                    "response_A": A[1]["response"],
-                    "model_B": B[0],
-                    "instruction_B": B[1]["instruction"],
-                    "response_B": B[1]["response"],
-                }
-            )
     comps.sort(key=lambda d: (d["source_type"], d["model_A"], d["model_B"], d["text"]))
     return comps
 def save_row(annotator_type, item, choice):
     row = {
         "annotator_type": annotator_type,
@@ -114,45 +156,59 @@ def save_row(annotator_type, item, choice):
         "instruction_B": item["instruction_B"],
         "response_B": item["response_B"],
         "timestamp": time.time(),
     }
-    # append safely to local /data/annotations.csv
-    with FileLock(str(LOCK_FILE)):
-        df = pd.DataFrame([row])
-        df.to_csv(OUT_FILE, mode="a", header=False, index=False)
-    # prepare filename per role + source
-    filename = f"annotations_{item['source_type']}_{annotator_type}.csv"
-    print("[DEBUG] HF_TOKEN present?", bool(HF_TOKEN))
-    print("[DEBUG] OUT_FILE exists?", OUT_FILE.exists(), OUT_FILE)
-    print("[DEBUG] Uploading as:", filename)
-    # push to HF dataset repo
-    if HF_TOKEN:
         try:
-            api.upload_file(
-                path_or_fileobj=str(OUT_FILE),
-                path_in_repo=filename,
-                repo_id=HF_DATASET_REPO,
-                repo_type="dataset",
-                token=HF_TOKEN,
-            )
-            print(f"[HF] Uploaded {filename} to {HF_DATASET_REPO}")
-            # List files in the repo to confirm
-            files = api.list_repo_files(
-                repo_id=HF_DATASET_REPO,
-                repo_type="dataset",
-                token=HF_TOKEN,
-            )
-            print("[HF] Current repo files:", files)
-        except Exception as e:
-            print(f"[HF] Upload failed: {e}")
 QUESTION_MD = (
     "**Question:** Which Question–Answer pair exhibits a stronger command of Irish grammar and "
@@ -166,7 +222,7 @@ CONSENT_MD = f"""
 You are invited to take part in a study on Large Language Model Irish-language QA quality.
 By continuing, you consent to the following:
-- Your annotations will be **anonymised** (we only record whether you are a **Learner** or **Native speaker**).
 - The dataset (reference text + model outputs + your choices) will be released **open-source** for both research and commercial purposes.
 - No personal data is collected beyond your level of Irish. You may stop at any time before submission.
@@ -184,8 +240,10 @@ with gr.Blocks() as demo:
     with gr.Group(visible=True) as page1:
         gr.Markdown(CONSENT_MD)
         consent_chk = gr.Checkbox(label="I consent to take part and for my anonymised annotations to be open-sourced.", value=False)
-        role_dd = gr.Dropdown(["Learner", "Native"], label="Annotator Type (required)", value=None)
         source_dd = gr.Dropdown(["Wiki", "Oireachtas"], label="Source (required)", value=None)
         begin_btn = gr.Button("Begin")
         gate_msg = gr.Markdown()
@@ -207,45 +265,54 @@ with gr.Blocks() as demo:
         status = gr.Markdown()
     # ---------- State ----------
-    annotator_type = gr.State("")   # Learner | Native
-    source_state = gr.State(None)   # Wiki | Oireachtas
-    comps_state = gr.State([])      # list of dicts
-    idx_state = gr.State(0)
     # ---------- Handlers ----------
-    def begin(consent, role, source):
         if not consent:
             return ("**Please tick the consent checkbox to proceed.**",
                     gr.update(visible=True), gr.update(visible=False),
-                    "", "", "", "", "", "", "", "", "", "", "")
-        if role not in ["Learner", "Native"]:
             return ("**Please select your annotator type.**",
                     gr.update(visible=True), gr.update(visible=False),
-                    "", "", "", "", "", "", "", "", "", "", "")
         if source not in ["Wiki", "Oireachtas"]:
             return ("**Please select a source (Wikipedia/Oireachtas).**",
                     gr.update(visible=True), gr.update(visible=False),
-                    "", "", "", "", "", "", "", "", "", "", "")
-        comp_list = build_comparisons_k(source, K)
-        if not comp_list:
             return ("**No items found for the selected source.**",
                     gr.update(visible=True), gr.update(visible=False),
-                    "", "", "", "", "", "", "", "", "", "", "")
         i = 0
-        item = comp_list[i]
-        return ("",  # clear gate msg
-                gr.update(visible=False), gr.update(visible=True),  # show page2
-                f"{i+1} / {len(comp_list)}",
                 item["text"], item["instruction_A"], item["response_A"],
                 item["instruction_B"], item["response_B"],
-                role, source, comp_list, i,
                 gr.update(interactive=True), gr.update(interactive=True))
     begin_btn.click(
         begin,
-        inputs=[consent_chk, role_dd, source_dd],
         outputs=[
             gate_msg, page1, page2,
             counter, ref_text, instA, respA, instB, respB,
@@ -257,30 +324,24 @@ with gr.Blocks() as demo:
     def choose(choice, role, source, comp_list, i):
         role = (role or "").strip()
         if not role or not comp_list:
-            return ("**No comparisons loaded.**",
-                    gr.skip(), gr.skip(), gr.skip(), gr.skip(),
-                    gr.skip(), gr.skip(),
                     gr.update(interactive=False), gr.update(interactive=False), i)
         item = comp_list[i]
         save_row(role, item, choice)
         i += 1
         if i >= len(comp_list):
-            # Done: still return 10 values
             return ("**Done — thank you!**",
-                    f"{len(comp_list)} / {len(comp_list)}",
-                    "", "", "", "", "",
                     gr.update(interactive=False), gr.update(interactive=False), i)
         nxt = comp_list[i]
         return (f"Saved: {choice}",
                 f"{i+1} / {len(comp_list)}",
-                nxt["text"], nxt["instruction_A"], nxt["response_A"],
-                nxt["instruction_B"], nxt["response_B"],
                 gr.update(interactive=True), gr.update(interactive=True), i)
     btnA.click(
         lambda role, src, comps, i: choose("A", role, src, comps, i),
         inputs=[annotator_type, source_state, comps_state, idx_state],
@@ -292,5 +353,5 @@ with gr.Blocks() as demo:
         outputs=[status, counter, ref_text, instA, respA, instB, respB, btnA, btnB, idx_state],
     )
-# Enable request queueing for concurrent users
-demo.queue(max_size=128).launch()

+# ab_app_k4_two_page_resume.py
 # Two-page Gradio app for open-sourced annotation (Master’s thesis)
+# Adds: resume from where you left off by cross-referencing completed items on HF/local.
+# - Canonical comparison key (A/B-order agnostic)
+# - Loads completed keys from HF annotations.csv (configurable URL) or local OUT_FILE fallback
+# - Skips already-completed items; shows remaining count; supports new role "Tester"
 import gradio as gr
 import pandas as pd
 from pathlib import Path
 import json
 import hashlib
+import io
+import requests
+import shutil
+PAIRS_CSV = "./outputs/pairs.csv"  # columns: run_id, model, source_type, instruction, response, text
 # --- Config ---
 K = 4
+OUT_FILE = "./annotations.csv"
 SCHEMA = [
+    "annotator_type",   # Learner | Native | Tester
     "source_type",      # Wiki | Oireachtas
     "text",
     "model_A",
     "instruction_B",
     "response_B",
     "timestamp",
+    "comp_key",         # NEW: canonical key for the comparison
 ]
+# ---------- Utilities ----------
+def _stable_hash(s: str) -> int:
+    return int(hashlib.sha256(s.encode("utf-8")).hexdigest(), 16)
+def _comp_key(source_type: str, text: str, model_a: str, model_b: str) -> str:
+    """Order-agnostic key: source|text|min(model)|max(model) -> sha256 hex."""
+    m1, m2 = sorted([str(model_a), str(model_b)])
+    raw = f"{source_type}|{text}|{m1}|{m2}"
+    return hashlib.sha256(raw.encode("utf-8")).hexdigest()
+def ensure_outfile_schema():
+    """Ensure OUT_FILE exists with SCHEMA; if an older file exists, upgrade it by adding comp_key."""
+    if not Path(OUT_FILE).exists():
+        pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
+        return
+    # If exists, check columns
+    try:
+        existing = pd.read_csv(OUT_FILE)
+    except Exception:
+        # Corrupt or empty -> recreate
+        pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
+        return
+    cols = existing.columns.tolist()
+    if cols == SCHEMA:
+        return
+    # Upgrade: compute comp_key where missing, reorder columns
+    # Try to infer comp_key from rows
+    if "comp_key" not in existing.columns:
+        def infer_key(r):
+            try:
+                return _comp_key(r.get("source_type", ""), r.get("text", ""), r.get("model_A", ""), r.get("model_B", ""))
+            except Exception:
+                return ""
+        existing["comp_key"] = existing.apply(infer_key, axis=1)
+    # Add any missing columns with defaults
+    for c in SCHEMA:
+        if c not in existing.columns:
+            existing[c] = ""
+    existing = existing[SCHEMA]
+    # Backup and overwrite
+    backup = OUT_FILE + ".bak"
+    try:
+        shutil.copyfile(OUT_FILE, backup)
+    except Exception:
+        pass
+    existing.to_csv(OUT_FILE, index=False)
+ensure_outfile_schema()
 pairs_all = pd.read_csv(PAIRS_CSV)
 # --- Helpers for deterministic schedule ---
 def _shared_texts(df, m1, m2):
     t1 = set(df[df["model"] == m1]["text"])
     t2 = set(df[df["model"] == m2]["text"])
     return list(t1 & t2)
 def build_comparisons_k(source_type: str, k: int):
     df = pairs_all[pairs_all["source_type"] == source_type].copy()
         shared = _shared_texts(df, m1, m2)
         if not shared:
             continue
+        keyed = [( _stable_hash(f"{source_type}|{m1}|{m2}|{t}"), t) for t in shared]
         keyed.sort(key=lambda x: x[0])
         ordered_texts = [t for _, t in keyed]
         chosen = []
         idx = 0
+        while len(chosen) < k and len(ordered_texts) > 0:
             chosen.append(ordered_texts[idx % len(ordered_texts)])
             idx += 1
                 A, B = (m1, r1), (m2, r2)
             else:
                 A, B = (m2, r2), (m1, r1)
+            item = {
+                "source_type": source_type,
+                "text": t,
+                "model_A": A[0],
+                "instruction_A": A[1]["instruction"],
+                "response_A": A[1]["response"],
+                "model_B": B[0],
+                "instruction_B": B[1]["instruction"],
+                "response_B": B[1]["response"],
+            }
+            item["comp_key"] = _comp_key(source_type, t, item["model_A"], item["model_B"])
+            comps.append(item)
     comps.sort(key=lambda d: (d["source_type"], d["model_A"], d["model_B"], d["text"]))
     return comps
 def save_row(annotator_type, item, choice):
     row = {
         "annotator_type": annotator_type,
         "instruction_B": item["instruction_B"],
         "response_B": item["response_B"],
         "timestamp": time.time(),
+        "comp_key": item.get("comp_key", _comp_key(item["source_type"], item["text"], item["model_A"], item["model_B"]))
     }
+    # Ensure columns order
+    df = pd.DataFrame([row])[SCHEMA]
+    df.to_csv(OUT_FILE, mode="a", header=False, index=False)
+# ---------- Load completed keys from HF or local ----------
+def _read_csv_from_url(url: str) -> pd.DataFrame:
+    resp = requests.get(url, timeout=10)
+    resp.raise_for_status()
+    return pd.read_csv(io.StringIO(resp.text))
+def load_done_keys(annotator_type: str, source_type: str, hf_csv_url: str | None) -> set:
+    """
+    Return a set of comp_key strings already completed for this annotator_type + source_type.
+    Priority: HF CSV URL (if provided) -> local OUT_FILE fallback.
+    If comp_key column missing on HF, attempt to reconstruct from row fields.
+    """
+    df = None
+    if hf_csv_url:
+        try:
+            df = _read_csv_from_url(hf_csv_url)
+        except Exception:
+            df = None
+    if df is None:
         try:
+            df = pd.read_csv(OUT_FILE)
+        except Exception:
+            return set()
+    # Filter by role+source
+    if "annotator_type" in df.columns:
+        df = df[df["annotator_type"].astype(str).str.strip() == annotator_type]
+    if "source_type" in df.columns:
+        df = df[df["source_type"].astype(str).str.strip() == source_type]
+    # If comp_key exists, use it; else reconstruct
+    keys = set()
+    if "comp_key" in df.columns:
+        keys = set(df["comp_key"].dropna().astype(str).tolist())
+    else:
+        for _, r in df.iterrows():
+            try:
+                k = _comp_key(r.get("source_type", ""), r.get("text", ""), r.get("model_A", ""), r.get("model_B", ""))
+                if k:
+                    keys.add(k)
+            except Exception:
+                pass
+    return keys
 QUESTION_MD = (
     "**Question:** Which Question–Answer pair exhibits a stronger command of Irish grammar and "
 You are invited to take part in a study on Large Language Model Irish-language QA quality.
 By continuing, you consent to the following:
+- Your annotations will be **anonymised** (we only record whether you are a **Learner**, **Native speaker**, or **Tester**).
 - The dataset (reference text + model outputs + your choices) will be released **open-source** for both research and commercial purposes.
 - No personal data is collected beyond your level of Irish. You may stop at any time before submission.
     with gr.Group(visible=True) as page1:
         gr.Markdown(CONSENT_MD)
         consent_chk = gr.Checkbox(label="I consent to take part and for my anonymised annotations to be open-sourced.", value=False)
+        role_dd = gr.Dropdown(["Learner", "Native", "Tester"], label="Annotator Type (required)", value=None)
         source_dd = gr.Dropdown(["Wiki", "Oireachtas"], label="Source (required)", value=None)
+        with gr.Row():
+            hf_csv_url_tb = gr.Textbox(label="(Optional) HF annotations.csv URL for resume", value="", placeholder="https://huggingface.co/datasets/<org>/<repo>/resolve/main/annotations.csv")
         begin_btn = gr.Button("Begin")
         gate_msg = gr.Markdown()
         status = gr.Markdown()
     # ---------- State ----------
+    annotator_type = gr.State("")   # Learner | Native | Tester
+    source_state = gr.State(None)    # Wiki | Oireachtas
+    comps_state = gr.State([])       # list of dicts (filtered to remaining)
+    idx_state = gr.State(0)          # index into filtered list
     # ---------- Handlers ----------
+    def begin(consent, role, source, hf_csv_url):
         if not consent:
             return ("**Please tick the consent checkbox to proceed.**",
                     gr.update(visible=True), gr.update(visible=False),
+                    "", "", "", "", "", "", "", "", "", "", "", "")
+        if role not in ["Learner", "Native", "Tester"]:
             return ("**Please select your annotator type.**",
                     gr.update(visible=True), gr.update(visible=False),
+                    "", "", "", "", "", "", "", "", "", "", "", "")
         if source not in ["Wiki", "Oireachtas"]:
             return ("**Please select a source (Wikipedia/Oireachtas).**",
                     gr.update(visible=True), gr.update(visible=False),
+                    "", "", "", "", "", "", "", "", "", "", "", "")
+        full_list = build_comparisons_k(source, K)
+        if not full_list:
             return ("**No items found for the selected source.**",
                     gr.update(visible=True), gr.update(visible=False),
+                    "", "", "", "", "", "", "", "", "", "", "", "")
+        done_keys = load_done_keys(role, source, hf_csv_url.strip() or None)
+        remaining = [it for it in full_list if it.get("comp_key") not in done_keys]
+        if not remaining:
+            return (f"**All done for {role} / {source}.**",
+                    gr.update(visible=True), gr.update(visible=False),
+                    "", "", "", "", "", "", "", "", role, source, remaining, 0, gr.update(interactive=False), gr.update(interactive=False))
         i = 0
+        item = remaining[i]
+        resume_note = f"Resuming from {len(done_keys)} completed; {len(remaining)} remaining."
+        return (resume_note,
+                gr.update(visible=False), gr.update(visible=True),
+                f"{i+1} / {len(remaining)}",
                 item["text"], item["instruction_A"], item["response_A"],
                 item["instruction_B"], item["response_B"],
+                role, source, remaining, i,
                 gr.update(interactive=True), gr.update(interactive=True))
     begin_btn.click(
         begin,
+        inputs=[consent_chk, role_dd, source_dd, hf_csv_url_tb],
         outputs=[
             gate_msg, page1, page2,
             counter, ref_text, instA, respA, instB, respB,
     def choose(choice, role, source, comp_list, i):
         role = (role or "").strip()
         if not role or not comp_list:
+            return ("**No comparisons loaded.**", gr.skip(), gr.skip(), gr.skip(), gr.skip(),
                     gr.update(interactive=False), gr.update(interactive=False), i)
         item = comp_list[i]
         save_row(role, item, choice)
         i += 1
         if i >= len(comp_list):
             return ("**Done — thank you!**",
+                    f"{len(comp_list)} / {len(comp_list)}", "", "", "", "",
                     gr.update(interactive=False), gr.update(interactive=False), i)
         nxt = comp_list[i]
         return (f"Saved: {choice}",
                 f"{i+1} / {len(comp_list)}",
+                nxt["text"], nxt["instruction_A"], nxt["response_A"], nxt["instruction_B"], nxt["response_B"],
                 gr.update(interactive=True), gr.update(interactive=True), i)
     btnA.click(
         lambda role, src, comps, i: choose("A", role, src, comps, i),
         inputs=[annotator_type, source_state, comps_state, idx_state],
         outputs=[status, counter, ref_text, instA, respA, instB, respB, btnA, btnB, idx_state],
     )
+if __name__ == "__main__":
+    demo.launch()