Spaces:

jmcinern
/

LLM_Comparirson_Ga

Sleeping

App Files Files Community

jmcinern commited on Aug 21, 2025

Commit

81705e7

verified ·

1 Parent(s): 967d609

Update app.py

Browse files

update resume progress

Files changed (1) hide show

app.py +116 -93

app.py CHANGED Viewed

@@ -1,26 +1,31 @@
 # ab_app_k4_two_page_resume.py
 # Two-page Gradio app for open-sourced annotation (Master’s thesis)
-# Adds: resume from where you left off by cross-referencing completed items on HF/local.
-# - Canonical comparison key (A/B-order agnostic)
-# - Loads completed keys from HF annotations.csv (configurable URL) or local OUT_FILE fallback
-# - Skips already-completed items; shows remaining count; supports new role "Tester"
 import gradio as gr
 import pandas as pd
 import time
 from itertools import combinations
 from pathlib import Path
-import json
 import hashlib
 import io
 import requests
 import shutil
 PAIRS_CSV = "./pairs.csv"  # columns: run_id, model, source_type, instruction, response, text
 # --- Config ---
 K = 4
-OUT_FILE = "./annotations.csv"
 SCHEMA = [
     "annotator_type",   # Learner | Native | Tester
     "source_type",      # Wiki | Oireachtas
@@ -33,6 +38,8 @@ SCHEMA = [
     "instruction_B",
     "response_B",
     "timestamp",
     "comp_key",         # NEW: canonical key for the comparison
 ]
@@ -41,45 +48,44 @@ SCHEMA = [
 def _stable_hash(s: str) -> int:
     return int(hashlib.sha256(s.encode("utf-8")).hexdigest(), 16)
-def _comp_key(source_type: str, text: str, model_a: str, model_b: str) -> str:
-    """Order-agnostic key: source|text|min(model)|max(model) -> sha256 hex."""
-    m1, m2 = sorted([str(model_a), str(model_b)])
-    raw = f"{source_type}|{text}|{m1}|{m2}"
     return hashlib.sha256(raw.encode("utf-8")).hexdigest()
 def ensure_outfile_schema():
-    """Ensure OUT_FILE exists with SCHEMA; if an older file exists, upgrade it by adding comp_key."""
     if not Path(OUT_FILE).exists():
         pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
         return
-    # If exists, check columns
     try:
         existing = pd.read_csv(OUT_FILE)
     except Exception:
-        # Corrupt or empty -> recreate
         pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
         return
-    cols = existing.columns.tolist()
-    if cols == SCHEMA:
-        return
-    # Upgrade: compute comp_key where missing, reorder columns
-    # Try to infer comp_key from rows
-    if "comp_key" not in existing.columns:
-        def infer_key(r):
-            try:
-                return _comp_key(r.get("source_type", ""), r.get("text", ""), r.get("model_A", ""), r.get("model_B", ""))
-            except Exception:
-                return ""
-        existing["comp_key"] = existing.apply(infer_key, axis=1)
-    # Add any missing columns with defaults
     for c in SCHEMA:
         if c not in existing.columns:
             existing[c] = ""
     existing = existing[SCHEMA]
     # Backup and overwrite
-    backup = OUT_FILE + ".bak"
     try:
-        shutil.copyfile(OUT_FILE, backup)
     except Exception:
         pass
     existing.to_csv(OUT_FILE, index=False)
@@ -126,17 +132,21 @@ def build_comparisons_k(source_type: str, k: int):
                 A, B = (m1, r1), (m2, r2)
             else:
                 A, B = (m2, r2), (m1, r1)
             item = {
                 "source_type": source_type,
                 "text": t,
                 "model_A": A[0],
                 "instruction_A": A[1]["instruction"],
                 "response_A": A[1]["response"],
                 "model_B": B[0],
                 "instruction_B": B[1]["instruction"],
                 "response_B": B[1]["response"],
             }
-            item["comp_key"] = _comp_key(source_type, t, item["model_A"], item["model_B"])
             comps.append(item)
     comps.sort(key=lambda d: (d["source_type"], d["model_A"], d["model_B"], d["text"]))
@@ -156,57 +166,59 @@ def save_row(annotator_type, item, choice):
         "instruction_B": item["instruction_B"],
         "response_B": item["response_B"],
         "timestamp": time.time(),
-        "comp_key": item.get("comp_key", _comp_key(item["source_type"], item["text"], item["model_A"], item["model_B"]))
     }
-    # Ensure columns order
     df = pd.DataFrame([row])[SCHEMA]
     df.to_csv(OUT_FILE, mode="a", header=False, index=False)
-# ---------- Load completed keys from HF or local ----------
-def _read_csv_from_url(url: str) -> pd.DataFrame:
-    resp = requests.get(url, timeout=10)
     resp.raise_for_status()
     return pd.read_csv(io.StringIO(resp.text))
-def load_done_keys(annotator_type: str, source_type: str, hf_csv_url: str | None) -> set:
-    """
-    Return a set of comp_key strings already completed for this annotator_type + source_type.
-    Priority: HF CSV URL (if provided) -> local OUT_FILE fallback.
-    If comp_key column missing on HF, attempt to reconstruct from row fields.
-    """
-    df = None
-    if hf_csv_url:
-        try:
-            df = _read_csv_from_url(hf_csv_url)
-        except Exception:
-            df = None
-    if df is None:
-        try:
-            df = pd.read_csv(OUT_FILE)
-        except Exception:
-            return set()
-    # Filter by role+source
     if "annotator_type" in df.columns:
         df = df[df["annotator_type"].astype(str).str.strip() == annotator_type]
     if "source_type" in df.columns:
         df = df[df["source_type"].astype(str).str.strip() == source_type]
-    # If comp_key exists, use it; else reconstruct
     keys = set()
-    if "comp_key" in df.columns:
-        keys = set(df["comp_key"].dropna().astype(str).tolist())
-    else:
-        for _, r in df.iterrows():
-            try:
-                k = _comp_key(r.get("source_type", ""), r.get("text", ""), r.get("model_A", ""), r.get("model_B", ""))
-                if k:
-                    keys.add(k)
-            except Exception:
-                pass
     return keys
@@ -242,8 +254,6 @@ with gr.Blocks() as demo:
         consent_chk = gr.Checkbox(label="I consent to take part and for my anonymised annotations to be open-sourced.", value=False)
         role_dd = gr.Dropdown(["Learner", "Native", "Tester"], label="Annotator Type (required)", value=None)
         source_dd = gr.Dropdown(["Wiki", "Oireachtas"], label="Source (required)", value=None)
-        with gr.Row():
-            hf_csv_url_tb = gr.Textbox(label="(Optional) HF annotations.csv URL for resume", value="", placeholder="https://huggingface.co/datasets/<org>/<repo>/resolve/main/annotations.csv")
         begin_btn = gr.Button("Begin")
         gate_msg = gr.Markdown()
@@ -267,52 +277,63 @@ with gr.Blocks() as demo:
     # ---------- State ----------
     annotator_type = gr.State("")   # Learner | Native | Tester
     source_state = gr.State(None)    # Wiki | Oireachtas
-    comps_state = gr.State([])       # list of dicts (filtered to remaining)
-    idx_state = gr.State(0)          # index into filtered list
     # ---------- Handlers ----------
-    def begin(consent, role, source, hf_csv_url):
         if not consent:
             return ("**Please tick the consent checkbox to proceed.**",
-                    gr.update(visible=True), gr.update(visible=False),
-                    "", "", "", "", "", "", "", "", "", "", "", "")
         if role not in ["Learner", "Native", "Tester"]:
             return ("**Please select your annotator type.**",
-                    gr.update(visible=True), gr.update(visible=False),
-                    "", "", "", "", "", "", "", "", "", "", "", "")
         if source not in ["Wiki", "Oireachtas"]:
             return ("**Please select a source (Wikipedia/Oireachtas).**",
-                    gr.update(visible=True), gr.update(visible=False),
-                    "", "", "", "", "", "", "", "", "", "", "", "")
         full_list = build_comparisons_k(source, K)
         if not full_list:
             return ("**No items found for the selected source.**",
-                    gr.update(visible=True), gr.update(visible=False),
-                    "", "", "", "", "", "", "", "", "", "", "", "")
-        done_keys = load_done_keys(role, source, hf_csv_url.strip() or None)
-        remaining = [it for it in full_list if it.get("comp_key") not in done_keys]
-        if not remaining:
             return (f"**All done for {role} / {source}.**",
-                    gr.update(visible=True), gr.update(visible=False),
-                    "", "", "", "", "", "", "", "", role, source, remaining, 0, gr.update(interactive=False), gr.update(interactive=False))
-        i = 0
-        item = remaining[i]
-        resume_note = f"Resuming from {len(done_keys)} completed; {len(remaining)} remaining."
-        return (resume_note,
                 gr.update(visible=False), gr.update(visible=True),
-                f"{i+1} / {len(remaining)}",
                 item["text"], item["instruction_A"], item["response_A"],
                 item["instruction_B"], item["response_B"],
-                role, source, remaining, i,
                 gr.update(interactive=True), gr.update(interactive=True))
     begin_btn.click(
         begin,
-        inputs=[consent_chk, role_dd, source_dd, hf_csv_url_tb],
         outputs=[
             gate_msg, page1, page2,
             counter, ref_text, instA, respA, instB, respB,
@@ -327,18 +348,20 @@ with gr.Blocks() as demo:
             return ("**No comparisons loaded.**", gr.skip(), gr.skip(), gr.skip(), gr.skip(),
                     gr.update(interactive=False), gr.update(interactive=False), i)
         item = comp_list[i]
         save_row(role, item, choice)
         i += 1
-        if i >= len(comp_list):
             return ("**Done — thank you!**",
-                    f"{len(comp_list)} / {len(comp_list)}", "", "", "", "",
                     gr.update(interactive=False), gr.update(interactive=False), i)
         nxt = comp_list[i]
         return (f"Saved: {choice}",
-                f"{i+1} / {len(comp_list)}",
                 nxt["text"], nxt["instruction_A"], nxt["response_A"], nxt["instruction_B"], nxt["response_B"],
                 gr.update(interactive=True), gr.update(interactive=True), i)
@@ -354,4 +377,4 @@ with gr.Blocks() as demo:
     )
 if __name__ == "__main__":
-    demo.launch()

 # ab_app_k4_two_page_resume.py
 # Two-page Gradio app for open-sourced annotation (Master’s thesis)
+# Adds: resume from where you left off by cross-referencing completed items on HF (single canonical file).
+# - Canonical comparison key (A/B-order agnostic), includes run_ids when available
+# - ALWAYS reads progress from HF file: annotations_Wiki_Native.csv
+# - Never uses local storage to determine resume point (local file is only for local logging if desired)
+# - Skips already-completed items; shows overall counter (e.g., 31/60)
+# - Supports new role "Tester"
 import gradio as gr
 import pandas as pd
 import time
 from itertools import combinations
 from pathlib import Path
 import hashlib
 import io
 import requests
 import shutil
+import os
 PAIRS_CSV = "./pairs.csv"  # columns: run_id, model, source_type, instruction, response, text
 # --- Config ---
 K = 4
+OUT_FILE = "./annotations.csv"   # local log (NOT used for resume)
+HF_ANNOTATIONS_URL = (
+    "https://huggingface.co/datasets/jmcinern/Irish_Prompt_Response_Human_Feedback/resolve/main/annotations_Wiki_Native.csv"
+)
 SCHEMA = [
     "annotator_type",   # Learner | Native | Tester
     "source_type",      # Wiki | Oireachtas
     "instruction_B",
     "response_B",
     "timestamp",
+    "run_id_A",         # NEW: for key stability
+    "run_id_B",         # NEW: for key stability
     "comp_key",         # NEW: canonical key for the comparison
 ]
 def _stable_hash(s: str) -> int:
     return int(hashlib.sha256(s.encode("utf-8")).hexdigest(), 16)
+def _comp_key(source_type: str, text: str, model_a: str, model_b: str, run_id_a: str | None = None, run_id_b: str | None = None) -> str:
+    """Order-agnostic key. If run_ids provided, include them to disambiguate different runs.
+    Canonical order = sort by (model, run_id or '')."""
+    a_model, b_model = str(model_a), str(model_b)
+    a_rid, b_rid = ("" if run_id_a is None else str(run_id_a)), ("" if run_id_b is None else str(run_id_b))
+    pair = sorted([(a_model, a_rid), (b_model, b_rid)])
+    (m1, r1), (m2, r2) = pair[0], pair[1]
+    raw = f"{source_type}|{text}|{m1}|{r1}|{m2}|{r2}"
     return hashlib.sha256(raw.encode("utf-8")).hexdigest()
 def ensure_outfile_schema():
+    """Ensure OUT_FILE exists with SCHEMA; upgrade older files by adding columns as needed.
+    This file is NOT used for resume, only optional local logging."""
     if not Path(OUT_FILE).exists():
         pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
         return
     try:
         existing = pd.read_csv(OUT_FILE)
     except Exception:
         pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
         return
+    # Add missing columns
     for c in SCHEMA:
         if c not in existing.columns:
             existing[c] = ""
+    # Try to backfill run_id/comp_key when possible
+    if "comp_key" in existing.columns:
+        missing = existing[existing["comp_key"].isna()].index
+        for idx in missing:
+            r = existing.loc[idx]
+            existing.at[idx, "comp_key"] = _comp_key(r.get("source_type", ""), r.get("text", ""), r.get("model_A", ""), r.get("model_B", ""), r.get("run_id_A", ""), r.get("run_id_B", ""))
+    # Reorder
     existing = existing[SCHEMA]
     # Backup and overwrite
     try:
+        shutil.copyfile(OUT_FILE, OUT_FILE + ".bak")
     except Exception:
         pass
     existing.to_csv(OUT_FILE, index=False)
                 A, B = (m1, r1), (m2, r2)
             else:
                 A, B = (m2, r2), (m1, r1)
+            run_id_a = str(A[1].get("run_id", ""))
+            run_id_b = str(B[1].get("run_id", ""))
             item = {
                 "source_type": source_type,
                 "text": t,
                 "model_A": A[0],
                 "instruction_A": A[1]["instruction"],
                 "response_A": A[1]["response"],
+                "run_id_A": run_id_a,
                 "model_B": B[0],
                 "instruction_B": B[1]["instruction"],
                 "response_B": B[1]["response"],
+                "run_id_B": run_id_b,
             }
+            item["comp_key"] = _comp_key(source_type, t, item["model_A"], item["model_B"], run_id_a, run_id_b)
             comps.append(item)
     comps.sort(key=lambda d: (d["source_type"], d["model_A"], d["model_B"], d["text"]))
         "instruction_B": item["instruction_B"],
         "response_B": item["response_B"],
         "timestamp": time.time(),
+        "run_id_A": item.get("run_id_A", ""),
+        "run_id_B": item.get("run_id_B", ""),
+        "comp_key": item.get("comp_key", _comp_key(item["source_type"], item["text"], item["model_A"], item["model_B"], item.get("run_id_A"), item.get("run_id_B")))
     }
+    # Local log only; pushing to HF handled elsewhere in your pipeline
     df = pd.DataFrame([row])[SCHEMA]
     df.to_csv(OUT_FILE, mode="a", header=False, index=False)
+# ---------- Load completed keys from HF (single canonical file) ----------
+def _read_csv_from_hf(url: str) -> pd.DataFrame:
+    headers = {}
+    # Optional: HF token if the dataset is private
+    token = os.getenv("HF_TOKEN")
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+    resp = requests.get(url, headers=headers, timeout=20)
     resp.raise_for_status()
+    # Hugging Face may return a redirect; requests follows by default
     return pd.read_csv(io.StringIO(resp.text))
+def load_done_keys_from_hf(annotator_type: str, source_type: str) -> set[str]:
+    """Fetch annotations_Wiki_Native.csv and return a set of comp_key for this role+source.
+    If comp_key missing, reconstruct using our canonical function; if run_ids missing, fallback to model+text only."""
+    try:
+        df = _read_csv_from_hf(HF_ANNOTATIONS_URL)
+    except Exception as e:
+        raise RuntimeError(f"Could not read resume file from HF: {e}")
+    # Filter by role+source if those columns exist; otherwise treat all rows as potential
     if "annotator_type" in df.columns:
         df = df[df["annotator_type"].astype(str).str.strip() == annotator_type]
     if "source_type" in df.columns:
         df = df[df["source_type"].astype(str).str.strip() == source_type]
     keys = set()
+    has_key = "comp_key" in df.columns
+    for _, r in df.iterrows():
+        if has_key and pd.notna(r.get("comp_key")) and str(r.get("comp_key")).strip() != "":
+            keys.add(str(r.get("comp_key")).strip())
+        else:
+            # Reconstruct; try to use run_ids if present
+            k = _comp_key(
+                str(r.get("source_type", "")),
+                str(r.get("text", "")),
+                str(r.get("model_A", "")),
+                str(r.get("model_B", "")),
+                str(r.get("run_id_A", "")) if "run_id_A" in df.columns else None,
+                str(r.get("run_id_B", "")) if "run_id_B" in df.columns else None,
+            )
+            keys.add(k)
     return keys
         consent_chk = gr.Checkbox(label="I consent to take part and for my anonymised annotations to be open-sourced.", value=False)
         role_dd = gr.Dropdown(["Learner", "Native", "Tester"], label="Annotator Type (required)", value=None)
         source_dd = gr.Dropdown(["Wiki", "Oireachtas"], label="Source (required)", value=None)
         begin_btn = gr.Button("Begin")
         gate_msg = gr.Markdown()
     # ---------- State ----------
     annotator_type = gr.State("")   # Learner | Native | Tester
     source_state = gr.State(None)    # Wiki | Oireachtas
+    comps_state = gr.State([])       # list of dicts (FULL list, not filtered)
+    idx_state = gr.State(0)          # index into FULL list (resume point)
     # ---------- Handlers ----------
+    def begin(consent, role, source):
         if not consent:
             return ("**Please tick the consent checkbox to proceed.**",
+                    gr.update(visible=True), gr.update(visible(False)),
+                    "", "", "", "", "", "", "", "", "", "", "")
         if role not in ["Learner", "Native", "Tester"]:
             return ("**Please select your annotator type.**",
+                    gr.update(visible=True), gr.update(visible(False)),
+                    "", "", "", "", "", "", "", "", "", "", "")
         if source not in ["Wiki", "Oireachtas"]:
             return ("**Please select a source (Wikipedia/Oireachtas).**",
+                    gr.update(visible=True), gr.update(visible(False)),
+                    "", "", "", "", "", "", "", "", "", "", "")
         full_list = build_comparisons_k(source, K)
         if not full_list:
             return ("**No items found for the selected source.**",
+                    gr.update(visible=True), gr.update(visible(False)),
+                    "", "", "", "", "", "", "", "", "", "", "")
+        # Single resume check from HF
+        try:
+            done_keys = load_done_keys_from_hf(role, source)
+        except Exception as e:
+            return (f"**Error reading progress from HF:** {e}",
+                    gr.update(visible=True), gr.update(visible(False)),
+                    "", "", "", "", "", "", "", "", role, source, [], 0, gr.update(interactive=False), gr.update(interactive=False))
+        total = len(full_list)
+        resume_idx = 0
+        for i, it in enumerate(full_list):
+            if it.get("comp_key") not in done_keys:
+                resume_idx = i
+                break
+        else:
+            # all done
             return (f"**All done for {role} / {source}.**",
+                    gr.update(visible=True), gr.update(visible(False)),
+                    "", "", "", "", "", "", "", "", role, source, full_list, total, gr.update(interactive=False), gr.update(interactive=False))
+        item = full_list[resume_idx]
+        note = f"Resuming from {len(done_keys)} completed; {total - len(done_keys)} remaining."
+        return (note,
                 gr.update(visible=False), gr.update(visible=True),
+                f"{resume_idx+1} / {total}",
                 item["text"], item["instruction_A"], item["response_A"],
                 item["instruction_B"], item["response_B"],
+                role, source, full_list, resume_idx,
                 gr.update(interactive=True), gr.update(interactive=True))
     begin_btn.click(
         begin,
+        inputs=[consent_chk, role_dd, source_dd],
         outputs=[
             gate_msg, page1, page2,
             counter, ref_text, instA, respA, instB, respB,
             return ("**No comparisons loaded.**", gr.skip(), gr.skip(), gr.skip(), gr.skip(),
                     gr.update(interactive=False), gr.update(interactive=False), i)
+        total = len(comp_list)
+        # Save current item
         item = comp_list[i]
         save_row(role, item, choice)
         i += 1
+        if i >= total:
             return ("**Done — thank you!**",
+                    f"{total} / {total}", "", "", "", "",
                     gr.update(interactive=False), gr.update(interactive=False), i)
         nxt = comp_list[i]
         return (f"Saved: {choice}",
+                f"{i+1} / {total}",
                 nxt["text"], nxt["instruction_A"], nxt["response_A"], nxt["instruction_B"], nxt["response_B"],
                 gr.update(interactive=True), gr.update(interactive=True), i)
     )
 if __name__ == "__main__":
+    demo.launch()