Spaces:

jmcinern
/

LLM_Comparirson_Ga

Sleeping

App Files Files Community

jmcinern commited on Aug 21, 2025

Commit

6acd3d0

verified ·

1 Parent(s): e922c51

added back push to hub feature

Browse files

Files changed (1) hide show

app.py +139 -157

app.py CHANGED Viewed

@@ -1,11 +1,8 @@
-# ab_app_k4_two_page_resume.py
 # Two-page Gradio app for open-sourced annotation (Master’s thesis)
-# Adds: resume from where you left off by cross-referencing completed items on HF (single canonical file).
-# - Canonical comparison key (A/B-order agnostic), includes run_ids when available
-# - ALWAYS reads progress from HF file: annotations_Wiki_Native.csv
-# - Never uses local storage to determine resume point (local file is only for local logging if desired)
-# - Skips already-completed items; shows overall counter (e.g., 31/60)
-# - Supports new role "Tester"
 import gradio as gr
 import pandas as pd
@@ -15,16 +12,19 @@ from pathlib import Path
 import hashlib
 import io
 import requests
-import shutil
 import os
 PAIRS_CSV = "./pairs.csv"  # columns: run_id, model, source_type, instruction, response, text
 # --- Config ---
 K = 4
-OUT_FILE = "./annotations.csv"   # local log (NOT used for resume)
 HF_ANNOTATIONS_URL = (
-    "https://huggingface.co/datasets/jmcinern/Irish_Prompt_Response_Human_Feedback/resolve/main/annotations_Wiki_Native.csv"
 )
 SCHEMA = [
     "annotator_type",   # Learner | Native | Tester
@@ -38,69 +38,31 @@ SCHEMA = [
     "instruction_B",
     "response_B",
     "timestamp",
-    "run_id_A",         # NEW: for key stability
-    "run_id_B",         # NEW: for key stability
-    "comp_key",         # NEW: canonical key for the comparison
 ]
-# ---------- Utilities ----------
 def _stable_hash(s: str) -> int:
     return int(hashlib.sha256(s.encode("utf-8")).hexdigest(), 16)
-def _comp_key(source_type: str, text: str, model_a: str, model_b: str, run_id_a: str | None = None, run_id_b: str | None = None) -> str:
-    """Backward-compatible, order-agnostic key that **does not rely on run_ids**.
-    Reason: historical HF annotations may not have run_ids, so resume must match without them.
-    Key = sha256(f"{source}|{text}|{min(model)}|{max(model)}")."""
-    a_model, b_model = str(model_a), str(model_b)
-    m1, m2 = sorted([a_model, b_model])
     raw = f"{source_type}|{text}|{m1}|{m2}"
     return hashlib.sha256(raw.encode("utf-8")).hexdigest()
-def ensure_outfile_schema():
-    """Ensure OUT_FILE exists with SCHEMA; upgrade older files by adding columns as needed.
-    This file is NOT used for resume, only optional local logging."""
-    if not Path(OUT_FILE).exists():
-        pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
-        return
-    try:
-        existing = pd.read_csv(OUT_FILE)
-    except Exception:
-        pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
-        return
-    # Add missing columns
-    for c in SCHEMA:
-        if c not in existing.columns:
-            existing[c] = ""
-    # Try to backfill run_id/comp_key when possible
-    if "comp_key" in existing.columns:
-        missing = existing[existing["comp_key"].isna()].index
-        for idx in missing:
-            r = existing.loc[idx]
-            existing.at[idx, "comp_key"] = _comp_key(r.get("source_type", ""), r.get("text", ""), r.get("model_A", ""), r.get("model_B", ""), r.get("run_id_A", ""), r.get("run_id_B", ""))
-    # Reorder
-    existing = existing[SCHEMA]
-    # Backup and overwrite
-    try:
-        shutil.copyfile(OUT_FILE, OUT_FILE + ".bak")
-    except Exception:
-        pass
-    existing.to_csv(OUT_FILE, index=False)
-ensure_outfile_schema()
-pairs_all = pd.read_csv(PAIRS_CSV)
-# --- Helpers for deterministic schedule ---
-def _shared_texts(df, m1, m2):
-    t1 = set(df[df["model"] == m1]["text"])
-    t2 = set(df[df["model"] == m2]["text"])
-    return list(t1 & t2)
 def build_comparisons_k(source_type: str, k: int):
     df = pairs_all[pairs_all["source_type"] == source_type].copy()
     if df.empty:
@@ -114,13 +76,13 @@ def build_comparisons_k(source_type: str, k: int):
         shared = _shared_texts(df, m1, m2)
         if not shared:
             continue
-        keyed = [( _stable_hash(f"{source_type}|{m1}|{m2}|{t}"), t) for t in shared]
         keyed.sort(key=lambda x: x[0])
         ordered_texts = [t for _, t in keyed]
         chosen = []
         idx = 0
-        while len(chosen) < k and len(ordered_texts) > 0:
             chosen.append(ordered_texts[idx % len(ordered_texts)])
             idx += 1
@@ -131,27 +93,88 @@ def build_comparisons_k(source_type: str, k: int):
                 A, B = (m1, r1), (m2, r2)
             else:
                 A, B = (m2, r2), (m1, r1)
-            run_id_a = str(A[1].get("run_id", ""))
-            run_id_b = str(B[1].get("run_id", ""))
-            item = {
-                "source_type": source_type,
-                "text": t,
-                "model_A": A[0],
-                "instruction_A": A[1]["instruction"],
-                "response_A": A[1]["response"],
-                "run_id_A": run_id_a,
-                "model_B": B[0],
-                "instruction_B": B[1]["instruction"],
-                "response_B": B[1]["response"],
-                "run_id_B": run_id_b,
-            }
-            item["comp_key"] = _comp_key(source_type, t, item["model_A"], item["model_B"])
-            comps.append(item)
     comps.sort(key=lambda d: (d["source_type"], d["model_A"], d["model_B"], d["text"]))
     return comps
 def save_row(annotator_type, item, choice):
     row = {
         "annotator_type": annotator_type,
@@ -165,60 +188,21 @@ def save_row(annotator_type, item, choice):
         "instruction_B": item["instruction_B"],
         "response_B": item["response_B"],
         "timestamp": time.time(),
-        "run_id_A": item.get("run_id_A", ""),
-        "run_id_B": item.get("run_id_B", ""),
-        "comp_key": item.get("comp_key", _comp_key(item["source_type"], item["text"], item["model_A"], item["model_B"], item.get("run_id_A"), item.get("run_id_B")))
     }
-    # Local log only; pushing to HF handled elsewhere in your pipeline
-    df = pd.DataFrame([row])[SCHEMA]
-    df.to_csv(OUT_FILE, mode="a", header=False, index=False)
-# ---------- Load completed keys from HF (single canonical file) ----------
-def _read_csv_from_hf(url: str) -> pd.DataFrame:
-    headers = {}
-    # Optional: HF token if the dataset is private
-    token = os.getenv("HF_TOKEN")
-    if token:
-        headers["Authorization"] = f"Bearer {token}"
-    resp = requests.get(url, headers=headers, timeout=20)
-    resp.raise_for_status()
-    # Hugging Face may return a redirect; requests follows by default
-    return pd.read_csv(io.StringIO(resp.text))
-def load_done_keys_from_hf(annotator_type: str, source_type: str) -> set[str]:
-    """Fetch annotations_Wiki_Native.csv and return a set of comp_key for this role+source.
-    If comp_key missing, reconstruct using our canonical function; if run_ids missing, fallback to model+text only."""
     try:
-        df = _read_csv_from_hf(HF_ANNOTATIONS_URL)
-    except Exception as e:
-        raise RuntimeError(f"Could not read resume file from HF: {e}")
-    # Filter by role+source if those columns exist; otherwise treat all rows as potential
-    if "annotator_type" in df.columns:
-        df = df[df["annotator_type"].astype(str).str.strip() == annotator_type]
-    if "source_type" in df.columns:
-        df = df[df["source_type"].astype(str).str.strip() == source_type]
-    keys = set()
-    has_key = "comp_key" in df.columns
-    for _, r in df.iterrows():
-        if has_key and pd.notna(r.get("comp_key")) and str(r.get("comp_key")).strip() != "":
-            keys.add(str(r.get("comp_key")).strip())
-        else:
-            # Reconstruct; try to use run_ids if present
-            k = _comp_key(
-                str(r.get("source_type", "")),
-                str(r.get("text", "")),
-                str(r.get("model_A", "")),
-                str(r.get("model_B", "")),
-                str(r.get("run_id_A", "")) if "run_id_A" in df.columns else None,
-                str(r.get("run_id_B", "")) if "run_id_B" in df.columns else None,
-            )
-            keys.add(k)
-    return keys
 QUESTION_MD = (
@@ -275,59 +259,58 @@ with gr.Blocks() as demo:
     # ---------- State ----------
     annotator_type = gr.State("")   # Learner | Native | Tester
-    source_state = gr.State(None)    # Wiki | Oireachtas
-    comps_state = gr.State([])       # list of dicts (FULL list, not filtered)
-    idx_state = gr.State(0)          # index into FULL list (resume point)
     # ---------- Handlers ----------
     def begin(consent, role, source):
         if not consent:
             return ("**Please tick the consent checkbox to proceed.**",
-                    gr.update(visible=True), gr.update(visible(False)),
                     "", "", "", "", "", "", "", "", "", "", "")
         if role not in ["Learner", "Native", "Tester"]:
             return ("**Please select your annotator type.**",
-                    gr.update(visible=True), gr.update(visible(False)),
                     "", "", "", "", "", "", "", "", "", "", "")
         if source not in ["Wiki", "Oireachtas"]:
             return ("**Please select a source (Wikipedia/Oireachtas).**",
-                    gr.update(visible=True), gr.update(visible(False)),
                     "", "", "", "", "", "", "", "", "", "", "")
-        full_list = build_comparisons_k(source, K)
-        if not full_list:
             return ("**No items found for the selected source.**",
-                    gr.update(visible=True), gr.update(visible(False)),
                     "", "", "", "", "", "", "", "", "", "", "")
-        # Single resume check from HF
         try:
             done_keys = load_done_keys_from_hf(role, source)
         except Exception as e:
             return (f"**Error reading progress from HF:** {e}",
-                    gr.update(visible=True), gr.update(visible(False)),
                     "", "", "", "", "", "", "", "", role, source, [], 0, gr.update(interactive=False), gr.update(interactive=False))
-        total = len(full_list)
         resume_idx = 0
-        for i, it in enumerate(full_list):
-            if it.get("comp_key") not in done_keys:
                 resume_idx = i
                 break
         else:
-            # all done
             return (f"**All done for {role} / {source}.**",
-                    gr.update(visible=True), gr.update(visible(False)),
-                    "", "", "", "", "", "", "", "", role, source, full_list, total, gr.update(interactive=False), gr.update(interactive=False))
-        item = full_list[resume_idx]
-        note = f"Resuming from {len(done_keys)} completed; {total - len(done_keys)} remaining."
-        return (note,
-                gr.update(visible=False), gr.update(visible=True),
                 f"{resume_idx+1} / {total}",
                 item["text"], item["instruction_A"], item["response_A"],
                 item["instruction_B"], item["response_B"],
-                role, source, full_list, resume_idx,
                 gr.update(interactive=True), gr.update(interactive=True))
     begin_btn.click(
@@ -347,20 +330,19 @@ with gr.Blocks() as demo:
             return ("**No comparisons loaded.**", gr.skip(), gr.skip(), gr.skip(), gr.skip(),
                     gr.update(interactive=False), gr.update(interactive=False), i)
-        total = len(comp_list)
-        # Save current item
         item = comp_list[i]
         save_row(role, item, choice)
         i += 1
-        if i >= total:
             return ("**Done — thank you!**",
-                    f"{total} / {total}", "", "", "", "",
                     gr.update(interactive=False), gr.update(interactive=False), i)
         nxt = comp_list[i]
         return (f"Saved: {choice}",
-                f"{i+1} / {total}",
                 nxt["text"], nxt["instruction_A"], nxt["response_A"], nxt["instruction_B"], nxt["response_B"],
                 gr.update(interactive=True), gr.update(interactive=True), i)

+# ab_app_k4_two_page.py (with HF resume + push on save)
 # Two-page Gradio app for open-sourced annotation (Master’s thesis)
+# Page 1: consent + annotator type (Learner/Native/Tester) + source (Wiki/Oireachtas)
+# Page 2: task only (QUESTION_MD + A/B), deterministic K=4 per model pair per source
+# Saves locally AND pushes each row to a single HF CSV; resume checks that HF CSV once at Begin
 import gradio as gr
 import pandas as pd
 import hashlib
 import io
 import requests
 import os
+import tempfile
+from huggingface_hub import HfApi, hf_hub_download, create_commit, CommitOperationAdd
 PAIRS_CSV = "./pairs.csv"  # columns: run_id, model, source_type, instruction, response, text
 # --- Config ---
 K = 4
+OUT_FILE = "./annotations.csv"
+HF_REPO_ID = "jmcinern/Irish_Prompt_Response_Human_Feedback"  # dataset repo
+HF_FILE_PATH = "annotations_Wiki_Native.csv"                   # single canonical file for all roles/sources
 HF_ANNOTATIONS_URL = (
+    f"https://huggingface.co/datasets/{HF_REPO_ID}/resolve/main/{HF_FILE_PATH}"
 )
 SCHEMA = [
     "annotator_type",   # Learner | Native | Tester
     "instruction_B",
     "response_B",
     "timestamp",
 ]
+if not Path(OUT_FILE).exists():
+    pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False)
+pairs_all = pd.read_csv(PAIRS_CSV)
+# --- Helpers for deterministic schedule ---
+def _shared_texts(df, m1, m2):
+    t1 = set(df[df["model"] == m1]["text"])
+    t2 = set(df[df["model"] == m2]["text"])
+    return list(t1 & t2)
 def _stable_hash(s: str) -> int:
     return int(hashlib.sha256(s.encode("utf-8")).hexdigest(), 16)
+def _comp_key(source_type: str, text: str, model_a: str, model_b: str) -> str:
+    """Order-agnostic, backward-compatible key (ignores run_ids)."""
+    m1, m2 = sorted([str(model_a), str(model_b)])
     raw = f"{source_type}|{text}|{m1}|{m2}"
     return hashlib.sha256(raw.encode("utf-8")).hexdigest()
 def build_comparisons_k(source_type: str, k: int):
     df = pairs_all[pairs_all["source_type"] == source_type].copy()
     if df.empty:
         shared = _shared_texts(df, m1, m2)
         if not shared:
             continue
+        keyed = [(_stable_hash(f"{source_type}|{m1}|{m2}|{t}"), t) for t in shared]
         keyed.sort(key=lambda x: x[0])
         ordered_texts = [t for _, t in keyed]
         chosen = []
         idx = 0
+        while len(chosen) < k:
             chosen.append(ordered_texts[idx % len(ordered_texts)])
             idx += 1
                 A, B = (m1, r1), (m2, r2)
             else:
                 A, B = (m2, r2), (m1, r1)
+            comps.append(
+                {
+                    "source_type": source_type,
+                    "text": t,
+                    "model_A": A[0],
+                    "instruction_A": A[1]["instruction"],
+                    "response_A": A[1]["response"],
+                    "model_B": B[0],
+                    "instruction_B": B[1]["instruction"],
+                    "response_B": B[1]["response"],
+                }
+            )
     comps.sort(key=lambda d: (d["source_type"], d["model_A"], d["model_B"], d["text"]))
     return comps
+# ---------- HF helpers ----------
+def _read_csv_from_hf(url: str) -> pd.DataFrame:
+    token = os.getenv("HF_TOKEN")
+    headers = {"Authorization": f"Bearer {token}"} if token else {}
+    resp = requests.get(url, headers=headers, timeout=20)
+    resp.raise_for_status()
+    return pd.read_csv(io.StringIO(resp.text))
+def load_done_keys_from_hf(annotator_type: str, source_type: str) -> set:
+    """Read the canonical HF CSV and return comp_keys already done for this role+source."""
+    df = _read_csv_from_hf(HF_ANNOTATIONS_URL)
+    if "annotator_type" in df.columns:
+        df = df[df["annotator_type"].astype(str).str.strip() == annotator_type]
+    if "source_type" in df.columns:
+        df = df[df["source_type"].astype(str).str.strip() == source_type]
+    keys = set()
+    for _, r in df.iterrows():
+        st = str(r.get("source_type", ""))
+        tx = str(r.get("text", ""))
+        ma = str(r.get("model_A", ""))
+        mb = str(r.get("model_B", ""))
+        if not (st and tx and ma and mb):
+            continue
+        keys.add(_comp_key(st, tx, ma, mb))
+    return keys
+def append_rows_to_hf(rows_df: pd.DataFrame):
+    """Append new annotations to the single HF CSV with basic schema alignment.
+    If the file doesn't exist, create it. Requires HF_TOKEN with write access.
+    """
+    api = HfApi()
+    # 1) download current csv (if missing, start new)
+    try:
+        local_path = hf_hub_download(repo_id=HF_REPO_ID, filename=HF_FILE_PATH, repo_type="dataset")
+        current = pd.read_csv(local_path)
+    except Exception:
+        current = pd.DataFrame(columns=SCHEMA)
+    for c in SCHEMA:
+        if c not in current.columns:
+            current[c] = ""
+    current = current[SCHEMA]
+    rows_df = rows_df[SCHEMA]
+    merged = pd.concat([current, rows_df], ignore_index=True)
+    # 4) write to a temp file and commit back
+    with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp:
+        merged.to_csv(tmp.name, index=False)
+        tmp.flush()
+        op = CommitOperationAdd(path_in_repo=HF_FILE_PATH, path_or_fileobj=tmp.name)
+        create_commit(
+            repo_id=HF_REPO_ID,
+            repo_type="dataset",
+            operations=[op],
+            commit_message="Append annotation row",
+        )
+# --- Save row (local + push to HF) ---
 def save_row(annotator_type, item, choice):
     row = {
         "annotator_type": annotator_type,
         "instruction_B": item["instruction_B"],
         "response_B": item["response_B"],
         "timestamp": time.time(),
     }
+    df_row = pd.DataFrame([row])[SCHEMA]
+    # Local redundancy
     try:
+        df_row.to_csv(OUT_FILE, mode="a", header=False, index=False)
+    except Exception:
+        pass
+    # Push to HF (single canonical file)
+    try:
+        append_rows_to_hf(df_row)
+    except Exception:
+        # Fail-open: allow annotator to continue, progress still in local file
+        pass
 QUESTION_MD = (
     # ---------- State ----------
     annotator_type = gr.State("")   # Learner | Native | Tester
+    source_state = gr.State(None)   # Wiki | Oireachtas
+    comps_state = gr.State([])      # list of dicts (full list)
+    idx_state = gr.State(0)         # current index into full list
     # ---------- Handlers ----------
     def begin(consent, role, source):
         if not consent:
             return ("**Please tick the consent checkbox to proceed.**",
+                    gr.update(visible=True), gr.update(visible=False),
                     "", "", "", "", "", "", "", "", "", "", "")
         if role not in ["Learner", "Native", "Tester"]:
             return ("**Please select your annotator type.**",
+                    gr.update(visible=True), gr.update(visible=False),
                     "", "", "", "", "", "", "", "", "", "", "")
         if source not in ["Wiki", "Oireachtas"]:
             return ("**Please select a source (Wikipedia/Oireachtas).**",
+                    gr.update(visible=True), gr.update(visible=False),
                     "", "", "", "", "", "", "", "", "", "", "")
+        comp_list = build_comparisons_k(source, K)
+        if not comp_list:
             return ("**No items found for the selected source.**",
+                    gr.update(visible=True), gr.update(visible=False),
                     "", "", "", "", "", "", "", "", "", "", "")
+        # Resume point from HF (single check)
         try:
             done_keys = load_done_keys_from_hf(role, source)
         except Exception as e:
             return (f"**Error reading progress from HF:** {e}",
+                    gr.update(visible=True), gr.update(visible=False),
                     "", "", "", "", "", "", "", "", role, source, [], 0, gr.update(interactive=False), gr.update(interactive=False))
+        total = len(comp_list)
         resume_idx = 0
+        for i, it in enumerate(comp_list):
+            key = _comp_key(source, it["text"], it["model_A"], it["model_B"])  # A/B order-agnostic
+            if key not in done_keys:
                 resume_idx = i
                 break
         else:
             return (f"**All done for {role} / {source}.**",
+                    gr.update(visible=True), gr.update(visible=False),
+                    "", "", "", "", "", "", "", "", role, source, comp_list, total, gr.update(interactive=False), gr.update(interactive=False))
+        item = comp_list[resume_idx]
+        return ("",  # clear gate msg
+                gr.update(visible=False), gr.update(visible=True),  # show page2
                 f"{resume_idx+1} / {total}",
                 item["text"], item["instruction_A"], item["response_A"],
                 item["instruction_B"], item["response_B"],
+                role, source, comp_list, resume_idx,
                 gr.update(interactive=True), gr.update(interactive=True))
     begin_btn.click(
             return ("**No comparisons loaded.**", gr.skip(), gr.skip(), gr.skip(), gr.skip(),
                     gr.update(interactive=False), gr.update(interactive=False), i)
         item = comp_list[i]
         save_row(role, item, choice)
         i += 1
+        if i >= len(comp_list):
+            # Done: disable buttons, clear fields, lock progress at max
             return ("**Done — thank you!**",
+                    f"{len(comp_list)} / {len(comp_list)}", "", "", "", "",
                     gr.update(interactive=False), gr.update(interactive=False), i)
         nxt = comp_list[i]
         return (f"Saved: {choice}",
+                f"{i+1} / {len(comp_list)}",
                 nxt["text"], nxt["instruction_A"], nxt["response_A"], nxt["instruction_B"], nxt["response_B"],
                 gr.update(interactive=True), gr.update(interactive=True), i)