Spaces:
Sleeping
Sleeping
| # ab_app_k4_two_page.py (with HF resume + push on save) | |
| # Two-page Gradio app for open-sourced annotation (Master’s thesis) | |
| # Page 1: consent + annotator type (Learner/Native/Tester) + source (Wiki/Oireachtas) | |
| # Page 2: task only (QUESTION_MD + A/B), deterministic K=4 per model pair per source | |
| # Saves locally AND pushes each row to a single HF CSV; resume checks that HF CSV once at Begin | |
| import gradio as gr | |
| import pandas as pd | |
| import time | |
| from itertools import combinations | |
| from pathlib import Path | |
| import hashlib | |
| import io | |
| import requests | |
| import os | |
| import tempfile | |
| from huggingface_hub import HfApi, hf_hub_download, create_commit, CommitOperationAdd | |
| PAIRS_CSV = "./pairs.csv" # columns: run_id, model, source_type, instruction, response, text | |
| # --- Config --- | |
| K = 4 | |
| OUT_FILE = "./annotations.csv" | |
| HF_REPO_ID = "jmcinern/Irish_Prompt_Response_Human_Feedback" # dataset repo | |
| HF_FILE_PATH = "annotations_Wiki_Native.csv" # single canonical file for all roles/sources | |
| HF_ANNOTATIONS_URL = ( | |
| f"https://huggingface.co/datasets/{HF_REPO_ID}/resolve/main/{HF_FILE_PATH}" | |
| ) | |
| SCHEMA = [ | |
| "annotator_type", # Learner | Native | Tester | |
| "source_type", # Wiki | Oireachtas | |
| "text", | |
| "model_A", | |
| "model_B", | |
| "choice", # A | B | |
| "instruction_A", | |
| "response_A", | |
| "instruction_B", | |
| "response_B", | |
| "timestamp", | |
| ] | |
| if not Path(OUT_FILE).exists(): | |
| pd.DataFrame(columns=SCHEMA).to_csv(OUT_FILE, index=False) | |
| pairs_all = pd.read_csv(PAIRS_CSV) | |
| # --- Helpers for deterministic schedule --- | |
| def _shared_texts(df, m1, m2): | |
| t1 = set(df[df["model"] == m1]["text"]) | |
| t2 = set(df[df["model"] == m2]["text"]) | |
| return list(t1 & t2) | |
| def _stable_hash(s: str) -> int: | |
| return int(hashlib.sha256(s.encode("utf-8")).hexdigest(), 16) | |
| def _comp_key(source_type: str, text: str, model_a: str, model_b: str) -> str: | |
| """Order-agnostic, backward-compatible key (ignores run_ids).""" | |
| m1, m2 = sorted([str(model_a), str(model_b)]) | |
| raw = f"{source_type}|{text}|{m1}|{m2}" | |
| return hashlib.sha256(raw.encode("utf-8")).hexdigest() | |
| def build_comparisons_k(source_type: str, k: int): | |
| df = pairs_all[pairs_all["source_type"] == source_type].copy() | |
| if df.empty: | |
| return [] | |
| models = sorted(df["model"].unique().tolist()) | |
| comps = [] | |
| # For each unordered pair, pick k texts deterministically; A/B flips alternate (2/2 over k=4) | |
| for m1, m2 in combinations(models, 2): | |
| shared = _shared_texts(df, m1, m2) | |
| if not shared: | |
| continue | |
| keyed = [(_stable_hash(f"{source_type}|{m1}|{m2}|{t}"), t) for t in shared] | |
| keyed.sort(key=lambda x: x[0]) | |
| ordered_texts = [t for _, t in keyed] | |
| chosen = [] | |
| idx = 0 | |
| while len(chosen) < k: | |
| chosen.append(ordered_texts[idx % len(ordered_texts)]) | |
| idx += 1 | |
| for j, t in enumerate(chosen): | |
| r1 = df[(df["model"] == m1) & (df["text"] == t)].iloc[0] | |
| r2 = df[(df["model"] == m2) & (df["text"] == t)].iloc[0] | |
| if j % 2 == 0: | |
| A, B = (m1, r1), (m2, r2) | |
| else: | |
| A, B = (m2, r2), (m1, r1) | |
| comps.append( | |
| { | |
| "source_type": source_type, | |
| "text": t, | |
| "model_A": A[0], | |
| "instruction_A": A[1]["instruction"], | |
| "response_A": A[1]["response"], | |
| "model_B": B[0], | |
| "instruction_B": B[1]["instruction"], | |
| "response_B": B[1]["response"], | |
| } | |
| ) | |
| comps.sort(key=lambda d: (d["source_type"], d["model_A"], d["model_B"], d["text"])) | |
| return comps | |
| # ---------- HF helpers ---------- | |
| def _read_csv_from_hf(url: str) -> pd.DataFrame: | |
| token = os.getenv("HF_TOKEN") | |
| headers = {"Authorization": f"Bearer {token}"} if token else {} | |
| resp = requests.get(url, headers=headers, timeout=20) | |
| resp.raise_for_status() | |
| return pd.read_csv(io.StringIO(resp.text)) | |
| def load_done_keys_from_hf(annotator_type: str, source_type: str) -> set: | |
| """Read the canonical HF CSV and return comp_keys already done for this role+source.""" | |
| df = _read_csv_from_hf(HF_ANNOTATIONS_URL) | |
| if "annotator_type" in df.columns: | |
| df = df[df["annotator_type"].astype(str).str.strip() == annotator_type] | |
| if "source_type" in df.columns: | |
| df = df[df["source_type"].astype(str).str.strip() == source_type] | |
| keys = set() | |
| for _, r in df.iterrows(): | |
| st = str(r.get("source_type", "")) | |
| tx = str(r.get("text", "")) | |
| ma = str(r.get("model_A", "")) | |
| mb = str(r.get("model_B", "")) | |
| if not (st and tx and ma and mb): | |
| continue | |
| keys.add(_comp_key(st, tx, ma, mb)) | |
| return keys | |
| def append_rows_to_hf(rows_df: pd.DataFrame): | |
| """Append new annotations to the single HF CSV with basic schema alignment. | |
| If the file doesn't exist, create it. Requires HF_TOKEN with write access. | |
| """ | |
| api = HfApi() | |
| # 1) download current csv (if missing, start new) | |
| try: | |
| local_path = hf_hub_download(repo_id=HF_REPO_ID, filename=HF_FILE_PATH, repo_type="dataset") | |
| current = pd.read_csv(local_path) | |
| except Exception: | |
| current = pd.DataFrame(columns=SCHEMA) | |
| for c in SCHEMA: | |
| if c not in current.columns: | |
| current[c] = "" | |
| current = current[SCHEMA] | |
| rows_df = rows_df[SCHEMA] | |
| merged = pd.concat([current, rows_df], ignore_index=True) | |
| # 4) write to a temp file and commit back | |
| with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: | |
| merged.to_csv(tmp.name, index=False) | |
| tmp.flush() | |
| op = CommitOperationAdd(path_in_repo=HF_FILE_PATH, path_or_fileobj=tmp.name) | |
| create_commit( | |
| repo_id=HF_REPO_ID, | |
| repo_type="dataset", | |
| operations=[op], | |
| commit_message="Append annotation row", | |
| ) | |
| # --- Save row (local + push to HF) --- | |
| def save_row(annotator_type, item, choice): | |
| row = { | |
| "annotator_type": annotator_type, | |
| "source_type": item["source_type"], | |
| "text": item["text"], | |
| "model_A": item["model_A"], | |
| "model_B": item["model_B"], | |
| "choice": choice, | |
| "instruction_A": item["instruction_A"], | |
| "response_A": item["response_A"], | |
| "instruction_B": item["instruction_B"], | |
| "response_B": item["response_B"], | |
| "timestamp": time.time(), | |
| } | |
| df_row = pd.DataFrame([row])[SCHEMA] | |
| # Local redundancy | |
| try: | |
| df_row.to_csv(OUT_FILE, mode="a", header=False, index=False) | |
| except Exception: | |
| pass | |
| # Push to HF (single canonical file) | |
| try: | |
| append_rows_to_hf(df_row) | |
| except Exception: | |
| # Fail-open: allow annotator to continue, progress still in local file | |
| pass | |
| QUESTION_MD = ( | |
| "**Question:** Which Question–Answer pair exhibits a stronger command of Irish grammar and " | |
| "semantic coherence? Take the use of the reference text into account. If unsure, pick the one " | |
| "with a stronger display of Irish grammar. Choose A or B." | |
| ) | |
| CONSENT_MD = f""" | |
| ### Irish QA Pair Comparison (Master’s Thesis) | |
| You are invited to take part in a study on Large Language Model Irish-language QA quality. | |
| By continuing, you consent to the following: | |
| - Your annotations will be **anonymised** (we only record whether you are a **Learner**, **Native speaker**, or **Tester**). | |
| - The dataset (reference text + model outputs + your choices) will be released **open-source** for both research and commercial purposes. | |
| - No personal data is collected beyond your level of Irish. You may stop at any time before submission. | |
| - You will answer the following question: | |
| #### "Which Question–Answer pair exhibits a stronger command of Irish grammar and semantic coherence? Take the use of the reference text into account. If unsure, pick the one with a stronger display of Irish grammar. Choose A or B.". | |
| - Only base your decision on this question and not other factors. | |
| Please confirm consent, select your annotator type and the source to evaluate, then press **Begin**. | |
| """ | |
| with gr.Blocks() as demo: | |
| # ---------- PAGE 1: Consent + Role + Source ---------- | |
| with gr.Group(visible=True) as page1: | |
| gr.Markdown(CONSENT_MD) | |
| consent_chk = gr.Checkbox(label="I consent to take part and for my anonymised annotations to be open-sourced.", value=False) | |
| role_dd = gr.Dropdown(["Learner", "Native", "Tester"], label="Annotator Type (required)", value=None) | |
| source_dd = gr.Dropdown(["Wiki", "Oireachtas"], label="Source (required)", value=None) | |
| begin_btn = gr.Button("Begin") | |
| gate_msg = gr.Markdown() | |
| # ---------- PAGE 2: Task ---------- | |
| with gr.Group(visible=False) as page2: | |
| crit = gr.Markdown(QUESTION_MD) | |
| counter = gr.Markdown() | |
| ref_text = gr.Textbox(label="Reference Text", interactive=False, lines=8) | |
| with gr.Row(): | |
| with gr.Column(): | |
| instA = gr.Textbox(label="Instruction A", interactive=False) | |
| respA = gr.Textbox(label="Response A", interactive=False, lines=8) | |
| with gr.Column(): | |
| instB = gr.Textbox(label="Instruction B", interactive=False) | |
| respB = gr.Textbox(label="Response B", interactive=False, lines=8) | |
| with gr.Row(): | |
| btnA = gr.Button("A is Better") | |
| btnB = gr.Button("B is Better") | |
| status = gr.Markdown() | |
| # ---------- State ---------- | |
| annotator_type = gr.State("") # Learner | Native | Tester | |
| source_state = gr.State(None) # Wiki | Oireachtas | |
| comps_state = gr.State([]) # list of dicts (full list) | |
| idx_state = gr.State(0) # current index into full list | |
| # ---------- Handlers ---------- | |
| def begin(consent, role, source): | |
| if not consent: | |
| return ("**Please tick the consent checkbox to proceed.**", | |
| gr.update(visible=True), gr.update(visible=False), | |
| "", "", "", "", "", "", "", "", "", "", "") | |
| if role not in ["Learner", "Native", "Tester"]: | |
| return ("**Please select your annotator type.**", | |
| gr.update(visible=True), gr.update(visible=False), | |
| "", "", "", "", "", "", "", "", "", "", "") | |
| if source not in ["Wiki", "Oireachtas"]: | |
| return ("**Please select a source (Wikipedia/Oireachtas).**", | |
| gr.update(visible=True), gr.update(visible=False), | |
| "", "", "", "", "", "", "", "", "", "", "") | |
| comp_list = build_comparisons_k(source, K) | |
| if not comp_list: | |
| return ("**No items found for the selected source.**", | |
| gr.update(visible=True), gr.update(visible=False), | |
| "", "", "", "", "", "", "", "", "", "", "") | |
| # Resume point from HF (single check) | |
| try: | |
| done_keys = load_done_keys_from_hf(role, source) | |
| except Exception as e: | |
| return (f"**Error reading progress from HF:** {e}", | |
| gr.update(visible=True), gr.update(visible=False), | |
| "", "", "", "", "", "", "", "", role, source, [], 0, gr.update(interactive=False), gr.update(interactive=False)) | |
| total = len(comp_list) | |
| resume_idx = 0 | |
| for i, it in enumerate(comp_list): | |
| key = _comp_key(source, it["text"], it["model_A"], it["model_B"]) # A/B order-agnostic | |
| if key not in done_keys: | |
| resume_idx = i | |
| break | |
| else: | |
| return (f"**All done for {role} / {source}.**", | |
| gr.update(visible=True), gr.update(visible=False), | |
| "", "", "", "", "", "", "", "", role, source, comp_list, total, gr.update(interactive=False), gr.update(interactive=False)) | |
| item = comp_list[resume_idx] | |
| return ("", # clear gate msg | |
| gr.update(visible=False), gr.update(visible=True), # show page2 | |
| f"{resume_idx+1} / {total}", | |
| item["text"], item["instruction_A"], item["response_A"], | |
| item["instruction_B"], item["response_B"], | |
| role, source, comp_list, resume_idx, | |
| gr.update(interactive=True), gr.update(interactive=True)) | |
| begin_btn.click( | |
| begin, | |
| inputs=[consent_chk, role_dd, source_dd], | |
| outputs=[ | |
| gate_msg, page1, page2, | |
| counter, ref_text, instA, respA, instB, respB, | |
| annotator_type, source_state, comps_state, idx_state, | |
| btnA, btnB | |
| ], | |
| ) | |
| def choose(choice, role, source, comp_list, i): | |
| role = (role or "").strip() | |
| if not role or not comp_list: | |
| return ("**No comparisons loaded.**", gr.skip(), gr.skip(), gr.skip(), gr.skip(), | |
| gr.update(interactive=False), gr.update(interactive=False), i) | |
| item = comp_list[i] | |
| save_row(role, item, choice) | |
| i += 1 | |
| if i >= len(comp_list): | |
| # Done: disable buttons, clear fields, lock progress at max | |
| return ("**Done — thank you!**", | |
| f"{len(comp_list)} / {len(comp_list)}", "", "", "", "", | |
| gr.update(interactive=False), gr.update(interactive=False), i) | |
| nxt = comp_list[i] | |
| return (f"Saved: {choice}", | |
| f"{i+1} / {len(comp_list)}", | |
| nxt["text"], nxt["instruction_A"], nxt["response_A"], nxt["instruction_B"], nxt["response_B"], | |
| gr.update(interactive=True), gr.update(interactive=True), i) | |
| btnA.click( | |
| lambda role, src, comps, i: choose("A", role, src, comps, i), | |
| inputs=[annotator_type, source_state, comps_state, idx_state], | |
| outputs=[status, counter, ref_text, instA, respA, instB, respB, btnA, btnB, idx_state], | |
| ) | |
| btnB.click( | |
| lambda role, src, comps, i: choose("B", role, src, comps, i), | |
| inputs=[annotator_type, source_state, comps_state, idx_state], | |
| outputs=[status, counter, ref_text, instA, respA, instB, respB, btnA, btnB, idx_state], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |