|
|
import gradio as gr |
|
|
import pandas as pd |
|
|
import json |
|
|
import random |
|
|
from datetime import datetime |
|
|
from pathlib import Path |
|
|
import os |
|
|
|
|
|
from huggingface_hub import HfApi, hf_hub_download, create_repo |
|
|
try: |
|
|
from huggingface_hub.utils import HfHubHTTPError |
|
|
except ImportError: |
|
|
|
|
|
class HfHubHTTPError(Exception): |
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
TRANSLATED_FILE = "translated_IRT_ga.jsonl" |
|
|
|
|
|
ANNOTATION_FILE = "DPO_annotations.csv" |
|
|
|
|
|
HF_REPO_ID = "jmcinern/DPO_ga" |
|
|
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
|
|
|
|
|
|
NUM_SAMPLES = 200 |
|
|
RANDOM_SEED = 42 |
|
|
|
|
|
|
|
|
CONSENT_MD = """ |
|
|
### Irish QA Pair Comparison (Master’s Thesis) |
|
|
|
|
|
You are invited to take part in a study on Large Language Model Irish-language QA quality. |
|
|
By continuing, you consent to the following: |
|
|
|
|
|
- Your annotations are anonymised. |
|
|
- The dataset (reference text + model outputs + your choices) will be released **open-source** for both research and commercial purposes. |
|
|
- No personal data is collected. You may stop at any time. |
|
|
|
|
|
- You will answer the following question: |
|
|
|
|
|
#### Which answer, A or B, is better in terms of grammar, naturalness, and coherence? |
|
|
|
|
|
- Only base your decision on this question and not other factors. |
|
|
|
|
|
Please confirm consent, select your role, then press **Begin**. |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
def load_master_samples() -> list: |
|
|
"""Loads, shuffles deterministically, and returns the first 100 samples.""" |
|
|
if not Path(TRANSLATED_FILE).exists(): |
|
|
raise FileNotFoundError(f"Source file not found: {TRANSLATED_FILE}") |
|
|
with open(TRANSLATED_FILE, "r", encoding="utf-8") as f: |
|
|
data = [json.loads(line) for line in f] |
|
|
|
|
|
|
|
|
rng = random.Random(RANDOM_SEED) |
|
|
rng.shuffle(data) |
|
|
return data[:NUM_SAMPLES] |
|
|
|
|
|
def download_annotations() -> pd.DataFrame: |
|
|
"""Downloads annotations from HF. If not found, returns an empty DataFrame.""" |
|
|
try: |
|
|
local_path = hf_hub_download( |
|
|
repo_id=HF_REPO_ID, |
|
|
filename=ANNOTATION_FILE, |
|
|
repo_type="dataset", |
|
|
token=HF_TOKEN, |
|
|
) |
|
|
print(f"Downloaded existing annotations from {HF_REPO_ID}") |
|
|
return pd.read_csv(local_path) |
|
|
except HfHubHTTPError as e: |
|
|
|
|
|
if e.response.status_code == 404: |
|
|
print("No remote annotation file found. Creating a new one.") |
|
|
|
|
|
return pd.DataFrame(columns=["hash", "annotator_type", "choice", "preferred_response", "timestamp"]) |
|
|
else: |
|
|
raise |
|
|
|
|
|
def upload_annotations(df: pd.DataFrame): |
|
|
"""Saves a DataFrame locally and pushes it to the Hugging Face Hub.""" |
|
|
if not HF_TOKEN: |
|
|
print("WARNING: No HF_TOKEN found. Skipping upload.") |
|
|
return |
|
|
|
|
|
|
|
|
df.to_csv(ANNOTATION_FILE, index=False) |
|
|
|
|
|
|
|
|
api = HfApi() |
|
|
create_repo(HF_REPO_ID, repo_type="dataset", exist_ok=True, token=HF_TOKEN) |
|
|
api.upload_file( |
|
|
path_or_fileobj=ANNOTATION_FILE, |
|
|
path_in_repo=ANNOTATION_FILE, |
|
|
repo_id=HF_REPO_ID, |
|
|
repo_type="dataset", |
|
|
token=HF_TOKEN, |
|
|
commit_message="Append new DPO annotation" |
|
|
) |
|
|
print(f"Successfully uploaded updated annotations to {HF_REPO_ID}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def prepare_tasks(): |
|
|
""" |
|
|
Loads master samples, downloads existing annotations, and prepares the |
|
|
list of un-annotated tasks for the current session. |
|
|
""" |
|
|
master_samples = load_master_samples() |
|
|
annotations_df = download_annotations() |
|
|
completed_hashes = set(annotations_df['hash'].unique()) |
|
|
|
|
|
to_do_samples = [s for s in master_samples if s['hash'] not in completed_hashes] |
|
|
|
|
|
tasks = [] |
|
|
for sample in to_do_samples: |
|
|
|
|
|
options = [('response1', sample['response1']), ('response2', sample['response2'])] |
|
|
random.shuffle(options) |
|
|
|
|
|
tasks.append({ |
|
|
"hash": sample['hash'], |
|
|
"instruction": sample['instruction'], |
|
|
"response_A": options[0][1], |
|
|
"response_B": options[1][1], |
|
|
|
|
|
"shuffle_map": {'A': options[0][0], 'B': options[1][0]} |
|
|
}) |
|
|
return tasks |
|
|
|
|
|
def start_session(annotator_type): |
|
|
""" |
|
|
Triggered by the 'Begin' button. Prepares tasks and loads the first one. |
|
|
""" |
|
|
tasks = prepare_tasks() |
|
|
if not tasks: |
|
|
|
|
|
return { |
|
|
consent_group: gr.update(visible=False), |
|
|
task_group: gr.update(visible=False), |
|
|
done_group: gr.update(visible=True), |
|
|
state_tasks: [], |
|
|
state_task_index: 0, |
|
|
state_annotator_type: "" |
|
|
} |
|
|
|
|
|
first_task = tasks[0] |
|
|
progress_str = f"Progress: 1 / {len(tasks)}" |
|
|
|
|
|
return { |
|
|
consent_group: gr.update(visible=False), |
|
|
task_group: gr.update(visible=True), |
|
|
done_group: gr.update(visible=False), |
|
|
state_tasks: tasks, |
|
|
state_task_index: 0, |
|
|
state_annotator_type: annotator_type, |
|
|
progress_counter: gr.update(value=progress_str), |
|
|
instruction_box: gr.update(value=first_task['instruction']), |
|
|
response_a_box: gr.update(value=first_task['response_A']), |
|
|
response_b_box: gr.update(value=first_task['response_B']), |
|
|
} |
|
|
|
|
|
def record_choice(tasks, current_index, annotator_type, choice): |
|
|
""" |
|
|
Records the user's choice, saves it, and loads the next task. |
|
|
""" |
|
|
|
|
|
current_task = tasks[current_index] |
|
|
preferred_response_key = current_task['shuffle_map'][choice] |
|
|
|
|
|
|
|
|
new_annotation = { |
|
|
"hash": current_task['hash'], |
|
|
"annotator_type": annotator_type, |
|
|
"choice": choice, |
|
|
"preferred_response": preferred_response_key, |
|
|
"timestamp": datetime.utcnow().isoformat() |
|
|
} |
|
|
|
|
|
|
|
|
annotations_df = download_annotations() |
|
|
new_df = pd.concat([annotations_df, pd.DataFrame([new_annotation])], ignore_index=True) |
|
|
upload_annotations(new_df) |
|
|
|
|
|
|
|
|
next_index = current_index + 1 |
|
|
if next_index >= len(tasks): |
|
|
|
|
|
return { |
|
|
task_group: gr.update(visible=False), |
|
|
done_group: gr.update(visible=True) |
|
|
} |
|
|
|
|
|
next_task = tasks[next_index] |
|
|
progress_str = f"Progress: {next_index + 1} / {len(tasks)}" |
|
|
|
|
|
return { |
|
|
state_task_index: next_index, |
|
|
progress_counter: gr.update(value=progress_str), |
|
|
instruction_box: gr.update(value=next_task['instruction']), |
|
|
response_a_box: gr.update(value=next_task['response_A']), |
|
|
response_b_box: gr.update(value=next_task['response_B']), |
|
|
} |
|
|
|
|
|
def update_begin_button_status(consent_given, role_selected): |
|
|
"""Enable the begin button only if consent is checked and a role is selected.""" |
|
|
return gr.update(interactive=(consent_given and role_selected is not None)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft(), title="DPO Annotation") as demo: |
|
|
|
|
|
state_tasks = gr.State([]) |
|
|
state_task_index = gr.State(0) |
|
|
state_annotator_type = gr.State("") |
|
|
|
|
|
|
|
|
with gr.Group(visible=True) as consent_group: |
|
|
gr.Markdown(CONSENT_MD) |
|
|
with gr.Row(): |
|
|
consent_checkbox = gr.Checkbox(label="I consent to the terms above") |
|
|
annotator_type_dropdown = gr.Dropdown(["Tester", "Native"], label="Select Your Role") |
|
|
begin_btn = gr.Button("Begin", interactive=False) |
|
|
|
|
|
|
|
|
with gr.Group(visible=False) as task_group: |
|
|
progress_counter = gr.Markdown("Progress: 0 / 0", elem_id="progress_counter") |
|
|
with gr.Column(): |
|
|
instruction_box = gr.Textbox(label="Instruction", interactive=False, lines=3) |
|
|
with gr.Row(): |
|
|
response_a_box = gr.Textbox(label="Answer A", interactive=False, lines=8) |
|
|
response_b_box = gr.Textbox(label="Answer B", interactive=False, lines=8) |
|
|
with gr.Row(): |
|
|
choose_a_btn = gr.Button("A is Better", variant="primary") |
|
|
choose_b_btn = gr.Button("B is Better", variant="primary") |
|
|
|
|
|
|
|
|
with gr.Group(visible=False) as done_group: |
|
|
gr.Markdown("## ✅ Thank You!\n\nAll available samples have been annotated. Your contribution is greatly appreciated.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
consent_checkbox.change( |
|
|
fn=update_begin_button_status, |
|
|
inputs=[consent_checkbox, annotator_type_dropdown], |
|
|
outputs=begin_btn |
|
|
) |
|
|
annotator_type_dropdown.change( |
|
|
fn=update_begin_button_status, |
|
|
inputs=[consent_checkbox, annotator_type_dropdown], |
|
|
outputs=begin_btn |
|
|
) |
|
|
|
|
|
|
|
|
begin_btn.click( |
|
|
fn=start_session, |
|
|
inputs=[annotator_type_dropdown], |
|
|
outputs=[ |
|
|
consent_group, task_group, done_group, |
|
|
state_tasks, state_task_index, state_annotator_type, |
|
|
progress_counter, instruction_box, response_a_box, response_b_box |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
choose_a_btn.click( |
|
|
fn=record_choice, |
|
|
inputs=[state_tasks, state_task_index, state_annotator_type, gr.State('A')], |
|
|
outputs=[ |
|
|
state_task_index, progress_counter, |
|
|
instruction_box, response_a_box, response_b_box, |
|
|
task_group, done_group |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
choose_b_btn.click( |
|
|
fn=record_choice, |
|
|
inputs=[state_tasks, state_task_index, state_annotator_type, gr.State('B')], |
|
|
outputs=[ |
|
|
state_task_index, progress_counter, |
|
|
instruction_box, response_a_box, response_b_box, |
|
|
task_group, done_group |
|
|
] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
if not Path(TRANSLATED_FILE).exists(): |
|
|
print(f"FATAL: Source data file '{TRANSLATED_FILE}' not found.") |
|
|
print("Please ensure the file is in the correct directory before running.") |
|
|
else: |
|
|
demo.launch() |