| | import gradio as gr |
| | import json |
| | import os |
| | from datetime import datetime |
| | from huggingface_hub import HfApi |
| | import time |
| | HF_DATASET_REPO = "Jazzcharles/audioverse_for_annotation" |
| | SYNC_INTERVAL = 300 |
| |
|
| | |
| | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
| | TMP_DIR = os.path.join(BASE_DIR, "gradio_tmp") |
| | os.makedirs(TMP_DIR, exist_ok=True) |
| | os.environ["GRADIO_TEMP_DIR"] = TMP_DIR |
| |
|
| | DATA_PATH = "data/samples_for_annotation_with_urls.json" |
| | ASSIGN_PATH = "data/assignments.json" |
| | RESULT_PATH = "results/results.jsonl" |
| |
|
| | os.makedirs("results", exist_ok=True) |
| |
|
| |
|
| | def pull_results_from_hf(): |
| | """ |
| | Download results.jsonl from HF dataset repo to local RESULT_PATH. |
| | If download fails, keep local file untouched. |
| | """ |
| | try: |
| | os.makedirs(os.path.dirname(RESULT_PATH), exist_ok=True) |
| |
|
| | api.download_file( |
| | repo_id=HF_DATASET_REPO, |
| | repo_type="dataset", |
| | filename="results.jsonl", |
| | local_dir=os.path.dirname(RESULT_PATH), |
| | local_dir_use_symlinks=False, |
| | ) |
| | print("[INIT] Pulled results.jsonl from HF dataset.") |
| | except Exception as e: |
| | print("[INIT] No remote results.jsonl or pull failed:", e) |
| |
|
| |
|
| | |
| | pull_results_from_hf() |
| |
|
| | with open(DATA_PATH, "r") as f: |
| | SAMPLES = {x["id"]: x for x in json.load(f)} |
| |
|
| | with open(ASSIGN_PATH, "r") as f: |
| | ASSIGN = json.load(f) |
| |
|
| |
|
| | |
| | |
| | |
| | def get_user_samples(user): |
| | return ASSIGN.get(user, []) |
| |
|
| |
|
| | def save_result(record): |
| | with open(RESULT_PATH, "a") as f: |
| | f.write(json.dumps(record, ensure_ascii=False) + "\n") |
| |
|
| | def load_existing_results(): |
| | if not os.path.exists(RESULT_PATH): |
| | return [] |
| |
|
| | records = [] |
| | with open(RESULT_PATH, "r", encoding="utf-8") as f: |
| | for line in f: |
| | try: |
| | records.append(json.loads(line)) |
| | except: |
| | pass |
| | return records |
| |
|
| |
|
| | def get_user_done_ids(user): |
| | records = load_existing_results() |
| | done = {} |
| | for r in records: |
| | if r["annotator"] == user: |
| | done[r["sample_id"]] = r |
| | return done |
| |
|
| | |
| | |
| | |
| | def init_state(user): |
| | sample_ids = get_user_samples(user) |
| |
|
| | done_map = get_user_done_ids(user) |
| | done_ids = set(done_map.keys()) |
| |
|
| | |
| | pending_ids = [sid for sid in sample_ids if sid not in done_ids] |
| |
|
| | return { |
| | "user": user, |
| | "sample_ids": sample_ids, |
| | "pending_ids": pending_ids, |
| | "done_map": done_map, |
| | "idx": 0 |
| | } |
| |
|
| |
|
| | api = HfApi() |
| | _last_sync_time = 0 |
| | def sync_results_to_hf(force=False): |
| | global _last_sync_time |
| |
|
| | if not os.path.exists(RESULT_PATH): |
| | return |
| |
|
| | now = time.time() |
| | if not force and now - _last_sync_time < SYNC_INTERVAL: |
| | return |
| |
|
| | try: |
| | api.upload_file( |
| | path_or_fileobj=RESULT_PATH, |
| | path_in_repo="results.jsonl", |
| | repo_id=HF_DATASET_REPO, |
| | repo_type="dataset", |
| | commit_message=f"Sync results at {datetime.utcnow().isoformat()}", |
| | ) |
| | _last_sync_time = now |
| | print("[SYNC] results.jsonl synced to HF dataset.") |
| | except Exception as e: |
| | print("[SYNC ERROR]", e) |
| |
|
| |
|
| | |
| | |
| | |
| | def load_sample(state): |
| | if state["idx"] >= len(state["pending_ids"]): |
| | return None, None, None, None, "All pending tasks completed." |
| |
|
| | sid = state["pending_ids"][state["idx"]] |
| | sample = SAMPLES[sid] |
| |
|
| | return ( |
| | sample["audio_url"], |
| | sample["captions"]["long"], |
| | sample["captions"]["short"], |
| | sample["captions"]["tag"], |
| | f"Pending {state['idx']+1}/{len(state['pending_ids'])} (ID={sid})" |
| | ) |
| |
|
| |
|
| |
|
| |
|
| | |
| | |
| | |
| | def submit(state, long_score, short_score, tag_score): |
| | sid = state["pending_ids"][state["idx"]] |
| |
|
| | record = { |
| | "timestamp": datetime.utcnow().isoformat(), |
| | "annotator": state["user"], |
| | "sample_id": sid, |
| | "scores": { |
| | "long": long_score, |
| | "short": short_score, |
| | "tag": tag_score |
| | } |
| | } |
| | save_result(record) |
| |
|
| | |
| | sync_results_to_hf() |
| |
|
| | state["idx"] += 1 |
| | return state |
| |
|
| |
|
| |
|
| |
|
| | |
| | |
| | |
| | with gr.Blocks(title="Audio-Caption Matching Annotation") as demo: |
| |
|
| | gr.Markdown("# Audio–Caption Matching Annotation") |
| |
|
| | with gr.Row(): |
| | user_input = gr.Textbox(label="Annotator ID", placeholder="e.g. annotator_1") |
| | start_btn = gr.Button("Start") |
| |
|
| | sync_btn = gr.Button("Finish & Sync results to HF") |
| | sync_status = gr.Markdown() |
| |
|
| | state = gr.State() |
| |
|
| | status = gr.Markdown() |
| |
|
| | audio = gr.Audio(label="Audio", type="filepath") |
| |
|
| | with gr.Column(): |
| | |
| | gr.Markdown("## Long Caption") |
| | gr.Markdown( |
| | """ |
| | **Criteria** |
| | 1. **Event accuracy**: Are the sound events in the caption actually present in the audio? |
| | 2. **Completeness**: Does the caption miss any major audible events? |
| | 3. **Temporal consistency**: Does the sequence of events match the audio timeline? |
| | 4. **Acoustic detail**: Does the caption correctly reflect loudness, duration, tone, speed, environment? |
| | """ |
| | ) |
| | long_caption = gr.Textbox(label="Caption (Long)", interactive=False) |
| | long_score = gr.Radio( |
| | choices=[str(i) for i in range(1, 11)], |
| | label="Overall Score (1–10)", |
| | value=None |
| | ) |
| |
|
| | |
| | gr.Markdown("## Short Caption") |
| | gr.Markdown( |
| | """ |
| | **Criteria** |
| | 1. **Event accuracy**: Are the sound events in the caption actually present in the audio? |
| | 2. **Completeness**: Does the caption miss any major audible events? |
| | """ |
| | ) |
| | short_caption = gr.Textbox(label="Caption (Short)", interactive=False) |
| | short_score = gr.Radio( |
| | choices=[str(i) for i in range(1, 11)], |
| | label="Overall Score (1–10)", |
| | value=None |
| | ) |
| |
|
| | |
| | gr.Markdown("## Tag") |
| | gr.Markdown( |
| | """ |
| | **Criteria** |
| | 1. **Event accuracy**: Are the sound events in the tags actually present in the audio? |
| | 2. **Completeness**: Does the tags miss any major audible events? |
| | """ |
| | ) |
| | tag_caption = gr.Textbox(label="Caption (Tag)", interactive=False) |
| | tag_score = gr.Radio( |
| | choices=[str(i) for i in range(1, 11)], |
| | label="Overall Score (1–10)", |
| | value=None |
| | ) |
| |
|
| | submit_btn = gr.Button("Submit & Next") |
| |
|
| | |
| | |
| | |
| | def on_start(user): |
| | st = init_state(user) |
| |
|
| | |
| | audio_url, long_c, short_c, tag_c, msg = load_sample(st) |
| |
|
| | |
| | done_ids = sorted(st["done_map"].keys()) |
| | dropdown_choices = [str(sid) for sid in done_ids] |
| |
|
| | return ( |
| | st, |
| | audio_url, |
| | long_c, |
| | short_c, |
| | tag_c, |
| | msg, |
| | dropdown_choices |
| | ) |
| |
|
| |
|
| | start_btn.click( |
| | on_start, |
| | inputs=[user_input], |
| | outputs=[state, audio, long_caption, short_caption, tag_caption, status] |
| | ) |
| | |
| | |
| | def on_submit(st, l, s, t): |
| | if l is None or s is None or t is None: |
| | return st, None, None, None, "Please score all captions before submitting." |
| |
|
| | st = submit(st, l, s, t) |
| | audio_url, long_c, short_c, tag_c, msg = load_sample(st) |
| |
|
| | |
| | return ( |
| | st, |
| | audio_url, |
| | long_c, |
| | short_c, |
| | tag_c, |
| | msg, |
| | None, |
| | None, |
| | None |
| | ) |
| |
|
| |
|
| | submit_btn.click( |
| | on_submit, |
| | inputs=[state, long_score, short_score, tag_score], |
| | outputs=[ |
| | state, |
| | audio, |
| | long_caption, |
| | short_caption, |
| | tag_caption, |
| | status, |
| | long_score, |
| | short_score, |
| | tag_score |
| | ] |
| | ) |
| |
|
| |
|
| | demo.launch() |
| |
|