Spaces:

Jazzcharles
/

audioverse-caption-verification

Sleeping

App Files Files Community

Jazzcharles commited on Jan 14

Commit

dbfa08c

0 Parent(s):

initial deploy

Browse files

Files changed (5) hide show

app.py +323 -0
data/assignments.json +6 -0
data/samples_for_annotation_with_urls.json +0 -0
requirements.txt +2 -0
results/results.jsonl +1 -0

app.py ADDED Viewed

	@@ -0,0 +1,323 @@

+import gradio as gr
+import json
+import os
+from datetime import datetime
+from huggingface_hub import HfApi
+import time
+HF_DATASET_REPO = "Jazzcharles/audioverse_for_annotation"
+SYNC_INTERVAL = 300  # 秒，Space 环境建议 60~300
+# os.environ["GRADIO_TEMP_DIR"] = "/home/jilan_xu/qwen/assets/gradio_temp"
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+TMP_DIR = os.path.join(BASE_DIR, "gradio_tmp")
+os.makedirs(TMP_DIR, exist_ok=True)
+os.environ["GRADIO_TEMP_DIR"] = TMP_DIR
+DATA_PATH = "data/samples_for_annotation_with_urls.json"
+ASSIGN_PATH = "data/assignments.json"
+RESULT_PATH = "results/results.jsonl"
+os.makedirs("results", exist_ok=True)
+def pull_results_from_hf():
+    """
+    Download results.jsonl from HF dataset repo to local RESULT_PATH.
+    If download fails, keep local file untouched.
+    """
+    try:
+        os.makedirs(os.path.dirname(RESULT_PATH), exist_ok=True)
+        api.download_file(
+            repo_id=HF_DATASET_REPO,
+            repo_type="dataset",
+            filename="results.jsonl",
+            local_dir=os.path.dirname(RESULT_PATH),
+            local_dir_use_symlinks=False,
+        )
+        print("[INIT] Pulled results.jsonl from HF dataset.")
+    except Exception as e:
+        print("[INIT] No remote results.jsonl or pull failed:", e)
+# ---- pull latest results from HF dataset ----
+pull_results_from_hf()
+with open(DATA_PATH, "r") as f:
+    SAMPLES = {x["id"]: x for x in json.load(f)}
+with open(ASSIGN_PATH, "r") as f:
+    ASSIGN = json.load(f)
+# ------------------------
+# Utilities
+# ------------------------
+def get_user_samples(user):
+    return ASSIGN.get(user, [])
+def save_result(record):
+    with open(RESULT_PATH, "a") as f:
+        f.write(json.dumps(record, ensure_ascii=False) + "\n")
+def load_existing_results():
+    if not os.path.exists(RESULT_PATH):
+        return []
+    records = []
+    with open(RESULT_PATH, "r", encoding="utf-8") as f:
+        for line in f:
+            try:
+                records.append(json.loads(line))
+            except:
+                pass
+    return records
+def get_user_done_ids(user):
+    records = load_existing_results()
+    done = {}
+    for r in records:
+        if r["annotator"] == user:
+            done[r["sample_id"]] = r   # 后写的覆盖前面的
+    return done   # {sample_id: last_record}
+# ------------------------
+# State
+# ------------------------
+def init_state(user):
+    sample_ids = get_user_samples(user)
+    done_map = get_user_done_ids(user)
+    done_ids = set(done_map.keys())
+    # 只保留未完成的 sample
+    pending_ids = [sid for sid in sample_ids if sid not in done_ids]
+    return {
+        "user": user,
+        "sample_ids": sample_ids,
+        "pending_ids": pending_ids,
+        "done_map": done_map,
+        "idx": 0
+    }
+api = HfApi()
+_last_sync_time = 0
+def sync_results_to_hf(force=False):
+    global _last_sync_time
+    if not os.path.exists(RESULT_PATH):
+        return
+    now = time.time()
+    if not force and now - _last_sync_time < SYNC_INTERVAL:
+        return
+    try:
+        api.upload_file(
+            path_or_fileobj=RESULT_PATH,
+            path_in_repo="results.jsonl",
+            repo_id=HF_DATASET_REPO,
+            repo_type="dataset",
+            commit_message=f"Sync results at {datetime.utcnow().isoformat()}",
+        )
+        _last_sync_time = now
+        print("[SYNC] results.jsonl synced to HF dataset.")
+    except Exception as e:
+        print("[SYNC ERROR]", e)
+# ------------------------
+# Load sample
+# ------------------------
+def load_sample(state):
+    if state["idx"] >= len(state["pending_ids"]):
+        return None, None, None, None, "All pending tasks completed."
+    sid = state["pending_ids"][state["idx"]]
+    sample = SAMPLES[sid]
+    return (
+        sample["audio_url"],
+        sample["captions"]["long"],
+        sample["captions"]["short"],
+        sample["captions"]["tag"],
+        f"Pending {state['idx']+1}/{len(state['pending_ids'])} (ID={sid})"
+    )
+# ------------------------
+# Submit
+# ------------------------
+def submit(state, long_score, short_score, tag_score):
+    sid = state["pending_ids"][state["idx"]]
+    record = {
+        "timestamp": datetime.utcnow().isoformat(),
+        "annotator": state["user"],
+        "sample_id": sid,
+        "scores": {
+            "long": long_score,
+            "short": short_score,
+            "tag": tag_score
+        }
+    }
+    save_result(record)
+    # >>> 新增：尝试同步 <<<
+    sync_results_to_hf()
+    state["idx"] += 1
+    return state
+# ------------------------
+# UI
+# ------------------------
+with gr.Blocks(title="Audio-Caption Matching Annotation") as demo:
+    gr.Markdown("# Audio–Caption Matching Annotation")
+    with gr.Row():
+        user_input = gr.Textbox(label="Annotator ID", placeholder="e.g. annotator_1")
+        start_btn = gr.Button("Start")
+        sync_btn = gr.Button("Finish & Sync results to HF")
+        sync_status = gr.Markdown()
+    state = gr.State()
+    status = gr.Markdown()
+    audio = gr.Audio(label="Audio", type="filepath")
+    with gr.Column():
+        # ---------- LONG ----------
+        gr.Markdown("## Long Caption")
+        gr.Markdown(
+            """
+**Criteria**
+1. **Event accuracy**: Are the sound events in the caption actually present in the audio?
+2. **Completeness**: Does the caption miss any major audible events?
+3. **Temporal consistency**: Does the sequence of events match the audio timeline?
+4. **Acoustic detail**: Does the caption correctly reflect loudness, duration, tone, speed, environment?
+            """
+        )
+        long_caption = gr.Textbox(label="Caption (Long)", interactive=False)
+        long_score = gr.Radio(
+            choices=[str(i) for i in range(1, 11)],
+            label="Overall Score (1–10)",
+            value=None
+        )
+        # ---------- SHORT ----------
+        gr.Markdown("## Short Caption")
+        gr.Markdown(
+            """
+**Criteria**
+1. **Event accuracy**: Are the sound events in the caption actually present in the audio?
+2. **Completeness**: Does the caption miss any major audible events?
+            """
+        )
+        short_caption = gr.Textbox(label="Caption (Short)", interactive=False)
+        short_score = gr.Radio(
+            choices=[str(i) for i in range(1, 11)],
+            label="Overall Score (1–10)",
+            value=None
+        )
+        # ---------- TAG ----------
+        gr.Markdown("## Tag")
+        gr.Markdown(
+            """
+**Criteria**
+1. **Event accuracy**: Are the sound events in the tags actually present in the audio?
+2. **Completeness**: Does the tags miss any major audible events?
+            """
+        )
+        tag_caption = gr.Textbox(label="Caption (Tag)", interactive=False)
+        tag_score = gr.Radio(
+            choices=[str(i) for i in range(1, 11)],
+            label="Overall Score (1–10)",
+            value=None
+        )
+    submit_btn = gr.Button("Submit & Next")
+    # ------------------------
+    # Callbacks
+    # ------------------------
+    def on_start(user):
+        st = init_state(user)
+        # pending sample
+        audio_url, long_c, short_c, tag_c, msg = load_sample(st)
+        # 已完成样本列表
+        done_ids = sorted(st["done_map"].keys())
+        dropdown_choices = [str(sid) for sid in done_ids]
+        return (
+            st,
+            audio_url,
+            long_c,
+            short_c,
+            tag_c,
+            msg,
+            dropdown_choices
+        )
+    start_btn.click(
+        on_start,
+        inputs=[user_input],
+        outputs=[state, audio, long_caption, short_caption, tag_caption, status]
+    )
+    def on_submit(st, l, s, t):
+        if l is None or s is None or t is None:
+            return st, None, None, None, "Please score all captions before submitting."
+        st = submit(st, l, s, t)
+        audio_url, long_c, short_c, tag_c, msg = load_sample(st)
+        # 注意：最后三个 None 是清空评分
+        return (
+            st,
+            audio_url,
+            long_c,
+            short_c,
+            tag_c,
+            msg,
+            None,   # long_score reset
+            None,   # short_score reset
+            None    # tag_score reset
+        )
+    submit_btn.click(
+        on_submit,
+        inputs=[state, long_score, short_score, tag_score],
+        outputs=[
+            state,
+            audio,
+            long_caption,
+            short_caption,
+            tag_caption,
+            status,
+            long_score,
+            short_score,
+            tag_score
+        ]
+    )
+demo.launch()

data/assignments.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+    "annotator_1": [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200],
+    "annotator_2": [201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400],
+    "annotator_3": [401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,500]
+}

data/samples_for_annotation_with_urls.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ gradio>=4.0
2	+ huggingface_hub>=0.20

results/results.jsonl ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"timestamp": "2026-01-14T15:03:44.808170", "annotator": "annotator_1", "sample_id": 1, "scores": {"long": "10", "short": "9", "tag": "9"}}