Spaces:

Jazzcharles
/

audioverse-caption-verification

Sleeping

audioverse-caption-verification / app.py

Jazzcharles

initial deploy

dbfa08c about 2 months ago

8.61 kB

	import gradio as gr
	import json
	import os
	from datetime import datetime
	from huggingface_hub import HfApi
	import time
	HF_DATASET_REPO = "Jazzcharles/audioverse_for_annotation"
	SYNC_INTERVAL = 300 # 秒，Space 环境建议 60~300

	# os.environ["GRADIO_TEMP_DIR"] = "/home/jilan_xu/qwen/assets/gradio_temp"
	BASE_DIR = os.path.dirname(os.path.abspath(__file__))
	TMP_DIR = os.path.join(BASE_DIR, "gradio_tmp")
	os.makedirs(TMP_DIR, exist_ok=True)
	os.environ["GRADIO_TEMP_DIR"] = TMP_DIR

	DATA_PATH = "data/samples_for_annotation_with_urls.json"
	ASSIGN_PATH = "data/assignments.json"
	RESULT_PATH = "results/results.jsonl"

	os.makedirs("results", exist_ok=True)


	def pull_results_from_hf():
	"""
	Download results.jsonl from HF dataset repo to local RESULT_PATH.
	If download fails, keep local file untouched.
	"""
	try:
	os.makedirs(os.path.dirname(RESULT_PATH), exist_ok=True)

	api.download_file(
	repo_id=HF_DATASET_REPO,
	repo_type="dataset",
	filename="results.jsonl",
	local_dir=os.path.dirname(RESULT_PATH),
	local_dir_use_symlinks=False,
	)
	print("[INIT] Pulled results.jsonl from HF dataset.")
	except Exception as e:
	print("[INIT] No remote results.jsonl or pull failed:", e)


	# ---- pull latest results from HF dataset ----
	pull_results_from_hf()

	with open(DATA_PATH, "r") as f:
	SAMPLES = {x["id"]: x for x in json.load(f)}

	with open(ASSIGN_PATH, "r") as f:
	ASSIGN = json.load(f)


	# ------------------------
	# Utilities
	# ------------------------
	def get_user_samples(user):
	return ASSIGN.get(user, [])


	def save_result(record):
	with open(RESULT_PATH, "a") as f:
	f.write(json.dumps(record, ensure_ascii=False) + "\n")

	def load_existing_results():
	if not os.path.exists(RESULT_PATH):
	return []

	records = []
	with open(RESULT_PATH, "r", encoding="utf-8") as f:
	for line in f:
	try:
	records.append(json.loads(line))
	except:
	pass
	return records


	def get_user_done_ids(user):
	records = load_existing_results()
	done = {}
	for r in records:
	if r["annotator"] == user:
	done[r["sample_id"]] = r # 后写的覆盖前面的
	return done # {sample_id: last_record}

	# ------------------------
	# State
	# ------------------------
	def init_state(user):
	sample_ids = get_user_samples(user)

	done_map = get_user_done_ids(user)
	done_ids = set(done_map.keys())

	# 只保留未完成的 sample
	pending_ids = [sid for sid in sample_ids if sid not in done_ids]

	return {
	"user": user,
	"sample_ids": sample_ids,
	"pending_ids": pending_ids,
	"done_map": done_map,
	"idx": 0
	}


	api = HfApi()
	_last_sync_time = 0
	def sync_results_to_hf(force=False):
	global _last_sync_time

	if not os.path.exists(RESULT_PATH):
	return

	now = time.time()
	if not force and now - _last_sync_time < SYNC_INTERVAL:
	return

	try:
	api.upload_file(
	path_or_fileobj=RESULT_PATH,
	path_in_repo="results.jsonl",
	repo_id=HF_DATASET_REPO,
	repo_type="dataset",
	commit_message=f"Sync results at {datetime.utcnow().isoformat()}",
	)
	_last_sync_time = now
	print("[SYNC] results.jsonl synced to HF dataset.")
	except Exception as e:
	print("[SYNC ERROR]", e)


	# ------------------------
	# Load sample
	# ------------------------
	def load_sample(state):
	if state["idx"] >= len(state["pending_ids"]):
	return None, None, None, None, "All pending tasks completed."

	sid = state["pending_ids"][state["idx"]]
	sample = SAMPLES[sid]

	return (
	sample["audio_url"],
	sample["captions"]["long"],
	sample["captions"]["short"],
	sample["captions"]["tag"],
	f"Pending {state['idx']+1}/{len(state['pending_ids'])} (ID={sid})"
	)




	# ------------------------
	# Submit
	# ------------------------
	def submit(state, long_score, short_score, tag_score):
	sid = state["pending_ids"][state["idx"]]

	record = {
	"timestamp": datetime.utcnow().isoformat(),
	"annotator": state["user"],
	"sample_id": sid,
	"scores": {
	"long": long_score,
	"short": short_score,
	"tag": tag_score
	}
	}
	save_result(record)

	# >>> 新增：尝试同步 <<<
	sync_results_to_hf()

	state["idx"] += 1
	return state




	# ------------------------
	# UI
	# ------------------------
	with gr.Blocks(title="Audio-Caption Matching Annotation") as demo:

	gr.Markdown("# Audio–Caption Matching Annotation")

	with gr.Row():
	user_input = gr.Textbox(label="Annotator ID", placeholder="e.g. annotator_1")
	start_btn = gr.Button("Start")

	sync_btn = gr.Button("Finish & Sync results to HF")
	sync_status = gr.Markdown()

	state = gr.State()

	status = gr.Markdown()

	audio = gr.Audio(label="Audio", type="filepath")

	with gr.Column():
	# ---------- LONG ----------
	gr.Markdown("## Long Caption")
	gr.Markdown(
	"""
	Criteria
	1. Event accuracy: Are the sound events in the caption actually present in the audio?
	2. Completeness: Does the caption miss any major audible events?
	3. Temporal consistency: Does the sequence of events match the audio timeline?
	4. Acoustic detail: Does the caption correctly reflect loudness, duration, tone, speed, environment?
	"""
	)
	long_caption = gr.Textbox(label="Caption (Long)", interactive=False)
	long_score = gr.Radio(
	choices=[str(i) for i in range(1, 11)],
	label="Overall Score (1–10)",
	value=None
	)

	# ---------- SHORT ----------
	gr.Markdown("## Short Caption")
	gr.Markdown(
	"""
	Criteria
	1. Event accuracy: Are the sound events in the caption actually present in the audio?
	2. Completeness: Does the caption miss any major audible events?
	"""
	)
	short_caption = gr.Textbox(label="Caption (Short)", interactive=False)
	short_score = gr.Radio(
	choices=[str(i) for i in range(1, 11)],
	label="Overall Score (1–10)",
	value=None
	)

	# ---------- TAG ----------
	gr.Markdown("## Tag")
	gr.Markdown(
	"""
	Criteria
	1. Event accuracy: Are the sound events in the tags actually present in the audio?
	2. Completeness: Does the tags miss any major audible events?
	"""
	)
	tag_caption = gr.Textbox(label="Caption (Tag)", interactive=False)
	tag_score = gr.Radio(
	choices=[str(i) for i in range(1, 11)],
	label="Overall Score (1–10)",
	value=None
	)

	submit_btn = gr.Button("Submit & Next")

	# ------------------------
	# Callbacks
	# ------------------------
	def on_start(user):
	st = init_state(user)

	# pending sample
	audio_url, long_c, short_c, tag_c, msg = load_sample(st)

	# 已完成样本列表
	done_ids = sorted(st["done_map"].keys())
	dropdown_choices = [str(sid) for sid in done_ids]

	return (
	st,
	audio_url,
	long_c,
	short_c,
	tag_c,
	msg,
	dropdown_choices
	)


	start_btn.click(
	on_start,
	inputs=[user_input],
	outputs=[state, audio, long_caption, short_caption, tag_caption, status]
	)


	def on_submit(st, l, s, t):
	if l is None or s is None or t is None:
	return st, None, None, None, "Please score all captions before submitting."

	st = submit(st, l, s, t)
	audio_url, long_c, short_c, tag_c, msg = load_sample(st)

	# 注意：最后三个 None 是清空评分
	return (
	st,
	audio_url,
	long_c,
	short_c,
	tag_c,
	msg,
	None, # long_score reset
	None, # short_score reset
	None # tag_score reset
	)


	submit_btn.click(
	on_submit,
	inputs=[state, long_score, short_score, tag_score],
	outputs=[
	state,
	audio,
	long_caption,
	short_caption,
	tag_caption,
	status,
	long_score,
	short_score,
	tag_score
	]
	)


	demo.launch()