DataLabelingApp / annotation.py
sunnyzjx's picture
Update annotation.py
6264532 verified
import re
import os
import json
from huggingface_hub import HfApi, hf_hub_download
import config
# 从 Space Secret 中读取 Token
HF_TOKEN = os.getenv("HF_TOKEN")
REPO_ID = config.SAVE_REPO_ID
api = HfApi()
def get_user_annotation_filename(username: str) -> str:
"""生成用户标注文件名"""
safe_username = re.sub(r'[\\/*?:"<>|]', "_", username)
return f"annotation_results_{safe_username}.json"
def save_annotations(username_state, annotation_results_state, tasks):
"""直接推送标注结果到 Hugging Face Hub"""
try:
# 组织数据
save_data = {
"total_tasks": len(tasks),
"completed_tasks": len(annotation_results_state),
"username": username_state,
"annotations": []
}
for task_id, choice in annotation_results_state.items():
save_data["annotations"].append({
"task_id": task_id,
"text": tasks[task_id]["text"],
"choice": choice,
"audioA_id": f"audioA_{task_id}",
"audioB_id": f"audioB_{task_id}",
"username": username_state
})
# 临时保存到内存/字符串
save_str = json.dumps(save_data, ensure_ascii=False, indent=2)
filename = get_user_annotation_filename(username_state)
# 上传到 Hub
api.upload_file(
path_or_fileobj=save_str.encode("utf-8"),
path_in_repo=filename,
repo_id=REPO_ID,
repo_type="dataset",
token=HF_TOKEN
)
return f"✅ 标注结果已上传到 {REPO_ID}/{filename}\n完成进度: {len(annotation_results_state)}/{len(tasks)}"
except Exception as e:
return f"❌ 上传失败: {str(e)}"
def load_annotations(username):
"""从 Hugging Face Hub 加载用户特定的标注结果"""
try:
filename = get_user_annotation_filename(username)
# 下载用户的标注文件
local_path = hf_hub_download(
repo_id=REPO_ID,
filename=filename,
repo_type="dataset",
token=HF_TOKEN,
force_download=True # 确保拿到最新版本
)
with open(local_path, "r", encoding="utf-8") as f:
save_data = json.load(f)
annotation_results = {ann["task_id"]: ann["choice"] for ann in save_data.get("annotations", [])}
return annotation_results
except Exception:
# 用户还没有标注文件的情况
return {}