import re import os import json from huggingface_hub import HfApi, hf_hub_download import config # 从 Space Secret 中读取 Token HF_TOKEN = os.getenv("HF_TOKEN") REPO_ID = config.SAVE_REPO_ID api = HfApi() def get_user_annotation_filename(username: str) -> str: """生成用户标注文件名""" safe_username = re.sub(r'[\\/*?:"<>|]', "_", username) return f"annotation_results_{safe_username}.json" def save_annotations(username_state, annotation_results_state, tasks): """直接推送标注结果到 Hugging Face Hub""" try: # 组织数据 save_data = { "total_tasks": len(tasks), "completed_tasks": len(annotation_results_state), "username": username_state, "annotations": [] } for task_id, choice in annotation_results_state.items(): save_data["annotations"].append({ "task_id": task_id, "text": tasks[task_id]["text"], "choice": choice, "audioA_id": f"audioA_{task_id}", "audioB_id": f"audioB_{task_id}", "username": username_state }) # 临时保存到内存/字符串 save_str = json.dumps(save_data, ensure_ascii=False, indent=2) filename = get_user_annotation_filename(username_state) # 上传到 Hub api.upload_file( path_or_fileobj=save_str.encode("utf-8"), path_in_repo=filename, repo_id=REPO_ID, repo_type="dataset", token=HF_TOKEN ) return f"✅ 标注结果已上传到 {REPO_ID}/{filename}\n完成进度: {len(annotation_results_state)}/{len(tasks)}" except Exception as e: return f"❌ 上传失败: {str(e)}" def load_annotations(username): """从 Hugging Face Hub 加载用户特定的标注结果""" try: filename = get_user_annotation_filename(username) # 下载用户的标注文件 local_path = hf_hub_download( repo_id=REPO_ID, filename=filename, repo_type="dataset", token=HF_TOKEN, force_download=True # 确保拿到最新版本 ) with open(local_path, "r", encoding="utf-8") as f: save_data = json.load(f) annotation_results = {ann["task_id"]: ann["choice"] for ann in save_data.get("annotations", [])} return annotation_results except Exception: # 用户还没有标注文件的情况 return {}