File size: 2,543 Bytes
6bf2582
 
 
2dfaa0b
63cf32e
6bf2582
2dfaa0b
63cf32e
6264532
6bf2582
2dfaa0b
 
 
 
 
6bf2582
2dfaa0b
 
6bf2582
 
2dfaa0b
6bf2582
2dfaa0b
6bf2582
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2dfaa0b
 
 
 
 
6bf2582
2dfaa0b
 
6bf2582
 
 
 
 
2dfaa0b
6bf2582
 
2dfaa0b
 
6bf2582
 
2dfaa0b
6bf2582
2dfaa0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6bf2582
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import re
import os
import json
from huggingface_hub import HfApi, hf_hub_download
import config

# 从 Space Secret 中读取 Token
HF_TOKEN = os.getenv("HF_TOKEN")
REPO_ID = config.SAVE_REPO_ID

api = HfApi()


def get_user_annotation_filename(username: str) -> str:
    """生成用户标注文件名"""
    safe_username = re.sub(r'[\\/*?:"<>|]', "_", username)
    return f"annotation_results_{safe_username}.json"


def save_annotations(username_state, annotation_results_state, tasks):
    """直接推送标注结果到 Hugging Face Hub"""
    try:
        # 组织数据
        save_data = {
            "total_tasks": len(tasks),
            "completed_tasks": len(annotation_results_state),
            "username": username_state,
            "annotations": []
        }
        for task_id, choice in annotation_results_state.items():
            save_data["annotations"].append({
                "task_id": task_id,
                "text": tasks[task_id]["text"],
                "choice": choice,
                "audioA_id": f"audioA_{task_id}",
                "audioB_id": f"audioB_{task_id}",
                "username": username_state
            })

        # 临时保存到内存/字符串
        save_str = json.dumps(save_data, ensure_ascii=False, indent=2)
        filename = get_user_annotation_filename(username_state)

        # 上传到 Hub
        api.upload_file(
            path_or_fileobj=save_str.encode("utf-8"),
            path_in_repo=filename,
            repo_id=REPO_ID,
            repo_type="dataset",
            token=HF_TOKEN
        )

        return f"✅ 标注结果已上传到 {REPO_ID}/{filename}\n完成进度: {len(annotation_results_state)}/{len(tasks)}"

    except Exception as e:
        return f"❌ 上传失败: {str(e)}"


def load_annotations(username):
    """从 Hugging Face Hub 加载用户特定的标注结果"""
    try:
        filename = get_user_annotation_filename(username)
        # 下载用户的标注文件
        local_path = hf_hub_download(
            repo_id=REPO_ID,
            filename=filename,
            repo_type="dataset",
            token=HF_TOKEN,
            force_download=True  # 确保拿到最新版本
        )
        with open(local_path, "r", encoding="utf-8") as f:
            save_data = json.load(f)
        annotation_results = {ann["task_id"]: ann["choice"] for ann in save_data.get("annotations", [])}
        return annotation_results
    except Exception:
        # 用户还没有标注文件的情况
        return {}