Spaces:
Sleeping
Sleeping
Upload 7 files
Browse files- annotation.py +255 -0
- app.py +198 -0
- config.py +12 -0
- data_processing.py +176 -0
- requirements.txt +4 -0
- task_manager.py +161 -0
- ui_components.py +119 -0
annotation.py
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from huggingface_hub import HfApi, hf_hub_download
|
| 5 |
+
from collections import defaultdict
|
| 6 |
+
import config
|
| 7 |
+
|
| 8 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 9 |
+
REPO_ID = config.SAVE_REPO_ID
|
| 10 |
+
|
| 11 |
+
api = HfApi()
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def get_user_annotation_filename(username: str) -> str:
|
| 15 |
+
"""生成用户标注文件名"""
|
| 16 |
+
safe_username = re.sub(r'[\\/*?:"<>|]', "_", username)
|
| 17 |
+
return f"annotation_results_{safe_username}.json"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def get_aggregated_filename() -> str:
|
| 21 |
+
"""聚合结果文件名"""
|
| 22 |
+
return "aggregated_annotations.json"
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def save_annotations(username_state, annotation_results_state, tasks):
|
| 26 |
+
"""保存个人标注结果并更新聚合结果"""
|
| 27 |
+
try:
|
| 28 |
+
individual_result = save_individual_annotations(username_state, annotation_results_state, tasks)
|
| 29 |
+
|
| 30 |
+
aggregated_result = update_aggregated_annotations(tasks)
|
| 31 |
+
|
| 32 |
+
return f"{individual_result}\n{aggregated_result}"
|
| 33 |
+
|
| 34 |
+
except Exception as e:
|
| 35 |
+
return f"❌ 保存失败: {str(e)}"
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def save_individual_annotations(username_state, annotation_results_state, tasks):
|
| 39 |
+
"""保存个人标注结果"""
|
| 40 |
+
save_data = {
|
| 41 |
+
"total_tasks": len(tasks),
|
| 42 |
+
"completed_tasks": len(annotation_results_state),
|
| 43 |
+
"username": username_state,
|
| 44 |
+
"annotations": []
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
for task_id, choice in annotation_results_state.items():
|
| 48 |
+
task = tasks[task_id]
|
| 49 |
+
save_data["annotations"].append({
|
| 50 |
+
"task_id": task_id,
|
| 51 |
+
"text": task["text"],
|
| 52 |
+
"instruction": task["instruction"],
|
| 53 |
+
"comparison": f"{task['audioA_source']} vs {task['audioB_source']}",
|
| 54 |
+
"audioA_source": task["audioA_source"],
|
| 55 |
+
"audioB_source": task["audioB_source"],
|
| 56 |
+
"original_index": task["original_index"],
|
| 57 |
+
"choice": choice,
|
| 58 |
+
"username": username_state
|
| 59 |
+
})
|
| 60 |
+
|
| 61 |
+
save_str = json.dumps(save_data, ensure_ascii=False, indent=2)
|
| 62 |
+
filename = get_user_annotation_filename(username_state)
|
| 63 |
+
|
| 64 |
+
api.upload_file(
|
| 65 |
+
path_or_fileobj=save_str.encode("utf-8"),
|
| 66 |
+
path_in_repo=filename,
|
| 67 |
+
repo_id=REPO_ID,
|
| 68 |
+
repo_type="dataset",
|
| 69 |
+
token=HF_TOKEN
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
return f"✅ 个人标注已保存: {filename} ({len(annotation_results_state)}/{len(tasks)})"
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def update_aggregated_annotations(tasks):
|
| 76 |
+
"""更新聚合标注结果"""
|
| 77 |
+
try:
|
| 78 |
+
aggregated_data = load_aggregated_annotations()
|
| 79 |
+
|
| 80 |
+
all_annotations = collect_all_annotations()
|
| 81 |
+
|
| 82 |
+
aggregated_data = build_aggregated_results(all_annotations, tasks)
|
| 83 |
+
|
| 84 |
+
save_str = json.dumps(aggregated_data, ensure_ascii=False, indent=2)
|
| 85 |
+
filename = get_aggregated_filename()
|
| 86 |
+
|
| 87 |
+
api.upload_file(
|
| 88 |
+
path_or_fileobj=save_str.encode("utf-8"),
|
| 89 |
+
path_in_repo=filename,
|
| 90 |
+
repo_id=REPO_ID,
|
| 91 |
+
repo_type="dataset",
|
| 92 |
+
token=HF_TOKEN
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
return f"✅ 聚合结果已更新: {filename}"
|
| 96 |
+
|
| 97 |
+
except Exception as e:
|
| 98 |
+
return f"❌ 聚合结果更新失败: {str(e)}"
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def collect_all_annotations():
|
| 102 |
+
"""收集所有用户的标注结果"""
|
| 103 |
+
try:
|
| 104 |
+
files_info = api.list_repo_files(repo_id=REPO_ID, repo_type="dataset", token=HF_TOKEN)
|
| 105 |
+
|
| 106 |
+
all_annotations = []
|
| 107 |
+
|
| 108 |
+
for filename in files_info:
|
| 109 |
+
if filename.startswith("annotation_results_") and filename.endswith(".json"):
|
| 110 |
+
try:
|
| 111 |
+
# 下载并加载用户标注
|
| 112 |
+
local_path = hf_hub_download(
|
| 113 |
+
repo_id=REPO_ID,
|
| 114 |
+
filename=filename,
|
| 115 |
+
repo_type="dataset",
|
| 116 |
+
token=HF_TOKEN,
|
| 117 |
+
force_download=True
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
with open(local_path, "r", encoding="utf-8") as f:
|
| 121 |
+
user_data = json.load(f)
|
| 122 |
+
all_annotations.extend(user_data.get("annotations", []))
|
| 123 |
+
|
| 124 |
+
except Exception as e:
|
| 125 |
+
print(f"加载文件 {filename} 失败: {e}")
|
| 126 |
+
continue
|
| 127 |
+
|
| 128 |
+
return all_annotations
|
| 129 |
+
|
| 130 |
+
except Exception as e:
|
| 131 |
+
print(f"收集标注失败: {e}")
|
| 132 |
+
return []
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def build_aggregated_results(all_annotations, tasks):
|
| 136 |
+
"""构建聚合结果"""
|
| 137 |
+
groups = defaultdict(lambda: {
|
| 138 |
+
"text": "",
|
| 139 |
+
"instruction": "",
|
| 140 |
+
"comparisons": defaultdict(lambda: {"win": 0, "tie": 0, "lose": 0, "annotators": []})
|
| 141 |
+
})
|
| 142 |
+
|
| 143 |
+
for ann in all_annotations:
|
| 144 |
+
original_index = ann.get("original_index")
|
| 145 |
+
comparison = ann.get("comparison")
|
| 146 |
+
choice = ann.get("choice")
|
| 147 |
+
username = ann.get("username")
|
| 148 |
+
text = ann.get("text", "")
|
| 149 |
+
instruction = ann.get("instruction", "")
|
| 150 |
+
|
| 151 |
+
if original_index is not None and comparison and choice:
|
| 152 |
+
key = original_index
|
| 153 |
+
|
| 154 |
+
groups[key]["text"] = text
|
| 155 |
+
groups[key]["instruction"] = instruction
|
| 156 |
+
|
| 157 |
+
if choice in ["win", "tie", "lose"]:
|
| 158 |
+
groups[key]["comparisons"][comparison][choice] += 1
|
| 159 |
+
if username not in groups[key]["comparisons"][comparison]["annotators"]:
|
| 160 |
+
groups[key]["comparisons"][comparison]["annotators"].append(username)
|
| 161 |
+
|
| 162 |
+
aggregated_results = []
|
| 163 |
+
for original_index, group_data in groups.items():
|
| 164 |
+
result_item = {
|
| 165 |
+
"original_index": original_index,
|
| 166 |
+
"text": group_data["text"],
|
| 167 |
+
"instruction": group_data["instruction"],
|
| 168 |
+
"comparisons": {}
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
for comparison, votes in group_data["comparisons"].items():
|
| 172 |
+
result_item["comparisons"][comparison] = {
|
| 173 |
+
"votes(win tie lose)": [votes["win"], votes["tie"], votes["lose"]],
|
| 174 |
+
"total_annotators": len(votes["annotators"]),
|
| 175 |
+
"annotators": votes["annotators"]
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
aggregated_results.append(result_item)
|
| 179 |
+
|
| 180 |
+
aggregated_results.sort(key=lambda x: x["original_index"])
|
| 181 |
+
|
| 182 |
+
return {
|
| 183 |
+
"total_groups": len(aggregated_results),
|
| 184 |
+
"total_annotations": len(all_annotations),
|
| 185 |
+
"results": aggregated_results
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def load_aggregated_annotations():
|
| 190 |
+
"""加载现有的聚合结果"""
|
| 191 |
+
try:
|
| 192 |
+
filename = get_aggregated_filename()
|
| 193 |
+
local_path = hf_hub_download(
|
| 194 |
+
repo_id=REPO_ID,
|
| 195 |
+
filename=filename,
|
| 196 |
+
repo_type="dataset",
|
| 197 |
+
token=HF_TOKEN,
|
| 198 |
+
force_download=True
|
| 199 |
+
)
|
| 200 |
+
|
| 201 |
+
with open(local_path, "r", encoding="utf-8") as f:
|
| 202 |
+
return json.load(f)
|
| 203 |
+
|
| 204 |
+
except Exception:
|
| 205 |
+
return {"total_groups": 0, "total_annotations": 0, "results": []}
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def load_annotations(username):
|
| 209 |
+
"""从 Hugging Face Hub 加载用户特定的标注结果"""
|
| 210 |
+
try:
|
| 211 |
+
filename = get_user_annotation_filename(username)
|
| 212 |
+
local_path = hf_hub_download(
|
| 213 |
+
repo_id=REPO_ID,
|
| 214 |
+
filename=filename,
|
| 215 |
+
repo_type="dataset",
|
| 216 |
+
token=HF_TOKEN,
|
| 217 |
+
force_download=True
|
| 218 |
+
)
|
| 219 |
+
with open(local_path, "r", encoding="utf-8") as f:
|
| 220 |
+
save_data = json.load(f)
|
| 221 |
+
annotation_results = {ann["task_id"]: ann["choice"] for ann in save_data.get("annotations", [])}
|
| 222 |
+
return annotation_results
|
| 223 |
+
except Exception:
|
| 224 |
+
return {}
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
def get_aggregated_stats():
|
| 228 |
+
"""获取聚合统计信息"""
|
| 229 |
+
try:
|
| 230 |
+
aggregated_data = load_aggregated_annotations()
|
| 231 |
+
|
| 232 |
+
stats = {
|
| 233 |
+
"total_groups": aggregated_data.get("total_groups", 0),
|
| 234 |
+
"total_annotations": aggregated_data.get("total_annotations", 0),
|
| 235 |
+
"comparison_summary": {}
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
for result in aggregated_data.get("results", []):
|
| 239 |
+
for comparison, data in result.get("comparisons", {}).items():
|
| 240 |
+
if comparison not in stats["comparison_summary"]:
|
| 241 |
+
stats["comparison_summary"][comparison] = {
|
| 242 |
+
"total_votes": 0,
|
| 243 |
+
"win": 0, "tie": 0, "lose": 0
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
votes = data.get("votes", [0, 0, 0])
|
| 247 |
+
stats["comparison_summary"][comparison]["win"] += votes[0]
|
| 248 |
+
stats["comparison_summary"][comparison]["tie"] += votes[1]
|
| 249 |
+
stats["comparison_summary"][comparison]["lose"] += votes[2]
|
| 250 |
+
stats["comparison_summary"][comparison]["total_votes"] += sum(votes)
|
| 251 |
+
|
| 252 |
+
return stats
|
| 253 |
+
|
| 254 |
+
except Exception as e:
|
| 255 |
+
return {"error": str(e)}
|
app.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import config
|
| 3 |
+
from ui_components import create_ui
|
| 4 |
+
from annotation import load_annotations
|
| 5 |
+
from task_manager import get_current_task, get_current_task_with_annotations, get_total_tasks
|
| 6 |
+
|
| 7 |
+
css = """
|
| 8 |
+
.center { text-align: center; }
|
| 9 |
+
.audio-container { margin: 10px; padding: 15px; }
|
| 10 |
+
|
| 11 |
+
/* 胜负选择样式 - 绿色 */
|
| 12 |
+
.selected {
|
| 13 |
+
border: 3px solid #4CAF50 !important;
|
| 14 |
+
background-color: #e8f5e9 !important;
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
/* 平局选择样式 - 橙色 */
|
| 18 |
+
.tie-selected {
|
| 19 |
+
border: 3px solid #FF9800 !important;
|
| 20 |
+
background-color: #fff3e0 !important;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
.user-task-info {
|
| 24 |
+
font-size: 16px;
|
| 25 |
+
color: #333;
|
| 26 |
+
padding: 10px;
|
| 27 |
+
background-color: #f0f0f0;
|
| 28 |
+
border-radius: 5px;
|
| 29 |
+
display: flex;
|
| 30 |
+
justify-content: space-between;
|
| 31 |
+
align-items: center;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
.progress-bar {
|
| 35 |
+
background-color: #e0e0e0;
|
| 36 |
+
border-radius: 10px;
|
| 37 |
+
height: 8px;
|
| 38 |
+
width: 200px;
|
| 39 |
+
margin: 0 10px;
|
| 40 |
+
position: relative;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
.progress-fill {
|
| 44 |
+
background-color: #4CAF50;
|
| 45 |
+
height: 100%;
|
| 46 |
+
border-radius: 10px;
|
| 47 |
+
transition: width 0.3s ease;
|
| 48 |
+
}
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def calculate_completion_stats(annotation_results):
|
| 53 |
+
"""计算任务完成统计信息"""
|
| 54 |
+
if not annotation_results:
|
| 55 |
+
return 0, 0, 0.0
|
| 56 |
+
|
| 57 |
+
total_tasks = get_total_tasks()
|
| 58 |
+
completed_tasks = len(annotation_results)
|
| 59 |
+
completion_rate = (completed_tasks / total_tasks * 100) if total_tasks > 0 else 0.0
|
| 60 |
+
|
| 61 |
+
return completed_tasks, total_tasks, completion_rate
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def get_initial_task_position(annotation_results):
|
| 65 |
+
"""根据用户的标注历史确定初始任务位置"""
|
| 66 |
+
if not annotation_results:
|
| 67 |
+
return 0
|
| 68 |
+
|
| 69 |
+
max_annotated = max(annotation_results.keys()) if annotation_results else -1
|
| 70 |
+
next_task = max_annotated + 1
|
| 71 |
+
|
| 72 |
+
total_tasks = get_total_tasks()
|
| 73 |
+
return min(next_task, total_tasks - 1)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def create_task_info_html(username, annotation_results, current_task_num):
|
| 77 |
+
"""创建包含用户信息和完成度的HTML"""
|
| 78 |
+
completed_tasks, total_tasks, completion_rate = calculate_completion_stats(annotation_results)
|
| 79 |
+
|
| 80 |
+
# 创建进度条HTML
|
| 81 |
+
progress_bar_html = f"""
|
| 82 |
+
<div class="progress-bar">
|
| 83 |
+
<div class="progress-fill" style="width: {completion_rate}%;"></div>
|
| 84 |
+
</div>
|
| 85 |
+
"""
|
| 86 |
+
|
| 87 |
+
task_info_html = f"""
|
| 88 |
+
<div class="user-task-info">
|
| 89 |
+
<span>👤 当前用户: {username}</span>
|
| 90 |
+
<div style="display: flex; align-items: center;">
|
| 91 |
+
<span>完成度: {completed_tasks}/{total_tasks} ({completion_rate:.1f}%)</span>
|
| 92 |
+
{progress_bar_html}
|
| 93 |
+
</div>
|
| 94 |
+
<span><strong>当前任务: {current_task_num}</strong></span>
|
| 95 |
+
</div>
|
| 96 |
+
"""
|
| 97 |
+
|
| 98 |
+
return task_info_html
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def set_user_info(request: gr.Request):
|
| 102 |
+
"""设置用户信息到 State 并加载用户特定的标注"""
|
| 103 |
+
username = request.username if hasattr(request, 'username') else "unknown"
|
| 104 |
+
annotation_results = load_annotations(username)
|
| 105 |
+
print(f"加载用户 {username} 的标注结果:{annotation_results}")
|
| 106 |
+
|
| 107 |
+
user_current_task = get_initial_task_position(annotation_results)
|
| 108 |
+
|
| 109 |
+
inst, text, audioA_update, audioB_update, prev_disabled, next_disabled, task_num = get_current_task_with_annotations(
|
| 110 |
+
annotation_results, user_current_task)
|
| 111 |
+
|
| 112 |
+
# 创建包含完成度信息的HTML
|
| 113 |
+
task_info_html = create_task_info_html(username, annotation_results, task_num)
|
| 114 |
+
|
| 115 |
+
return (
|
| 116 |
+
username,
|
| 117 |
+
annotation_results,
|
| 118 |
+
user_current_task,
|
| 119 |
+
inst,
|
| 120 |
+
text,
|
| 121 |
+
audioA_update,
|
| 122 |
+
audioB_update,
|
| 123 |
+
gr.update(interactive=not prev_disabled),
|
| 124 |
+
gr.update(interactive=not next_disabled),
|
| 125 |
+
gr.update(value=task_info_html)
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def update_task_info_after_action(username, annotation_results, current_task_num):
|
| 130 |
+
"""在用户操作后更新任务信息显示"""
|
| 131 |
+
return create_task_info_html(username, annotation_results, current_task_num)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
if __name__ == "__main__":
|
| 135 |
+
print("启动应用...")
|
| 136 |
+
with gr.Blocks(css=css) as demo:
|
| 137 |
+
username = gr.State(value="unknown")
|
| 138 |
+
annotation_results = gr.State(value={})
|
| 139 |
+
user_current_task = gr.State(value=0)
|
| 140 |
+
|
| 141 |
+
ui_components = create_ui(get_current_task(), username, annotation_results)
|
| 142 |
+
|
| 143 |
+
demo.load(
|
| 144 |
+
set_user_info,
|
| 145 |
+
inputs=None,
|
| 146 |
+
outputs=[
|
| 147 |
+
username,
|
| 148 |
+
annotation_results,
|
| 149 |
+
user_current_task,
|
| 150 |
+
ui_components["instruction"],
|
| 151 |
+
ui_components["text_box"],
|
| 152 |
+
ui_components["audioA"],
|
| 153 |
+
ui_components["audioB"],
|
| 154 |
+
ui_components["btn_prev"],
|
| 155 |
+
ui_components["btn_next"],
|
| 156 |
+
ui_components["task_number"]
|
| 157 |
+
]
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
# 修改按钮点击事件,添加任务信息更新
|
| 161 |
+
ui_components["btn_win"].click(
|
| 162 |
+
ui_components["select_result"],
|
| 163 |
+
inputs=[gr.State("win"), ui_components["audioA"], ui_components["audioB"], annotation_results, username,
|
| 164 |
+
user_current_task],
|
| 165 |
+
outputs=[ui_components["audioA"], ui_components["audioB"], annotation_results, ui_components["task_number"]]
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
ui_components["btn_tie"].click(
|
| 169 |
+
ui_components["select_result"],
|
| 170 |
+
inputs=[gr.State("tie"), ui_components["audioA"], ui_components["audioB"], annotation_results, username,
|
| 171 |
+
user_current_task],
|
| 172 |
+
outputs=[ui_components["audioA"], ui_components["audioB"], annotation_results, ui_components["task_number"]]
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
ui_components["btn_lose"].click(
|
| 176 |
+
ui_components["select_result"],
|
| 177 |
+
inputs=[gr.State("lose"), ui_components["audioA"], ui_components["audioB"], annotation_results, username,
|
| 178 |
+
user_current_task],
|
| 179 |
+
outputs=[ui_components["audioA"], ui_components["audioB"], annotation_results, ui_components["task_number"]]
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
ui_components["btn_prev"].click(
|
| 183 |
+
ui_components["change_task"],
|
| 184 |
+
inputs=[gr.State("prev"), annotation_results, username, user_current_task],
|
| 185 |
+
outputs=[ui_components["instruction"], ui_components["text_box"], ui_components["audioA"],
|
| 186 |
+
ui_components["audioB"], ui_components["btn_prev"], ui_components["btn_next"],
|
| 187 |
+
ui_components["task_number"], annotation_results, user_current_task]
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
ui_components["btn_next"].click(
|
| 191 |
+
ui_components["change_task"],
|
| 192 |
+
inputs=[gr.State("next"), annotation_results, username, user_current_task],
|
| 193 |
+
outputs=[ui_components["instruction"], ui_components["text_box"], ui_components["audioA"],
|
| 194 |
+
ui_components["audioB"], ui_components["btn_prev"], ui_components["btn_next"],
|
| 195 |
+
ui_components["task_number"], annotation_results, user_current_task]
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
demo.launch(auth=config.ANNOTATOR)
|
config.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# config.py
|
| 2 |
+
|
| 3 |
+
# Hugging Face 数据集配置
|
| 4 |
+
PROCESS_REPO_ID = "sunnyzjx/Test_dataset"
|
| 5 |
+
SAVE_REPO_ID = "sunnyzjx/annotation_results"
|
| 6 |
+
|
| 7 |
+
# 数据集字段名配置
|
| 8 |
+
AUDIO_FIELDS = ['model1', 'model2']
|
| 9 |
+
FIELD_TEXT = "text"
|
| 10 |
+
FIELD_INSTRUCTION = "instruction"
|
| 11 |
+
|
| 12 |
+
ANNOTATOR = [('zjx', '123'), ('wy', '123')]
|
data_processing.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from datasets import load_dataset
|
| 3 |
+
import os
|
| 4 |
+
import config
|
| 5 |
+
from itertools import combinations
|
| 6 |
+
import random
|
| 7 |
+
|
| 8 |
+
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "true"
|
| 9 |
+
|
| 10 |
+
dataset = load_dataset(config.PROCESS_REPO_ID, split="train")
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def process_audio(audio_obj):
|
| 14 |
+
"""处理音频对象,返回音频数据和采样率"""
|
| 15 |
+
try:
|
| 16 |
+
if hasattr(audio_obj, 'get_all_samples'):
|
| 17 |
+
samples = audio_obj.get_all_samples()
|
| 18 |
+
audio_data = samples.data
|
| 19 |
+
if not isinstance(audio_data, np.ndarray):
|
| 20 |
+
audio_data = np.array(audio_data, dtype=np.float32)
|
| 21 |
+
sample_rate = samples.sample_rate
|
| 22 |
+
if not isinstance(sample_rate, int):
|
| 23 |
+
sample_rate = int(sample_rate)
|
| 24 |
+
if len(audio_data.shape) > 1:
|
| 25 |
+
audio_data = audio_data.mean(axis=0)
|
| 26 |
+
return audio_data, sample_rate
|
| 27 |
+
else:
|
| 28 |
+
print("音频对象缺少 get_all_samples 方法")
|
| 29 |
+
return None, None
|
| 30 |
+
except Exception as e:
|
| 31 |
+
print(f"处理音频失败: {e}")
|
| 32 |
+
return None, None
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def generate_random_pairs(audio_fields, include_reverse=True, shuffle_order=True):
|
| 36 |
+
"""
|
| 37 |
+
生成随机的音频对比较对
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
audio_fields: 音频字段列表
|
| 41 |
+
include_reverse: 是否包含反向比较(A vs B 和 B vs A)
|
| 42 |
+
shuffle_order: 是否随机打乱比较对的顺序
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
比较对的列表
|
| 46 |
+
"""
|
| 47 |
+
basic_combinations = list(combinations(audio_fields, 2))
|
| 48 |
+
|
| 49 |
+
if include_reverse:
|
| 50 |
+
pairs = []
|
| 51 |
+
for combo in basic_combinations:
|
| 52 |
+
if random.choice([True, False]):
|
| 53 |
+
pairs.append((combo[1], combo[0]))
|
| 54 |
+
else:
|
| 55 |
+
pairs.append(combo)
|
| 56 |
+
else:
|
| 57 |
+
pairs = basic_combinations
|
| 58 |
+
|
| 59 |
+
if shuffle_order:
|
| 60 |
+
random.shuffle(pairs)
|
| 61 |
+
|
| 62 |
+
return pairs
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def generate_all_permutations(audio_fields, shuffle_order=True):
|
| 66 |
+
"""
|
| 67 |
+
生成所有可能的有序对(包括正向和反向)
|
| 68 |
+
|
| 69 |
+
Args:
|
| 70 |
+
audio_fields: 音频字段列表
|
| 71 |
+
shuffle_order: 是否随机打乱顺序
|
| 72 |
+
|
| 73 |
+
Returns:
|
| 74 |
+
所有有序对的列表
|
| 75 |
+
"""
|
| 76 |
+
pairs = []
|
| 77 |
+
for i, field_a in enumerate(audio_fields):
|
| 78 |
+
for j, field_b in enumerate(audio_fields):
|
| 79 |
+
if i != j: # 不与自己比较
|
| 80 |
+
pairs.append((field_a, field_b))
|
| 81 |
+
|
| 82 |
+
if shuffle_order:
|
| 83 |
+
random.shuffle(pairs)
|
| 84 |
+
|
| 85 |
+
return pairs
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def load_tasks(comparison_mode="random_reverse", seed=None):
|
| 89 |
+
"""
|
| 90 |
+
使用config配置的音频字段进行两两比较
|
| 91 |
+
|
| 92 |
+
Args:
|
| 93 |
+
comparison_mode: 比较模式
|
| 94 |
+
- "fixed": 固定顺序的组合(原始模式)
|
| 95 |
+
- "random_reverse": 随机决定是否反转每个组合的顺序
|
| 96 |
+
- "all_permutations": 生成所有可能的有序对
|
| 97 |
+
seed: 随机种子,仅在需要复现结果时使用
|
| 98 |
+
"""
|
| 99 |
+
if seed is not None:
|
| 100 |
+
random.seed(seed)
|
| 101 |
+
print(f"使用随机种子: {seed}")
|
| 102 |
+
else:
|
| 103 |
+
print("使用真随机模式")
|
| 104 |
+
|
| 105 |
+
print("处理数据集...")
|
| 106 |
+
|
| 107 |
+
audio_fields = config.AUDIO_FIELDS
|
| 108 |
+
text_field = config.FIELD_TEXT
|
| 109 |
+
instruction_field = config.FIELD_INSTRUCTION
|
| 110 |
+
|
| 111 |
+
print(f"使用音频字段: {audio_fields}")
|
| 112 |
+
print(f"文本字段: {text_field}")
|
| 113 |
+
print(f"指令字段: {instruction_field}")
|
| 114 |
+
print(f"比较模式: {comparison_mode}")
|
| 115 |
+
|
| 116 |
+
tasks = []
|
| 117 |
+
|
| 118 |
+
for i, row in enumerate(dataset):
|
| 119 |
+
processed_audios = {}
|
| 120 |
+
for field in audio_fields:
|
| 121 |
+
if field not in row or row[field] is None:
|
| 122 |
+
print(f"任务 {i} 缺少音频字段: {field}")
|
| 123 |
+
continue
|
| 124 |
+
|
| 125 |
+
audio_data, audio_rate = process_audio(row[field])
|
| 126 |
+
if (audio_data is not None and audio_rate is not None and
|
| 127 |
+
isinstance(audio_data, np.ndarray) and isinstance(audio_rate, int)):
|
| 128 |
+
processed_audios[field] = (audio_data, audio_rate)
|
| 129 |
+
else:
|
| 130 |
+
print(f"任务 {i} 的音频字段 {field} 处理失败")
|
| 131 |
+
|
| 132 |
+
if len(processed_audios) < 2:
|
| 133 |
+
print(f"跳过任务 {i}:有效音频数量不足")
|
| 134 |
+
continue
|
| 135 |
+
|
| 136 |
+
text = row.get(text_field, '')
|
| 137 |
+
instruction = row.get(instruction_field, '请比较这两个音频的质量')
|
| 138 |
+
|
| 139 |
+
available_fields = list(processed_audios.keys())
|
| 140 |
+
|
| 141 |
+
if comparison_mode == "fixed":
|
| 142 |
+
pairs = list(combinations(available_fields, 2))
|
| 143 |
+
elif comparison_mode == "random_reverse":
|
| 144 |
+
pairs = generate_random_pairs(available_fields, include_reverse=True, shuffle_order=True)
|
| 145 |
+
elif comparison_mode == "all_permutations":
|
| 146 |
+
pairs = generate_all_permutations(available_fields, shuffle_order=True)
|
| 147 |
+
else:
|
| 148 |
+
raise ValueError(f"未知的比较模式: {comparison_mode}")
|
| 149 |
+
|
| 150 |
+
for field_a, field_b in pairs:
|
| 151 |
+
tasks.append({
|
| 152 |
+
"instruction": instruction,
|
| 153 |
+
"text": text,
|
| 154 |
+
"audioA": processed_audios[field_a],
|
| 155 |
+
"audioB": processed_audios[field_b],
|
| 156 |
+
"audioA_source": field_a,
|
| 157 |
+
"audioB_source": field_b,
|
| 158 |
+
"comparison": f"{field_a} vs {field_b}",
|
| 159 |
+
"original_index": i
|
| 160 |
+
})
|
| 161 |
+
|
| 162 |
+
print(f"成功生成 {len(tasks)} 个比较任务")
|
| 163 |
+
if len(tasks) == 0:
|
| 164 |
+
print("没有可用任务!")
|
| 165 |
+
exit()
|
| 166 |
+
|
| 167 |
+
comparison_counts = {}
|
| 168 |
+
for task in tasks:
|
| 169 |
+
comp = task["comparison"]
|
| 170 |
+
comparison_counts[comp] = comparison_counts.get(comp, 0) + 1
|
| 171 |
+
|
| 172 |
+
print("比较任务统计:")
|
| 173 |
+
for comp, count in sorted(comparison_counts.items()):
|
| 174 |
+
print(f" {comp}: {count} 个任务")
|
| 175 |
+
|
| 176 |
+
return tasks
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==4.44.0
|
| 2 |
+
numpy
|
| 3 |
+
datasets
|
| 4 |
+
huggingface_hub
|
task_manager.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from data_processing import load_tasks
|
| 3 |
+
from annotation import save_annotations
|
| 4 |
+
|
| 5 |
+
tasks = load_tasks(comparison_mode="random_reverse", seed=42)
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def get_current_task_with_annotations(annotation_results, user_current_task=0):
|
| 9 |
+
"""获取当前任务信息,应用已有标注的样式(用于初始加载)"""
|
| 10 |
+
task = tasks[user_current_task]
|
| 11 |
+
current_choice = annotation_results.get(user_current_task) if annotation_results else None
|
| 12 |
+
|
| 13 |
+
# 基础音频数据
|
| 14 |
+
audioA_data = (task["audioA"][1], task["audioA"][0]) # (rate, data)
|
| 15 |
+
audioB_data = (task["audioB"][1], task["audioB"][0]) # (rate, data)
|
| 16 |
+
|
| 17 |
+
# 根据选择结果应用样式
|
| 18 |
+
if current_choice == "win":
|
| 19 |
+
# A胜过B - A高亮绿色,B显示败北
|
| 20 |
+
audioA_styled = gr.update(value=audioA_data, elem_classes="selected")
|
| 21 |
+
audioB_styled = gr.update(value=audioB_data, elem_classes="")
|
| 22 |
+
elif current_choice == "lose":
|
| 23 |
+
# A输给B - B高亮绿色,A显示败北
|
| 24 |
+
audioA_styled = gr.update(value=audioA_data, elem_classes="")
|
| 25 |
+
audioB_styled = gr.update(value=audioB_data, elem_classes="selected")
|
| 26 |
+
elif current_choice == "tie":
|
| 27 |
+
# 平局 - 两个都用特殊样式
|
| 28 |
+
audioA_styled = gr.update(value=audioA_data, elem_classes="tie-selected")
|
| 29 |
+
audioB_styled = gr.update(value=audioB_data, elem_classes="tie-selected")
|
| 30 |
+
else:
|
| 31 |
+
# 未选择
|
| 32 |
+
audioA_styled = gr.update(value=audioA_data, elem_classes="")
|
| 33 |
+
audioB_styled = gr.update(value=audioB_data, elem_classes="")
|
| 34 |
+
|
| 35 |
+
return (
|
| 36 |
+
task["instruction"],
|
| 37 |
+
task["text"],
|
| 38 |
+
audioA_styled,
|
| 39 |
+
audioB_styled,
|
| 40 |
+
user_current_task == 0,
|
| 41 |
+
user_current_task == len(tasks) - 1,
|
| 42 |
+
user_current_task + 1
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def get_current_task(user_current_task=0, annotation_results=None, styled=False):
|
| 47 |
+
"""获取当前任务信息,可选择是否应用样式"""
|
| 48 |
+
task = tasks[user_current_task]
|
| 49 |
+
|
| 50 |
+
if styled and annotation_results is not None:
|
| 51 |
+
current_choice = annotation_results.get(user_current_task)
|
| 52 |
+
audioA_data = (task["audioA"][1], task["audioA"][0]) # (rate, data)
|
| 53 |
+
audioB_data = (task["audioB"][1], task["audioB"][0]) # (rate, data)
|
| 54 |
+
|
| 55 |
+
if current_choice == "win":
|
| 56 |
+
audioA_styled = gr.update(value=audioA_data, elem_classes="selected")
|
| 57 |
+
audioB_styled = gr.update(value=audioB_data, elem_classes="")
|
| 58 |
+
elif current_choice == "lose":
|
| 59 |
+
audioA_styled = gr.update(value=audioA_data, elem_classes="")
|
| 60 |
+
audioB_styled = gr.update(value=audioB_data, elem_classes="selected")
|
| 61 |
+
elif current_choice == "tie":
|
| 62 |
+
audioA_styled = gr.update(value=audioA_data, elem_classes="tie-selected")
|
| 63 |
+
audioB_styled = gr.update(value=audioB_data, elem_classes="tie-selected")
|
| 64 |
+
else:
|
| 65 |
+
audioA_styled = gr.update(value=audioA_data, elem_classes="")
|
| 66 |
+
audioB_styled = gr.update(value=audioB_data, elem_classes="")
|
| 67 |
+
|
| 68 |
+
return (
|
| 69 |
+
task["instruction"],
|
| 70 |
+
task["text"],
|
| 71 |
+
audioA_styled,
|
| 72 |
+
audioB_styled,
|
| 73 |
+
user_current_task == 0,
|
| 74 |
+
user_current_task == len(tasks) - 1,
|
| 75 |
+
user_current_task + 1
|
| 76 |
+
)
|
| 77 |
+
else:
|
| 78 |
+
return (
|
| 79 |
+
task["instruction"],
|
| 80 |
+
task["text"],
|
| 81 |
+
task["audioA"][0],
|
| 82 |
+
task["audioA"][1],
|
| 83 |
+
task["audioB"][0],
|
| 84 |
+
task["audioB"][1],
|
| 85 |
+
user_current_task == 0,
|
| 86 |
+
user_current_task == len(tasks) - 1,
|
| 87 |
+
user_current_task + 1
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def apply_selection_style(audioA, audioB, choice):
|
| 92 |
+
"""根据选择结果应用样式"""
|
| 93 |
+
if choice == "win":
|
| 94 |
+
# A胜过B
|
| 95 |
+
return (
|
| 96 |
+
gr.update(value=audioA, elem_classes="selected"),
|
| 97 |
+
gr.update(value=audioB, elem_classes="")
|
| 98 |
+
)
|
| 99 |
+
elif choice == "lose":
|
| 100 |
+
# A输给B
|
| 101 |
+
return (
|
| 102 |
+
gr.update(value=audioA, elem_classes=""),
|
| 103 |
+
gr.update(value=audioB, elem_classes="selected")
|
| 104 |
+
)
|
| 105 |
+
elif choice == "tie":
|
| 106 |
+
# 平局
|
| 107 |
+
return (
|
| 108 |
+
gr.update(value=audioA, elem_classes="tie-selected"),
|
| 109 |
+
gr.update(value=audioB, elem_classes="tie-selected")
|
| 110 |
+
)
|
| 111 |
+
else:
|
| 112 |
+
# 清除选择
|
| 113 |
+
return (
|
| 114 |
+
gr.update(value=audioA, elem_classes=""),
|
| 115 |
+
gr.update(value=audioB, elem_classes="")
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def select_result(choice, audioA, audioB, annotation_results, username, user_current_task):
|
| 120 |
+
"""记录选择结果并更新UI高亮,自动保存标注结果"""
|
| 121 |
+
annotation_results[user_current_task] = choice
|
| 122 |
+
|
| 123 |
+
# 自动保存标注结果
|
| 124 |
+
save_result = save_annotations(username, annotation_results, tasks)
|
| 125 |
+
print(f"自动保存结果: {save_result}")
|
| 126 |
+
|
| 127 |
+
audioA_update, audioB_update = apply_selection_style(audioA, audioB, choice)
|
| 128 |
+
return audioA_update, audioB_update, annotation_results
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def change_task(direction, annotation_results, username, user_current_task):
|
| 132 |
+
"""切换任务"""
|
| 133 |
+
new_user_current_task = user_current_task
|
| 134 |
+
|
| 135 |
+
if direction == "prev" and user_current_task > 0:
|
| 136 |
+
new_user_current_task = user_current_task - 1
|
| 137 |
+
elif direction == "next" and user_current_task < len(tasks) - 1:
|
| 138 |
+
new_user_current_task = user_current_task + 1
|
| 139 |
+
|
| 140 |
+
inst, text, audioA_update, audioB_update, prev_disabled, next_disabled, task_num = get_current_task(
|
| 141 |
+
new_user_current_task, annotation_results, styled=True)
|
| 142 |
+
|
| 143 |
+
total_tasks = get_total_tasks()
|
| 144 |
+
|
| 145 |
+
combined_task_info = f'<div class="user-task-info"><span>👤 当前用户: {username}</span><span><strong>任务编号: {task_num} / {total_tasks}</strong></span></div>'
|
| 146 |
+
|
| 147 |
+
return (
|
| 148 |
+
inst, text,
|
| 149 |
+
audioA_update,
|
| 150 |
+
audioB_update,
|
| 151 |
+
gr.update(interactive=not prev_disabled),
|
| 152 |
+
gr.update(interactive=not next_disabled),
|
| 153 |
+
gr.update(value=combined_task_info),
|
| 154 |
+
annotation_results,
|
| 155 |
+
new_user_current_task
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def get_total_tasks():
|
| 160 |
+
"""返回总任务数"""
|
| 161 |
+
return len(tasks)
|
ui_components.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from task_manager import get_current_task, select_result, change_task, tasks, get_total_tasks
|
| 3 |
+
from annotation import save_annotations
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def calculate_completion_stats(annotation_results):
|
| 7 |
+
"""计算任务完成统计信息"""
|
| 8 |
+
if not annotation_results:
|
| 9 |
+
return 0, 0, 0.0
|
| 10 |
+
|
| 11 |
+
total_tasks = get_total_tasks()
|
| 12 |
+
completed_tasks = len(annotation_results)
|
| 13 |
+
completion_rate = (completed_tasks / total_tasks * 100) if total_tasks > 0 else 0.0
|
| 14 |
+
|
| 15 |
+
return completed_tasks, total_tasks, completion_rate
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def create_task_info_html(username, annotation_results, current_task_num):
|
| 19 |
+
"""创建包含用户信息和完成度的HTML"""
|
| 20 |
+
completed_tasks, total_tasks, completion_rate = calculate_completion_stats(annotation_results)
|
| 21 |
+
|
| 22 |
+
# 使用更简洁的HTML结构
|
| 23 |
+
task_info_html = f"""
|
| 24 |
+
<div style="font-size: 16px; color: #333; padding: 10px; background-color: #f0f0f0; border-radius: 5px; display: flex; justify-content: space-between; align-items: center;">
|
| 25 |
+
<span>👤 用户: {username}</span>
|
| 26 |
+
<span>完成度: {completed_tasks}/{total_tasks} ({completion_rate:.1f}%)</span>
|
| 27 |
+
<span><strong>当前任务: {current_task_num}</strong></span>
|
| 28 |
+
</div>
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
return task_info_html
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def create_ui(init_task, username, annotation_results):
|
| 35 |
+
"""创建 Gradio 界面组件"""
|
| 36 |
+
init_inst, init_text, init_audioA_data, init_audioA_rate, init_audioB_data, init_audioB_rate, init_prev_dis, init_next_dis, init_task_num = init_task
|
| 37 |
+
|
| 38 |
+
gr.Markdown('<div class="center"><h2>🎵 音频对比标注平台</h2></div>')
|
| 39 |
+
user_display = gr.Markdown()
|
| 40 |
+
|
| 41 |
+
instruction = gr.Textbox(label="🎯 指令", value=init_inst, interactive=False)
|
| 42 |
+
text_box = gr.Textbox(label="📋 转录文本", value=init_text, interactive=False)
|
| 43 |
+
|
| 44 |
+
# 初始化时创建完成度显示 - 使用HTML组件而不是Markdown
|
| 45 |
+
initial_task_info = create_task_info_html("unknown", {}, init_task_num)
|
| 46 |
+
task_number = gr.HTML(value=initial_task_info, elem_classes="center")
|
| 47 |
+
|
| 48 |
+
with gr.Row():
|
| 49 |
+
with gr.Column(elem_classes="audio-container"):
|
| 50 |
+
audioA = gr.Audio(
|
| 51 |
+
label="🔊 音频 A",
|
| 52 |
+
value=(init_audioA_rate, init_audioA_data),
|
| 53 |
+
interactive=False,
|
| 54 |
+
type="numpy"
|
| 55 |
+
)
|
| 56 |
+
with gr.Column(elem_classes="audio-container"):
|
| 57 |
+
audioB = gr.Audio(
|
| 58 |
+
label="🔊 音频 B",
|
| 59 |
+
value=(init_audioB_rate, init_audioB_data),
|
| 60 |
+
interactive=False,
|
| 61 |
+
type="numpy"
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
with gr.Row():
|
| 65 |
+
btn_win = gr.Button("🏆 Win", variant="primary")
|
| 66 |
+
btn_tie = gr.Button("🤝 Tie", variant="secondary")
|
| 67 |
+
btn_lose = gr.Button("❌ Lose", variant="stop")
|
| 68 |
+
|
| 69 |
+
with gr.Row():
|
| 70 |
+
btn_prev = gr.Button("⬅️ 上一题", interactive=not init_prev_dis)
|
| 71 |
+
btn_next = gr.Button("➡️ 下一题", interactive=not init_next_dis)
|
| 72 |
+
|
| 73 |
+
def wrapped_select_result(choice, audioA, audioB, annotation_results, username, user_current_task):
|
| 74 |
+
"""包装 select_result 函数以适配新的参数签名"""
|
| 75 |
+
result_audioA, result_audioB, updated_annotation_results = select_result(choice, audioA, audioB,
|
| 76 |
+
annotation_results, username,
|
| 77 |
+
user_current_task)
|
| 78 |
+
|
| 79 |
+
# 更新完成度显示 - 保持当前任务编号不变,因为还在同一个任务上
|
| 80 |
+
current_task_display = user_current_task + 1 # 假设任务编号从1开始显示
|
| 81 |
+
updated_task_info = create_task_info_html(username, updated_annotation_results, current_task_display)
|
| 82 |
+
|
| 83 |
+
return result_audioA, result_audioB, updated_annotation_results, gr.update(value=updated_task_info)
|
| 84 |
+
|
| 85 |
+
def wrapped_change_task(direction, annotation_results, username, user_current_task):
|
| 86 |
+
"""包装 change_task 函数以适配新的参数签名"""
|
| 87 |
+
result = change_task(direction, annotation_results, username, user_current_task)
|
| 88 |
+
|
| 89 |
+
if len(result) >= 9: # 确保返回值包含所有必需的字段
|
| 90 |
+
inst, text, audioA, audioB, btn_prev, btn_next, original_task_display, updated_annotation_results, updated_current_task = result[
|
| 91 |
+
:9]
|
| 92 |
+
|
| 93 |
+
# 使用更新后的当前任务索引来计算显示编号
|
| 94 |
+
current_task_display = updated_current_task + 1 # 转换为1-based显示
|
| 95 |
+
|
| 96 |
+
# 创建新的完成度显示
|
| 97 |
+
updated_task_info = create_task_info_html(username, updated_annotation_results, current_task_display)
|
| 98 |
+
|
| 99 |
+
return inst, text, audioA, audioB, btn_prev, btn_next, gr.update(
|
| 100 |
+
value=updated_task_info), updated_annotation_results, updated_current_task
|
| 101 |
+
else:
|
| 102 |
+
return result
|
| 103 |
+
|
| 104 |
+
return {
|
| 105 |
+
"user_display": user_display,
|
| 106 |
+
"instruction": instruction,
|
| 107 |
+
"text_box": text_box,
|
| 108 |
+
"task_number": task_number,
|
| 109 |
+
"audioA": audioA,
|
| 110 |
+
"audioB": audioB,
|
| 111 |
+
"btn_win": btn_win,
|
| 112 |
+
"btn_tie": btn_tie,
|
| 113 |
+
"btn_lose": btn_lose,
|
| 114 |
+
"btn_prev": btn_prev,
|
| 115 |
+
"btn_next": btn_next,
|
| 116 |
+
"select_result": wrapped_select_result,
|
| 117 |
+
"change_task": wrapped_change_task,
|
| 118 |
+
"save_annotations": lambda u, a: save_annotations(u, a, tasks)
|
| 119 |
+
}
|