Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -90,7 +90,7 @@ DIMENSIONS_DATA = [
|
|
| 90 |
|
| 91 |
DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
|
| 92 |
|
| 93 |
-
def load_or_initialize_count_json(audio_paths):
|
| 94 |
if os.path.exists(COUNT_JSON_PATH):
|
| 95 |
with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
|
| 96 |
# 使用 object_pairs_hook 保持原始顺序
|
|
@@ -118,12 +118,41 @@ def load_or_initialize_count_json(audio_paths):
|
|
| 118 |
# 确保写入时也保持顺序
|
| 119 |
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
return count_data
|
| 122 |
|
| 123 |
def append_cache_buster(audio_path):
|
| 124 |
return f"{audio_path}?t={int(time.time() * 1000)}"
|
| 125 |
|
| 126 |
-
def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
|
| 127 |
eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
|
| 128 |
|
| 129 |
if len(eligible_paths) < k:
|
|
@@ -142,6 +171,25 @@ def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
|
|
| 142 |
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 143 |
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
return selected, count_data
|
| 146 |
|
| 147 |
count_data = load_or_initialize_count_json(all_data_audio_paths)
|
|
@@ -410,7 +458,7 @@ def save_all_results_to_file(all_results, user_data, count_data=None):
|
|
| 410 |
print(f"上传成功: {submission_filename}")
|
| 411 |
|
| 412 |
# 上传 count.json(如果提供)
|
| 413 |
-
if count_data:
|
| 414 |
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 415 |
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 416 |
|
|
@@ -422,7 +470,21 @@ def save_all_results_to_file(all_results, user_data, count_data=None):
|
|
| 422 |
token=hf_token,
|
| 423 |
commit_message=f"Update count.json after submission by {username}"
|
| 424 |
)
|
| 425 |
-
print("count.json 上传成功")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
|
| 427 |
except Exception as e:
|
| 428 |
print(f"上传出错: {e}")
|
|
|
|
| 90 |
|
| 91 |
DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
|
| 92 |
|
| 93 |
+
"""def load_or_initialize_count_json(audio_paths):
|
| 94 |
if os.path.exists(COUNT_JSON_PATH):
|
| 95 |
with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
|
| 96 |
# 使用 object_pairs_hook 保持原始顺序
|
|
|
|
| 118 |
# 确保写入时也保持顺序
|
| 119 |
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 120 |
|
| 121 |
+
return count_data"""
|
| 122 |
+
|
| 123 |
+
from filelock import FileLock
|
| 124 |
+
|
| 125 |
+
def load_or_initialize_count_json(audio_paths):
|
| 126 |
+
lock_path = COUNT_JSON_PATH + ".lock"
|
| 127 |
+
with FileLock(lock_path, timeout=5): # 最多等 10 秒
|
| 128 |
+
if os.path.exists(COUNT_JSON_PATH):
|
| 129 |
+
with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
|
| 130 |
+
count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
|
| 131 |
+
else:
|
| 132 |
+
count_data = collections.OrderedDict()
|
| 133 |
+
|
| 134 |
+
updated = False
|
| 135 |
+
sample_audio_files = {os.path.basename(d["audio"]) for d in DIMENSIONS_DATA}
|
| 136 |
+
|
| 137 |
+
for path in audio_paths:
|
| 138 |
+
filename = os.path.basename(path)
|
| 139 |
+
if filename not in count_data:
|
| 140 |
+
if filename in sample_audio_files:
|
| 141 |
+
count_data[filename] = 999
|
| 142 |
+
else:
|
| 143 |
+
count_data[filename] = 0
|
| 144 |
+
updated = True
|
| 145 |
+
|
| 146 |
+
if updated or not os.path.exists(COUNT_JSON_PATH):
|
| 147 |
+
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 148 |
+
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 149 |
+
|
| 150 |
return count_data
|
| 151 |
|
| 152 |
def append_cache_buster(audio_path):
|
| 153 |
return f"{audio_path}?t={int(time.time() * 1000)}"
|
| 154 |
|
| 155 |
+
"""def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
|
| 156 |
eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
|
| 157 |
|
| 158 |
if len(eligible_paths) < k:
|
|
|
|
| 171 |
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 172 |
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 173 |
|
| 174 |
+
return selected, count_data"""
|
| 175 |
+
|
| 176 |
+
def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
|
| 177 |
+
eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
|
| 178 |
+
|
| 179 |
+
if len(eligible_paths) < k:
|
| 180 |
+
raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
|
| 181 |
+
|
| 182 |
+
selected = random.sample(eligible_paths, k)
|
| 183 |
+
|
| 184 |
+
for path in selected:
|
| 185 |
+
filename = os.path.basename(path)
|
| 186 |
+
count_data[filename] = count_data.get(filename, 0) + 1
|
| 187 |
+
|
| 188 |
+
lock_path = COUNT_JSON_PATH + ".lock"
|
| 189 |
+
with FileLock(lock_path, timeout=10):
|
| 190 |
+
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 191 |
+
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 192 |
+
|
| 193 |
return selected, count_data
|
| 194 |
|
| 195 |
count_data = load_or_initialize_count_json(all_data_audio_paths)
|
|
|
|
| 458 |
print(f"上传成功: {submission_filename}")
|
| 459 |
|
| 460 |
# 上传 count.json(如果提供)
|
| 461 |
+
"""if count_data:
|
| 462 |
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 463 |
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 464 |
|
|
|
|
| 470 |
token=hf_token,
|
| 471 |
commit_message=f"Update count.json after submission by {username}"
|
| 472 |
)
|
| 473 |
+
print("count.json 上传成功")"""
|
| 474 |
+
|
| 475 |
+
if count_data:
|
| 476 |
+
with FileLock(COUNT_JSON_PATH + ".lock", timeout=5):
|
| 477 |
+
with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
|
| 478 |
+
json.dump(count_data, f, indent=4, ensure_ascii=False)
|
| 479 |
+
|
| 480 |
+
api.upload_file(
|
| 481 |
+
path_or_fileobj=COUNT_JSON_PATH,
|
| 482 |
+
path_in_repo=COUNT_JSON_REPO_PATH,
|
| 483 |
+
repo_id=repo_id,
|
| 484 |
+
repo_type="dataset",
|
| 485 |
+
token=hf_token,
|
| 486 |
+
commit_message=f"Update count.json after submission by {username}"
|
| 487 |
+
)
|
| 488 |
|
| 489 |
except Exception as e:
|
| 490 |
print(f"上传出错: {e}")
|