intersteller2887 commited on
Commit
22919c2
·
verified ·
1 Parent(s): 7bedf10

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -4
app.py CHANGED
@@ -90,7 +90,7 @@ DIMENSIONS_DATA = [
90
 
91
  DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
92
 
93
- def load_or_initialize_count_json(audio_paths):
94
  if os.path.exists(COUNT_JSON_PATH):
95
  with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
96
  # 使用 object_pairs_hook 保持原始顺序
@@ -118,12 +118,41 @@ def load_or_initialize_count_json(audio_paths):
118
  # 确保写入时也保持顺序
119
  json.dump(count_data, f, indent=4, ensure_ascii=False)
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  return count_data
122
 
123
  def append_cache_buster(audio_path):
124
  return f"{audio_path}?t={int(time.time() * 1000)}"
125
 
126
- def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
127
  eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
128
 
129
  if len(eligible_paths) < k:
@@ -142,6 +171,25 @@ def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
142
  with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
143
  json.dump(count_data, f, indent=4, ensure_ascii=False)
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  return selected, count_data
146
 
147
  count_data = load_or_initialize_count_json(all_data_audio_paths)
@@ -410,7 +458,7 @@ def save_all_results_to_file(all_results, user_data, count_data=None):
410
  print(f"上传成功: {submission_filename}")
411
 
412
  # 上传 count.json(如果提供)
413
- if count_data:
414
  with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
415
  json.dump(count_data, f, indent=4, ensure_ascii=False)
416
 
@@ -422,7 +470,21 @@ def save_all_results_to_file(all_results, user_data, count_data=None):
422
  token=hf_token,
423
  commit_message=f"Update count.json after submission by {username}"
424
  )
425
- print("count.json 上传成功")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
 
427
  except Exception as e:
428
  print(f"上传出错: {e}")
 
90
 
91
  DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
92
 
93
+ """def load_or_initialize_count_json(audio_paths):
94
  if os.path.exists(COUNT_JSON_PATH):
95
  with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
96
  # 使用 object_pairs_hook 保持原始顺序
 
118
  # 确保写入时也保持顺序
119
  json.dump(count_data, f, indent=4, ensure_ascii=False)
120
 
121
+ return count_data"""
122
+
123
+ from filelock import FileLock
124
+
125
+ def load_or_initialize_count_json(audio_paths):
126
+ lock_path = COUNT_JSON_PATH + ".lock"
127
+ with FileLock(lock_path, timeout=5): # 最多等 10 秒
128
+ if os.path.exists(COUNT_JSON_PATH):
129
+ with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
130
+ count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
131
+ else:
132
+ count_data = collections.OrderedDict()
133
+
134
+ updated = False
135
+ sample_audio_files = {os.path.basename(d["audio"]) for d in DIMENSIONS_DATA}
136
+
137
+ for path in audio_paths:
138
+ filename = os.path.basename(path)
139
+ if filename not in count_data:
140
+ if filename in sample_audio_files:
141
+ count_data[filename] = 999
142
+ else:
143
+ count_data[filename] = 0
144
+ updated = True
145
+
146
+ if updated or not os.path.exists(COUNT_JSON_PATH):
147
+ with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
148
+ json.dump(count_data, f, indent=4, ensure_ascii=False)
149
+
150
  return count_data
151
 
152
  def append_cache_buster(audio_path):
153
  return f"{audio_path}?t={int(time.time() * 1000)}"
154
 
155
+ """def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
156
  eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
157
 
158
  if len(eligible_paths) < k:
 
171
  with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
172
  json.dump(count_data, f, indent=4, ensure_ascii=False)
173
 
174
+ return selected, count_data"""
175
+
176
+ def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
177
+ eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
178
+
179
+ if len(eligible_paths) < k:
180
+ raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
181
+
182
+ selected = random.sample(eligible_paths, k)
183
+
184
+ for path in selected:
185
+ filename = os.path.basename(path)
186
+ count_data[filename] = count_data.get(filename, 0) + 1
187
+
188
+ lock_path = COUNT_JSON_PATH + ".lock"
189
+ with FileLock(lock_path, timeout=10):
190
+ with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
191
+ json.dump(count_data, f, indent=4, ensure_ascii=False)
192
+
193
  return selected, count_data
194
 
195
  count_data = load_or_initialize_count_json(all_data_audio_paths)
 
458
  print(f"上传成功: {submission_filename}")
459
 
460
  # 上传 count.json(如果提供)
461
+ """if count_data:
462
  with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
463
  json.dump(count_data, f, indent=4, ensure_ascii=False)
464
 
 
470
  token=hf_token,
471
  commit_message=f"Update count.json after submission by {username}"
472
  )
473
+ print("count.json 上传成功")"""
474
+
475
+ if count_data:
476
+ with FileLock(COUNT_JSON_PATH + ".lock", timeout=5):
477
+ with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
478
+ json.dump(count_data, f, indent=4, ensure_ascii=False)
479
+
480
+ api.upload_file(
481
+ path_or_fileobj=COUNT_JSON_PATH,
482
+ path_in_repo=COUNT_JSON_REPO_PATH,
483
+ repo_id=repo_id,
484
+ repo_type="dataset",
485
+ token=hf_token,
486
+ commit_message=f"Update count.json after submission by {username}"
487
+ )
488
 
489
  except Exception as e:
490
  print(f"上传出错: {e}")