intersteller2887 commited on
Commit
d78e376
·
verified ·
1 Parent(s): fe34f00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -107
app.py CHANGED
@@ -6,12 +6,10 @@ import random
6
  import shutil
7
  import time
8
  import collections
9
- from filelock import FileLock
10
- from datasets import load_dataset, Audio
11
  from huggingface_hub import HfApi
12
 
13
  dataset = load_dataset("intersteller2887/Turing-test-dataset", split="train")
14
- dataset = dataset.cast_column("audio", Audio(decode=False))
15
 
16
  target_audio_dir = "/home/user/app/audio"
17
  os.makedirs(target_audio_dir, exist_ok=True)
@@ -94,20 +92,29 @@ DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
94
  def load_or_initialize_count_json(audio_paths):
95
  if os.path.exists(COUNT_JSON_PATH):
96
  with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
97
- count_data = json.load(f)
 
98
  else:
99
- count_data = {}
100
 
101
  updated = False
102
- # Count initialize
 
 
 
103
  for path in audio_paths:
104
  filename = os.path.basename(path)
105
  if filename not in count_data:
106
- count_data[filename] = 0
 
 
 
 
107
  updated = True
108
 
109
  if updated or not os.path.exists(COUNT_JSON_PATH):
110
  with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
 
111
  json.dump(count_data, f, indent=4, ensure_ascii=False)
112
 
113
  return count_data
@@ -115,96 +122,25 @@ def load_or_initialize_count_json(audio_paths):
115
  def append_cache_buster(audio_path):
116
  return f"{audio_path}?t={int(time.time() * 1000)}"
117
 
118
- # k for number of questions per test
119
- """def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
120
  eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
121
 
122
  if len(eligible_paths) < k:
123
  raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
124
 
125
- random.shuffle(eligible_paths)
126
- selected = random.sample(eligible_paths, k)
 
 
 
127
 
128
  for path in selected:
129
  filename = os.path.basename(path)
130
- count_data[filename] += 1
131
 
132
  with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
133
  json.dump(count_data, f, indent=4, ensure_ascii=False)
134
 
135
- return selected, count_data"""
136
-
137
- import threading
138
-
139
- LOCK_TEST_LOG = "/home/user/app/lock_test_log.json"
140
-
141
- def test_file_locking(n=5):
142
- """
143
- 测试 FileLock 是否生效,通过并发写入一个共享 JSON 文件观察是否冲突。
144
- """
145
- lock_path = COUNT_JSON_PATH + ".lock"
146
- log_lock = FileLock(lock_path)
147
-
148
- def worker(thread_id):
149
- with log_lock:
150
- try:
151
- # 读取当前日志内容
152
- if os.path.exists(LOCK_TEST_LOG):
153
- with open(LOCK_TEST_LOG, "r", encoding="utf-8") as f:
154
- log_data = json.load(f)
155
- else:
156
- log_data = []
157
-
158
- # 添加一条记录
159
- log_data.append({
160
- "thread": thread_id,
161
- "timestamp": time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()),
162
- "note": f"Thread {thread_id} acquired lock and wrote this entry."
163
- })
164
-
165
- # 写入日志文件
166
- with open(LOCK_TEST_LOG, "w", encoding="utf-8") as f:
167
- json.dump(log_data, f, ensure_ascii=False, indent=2)
168
-
169
- print(f"✅ Thread {thread_id} wrote to log.")
170
- except Exception as e:
171
- print(f"❌ Thread {thread_id} failed: {e}")
172
-
173
- # 启动多个线程并发写入
174
- threads = []
175
- for i in range(n):
176
- t = threading.Thread(target=worker, args=(i,))
177
- threads.append(t)
178
- t.start()
179
-
180
- for t in threads:
181
- t.join()
182
-
183
- print(f"\n✅ 测试完成。日志记录保存在: {LOCK_TEST_LOG}")
184
-
185
-
186
-
187
- def sample_audio_paths(audio_paths, count_data, k=100, max_count=1):
188
- lock_path = COUNT_JSON_PATH + ".lock"
189
- with FileLock(lock_path): # 加锁,确保只有一个用户操作
190
- with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
191
- count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
192
-
193
- eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
194
- if len(eligible_paths) < k:
195
- raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
196
-
197
- random.seed(int(time.time()))
198
- selected = random.sample(eligible_paths, k)
199
-
200
- # 更新计数
201
- for path in selected:
202
- filename = os.path.basename(path)
203
- count_data[filename] = count_data.get(filename, 0) + 1
204
-
205
- with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
206
- json.dump(count_data, f, indent=4, ensure_ascii=False)
207
-
208
  return selected, count_data
209
 
210
  count_data = load_or_initialize_count_json(all_data_audio_paths)
@@ -405,8 +341,6 @@ def save_all_results_to_file(all_results, user_data, count_data=None):
405
  username = user_data.get("username", "user")
406
  timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
407
  submission_filename = f"submissions_{username}_{timestamp}.json"
408
- lock_log_path = "/home/user/app/lock_test_log.json"
409
- lock_log_repo_path = f"logs/lock_test_log_{username}_{timestamp}.json"
410
 
411
  final_data_package = {
412
  "user_info": user_data,
@@ -448,19 +382,6 @@ def save_all_results_to_file(all_results, user_data, count_data=None):
448
  )
449
  print("count.json 上传成功")
450
 
451
- if os.path.exists(lock_log_path):
452
- api.upload_file(
453
- path_or_fileobj=lock_log_path,
454
- path_in_repo=lock_log_repo_path,
455
- repo_id=repo_id,
456
- repo_type="dataset",
457
- token=hf_token,
458
- commit_message=f"Upload FileLock test log from {username}"
459
- )
460
- print(f"✅ FileLock 测试日志上传成功: {lock_log_repo_path}")
461
- else:
462
- print("ℹ️ 未发现 lock_test_log.json,跳过上传。")
463
-
464
  except Exception as e:
465
  print(f"上传出错: {e}")
466
 
@@ -515,10 +436,9 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
515
  start_btn = gr.Button("开始挑战", variant="primary")
516
 
517
  with info_page:
518
- gr.Markdown("## 请提供一些基本信息")
519
-
520
  username_input = gr.Textbox(label="用户名", placeholder="请输入你的昵称")
521
-
 
522
  age_input = gr.Radio(["18岁以下", "18-25岁", "26-35岁", "36-50岁", "50岁以上"], label="年龄")
523
  gender_input = gr.Radio(["男", "女", "其他"], label="性别")
524
  education_input = gr.Radio(["高中及以下", "本科", "硕士", "博士", "其他(请注明)"], label="学历")
@@ -609,10 +529,9 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
609
 
610
  start_btn.click(fn=start_challenge, outputs=[welcome_page, info_page])
611
 
612
- for comp in [username_input, age_input, gender_input, education_input, education_other_input, ai_experience_input]:
613
  comp.change(
614
  fn=check_info_complete,
615
- # inputs=[age_input, gender_input, education_input, education_other_input, ai_experience_input],
616
  inputs=[username_input, age_input, gender_input, education_input, education_other_input, ai_experience_input],
617
  outputs=submit_info_btn
618
  )
@@ -681,6 +600,5 @@ if __name__ == "__main__":
681
  for audio_file in set(all_files):
682
  if not os.path.exists(audio_file):
683
  print(f"⚠️ Warning: Audio file not found: {audio_file}")
684
-
685
- test_file_locking(n=5)
686
  demo.launch(debug=True)
 
6
  import shutil
7
  import time
8
  import collections
9
+ from datasets import load_dataset
 
10
  from huggingface_hub import HfApi
11
 
12
  dataset = load_dataset("intersteller2887/Turing-test-dataset", split="train")
 
13
 
14
  target_audio_dir = "/home/user/app/audio"
15
  os.makedirs(target_audio_dir, exist_ok=True)
 
92
  def load_or_initialize_count_json(audio_paths):
93
  if os.path.exists(COUNT_JSON_PATH):
94
  with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
95
+ # 使用 object_pairs_hook 保持原始顺序
96
+ count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
97
  else:
98
+ count_data = collections.OrderedDict()
99
 
100
  updated = False
101
+
102
+ # 获取所有样例音频文件名
103
+ sample_audio_files = {os.path.basename(d["audio"]) for d in DIMENSIONS_DATA}
104
+
105
  for path in audio_paths:
106
  filename = os.path.basename(path)
107
  if filename not in count_data:
108
+ # 如果是样例音频,直接设置为最大值
109
+ if filename in sample_audio_files:
110
+ count_data[filename] = 999 # 设置为很大的值,确保不会被选中
111
+ else:
112
+ count_data[filename] = 0
113
  updated = True
114
 
115
  if updated or not os.path.exists(COUNT_JSON_PATH):
116
  with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
117
+ # 确保写入时也保持顺序
118
  json.dump(count_data, f, indent=4, ensure_ascii=False)
119
 
120
  return count_data
 
122
  def append_cache_buster(audio_path):
123
  return f"{audio_path}?t={int(time.time() * 1000)}"
124
 
125
+ def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
 
126
  eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
127
 
128
  if len(eligible_paths) < k:
129
  raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
130
 
131
+ eligible_paths_copy = eligible_paths.copy()
132
+
133
+ random.seed(int(time.time()))
134
+
135
+ selected = random.sample(eligible_paths_copy, k)
136
 
137
  for path in selected:
138
  filename = os.path.basename(path)
139
+ count_data[filename] = count_data.get(filename, 0) + 1
140
 
141
  with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
142
  json.dump(count_data, f, indent=4, ensure_ascii=False)
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  return selected, count_data
145
 
146
  count_data = load_or_initialize_count_json(all_data_audio_paths)
 
341
  username = user_data.get("username", "user")
342
  timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
343
  submission_filename = f"submissions_{username}_{timestamp}.json"
 
 
344
 
345
  final_data_package = {
346
  "user_info": user_data,
 
382
  )
383
  print("count.json 上传成功")
384
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  except Exception as e:
386
  print(f"上传出错: {e}")
387
 
 
436
  start_btn = gr.Button("开始挑战", variant="primary")
437
 
438
  with info_page:
 
 
439
  username_input = gr.Textbox(label="用户名", placeholder="请输入你的昵称")
440
+ gr.Markdown("## 请提供一些基本信息")
441
+ username_input = gr.Textbox(label="用户名", placeholder="请输入一个昵称或代号")
442
  age_input = gr.Radio(["18岁以下", "18-25岁", "26-35岁", "36-50岁", "50岁以上"], label="年龄")
443
  gender_input = gr.Radio(["男", "女", "其他"], label="性别")
444
  education_input = gr.Radio(["高中及以下", "本科", "硕士", "博士", "其他(请注明)"], label="学历")
 
529
 
530
  start_btn.click(fn=start_challenge, outputs=[welcome_page, info_page])
531
 
532
+ for comp in [age_input, gender_input, education_input, education_other_input, ai_experience_input]:
533
  comp.change(
534
  fn=check_info_complete,
 
535
  inputs=[username_input, age_input, gender_input, education_input, education_other_input, ai_experience_input],
536
  outputs=submit_info_btn
537
  )
 
600
  for audio_file in set(all_files):
601
  if not os.path.exists(audio_file):
602
  print(f"⚠️ Warning: Audio file not found: {audio_file}")
603
+
 
604
  demo.launch(debug=True)