intersteller2887 commited on
Commit
98229e1
·
verified ·
1 Parent(s): 17c7808

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -709
app.py CHANGED
@@ -89,715 +89,6 @@ DIMENSIONS_DATA = [
89
 
90
  DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
91
 
92
- def load_or_initialize_count_json(audio_paths):
93
- if os.path.exists(COUNT_JSON_PATH):
94
- with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
95
- # 使用 object_pairs_hook 保持原始顺序
96
- count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
97
- else:
98
- count_data = collections.OrderedDict()
99
-
100
- updated = False
101
- for path in audio_paths:
102
- filename = os.path.basename(path)
103
- if filename not in count_data:
104
- count_data[filename] = 0
105
- updated = True
106
-
107
- if updated or not os.path.exists(COUNT_JSON_PATH):
108
- with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
109
- # 确保写入时也保持顺序
110
- json.dump(count_data, f, indent=4, ensure_ascii=False)
111
-
112
- return count_data
113
-
114
- def append_cache_buster(audio_path):
115
- return f"{audio_path}?t={int(time.time() * 1000)}"
116
-
117
- def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
118
- eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
119
-
120
- if len(eligible_paths) < k:
121
- raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
122
-
123
- eligible_paths_copy = eligible_paths.copy()
124
-
125
- random.seed(int(time.time()))
126
-
127
- selected = random.sample(eligible_paths_copy, k)
128
-
129
- for path in selected:
130
- filename = os.path.basename(path)
131
- count_data[filename] = count_data.get(filename, 0) + 1
132
-
133
- with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
134
- json.dump(count_data, f, indent=4, ensure_ascii=False)
135
-
136
- return selected, count_data
137
-
138
- count_data = load_or_initialize_count_json(all_data_audio_paths)
139
- selected_audio_paths, updated_count_data = sample_audio_paths(all_data_audio_paths, count_data, k=5)
140
-
141
-
142
- QUESTION_SET = [
143
- {"audio": path, "desc": f"这是音频文件 {os.path.basename(path)} 的描述"}
144
- for path in selected_audio_paths
145
- ]
146
-
147
- MAX_SUB_DIMS = max(len(d['sub_dims']) for d in DIMENSIONS_DATA)
148
-
149
- # ==============================================================================
150
- # 功能函数定义 (Function Definitions)
151
- # ==============================================================================
152
- def start_challenge():
153
- return gr.update(visible=False), gr.update(visible=True)
154
-
155
- def toggle_education_other(choice):
156
- is_other = (choice == "其他(请注明)")
157
- return gr.update(visible=is_other, interactive=is_other, value="")
158
-
159
- def check_info_complete(username, age, gender, education, education_other, ai_experience):
160
- if username.strip() and age and gender and education and ai_experience:
161
- if education == "其他(请注明)" and not education_other.strip():
162
- return gr.update(interactive=False)
163
- return gr.update(interactive=True)
164
- return gr.update(interactive=False)
165
-
166
- def show_sample_page_and_init(username, age, gender, education, education_other, ai_experience, user_data):
167
- final_edu = education_other if education == "其他(请注明)" else education
168
- user_data.update({
169
- "username": username.strip(),
170
- "age": age,
171
- "gender": gender,
172
- "education": final_edu,
173
- "ai_experience": ai_experience
174
- })
175
- first_dim_title = DIMENSION_TITLES[0]
176
-
177
- initial_updates = update_sample_view(first_dim_title)
178
-
179
- return [
180
- gr.update(visible=False), gr.update(visible=True), user_data, first_dim_title
181
- ] + initial_updates
182
-
183
- def update_sample_view(dimension_title):
184
- dim_data = next((d for d in DIMENSIONS_DATA if d["title"] == dimension_title), None)
185
- if dim_data:
186
- audio_up = gr.update(value=dim_data["audio"])
187
- # audio_up = gr.update(value=append_cache_buster(dim_data["audio"]))
188
- interactive_view_up = gr.update(visible=True)
189
- reference_view_up = gr.update(visible=False)
190
- reference_btn_up = gr.update(value="参考")
191
- sample_slider_ups = []
192
- ref_slider_ups = []
193
- scores = dim_data.get("reference_scores", [])
194
-
195
- for i in range(MAX_SUB_DIMS):
196
- if i < len(dim_data['sub_dims']):
197
- label = dim_data['sub_dims'][i]
198
- score = scores[i] if i < len(scores) else 0
199
- sample_slider_ups.append(gr.update(visible=True, label=label, value=3))
200
- ref_slider_ups.append(gr.update(visible=True, label=label, value=score))
201
- else:
202
- sample_slider_ups.append(gr.update(visible=False, value=0))
203
- ref_slider_ups.append(gr.update(visible=False, value=0))
204
- return [audio_up, interactive_view_up, reference_view_up, reference_btn_up] + sample_slider_ups + ref_slider_ups
205
- empty_updates = [gr.update()] * 4
206
- slider_empty_updates = [gr.update()] * (MAX_SUB_DIMS * 2)
207
- return empty_updates + slider_empty_updates
208
-
209
- def update_test_dimension_view(d_idx, selections):
210
- dimension = DIMENSIONS_DATA[d_idx]
211
- progress_d = f"维度 {d_idx + 1} / {len(DIMENSIONS_DATA)}: **{dimension['title']}**"
212
-
213
- existing_scores = selections.get(dimension['title'], {})
214
-
215
- slider_updates = []
216
- for i in range(MAX_SUB_DIMS):
217
- if i < len(dimension['sub_dims']):
218
- sub_dim_label = dimension['sub_dims'][i]
219
- value = existing_scores.get(sub_dim_label, 3)
220
- slider_updates.append(gr.update(visible=True, label=sub_dim_label, value=value))
221
- else:
222
- slider_updates.append(gr.update(visible=False, value=0))
223
-
224
- prev_btn_update = gr.update(interactive=(d_idx > 0))
225
- next_btn_update = gr.update(
226
- value="进入最终判断" if d_idx == len(DIMENSIONS_DATA) - 1 else "下一维度",
227
- interactive=True
228
- )
229
-
230
- return [gr.update(value=progress_d), prev_btn_update, next_btn_update] + slider_updates
231
-
232
- def init_test_question(user_data, q_idx):
233
- d_idx = 0
234
- question = QUESTION_SET[q_idx]
235
- progress_q = f"第 {q_idx + 1} / {len(QUESTION_SET)} 题"
236
-
237
- initial_updates = update_test_dimension_view(d_idx, {})
238
- dim_title_update, prev_btn_update, next_btn_update = initial_updates[:3]
239
- slider_updates = initial_updates[3:]
240
-
241
- return (
242
- gr.update(visible=False),
243
- gr.update(visible=True),
244
- gr.update(visible=False),
245
- gr.update(visible=False),
246
- q_idx, d_idx, {},
247
- gr.update(value=progress_q),
248
- dim_title_update,
249
- gr.update(value=question['audio']),
250
- # gr.update(value=append_cache_buster(question['audio'])),
251
- prev_btn_update,
252
- next_btn_update,
253
- gr.update(value=None), # BUG FIX: Changed from "" to None to correctly clear the radio button
254
- gr.update(interactive=False),
255
- ) + tuple(slider_updates)
256
-
257
- def navigate_dimensions(direction, q_idx, d_idx, selections, *slider_values):
258
- current_dim_data = DIMENSIONS_DATA[d_idx]
259
- current_sub_dims = current_dim_data['sub_dims']
260
- scores = {sub_dim: slider_values[i] for i, sub_dim in enumerate(current_sub_dims)}
261
- selections[current_dim_data['title']] = scores
262
-
263
- new_d_idx = d_idx + (1 if direction == "next" else -1)
264
-
265
- if direction == "next" and d_idx == len(DIMENSIONS_DATA) - 1:
266
- return (
267
- gr.update(visible=False),
268
- gr.update(visible=True),
269
- q_idx, new_d_idx, selections,
270
- gr.update(),
271
- gr.update(),
272
- gr.update(),
273
- gr.update(interactive=True),
274
- gr.update(interactive=False),
275
- gr.update(interactive=False),
276
- gr.update(interactive=False),
277
- ) + (gr.update(),) * MAX_SUB_DIMS
278
-
279
- else:
280
- view_updates = update_test_dimension_view(new_d_idx, selections)
281
- dim_title_update, prev_btn_update, next_btn_update = view_updates[:3]
282
- slider_updates = view_updates[3:]
283
-
284
- return (
285
- gr.update(), gr.update(),
286
- q_idx, new_d_idx, selections,
287
- gr.update(),
288
- dim_title_update,
289
- gr.update(),
290
- gr.update(),
291
- gr.update(),
292
- prev_btn_update,
293
- next_btn_update,
294
- ) + tuple(slider_updates)
295
-
296
- def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_results, user_data):
297
- selections["final_choice"] = final_choice
298
-
299
- final_question_result = {
300
- "question_id": q_idx, "audio_file": QUESTION_SET[q_idx]['audio'],
301
- "selections": selections
302
- }
303
- all_results.append(final_question_result)
304
-
305
- q_idx += 1
306
-
307
- if q_idx < len(QUESTION_SET):
308
- init_q_updates = init_test_question(user_data, q_idx)
309
- return init_q_updates + (all_results, gr.update(value=""))
310
- else:
311
- result_str = "### 测试全部完成!\n\n你的提交结果概览:\n"
312
- for res in all_results:
313
- # result_str += f"\n#### 题目: {res['audio_file']}\n"
314
- result_str += f"##### 最终判断: **{res['selections'].get('final_choice', '未选择')}**\n"
315
- for dim_title, dim_data in res['selections'].items():
316
- if dim_title == 'final_choice': continue
317
- result_str += f"- **{dim_title}**:\n"
318
- for sub_dim, score in dim_data.items():
319
- result_str += f" - *{sub_dim[:20]}...*: {score}/5\n"
320
-
321
- # save_all_results_to_file(all_results, user_data)
322
- save_all_results_to_file(all_results, user_data, count_data=updated_count_data)
323
-
324
- return (
325
- gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True),
326
- q_idx, d_idx, {},
327
- gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
328
- gr.update(), gr.update(),
329
- ) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str)
330
-
331
- def save_all_results_to_file(all_results, user_data, count_data=None):
332
- repo_id = "intersteller2887/Turing-test-dataset"
333
- username = user_data.get("username", "user")
334
- timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
335
- submission_filename = f"submissions_{username}_{timestamp}.json"
336
-
337
- final_data_package = {
338
- "user_info": user_data,
339
- "results": all_results
340
- }
341
- json_string = json.dumps(final_data_package, ensure_ascii=False, indent=4)
342
- hf_token = os.getenv("HF_TOKEN")
343
-
344
- if not hf_token:
345
- print("HF_TOKEN not found. Cannot upload to the Hub.")
346
- return
347
-
348
- try:
349
- api = HfApi()
350
-
351
- # 上传 submission 文件
352
- api.upload_file(
353
- path_or_fileobj=bytes(json_string, "utf-8"),
354
- path_in_repo=f"submissions/{submission_filename}",
355
- repo_id=repo_id,
356
- repo_type="dataset",
357
- token=hf_token,
358
- commit_message=f"Add new submission from {username}"
359
- )
360
- print(f"上传成功: {submission_filename}")
361
-
362
- # 上传 count.json(如果提供)
363
- if count_data:
364
- with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
365
- json.dump(count_data, f, indent=4, ensure_ascii=False)
366
-
367
- api.upload_file(
368
- path_or_fileobj=COUNT_JSON_PATH,
369
- path_in_repo=COUNT_JSON_REPO_PATH,
370
- repo_id=repo_id,
371
- repo_type="dataset",
372
- token=hf_token,
373
- commit_message=f"Update count.json after submission by {username}"
374
- )
375
- print("count.json 上传成功")
376
-
377
- except Exception as e:
378
- print(f"上传出错: {e}")
379
-
380
-
381
- def toggle_reference_view(current):
382
- if current == "参考":
383
- return gr.update(visible=False), gr.update(visible=True), gr.update(value="返回")
384
- else:
385
- return gr.update(visible=True), gr.update(visible=False), gr.update(value="参考")
386
-
387
- def back_to_welcome():
388
- return (
389
- gr.update(visible=True), # welcome_page
390
- gr.update(visible=False), # info_page
391
- gr.update(visible=False), # sample_page
392
- gr.update(visible=False), # pretest_page
393
- gr.update(visible=False), # test_page
394
- gr.update(visible=False), # final_judgment_page
395
- gr.update(visible=False), # result_page
396
- {}, # user_data_state
397
- 0, # current_question_index
398
- 0, # current_test_dimension_index
399
- {}, # current_question_selections
400
- [] # test_results
401
- )
402
-
403
- # ==============================================================================
404
- # Gradio 界面定义 (Gradio UI Definition)
405
- # ==============================================================================
406
- with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px !important}") as demo:
407
- user_data_state = gr.State({})
408
- current_question_index = gr.State(0)
409
- current_test_dimension_index = gr.State(0)
410
- current_question_selections = gr.State({})
411
- test_results = gr.State([])
412
-
413
- welcome_page = gr.Column(visible=True)
414
- info_page = gr.Column(visible=False)
415
- sample_page = gr.Column(visible=False)
416
- pretest_page = gr.Column(visible=False)
417
- test_page = gr.Column(visible=False)
418
- final_judgment_page = gr.Column(visible=False)
419
- result_page = gr.Column(visible=False)
420
- pages = {
421
- "welcome": welcome_page, "info": info_page, "sample": sample_page,
422
- "pretest": pretest_page, "test": test_page, "final_judgment": final_judgment_page,
423
- "result": result_page
424
- }
425
-
426
- with welcome_page:
427
- gr.Markdown("# AI 识破者\n你将听到一系列对话,请判断哪个回应者是 AI。")
428
- start_btn = gr.Button("开始挑战", variant="primary")
429
-
430
- with info_page:
431
- gr.Markdown("## 请提供一些基本信息")
432
- username_input = gr.Textbox(label="用户名", placeholder="请输入一个昵称或代号")
433
- age_input = gr.Radio(["18岁以下", "18-25岁", "26-35岁", "36-50岁", "50岁以上"], label="年龄")
434
- gender_input = gr.Radio(["男", "女", "其他"], label="性别")
435
- education_input = gr.Radio(["高中及以下", "本科", "硕士", "博士", "其他(请注明)"], label="学历")
436
- education_other_input = gr.Textbox(label="请填写你的学历", visible=False, interactive=False)
437
- ai_experience_input = gr.Radio(["从未使用过", "偶尔接触(如看别人用)", "使用过几次,了解基本功能", "经常使用,有一定操作经验", "非常熟悉,深入使用过多个 AI 工具"], label="对 AI 工具的熟悉程度")
438
- submit_info_btn = gr.Button("提交并开始学习样例", variant="primary", interactive=False)
439
-
440
- with sample_page:
441
-
442
- gr.Markdown("## 样例分析\n请选择一个维度进行学习和打分练习。所有维度共用同一个样例音频。")
443
- sample_dimension_selector = gr.Radio(DIMENSION_TITLES, label="选择学习维度", value=DIMENSION_TITLES[0])
444
- with gr.Row():
445
- with gr.Column(scale=1):
446
- sample_audio = gr.Audio(label="样例音频", value=DIMENSIONS_DATA[0]["audio"])
447
- with gr.Column(scale=2):
448
- with gr.Column(visible=True) as interactive_view:
449
- gr.Markdown("#### 请为以下特征打分 (1-5分。1对应机器,5对应人类)")
450
- sample_sliders = [gr.Slider(minimum=1, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)]
451
- with gr.Column(visible=False) as reference_view:
452
- gr.Markdown("### 参考答案解析 (1-5分。1对应机器,5对应人类)")
453
- reference_sliders = [gr.Slider(minimum=1, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=False) for i in range(MAX_SUB_DIMS)]
454
- with gr.Row():
455
- reference_btn = gr.Button("参考")
456
- go_to_pretest_btn = gr.Button("我明白了,开始测试", variant="primary")
457
-
458
- with pretest_page:
459
- gr.Markdown("## 测试说明\n"
460
- "- 对于每一道题,你都需要对全部 **5 个维度** 进行评估。\n"
461
- "- 在每个维度下,请为出现的每个特征 **从1到5打分。\n"
462
- "- **评分解释如下:**\n"
463
- " - **1 分:极度符合机器特征**;\n"
464
- " - **2 分:较为符合机器特征**;\n"
465
- " - **3 分:无明显人类或机器倾向或特征无体现**;\n"
466
- " - **4 分:较为符合人类特征**;\n"
467
- " - **5 分:极度符合人类特征**。\n"
468
- "- 完成所有维度后,请根据整体印象对回应方的身份做出做出“人类”或“机器人”的 **最终判断**。\n"
469
- "- 你可以使用“上一维度”和“下一维度”按钮在5个维度间自由切换和修改分数。")
470
- go_to_test_btn = gr.Button("开始测试", variant="primary")
471
-
472
- with test_page:
473
- gr.Markdown("## 正式测试")
474
- question_progress_text = gr.Markdown()
475
- test_dimension_title = gr.Markdown()
476
- test_audio = gr.Audio(label="测试音频")
477
- gr.Markdown("--- \n ### 请为以下特征打分 (1-5分。1对应机器,5对应人类)")
478
- test_sliders = [gr.Slider(minimum=1, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)]
479
- with gr.Row():
480
- prev_dim_btn = gr.Button("上一维度")
481
- next_dim_btn = gr.Button("下一维度", variant="primary")
482
-
483
- with final_judgment_page:
484
- gr.Markdown("## 最终判断")
485
- gr.Markdown("您已完成对所有维度的评分。请根据您的综合印象,做出最终判断。")
486
- final_human_robot_radio = gr.Radio(["👤 人类", "🤖 机器人"], label="请判断回应者类型 (必填)")
487
- submit_final_answer_btn = gr.Button("提交本题答案", variant="primary", interactive=False)
488
-
489
- with result_page:
490
- gr.Markdown("## 测试完成")
491
- result_text = gr.Markdown()
492
- back_to_welcome_btn = gr.Button("返回主界面", variant="primary")
493
-
494
- # ==============================================================================
495
- # 事件绑定 (Event Binding) & IO 列表定义
496
- # ==============================================================================
497
- sample_init_outputs = [
498
- info_page, sample_page, user_data_state, sample_dimension_selector,
499
- sample_audio, interactive_view, reference_view, reference_btn
500
- ] + sample_sliders + reference_sliders
501
-
502
- test_init_outputs = [
503
- pretest_page, test_page, final_judgment_page, result_page,
504
- current_question_index, current_test_dimension_index, current_question_selections,
505
- question_progress_text, test_dimension_title, test_audio,
506
- prev_dim_btn, next_dim_btn,
507
- final_human_robot_radio, submit_final_answer_btn,
508
- ] + test_sliders
509
-
510
- nav_inputs = [current_question_index, current_test_dimension_index, current_question_selections] + test_sliders
511
- nav_outputs = [
512
- test_page, final_judgment_page,
513
- current_question_index, current_test_dimension_index, current_question_selections,
514
- question_progress_text, test_dimension_title, test_audio,
515
- final_human_robot_radio, submit_final_answer_btn,
516
- prev_dim_btn, next_dim_btn,
517
- ] + test_sliders
518
-
519
- full_outputs_with_results = test_init_outputs + [test_results, result_text]
520
-
521
- start_btn.click(fn=start_challenge, outputs=[welcome_page, info_page])
522
-
523
- for comp in [age_input, gender_input, education_input, education_other_input, ai_experience_input]:
524
- comp.change(
525
- fn=check_info_complete,
526
- inputs=[username_input, age_input, gender_input, education_input, education_other_input, ai_experience_input],
527
- outputs=submit_info_btn
528
- )
529
-
530
- education_input.change(fn=toggle_education_other, inputs=education_input, outputs=education_other_input)
531
-
532
- submit_info_btn.click(
533
- fn=show_sample_page_and_init,
534
- inputs=[username_input, age_input, gender_input, education_input, education_other_input, ai_experience_input, user_data_state],
535
- outputs=sample_init_outputs
536
- )
537
-
538
- sample_dimension_selector.change(
539
- fn=update_sample_view,
540
- inputs=sample_dimension_selector,
541
- outputs=[sample_audio, interactive_view, reference_view, reference_btn] + sample_sliders + reference_sliders
542
- )
543
-
544
- reference_btn.click(
545
- fn=toggle_reference_view,
546
- inputs=reference_btn,
547
- outputs=[interactive_view, reference_view, reference_btn]
548
- )
549
-
550
- go_to_pretest_btn.click(lambda: (gr.update(visible=False), gr.update(visible=True)), outputs=[sample_page, pretest_page])
551
-
552
- go_to_test_btn.click(
553
- fn=lambda user: init_test_question(user, 0) + ([], gr.update()),
554
- inputs=[user_data_state],
555
- outputs=full_outputs_with_results
556
- )
557
-
558
- prev_dim_btn.click(
559
- fn=lambda q,d,s, *sliders: navigate_dimensions("prev", q,d,s, *sliders),
560
- inputs=nav_inputs, outputs=nav_outputs
561
- )
562
-
563
- next_dim_btn.click(
564
- fn=lambda q,d,s, *sliders: navigate_dimensions("next", q,d,s, *sliders),
565
- inputs=nav_inputs, outputs=nav_outputs
566
- )
567
-
568
- final_human_robot_radio.change(
569
- fn=lambda choice: gr.update(interactive=bool(choice)),
570
- inputs=final_human_robot_radio,
571
- outputs=submit_final_answer_btn
572
- )
573
-
574
- submit_final_answer_btn.click(
575
- fn=submit_question_and_advance,
576
- inputs=[current_question_index, current_test_dimension_index, current_question_selections, final_human_robot_radio, test_results, user_data_state],
577
- outputs=full_outputs_with_results
578
- )
579
-
580
- back_to_welcome_btn.click(fn=back_to_welcome, outputs=list(pages.values()) + [user_data_state, current_question_index, current_test_dimension_index, current_question_selections, test_results])
581
-
582
- # ==============================================================================
583
- # 程序入口 (Entry Point)
584
- # ==============================================================================
585
- if __name__ == "__main__":
586
- if not os.path.exists("audio"):
587
- os.makedirs("audio")
588
- if "SPACE_ID" in os.environ:
589
- print("Running in a Hugging Face Space, checking for audio files...")
590
- all_files = [q["audio"] for q in QUESTION_SET] + [d["audio"] for d in DIMENSIONS_DATA]
591
- for audio_file in set(all_files):
592
- if not os.path.exists(audio_file):
593
- print(f"⚠️ Warning: Audio file not found: {audio_file}")
594
-
595
- demo.launch(debug=True)
596
- 我还需要添加两个功能:第一,sample的音频文件直接计数为max(防止被抽取)
597
- 第二,在最后点击提交按钮时,需要终止可能正在播放的音频
598
- 我将帮你添加这两个功能:
599
-
600
- 首先修改 load_or_initialize_count_json 函数,将样例音频文件直接设置为最大值:
601
-
602
- python
603
- def load_or_initialize_count_json(audio_paths):
604
- if os.path.exists(COUNT_JSON_PATH):
605
- with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
606
- # 使用 object_pairs_hook 保持原始顺序
607
- count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
608
- else:
609
- count_data = collections.OrderedDict()
610
-
611
- updated = False
612
-
613
- # 获取所有样例音频文件名
614
- sample_audio_files = {os.path.basename(d["audio"]) for d in DIMENSIONS_DATA}
615
-
616
- for path in audio_paths:
617
- filename = os.path.basename(path)
618
- if filename not in count_data:
619
- # 如果是样例音频,直接设置为最大值
620
- if filename in sample_audio_files:
621
- count_data[filename] = 999 # 设置为很大的值,确保不会被选中
622
- else:
623
- count_data[filename] = 0
624
- updated = True
625
-
626
- if updated or not os.path.exists(COUNT_JSON_PATH):
627
- with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
628
- # 确保写入时也保持顺序
629
- json.dump(count_data, f, indent=4, ensure_ascii=False)
630
-
631
- return count_data
632
- 添加音频停止功能,修改 Gradio 界面部分:
633
-
634
- 在 submit_question_and_advance 函数中添加停止音频的逻辑:
635
-
636
- python
637
- def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_results, user_data):
638
- selections["final_choice"] = final_choice
639
-
640
- final_question_result = {
641
- "question_id": q_idx, "audio_file": QUESTION_SET[q_idx]['audio'],
642
- "selections": selections
643
- }
644
- all_results.append(final_question_result)
645
-
646
- q_idx += 1
647
-
648
- if q_idx < len(QUESTION_SET):
649
- init_q_updates = init_test_question(user_data, q_idx)
650
- # 添加停止音频的逻辑
651
- init_q_updates = list(init_q_updates)
652
- init_q_updates[9] = None # test_audio 的位置设为 None 来停止播放
653
- return tuple(init_q_updates) + (all_results, gr.update(value=""))
654
- else:
655
- result_str = "### 测试全部完成!\n\n你的提交结果概览:\n"
656
- for res in all_results:
657
- result_str += f"##### 最终判断: **{res['selections'].get('final_choice', '未选择')}**\n"
658
- for dim_title, dim_data in res['selections'].items():
659
- if dim_title == 'final_choice': continue
660
- result_str += f"- **{dim_title}**:\n"
661
- for sub_dim, score in dim_data.items():
662
- result_str += f" - *{sub_dim[:20]}...*: {score}/5\n"
663
-
664
- save_all_results_to_file(all_results, user_data, count_data=updated_count_data)
665
-
666
- return (
667
- gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True),
668
- q_idx, d_idx, {},
669
- gr.update(), gr.update(), None, gr.update(), gr.update(), # 设置 test_audio 为 None 来停止播放
670
- gr.update(), gr.update(),
671
- ) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str)
672
- 在返回主界面时也停止音频:
673
-
674
- 修改 back_to_welcome 函数:
675
-
676
- python
677
- def back_to_welcome():
678
- return (
679
- gr.update(visible=True), # welcome_page
680
- gr.update(visible=False), # info_page
681
- gr.update(visible=False), # sample_page
682
- gr.update(visible=False), # pretest_page
683
- gr.update(visible=False), # test_page
684
- gr.update(visible=False), # final_judgment_page
685
- gr.update(visible=False), # result_page
686
- {}, # user_data_state
687
- 0, # current_question_index
688
- 0, # current_test_dimension_index
689
- {}, # current_question_selections
690
- [], # test_results
691
- None # 停止音频
692
- )
693
- 更新 back_to_welcome_btn 的点击事件:
694
-
695
- python
696
- back_to_welcome_btn.click(
697
- fn=back_to_welcome,
698
- outputs=list(pages.values()) + [user_data_state, current_question_index, current_test_dimension_index, current_question_selections, test_results, test_audio]
699
- )
700
- 这些修改将实现:
701
-
702
- 样例音频文件不会被选为测试音频(因为它们的计数值被设为999)
703
-
704
- 在提交答案或返回主界面时,当前播放的音频会被停止
705
-
706
- 切换到新题目时,前一个音频会自动停止
707
-
708
- 注意:Gradio 的音频组件设置为 None 时会停止当前播放。
709
-
710
- import gradio as gr
711
- import os
712
- import json
713
- import pandas as pd
714
- import random
715
- import shutil
716
- import time
717
- import collections
718
- from datasets import load_dataset
719
- from huggingface_hub import HfApi
720
-
721
- dataset = load_dataset("intersteller2887/Turing-test-dataset", split="train")
722
-
723
- target_audio_dir = "/home/user/app/audio"
724
- os.makedirs(target_audio_dir, exist_ok=True)
725
- COUNT_JSON_PATH = "/home/user/app/count.json"
726
- COUNT_JSON_REPO_PATH = "submissions/count.json"
727
-
728
- local_audio_paths = []
729
-
730
- for item in dataset:
731
- src_path = item["audio"]["path"]
732
- if src_path and os.path.exists(src_path):
733
- filename = os.path.basename(src_path)
734
- dst_path = os.path.join(target_audio_dir, filename)
735
- if not os.path.exists(dst_path):
736
- shutil.copy(src_path, dst_path)
737
- local_audio_paths.append(dst_path)
738
-
739
- all_data_audio_paths = local_audio_paths
740
-
741
- sample1_audio_path = local_audio_paths[0]
742
- # sample1_audio_path = next((p for p in all_data_audio_paths if p.endswith("sample1.wav")), None)
743
- print(sample1_audio_path)
744
-
745
- # ==============================================================================
746
- # 数据定义 (Data Definition)
747
- # ==============================================================================
748
-
749
- DIMENSIONS_DATA = [
750
- {
751
- "title": "语义和语用特征",
752
- "audio": sample1_audio_path,
753
- "sub_dims": [
754
- "记忆一致性:人类会选择性记忆并自我修正错误;机器出现前后矛盾时无法自主察觉或修正(如:遗忘关键细节但坚持错误答案)", "逻辑连贯性:人类逻辑自然流畅,允许合理跳跃;机器逻辑转折生硬或自相矛盾(如:突然切换话题无过渡)",
755
- "读音正确性:人类大部分情况下发音正确、自然,会结合语境使用、区分多音字;机器存在不自然的发音错误,且对多音字语境的判断能力有限", "多语言混杂:人类多语言混杂流畅,且带有语境色彩;机器多语言混杂生硬,无语言切换逻辑",
756
- "语言不精确性:人类说话时会使用带有犹豫语气的表达,且会出现自我修正的行为;机器的回应通常不存在模糊表达,回答准确、肯定", "填充词使用:人类填充词(如‘嗯’‘那个’)随机且带有思考痕迹;机器填充词规律重复或完全缺失",
757
- "隐喻与语用用意:人类会使用隐喻、反语、委婉来表达多重含义;机器表达直白,仅能字面理解或生硬使用修辞,缺乏语义多样性"
758
- ],
759
- "reference_scores": [5, 5, 3, 3, 5, 5, 3]
760
- },
761
- {
762
- "title": "非生理性副语言特征",
763
- "audio": sample1_audio_path,
764
- "sub_dims": [
765
- "节奏:人类语速随语义起伏,偶尔卡顿或犹豫;机器节奏均匀,几乎无停顿或停顿机械", "语调:人类在表达疑问、惊讶、强调时,音调会自然上扬或下降;机器语调单一或变化过于规律,不符合语境",
766
- "重读:人类会有意识地加强重要词语,从而突出信息焦点;机器的词语强度一致性强,或出现强调部位异常", "辅助性发声:人类会发出符合语境的非语言声音,如笑声、叹气等;机器的辅助性发声语义错误,或完全无辅助性发声"
767
- ],
768
- "reference_scores": [4, 5, 4, 3]
769
- },
770
- {
771
- "title": "生理性副语言特征",
772
- "audio": sample1_audio_path,
773
- "sub_dims": [
774
- "微生理杂音:人类说话存在呼吸声、口水音、气泡音等无意识发声,且自然地穿插在语流节奏当中;机器没有微生理杂音、语音过于干净,或添加不自然杂音",
775
- "发音不稳定性:人类存在个体化波动(如偶尔咬字不清、鼻音丰富等);机器发音过于标准或统一,缺乏个性", "口音:人类存在自然的地区口音或语音特征;机器元音辅音机械拼接,或口音模式统一无差异"
776
- ],
777
- "reference_scores": [3, 3, 4]
778
- },
779
- {
780
- "title": "机械人格",
781
- "audio": sample1_audio_path,
782
- "sub_dims": [
783
- "谄媚现象:人类会根据语境判断是否同意,有时提出不同意见;机器频繁同意、感谢、道歉,过度认同对方观点,缺乏真实互动感",
784
- "书面化表达:人类表达灵活,;机器回应句式工整、规范,内容过于书面化、用词泛泛"
785
- ],
786
- "reference_scores": [5, 5]
787
- },
788
- {
789
- "title": "情感表达",
790
- "audio": sample1_audio_path,
791
- "sub_dims": [
792
- "语义层面:人类能对悲伤、开心等语境有符合人类的情感反应;机器回应情绪淡漠,或情感词泛泛、脱离语境",
793
- "声学层面:人类音调、音量随情绪动态变化;机器情感语调模式化,或与语境不符"
794
- ],
795
- "reference_scores": [3, 3]
796
- }
797
- ]
798
-
799
- DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
800
-
801
  def load_or_initialize_count_json(audio_paths):
802
  if os.path.exists(COUNT_JSON_PATH):
803
  with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
 
89
 
90
  DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  def load_or_initialize_count_json(audio_paths):
93
  if os.path.exists(COUNT_JSON_PATH):
94
  with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f: