intersteller2887 commited on
Commit
17c7808
·
verified ·
1 Parent(s): 6f51e22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +709 -0
app.py CHANGED
@@ -89,6 +89,715 @@ DIMENSIONS_DATA = [
89
 
90
  DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  def load_or_initialize_count_json(audio_paths):
93
  if os.path.exists(COUNT_JSON_PATH):
94
  with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
 
89
 
90
  DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
91
 
92
+ def load_or_initialize_count_json(audio_paths):
93
+ if os.path.exists(COUNT_JSON_PATH):
94
+ with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
95
+ # 使用 object_pairs_hook 保持原始顺序
96
+ count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
97
+ else:
98
+ count_data = collections.OrderedDict()
99
+
100
+ updated = False
101
+ for path in audio_paths:
102
+ filename = os.path.basename(path)
103
+ if filename not in count_data:
104
+ count_data[filename] = 0
105
+ updated = True
106
+
107
+ if updated or not os.path.exists(COUNT_JSON_PATH):
108
+ with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
109
+ # 确保写入时也保持顺序
110
+ json.dump(count_data, f, indent=4, ensure_ascii=False)
111
+
112
+ return count_data
113
+
114
+ def append_cache_buster(audio_path):
115
+ return f"{audio_path}?t={int(time.time() * 1000)}"
116
+
117
+ def sample_audio_paths(audio_paths, count_data, k=5, max_count=1):
118
+ eligible_paths = [p for p in audio_paths if count_data.get(os.path.basename(p), 0) < max_count]
119
+
120
+ if len(eligible_paths) < k:
121
+ raise ValueError(f"可用音频数量不足(只剩 {len(eligible_paths)} 条 count<{max_count} 的音频),无法抽取 {k} 条")
122
+
123
+ eligible_paths_copy = eligible_paths.copy()
124
+
125
+ random.seed(int(time.time()))
126
+
127
+ selected = random.sample(eligible_paths_copy, k)
128
+
129
+ for path in selected:
130
+ filename = os.path.basename(path)
131
+ count_data[filename] = count_data.get(filename, 0) + 1
132
+
133
+ with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
134
+ json.dump(count_data, f, indent=4, ensure_ascii=False)
135
+
136
+ return selected, count_data
137
+
138
+ count_data = load_or_initialize_count_json(all_data_audio_paths)
139
+ selected_audio_paths, updated_count_data = sample_audio_paths(all_data_audio_paths, count_data, k=5)
140
+
141
+
142
+ QUESTION_SET = [
143
+ {"audio": path, "desc": f"这是音频文件 {os.path.basename(path)} 的描述"}
144
+ for path in selected_audio_paths
145
+ ]
146
+
147
+ MAX_SUB_DIMS = max(len(d['sub_dims']) for d in DIMENSIONS_DATA)
148
+
149
+ # ==============================================================================
150
+ # 功能函数定义 (Function Definitions)
151
+ # ==============================================================================
152
+ def start_challenge():
153
+ return gr.update(visible=False), gr.update(visible=True)
154
+
155
+ def toggle_education_other(choice):
156
+ is_other = (choice == "其他(请注明)")
157
+ return gr.update(visible=is_other, interactive=is_other, value="")
158
+
159
+ def check_info_complete(username, age, gender, education, education_other, ai_experience):
160
+ if username.strip() and age and gender and education and ai_experience:
161
+ if education == "其他(请注明)" and not education_other.strip():
162
+ return gr.update(interactive=False)
163
+ return gr.update(interactive=True)
164
+ return gr.update(interactive=False)
165
+
166
+ def show_sample_page_and_init(username, age, gender, education, education_other, ai_experience, user_data):
167
+ final_edu = education_other if education == "其他(请注明)" else education
168
+ user_data.update({
169
+ "username": username.strip(),
170
+ "age": age,
171
+ "gender": gender,
172
+ "education": final_edu,
173
+ "ai_experience": ai_experience
174
+ })
175
+ first_dim_title = DIMENSION_TITLES[0]
176
+
177
+ initial_updates = update_sample_view(first_dim_title)
178
+
179
+ return [
180
+ gr.update(visible=False), gr.update(visible=True), user_data, first_dim_title
181
+ ] + initial_updates
182
+
183
+ def update_sample_view(dimension_title):
184
+ dim_data = next((d for d in DIMENSIONS_DATA if d["title"] == dimension_title), None)
185
+ if dim_data:
186
+ audio_up = gr.update(value=dim_data["audio"])
187
+ # audio_up = gr.update(value=append_cache_buster(dim_data["audio"]))
188
+ interactive_view_up = gr.update(visible=True)
189
+ reference_view_up = gr.update(visible=False)
190
+ reference_btn_up = gr.update(value="参考")
191
+ sample_slider_ups = []
192
+ ref_slider_ups = []
193
+ scores = dim_data.get("reference_scores", [])
194
+
195
+ for i in range(MAX_SUB_DIMS):
196
+ if i < len(dim_data['sub_dims']):
197
+ label = dim_data['sub_dims'][i]
198
+ score = scores[i] if i < len(scores) else 0
199
+ sample_slider_ups.append(gr.update(visible=True, label=label, value=3))
200
+ ref_slider_ups.append(gr.update(visible=True, label=label, value=score))
201
+ else:
202
+ sample_slider_ups.append(gr.update(visible=False, value=0))
203
+ ref_slider_ups.append(gr.update(visible=False, value=0))
204
+ return [audio_up, interactive_view_up, reference_view_up, reference_btn_up] + sample_slider_ups + ref_slider_ups
205
+ empty_updates = [gr.update()] * 4
206
+ slider_empty_updates = [gr.update()] * (MAX_SUB_DIMS * 2)
207
+ return empty_updates + slider_empty_updates
208
+
209
+ def update_test_dimension_view(d_idx, selections):
210
+ dimension = DIMENSIONS_DATA[d_idx]
211
+ progress_d = f"维度 {d_idx + 1} / {len(DIMENSIONS_DATA)}: **{dimension['title']}**"
212
+
213
+ existing_scores = selections.get(dimension['title'], {})
214
+
215
+ slider_updates = []
216
+ for i in range(MAX_SUB_DIMS):
217
+ if i < len(dimension['sub_dims']):
218
+ sub_dim_label = dimension['sub_dims'][i]
219
+ value = existing_scores.get(sub_dim_label, 3)
220
+ slider_updates.append(gr.update(visible=True, label=sub_dim_label, value=value))
221
+ else:
222
+ slider_updates.append(gr.update(visible=False, value=0))
223
+
224
+ prev_btn_update = gr.update(interactive=(d_idx > 0))
225
+ next_btn_update = gr.update(
226
+ value="进入最终判断" if d_idx == len(DIMENSIONS_DATA) - 1 else "下一维度",
227
+ interactive=True
228
+ )
229
+
230
+ return [gr.update(value=progress_d), prev_btn_update, next_btn_update] + slider_updates
231
+
232
+ def init_test_question(user_data, q_idx):
233
+ d_idx = 0
234
+ question = QUESTION_SET[q_idx]
235
+ progress_q = f"第 {q_idx + 1} / {len(QUESTION_SET)} 题"
236
+
237
+ initial_updates = update_test_dimension_view(d_idx, {})
238
+ dim_title_update, prev_btn_update, next_btn_update = initial_updates[:3]
239
+ slider_updates = initial_updates[3:]
240
+
241
+ return (
242
+ gr.update(visible=False),
243
+ gr.update(visible=True),
244
+ gr.update(visible=False),
245
+ gr.update(visible=False),
246
+ q_idx, d_idx, {},
247
+ gr.update(value=progress_q),
248
+ dim_title_update,
249
+ gr.update(value=question['audio']),
250
+ # gr.update(value=append_cache_buster(question['audio'])),
251
+ prev_btn_update,
252
+ next_btn_update,
253
+ gr.update(value=None), # BUG FIX: Changed from "" to None to correctly clear the radio button
254
+ gr.update(interactive=False),
255
+ ) + tuple(slider_updates)
256
+
257
+ def navigate_dimensions(direction, q_idx, d_idx, selections, *slider_values):
258
+ current_dim_data = DIMENSIONS_DATA[d_idx]
259
+ current_sub_dims = current_dim_data['sub_dims']
260
+ scores = {sub_dim: slider_values[i] for i, sub_dim in enumerate(current_sub_dims)}
261
+ selections[current_dim_data['title']] = scores
262
+
263
+ new_d_idx = d_idx + (1 if direction == "next" else -1)
264
+
265
+ if direction == "next" and d_idx == len(DIMENSIONS_DATA) - 1:
266
+ return (
267
+ gr.update(visible=False),
268
+ gr.update(visible=True),
269
+ q_idx, new_d_idx, selections,
270
+ gr.update(),
271
+ gr.update(),
272
+ gr.update(),
273
+ gr.update(interactive=True),
274
+ gr.update(interactive=False),
275
+ gr.update(interactive=False),
276
+ gr.update(interactive=False),
277
+ ) + (gr.update(),) * MAX_SUB_DIMS
278
+
279
+ else:
280
+ view_updates = update_test_dimension_view(new_d_idx, selections)
281
+ dim_title_update, prev_btn_update, next_btn_update = view_updates[:3]
282
+ slider_updates = view_updates[3:]
283
+
284
+ return (
285
+ gr.update(), gr.update(),
286
+ q_idx, new_d_idx, selections,
287
+ gr.update(),
288
+ dim_title_update,
289
+ gr.update(),
290
+ gr.update(),
291
+ gr.update(),
292
+ prev_btn_update,
293
+ next_btn_update,
294
+ ) + tuple(slider_updates)
295
+
296
+ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_results, user_data):
297
+ selections["final_choice"] = final_choice
298
+
299
+ final_question_result = {
300
+ "question_id": q_idx, "audio_file": QUESTION_SET[q_idx]['audio'],
301
+ "selections": selections
302
+ }
303
+ all_results.append(final_question_result)
304
+
305
+ q_idx += 1
306
+
307
+ if q_idx < len(QUESTION_SET):
308
+ init_q_updates = init_test_question(user_data, q_idx)
309
+ return init_q_updates + (all_results, gr.update(value=""))
310
+ else:
311
+ result_str = "### 测试全部完成!\n\n你的提交结果概览:\n"
312
+ for res in all_results:
313
+ # result_str += f"\n#### 题目: {res['audio_file']}\n"
314
+ result_str += f"##### 最终判断: **{res['selections'].get('final_choice', '未选择')}**\n"
315
+ for dim_title, dim_data in res['selections'].items():
316
+ if dim_title == 'final_choice': continue
317
+ result_str += f"- **{dim_title}**:\n"
318
+ for sub_dim, score in dim_data.items():
319
+ result_str += f" - *{sub_dim[:20]}...*: {score}/5\n"
320
+
321
+ # save_all_results_to_file(all_results, user_data)
322
+ save_all_results_to_file(all_results, user_data, count_data=updated_count_data)
323
+
324
+ return (
325
+ gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True),
326
+ q_idx, d_idx, {},
327
+ gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
328
+ gr.update(), gr.update(),
329
+ ) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str)
330
+
331
+ def save_all_results_to_file(all_results, user_data, count_data=None):
332
+ repo_id = "intersteller2887/Turing-test-dataset"
333
+ username = user_data.get("username", "user")
334
+ timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
335
+ submission_filename = f"submissions_{username}_{timestamp}.json"
336
+
337
+ final_data_package = {
338
+ "user_info": user_data,
339
+ "results": all_results
340
+ }
341
+ json_string = json.dumps(final_data_package, ensure_ascii=False, indent=4)
342
+ hf_token = os.getenv("HF_TOKEN")
343
+
344
+ if not hf_token:
345
+ print("HF_TOKEN not found. Cannot upload to the Hub.")
346
+ return
347
+
348
+ try:
349
+ api = HfApi()
350
+
351
+ # 上传 submission 文件
352
+ api.upload_file(
353
+ path_or_fileobj=bytes(json_string, "utf-8"),
354
+ path_in_repo=f"submissions/{submission_filename}",
355
+ repo_id=repo_id,
356
+ repo_type="dataset",
357
+ token=hf_token,
358
+ commit_message=f"Add new submission from {username}"
359
+ )
360
+ print(f"上传成功: {submission_filename}")
361
+
362
+ # 上传 count.json(如果提供)
363
+ if count_data:
364
+ with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
365
+ json.dump(count_data, f, indent=4, ensure_ascii=False)
366
+
367
+ api.upload_file(
368
+ path_or_fileobj=COUNT_JSON_PATH,
369
+ path_in_repo=COUNT_JSON_REPO_PATH,
370
+ repo_id=repo_id,
371
+ repo_type="dataset",
372
+ token=hf_token,
373
+ commit_message=f"Update count.json after submission by {username}"
374
+ )
375
+ print("count.json 上传成功")
376
+
377
+ except Exception as e:
378
+ print(f"上传出错: {e}")
379
+
380
+
381
+ def toggle_reference_view(current):
382
+ if current == "参考":
383
+ return gr.update(visible=False), gr.update(visible=True), gr.update(value="返回")
384
+ else:
385
+ return gr.update(visible=True), gr.update(visible=False), gr.update(value="参考")
386
+
387
+ def back_to_welcome():
388
+ return (
389
+ gr.update(visible=True), # welcome_page
390
+ gr.update(visible=False), # info_page
391
+ gr.update(visible=False), # sample_page
392
+ gr.update(visible=False), # pretest_page
393
+ gr.update(visible=False), # test_page
394
+ gr.update(visible=False), # final_judgment_page
395
+ gr.update(visible=False), # result_page
396
+ {}, # user_data_state
397
+ 0, # current_question_index
398
+ 0, # current_test_dimension_index
399
+ {}, # current_question_selections
400
+ [] # test_results
401
+ )
402
+
403
+ # ==============================================================================
404
+ # Gradio 界面定义 (Gradio UI Definition)
405
+ # ==============================================================================
406
+ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px !important}") as demo:
407
+ user_data_state = gr.State({})
408
+ current_question_index = gr.State(0)
409
+ current_test_dimension_index = gr.State(0)
410
+ current_question_selections = gr.State({})
411
+ test_results = gr.State([])
412
+
413
+ welcome_page = gr.Column(visible=True)
414
+ info_page = gr.Column(visible=False)
415
+ sample_page = gr.Column(visible=False)
416
+ pretest_page = gr.Column(visible=False)
417
+ test_page = gr.Column(visible=False)
418
+ final_judgment_page = gr.Column(visible=False)
419
+ result_page = gr.Column(visible=False)
420
+ pages = {
421
+ "welcome": welcome_page, "info": info_page, "sample": sample_page,
422
+ "pretest": pretest_page, "test": test_page, "final_judgment": final_judgment_page,
423
+ "result": result_page
424
+ }
425
+
426
+ with welcome_page:
427
+ gr.Markdown("# AI 识破者\n你将听到一系列对话,请判断哪个回应者是 AI。")
428
+ start_btn = gr.Button("开始挑战", variant="primary")
429
+
430
+ with info_page:
431
+ gr.Markdown("## 请提供一些基本信息")
432
+ username_input = gr.Textbox(label="用户名", placeholder="请输入一个昵称或代号")
433
+ age_input = gr.Radio(["18岁以下", "18-25岁", "26-35岁", "36-50岁", "50岁以上"], label="年龄")
434
+ gender_input = gr.Radio(["男", "女", "其他"], label="性别")
435
+ education_input = gr.Radio(["高中及以下", "本科", "硕士", "博士", "其他(请注明)"], label="学历")
436
+ education_other_input = gr.Textbox(label="请填写你的学历", visible=False, interactive=False)
437
+ ai_experience_input = gr.Radio(["从未使用过", "偶尔接触(如看别人用)", "使用过几次,了解基本功能", "经常使用,有一定操作经验", "非常熟悉,深入使用过多个 AI 工具"], label="对 AI 工具的熟悉程度")
438
+ submit_info_btn = gr.Button("提交并开始学习样例", variant="primary", interactive=False)
439
+
440
+ with sample_page:
441
+
442
+ gr.Markdown("## 样例分析\n请选择一个维度进行学习和打分练习。所有维度共用同一个样例音频。")
443
+ sample_dimension_selector = gr.Radio(DIMENSION_TITLES, label="选择学习维度", value=DIMENSION_TITLES[0])
444
+ with gr.Row():
445
+ with gr.Column(scale=1):
446
+ sample_audio = gr.Audio(label="样例音频", value=DIMENSIONS_DATA[0]["audio"])
447
+ with gr.Column(scale=2):
448
+ with gr.Column(visible=True) as interactive_view:
449
+ gr.Markdown("#### 请为以下特征打分 (1-5分。1对应机器,5对应人类)")
450
+ sample_sliders = [gr.Slider(minimum=1, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)]
451
+ with gr.Column(visible=False) as reference_view:
452
+ gr.Markdown("### 参考答案解析 (1-5分。1对应机器,5对应人类)")
453
+ reference_sliders = [gr.Slider(minimum=1, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=False) for i in range(MAX_SUB_DIMS)]
454
+ with gr.Row():
455
+ reference_btn = gr.Button("参考")
456
+ go_to_pretest_btn = gr.Button("我明白了,开始测试", variant="primary")
457
+
458
+ with pretest_page:
459
+ gr.Markdown("## 测试说明\n"
460
+ "- 对于每一道题,你都需要对全部 **5 个维度** 进行评估。\n"
461
+ "- 在每个维度下,请为出现的每个特征 **从1到5打分。\n"
462
+ "- **评分解释如下:**\n"
463
+ " - **1 分:极度符合机器特征**;\n"
464
+ " - **2 分:较为符合机器特征**;\n"
465
+ " - **3 分:无明显人类或机器倾向或特征无体现**;\n"
466
+ " - **4 分:较为符合人类特征**;\n"
467
+ " - **5 分:极度符合人类特征**。\n"
468
+ "- 完成所有维度后,请根据整体印象对回应方的身份做出做出“人类”或“机器人”的 **最终判断**。\n"
469
+ "- 你可以使用“上一维度”和“下一维度”按钮在5个维度间自由切换和修改分数。")
470
+ go_to_test_btn = gr.Button("开始测试", variant="primary")
471
+
472
+ with test_page:
473
+ gr.Markdown("## 正式测试")
474
+ question_progress_text = gr.Markdown()
475
+ test_dimension_title = gr.Markdown()
476
+ test_audio = gr.Audio(label="测试音频")
477
+ gr.Markdown("--- \n ### 请为以下特征打分 (1-5分。1对应机器,5对应人类)")
478
+ test_sliders = [gr.Slider(minimum=1, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)]
479
+ with gr.Row():
480
+ prev_dim_btn = gr.Button("上一维度")
481
+ next_dim_btn = gr.Button("下一维度", variant="primary")
482
+
483
+ with final_judgment_page:
484
+ gr.Markdown("## 最终判断")
485
+ gr.Markdown("您已完成对所有维度的评分。请根据您的综合印象,做出最终判断。")
486
+ final_human_robot_radio = gr.Radio(["👤 人类", "🤖 机器人"], label="请判断回应者类型 (必填)")
487
+ submit_final_answer_btn = gr.Button("提交本题答案", variant="primary", interactive=False)
488
+
489
+ with result_page:
490
+ gr.Markdown("## 测试完成")
491
+ result_text = gr.Markdown()
492
+ back_to_welcome_btn = gr.Button("返回主界面", variant="primary")
493
+
494
+ # ==============================================================================
495
+ # 事件绑定 (Event Binding) & IO 列表定义
496
+ # ==============================================================================
497
+ sample_init_outputs = [
498
+ info_page, sample_page, user_data_state, sample_dimension_selector,
499
+ sample_audio, interactive_view, reference_view, reference_btn
500
+ ] + sample_sliders + reference_sliders
501
+
502
+ test_init_outputs = [
503
+ pretest_page, test_page, final_judgment_page, result_page,
504
+ current_question_index, current_test_dimension_index, current_question_selections,
505
+ question_progress_text, test_dimension_title, test_audio,
506
+ prev_dim_btn, next_dim_btn,
507
+ final_human_robot_radio, submit_final_answer_btn,
508
+ ] + test_sliders
509
+
510
+ nav_inputs = [current_question_index, current_test_dimension_index, current_question_selections] + test_sliders
511
+ nav_outputs = [
512
+ test_page, final_judgment_page,
513
+ current_question_index, current_test_dimension_index, current_question_selections,
514
+ question_progress_text, test_dimension_title, test_audio,
515
+ final_human_robot_radio, submit_final_answer_btn,
516
+ prev_dim_btn, next_dim_btn,
517
+ ] + test_sliders
518
+
519
+ full_outputs_with_results = test_init_outputs + [test_results, result_text]
520
+
521
+ start_btn.click(fn=start_challenge, outputs=[welcome_page, info_page])
522
+
523
+ for comp in [age_input, gender_input, education_input, education_other_input, ai_experience_input]:
524
+ comp.change(
525
+ fn=check_info_complete,
526
+ inputs=[username_input, age_input, gender_input, education_input, education_other_input, ai_experience_input],
527
+ outputs=submit_info_btn
528
+ )
529
+
530
+ education_input.change(fn=toggle_education_other, inputs=education_input, outputs=education_other_input)
531
+
532
+ submit_info_btn.click(
533
+ fn=show_sample_page_and_init,
534
+ inputs=[username_input, age_input, gender_input, education_input, education_other_input, ai_experience_input, user_data_state],
535
+ outputs=sample_init_outputs
536
+ )
537
+
538
+ sample_dimension_selector.change(
539
+ fn=update_sample_view,
540
+ inputs=sample_dimension_selector,
541
+ outputs=[sample_audio, interactive_view, reference_view, reference_btn] + sample_sliders + reference_sliders
542
+ )
543
+
544
+ reference_btn.click(
545
+ fn=toggle_reference_view,
546
+ inputs=reference_btn,
547
+ outputs=[interactive_view, reference_view, reference_btn]
548
+ )
549
+
550
+ go_to_pretest_btn.click(lambda: (gr.update(visible=False), gr.update(visible=True)), outputs=[sample_page, pretest_page])
551
+
552
+ go_to_test_btn.click(
553
+ fn=lambda user: init_test_question(user, 0) + ([], gr.update()),
554
+ inputs=[user_data_state],
555
+ outputs=full_outputs_with_results
556
+ )
557
+
558
+ prev_dim_btn.click(
559
+ fn=lambda q,d,s, *sliders: navigate_dimensions("prev", q,d,s, *sliders),
560
+ inputs=nav_inputs, outputs=nav_outputs
561
+ )
562
+
563
+ next_dim_btn.click(
564
+ fn=lambda q,d,s, *sliders: navigate_dimensions("next", q,d,s, *sliders),
565
+ inputs=nav_inputs, outputs=nav_outputs
566
+ )
567
+
568
+ final_human_robot_radio.change(
569
+ fn=lambda choice: gr.update(interactive=bool(choice)),
570
+ inputs=final_human_robot_radio,
571
+ outputs=submit_final_answer_btn
572
+ )
573
+
574
+ submit_final_answer_btn.click(
575
+ fn=submit_question_and_advance,
576
+ inputs=[current_question_index, current_test_dimension_index, current_question_selections, final_human_robot_radio, test_results, user_data_state],
577
+ outputs=full_outputs_with_results
578
+ )
579
+
580
+ back_to_welcome_btn.click(fn=back_to_welcome, outputs=list(pages.values()) + [user_data_state, current_question_index, current_test_dimension_index, current_question_selections, test_results])
581
+
582
+ # ==============================================================================
583
+ # 程序入口 (Entry Point)
584
+ # ==============================================================================
585
+ if __name__ == "__main__":
586
+ if not os.path.exists("audio"):
587
+ os.makedirs("audio")
588
+ if "SPACE_ID" in os.environ:
589
+ print("Running in a Hugging Face Space, checking for audio files...")
590
+ all_files = [q["audio"] for q in QUESTION_SET] + [d["audio"] for d in DIMENSIONS_DATA]
591
+ for audio_file in set(all_files):
592
+ if not os.path.exists(audio_file):
593
+ print(f"⚠️ Warning: Audio file not found: {audio_file}")
594
+
595
+ demo.launch(debug=True)
596
+ 我还需要添加两个功能:第一,sample的音频文件直接计数为max(防止被抽取)
597
+ 第二,在最后点击提交按钮时,需要终止可能正在播放的音频
598
+ 我将帮你添加这两个功能:
599
+
600
+ 首先修改 load_or_initialize_count_json 函数,将样例音频文件直接设置为最大值:
601
+
602
+ python
603
+ def load_or_initialize_count_json(audio_paths):
604
+ if os.path.exists(COUNT_JSON_PATH):
605
+ with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
606
+ # 使用 object_pairs_hook 保持原始顺序
607
+ count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
608
+ else:
609
+ count_data = collections.OrderedDict()
610
+
611
+ updated = False
612
+
613
+ # 获取所有样例音频文件名
614
+ sample_audio_files = {os.path.basename(d["audio"]) for d in DIMENSIONS_DATA}
615
+
616
+ for path in audio_paths:
617
+ filename = os.path.basename(path)
618
+ if filename not in count_data:
619
+ # 如果是样例音频,直接设置为最大值
620
+ if filename in sample_audio_files:
621
+ count_data[filename] = 999 # 设置为很大的值,确保不会被选中
622
+ else:
623
+ count_data[filename] = 0
624
+ updated = True
625
+
626
+ if updated or not os.path.exists(COUNT_JSON_PATH):
627
+ with open(COUNT_JSON_PATH, "w", encoding="utf-8") as f:
628
+ # 确保写入时也保持顺序
629
+ json.dump(count_data, f, indent=4, ensure_ascii=False)
630
+
631
+ return count_data
632
+ 添加音频停止功能,修改 Gradio 界面部分:
633
+
634
+ 在 submit_question_and_advance 函数中添加停止音频的逻辑:
635
+
636
+ python
637
+ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_results, user_data):
638
+ selections["final_choice"] = final_choice
639
+
640
+ final_question_result = {
641
+ "question_id": q_idx, "audio_file": QUESTION_SET[q_idx]['audio'],
642
+ "selections": selections
643
+ }
644
+ all_results.append(final_question_result)
645
+
646
+ q_idx += 1
647
+
648
+ if q_idx < len(QUESTION_SET):
649
+ init_q_updates = init_test_question(user_data, q_idx)
650
+ # 添加停止音频的逻辑
651
+ init_q_updates = list(init_q_updates)
652
+ init_q_updates[9] = None # test_audio 的位置设为 None 来停止播放
653
+ return tuple(init_q_updates) + (all_results, gr.update(value=""))
654
+ else:
655
+ result_str = "### 测试全部完成!\n\n你的提交结果概览:\n"
656
+ for res in all_results:
657
+ result_str += f"##### 最终判断: **{res['selections'].get('final_choice', '未选择')}**\n"
658
+ for dim_title, dim_data in res['selections'].items():
659
+ if dim_title == 'final_choice': continue
660
+ result_str += f"- **{dim_title}**:\n"
661
+ for sub_dim, score in dim_data.items():
662
+ result_str += f" - *{sub_dim[:20]}...*: {score}/5\n"
663
+
664
+ save_all_results_to_file(all_results, user_data, count_data=updated_count_data)
665
+
666
+ return (
667
+ gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True),
668
+ q_idx, d_idx, {},
669
+ gr.update(), gr.update(), None, gr.update(), gr.update(), # 设置 test_audio 为 None 来停止播放
670
+ gr.update(), gr.update(),
671
+ ) + (gr.update(),) * MAX_SUB_DIMS + (all_results, result_str)
672
+ 在返回主界面时也停止音频:
673
+
674
+ 修改 back_to_welcome 函数:
675
+
676
+ python
677
+ def back_to_welcome():
678
+ return (
679
+ gr.update(visible=True), # welcome_page
680
+ gr.update(visible=False), # info_page
681
+ gr.update(visible=False), # sample_page
682
+ gr.update(visible=False), # pretest_page
683
+ gr.update(visible=False), # test_page
684
+ gr.update(visible=False), # final_judgment_page
685
+ gr.update(visible=False), # result_page
686
+ {}, # user_data_state
687
+ 0, # current_question_index
688
+ 0, # current_test_dimension_index
689
+ {}, # current_question_selections
690
+ [], # test_results
691
+ None # 停止音频
692
+ )
693
+ 更新 back_to_welcome_btn 的点击事件:
694
+
695
+ python
696
+ back_to_welcome_btn.click(
697
+ fn=back_to_welcome,
698
+ outputs=list(pages.values()) + [user_data_state, current_question_index, current_test_dimension_index, current_question_selections, test_results, test_audio]
699
+ )
700
+ 这些修改将实现:
701
+
702
+ 样例音频文件不会被选为测试音频(因为它们的计数值被设为999)
703
+
704
+ 在提交答案或返回主界面时,当前播放的音频会被停止
705
+
706
+ 切换到新题目时,前一个音频会自动停止
707
+
708
+ 注意:Gradio 的音频组件设置为 None 时会停止当前播放。
709
+
710
+ import gradio as gr
711
+ import os
712
+ import json
713
+ import pandas as pd
714
+ import random
715
+ import shutil
716
+ import time
717
+ import collections
718
+ from datasets import load_dataset
719
+ from huggingface_hub import HfApi
720
+
721
+ dataset = load_dataset("intersteller2887/Turing-test-dataset", split="train")
722
+
723
+ target_audio_dir = "/home/user/app/audio"
724
+ os.makedirs(target_audio_dir, exist_ok=True)
725
+ COUNT_JSON_PATH = "/home/user/app/count.json"
726
+ COUNT_JSON_REPO_PATH = "submissions/count.json"
727
+
728
+ local_audio_paths = []
729
+
730
+ for item in dataset:
731
+ src_path = item["audio"]["path"]
732
+ if src_path and os.path.exists(src_path):
733
+ filename = os.path.basename(src_path)
734
+ dst_path = os.path.join(target_audio_dir, filename)
735
+ if not os.path.exists(dst_path):
736
+ shutil.copy(src_path, dst_path)
737
+ local_audio_paths.append(dst_path)
738
+
739
+ all_data_audio_paths = local_audio_paths
740
+
741
+ sample1_audio_path = local_audio_paths[0]
742
+ # sample1_audio_path = next((p for p in all_data_audio_paths if p.endswith("sample1.wav")), None)
743
+ print(sample1_audio_path)
744
+
745
+ # ==============================================================================
746
+ # 数据定义 (Data Definition)
747
+ # ==============================================================================
748
+
749
+ DIMENSIONS_DATA = [
750
+ {
751
+ "title": "语义和语用特征",
752
+ "audio": sample1_audio_path,
753
+ "sub_dims": [
754
+ "记忆一致性:人类会选择性记忆并自我修正错误;机器出现前后矛盾时无法自主察觉或修正(如:遗忘关键细节但坚持错误答案)", "逻辑连贯性:人类逻辑自然流畅,允许合理跳跃;机器逻辑转折生硬或自相矛盾(如:突然切换话题无过渡)",
755
+ "读音正确性:人类大部分情况下发音正确、自然,会结合语境使用、区分多音字;机器存在不自然的发音错误,且对多音字语境的判断能力有限", "多语言混杂:人类多语言混杂流畅,且带有语境色彩;机器多语言混杂生硬,无语言切换逻辑",
756
+ "语言不精确性:人类说话时会使用带有犹豫语气的表达,且会出现自我修正的行为;机器的回应通常不存在模糊表达,回答准确、肯定", "填充词使用:人类填充词(如‘嗯’‘那个’)随机且带有思考痕迹;机器填充词规律重复或完全缺失",
757
+ "隐喻与语用用意:人类会使用隐喻、反语、委婉来表达多重含义;机器表达直白,仅能字面理解或生硬使用修辞,缺乏语义多样性"
758
+ ],
759
+ "reference_scores": [5, 5, 3, 3, 5, 5, 3]
760
+ },
761
+ {
762
+ "title": "非生理性副语言特征",
763
+ "audio": sample1_audio_path,
764
+ "sub_dims": [
765
+ "节奏:人类语速随语义起伏,偶尔卡顿或犹豫;机器节奏均匀,几乎无停顿或停顿机械", "语调:人类在表达疑问、惊讶、强调时,音调会自然上扬或下降;机器语调单一或变化过于规律,不符合语境",
766
+ "重读:人类会有意识地加强重要词语,从而突出信息焦点;机器的词语强度一致性强,或出现强调部位异常", "辅助性发声:人类会发出符合语境的非语言声音,如笑声、叹气等;机器的辅助性发声语义错误,或完全无辅助性发声"
767
+ ],
768
+ "reference_scores": [4, 5, 4, 3]
769
+ },
770
+ {
771
+ "title": "生理性副语言特征",
772
+ "audio": sample1_audio_path,
773
+ "sub_dims": [
774
+ "微生理杂音:人类说话存在呼吸声、口水音、气泡音等无意识发声,且自然地穿插在语流节奏当中;机器没有微生理杂音、语音过于干净,或添加不自然杂音",
775
+ "发音不稳定性:人类存在个体化波动(如偶尔咬字不清、鼻音丰富等);机器发音过于标准或统一,缺乏个性", "口音:人类存在自然的地区口音或语音特征;机器元音辅音机械拼接,或口音模式统一无差异"
776
+ ],
777
+ "reference_scores": [3, 3, 4]
778
+ },
779
+ {
780
+ "title": "机械人格",
781
+ "audio": sample1_audio_path,
782
+ "sub_dims": [
783
+ "谄媚现象:人类会根据语境判断是否同意,有时提出不同意见;机器频繁同意、感谢、道歉,过度认同对方观点,缺乏真实互动感",
784
+ "书面化表达:人类表达灵活,;机器回应句式工整、规范,内容过于书面化、用词泛泛"
785
+ ],
786
+ "reference_scores": [5, 5]
787
+ },
788
+ {
789
+ "title": "情感表达",
790
+ "audio": sample1_audio_path,
791
+ "sub_dims": [
792
+ "语义层面:人类能对悲伤、开心等语境有符合人类的情感反应;机器回应情绪淡漠,或情感词泛泛、脱离语境",
793
+ "声学层面:人类音调、音量随情绪动态变化;机器情感语调模式化,或与语境不符"
794
+ ],
795
+ "reference_scores": [3, 3]
796
+ }
797
+ ]
798
+
799
+ DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
800
+
801
  def load_or_initialize_count_json(audio_paths):
802
  if os.path.exists(COUNT_JSON_PATH):
803
  with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f: