Turing-test-web-en

Sleeping

App Files Files Community

Spark Chou commited on Jul 21, 2025

Commit

5d0ff5c

1 Parent(s): f44f7cc

try

Browse files

Files changed (1) hide show

app.py +108 -110

app.py CHANGED Viewed

@@ -47,63 +47,62 @@ print(sample1_audio_path)
 DIMENSIONS_DATA = [
     {
-        "title": "Semantic and Pragmatic Features",
         "audio": sample1_audio_path,
         "sub_dims": [
-            "Memory Consistency: Human-like: Consistent memory in short contexts, and asks for clarification when memory deviations occur; Machine-like: Inconsistent memory across contexts and unable to detect or correct errors (e.g., forgetting key information and insisting on incorrect answers)",
-            "Logical Coherence: Human-like: Natural and smooth logic; Machine-like: Abrupt logical transitions or self-contradictions (e.g., suddenly changing topics without transition)",
-            "Pronunciation Accuracy: Human-like: Correct and natural pronunciation of words, with proper usage of polyphonic characters based on context; Machine-like: Unnatural pronunciation errors, mispronunciation of common polyphonic characters",
-            "Multilingual Mixing: Human-like: Multilingual mixing is often context-dependent (e.g., proper nouns, idiomatic expressions), with awkward or unnatural language switching; Machine-like: Rigid multilingual mixing without logical language switching",
-            "Imprecision in Language: Human-like: Uses vague expressions like 'more or less', 'probably', and may self-correct (e.g., 'no, no'); Machine-like: Rarely uses vague expressions, responses are precise and affirmative",
-            "Use of Fillers: Human-like: Frequently uses fillers (e.g., 'um', 'like') while thinking; Machine-like: Rare use of fillers or unnatural usage",
-            "Metaphor and Pragmatic Intent: Human-like: Uses metaphor, irony, and euphemism to convey layered meanings; Machine-like: Literal and direct, lacking semantic diversity, only capable of surface-level interpretation"
         ],
         "reference_scores": [5, 5, 5, 0, 5, 5, 0]
     },
     {
-        "title": "Non-Physiological Paralinguistic Features",
         "audio": sample1_audio_path,
         "sub_dims": [
-            "Rhythm: Human-like: Speaking rate varies with semantic flow, occasional pauses or hesitations; Machine-like: Almost no pauses or mechanical pauses",
-            "Intonation: Human-like: Natural pitch rise or fall when expressing questions, surprise, or emphasis; Machine-like: Monotonous or overly regular pitch changes, inappropriate to the context",
-            "Stress: Human-like: Consciously emphasizes key words to highlight focus; Machine-like: No emphasis on words or abnormal emphasis placement",
-            "Auxiliary Vocalizations: Human-like: Produces context-appropriate non-verbal sounds, such as laughter or sighs; Machine-like: Contextually incorrect or mechanical auxiliary sounds, or completely absent"
         ],
         "reference_scores": [5, 5, 5, 5]
     },
     {
-        "title": "Physiological Paralinguistic Features",
         "audio": sample1_audio_path,
         "sub_dims": [
-            "Micro-physiological Noise: Human-like: Presence of breathing sounds, saliva sounds, bubble noise, etc., naturally occurring during speech; Machine-like: Speech is overly clean or emits unnatural noises (e.g., electrical static)",
-            "Instability in Pronunciation: Human-like: Some irregularities in pronunciation (e.g., liaison, tremolo, slurred speech, nasal sounds); Machine-like: Pronunciation is overly clear and regular",
-            "Accent: Human-like: Natural regional accent or vocal traits; Machine-like: Stiff or unnatural accent"
         ],
         "reference_scores": [5, 4, 4]
     },
     {
-        "title": "Mechanical Persona",
         "audio": sample1_audio_path,
         "sub_dims": [
-            "Sycophancy: Human-like: Judges whether to agree with requests or opinions based on context, doesn't always agree or echo; Machine-like: Frequently agrees, thanks, apologizes, excessively aligns with the other’s opinion, lacking genuine interaction",
-            "Written-style Expression: Human-like: Conversational, flexible, and varied expression; Machine-like: Responses are well-structured and formal, overly formal wording, frequent listing, and vague word choice"
         ],
         "reference_scores": [5, 5]
     },
     {
-        "title": "Emotional Expression",
         "audio": sample1_audio_path,
         "sub_dims": [
-            "Semantic Level: Human-like: Displays human-like emotional responses to contexts such as sadness or joy; Machine-like: Fails to respond emotionally to the other’s feelings, or uses vague and context-inappropriate emotional language",
-            "Acoustic Level: Human-like: Pitch, volume, and rhythm dynamically change with emotion; Machine-like: Emotional tone is patterned or context-inappropriate"
         ],
         "reference_scores": [5, 5]
     }
 ]
 DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
-SPECIAL_KEYWORDS = ["Multilingual Mixing", "Metaphor and Pragmatic Intent", "Auxiliary Vocalizations", "Accent"]
 MAX_SUB_DIMS = max(len(d['sub_dims']) for d in DIMENSIONS_DATA)
 THE_SUB_DIMS = [d['sub_dims'] for d in DIMENSIONS_DATA]
@@ -346,7 +345,7 @@ def update_sample_view(dimension_title):
         # audio_up = gr.update(value=append_cache_buster(dim_data["audio"]))
         interactive_view_up = gr.update(visible=True)
         reference_view_up = gr.update(visible=False)
-        reference_btn_up = gr.update(value="Reference")
         sample_slider_ups = []
         ref_slider_ups = []
         scores = dim_data.get("reference_scores", [])
@@ -372,13 +371,13 @@ def update_test_dimension_view(d_idx, selections):
     sub_dims = dim_data["sub_dims"]
     dim_title = dim_data["title"]
     existing_scores = selections.get(dim_data['title'], {})
-    progress_d = f"Dimension {d_idx + 1} / {len(DIMENSIONS_DATA)}: **{dim_data['title']}**"
     for i in range(MAX_SUB_DIMS):
         if i < len(sub_dims):
             desc = sub_dims[i]
             # print(f"{desc} -> default value: {existing_scores.get(desc, 0)}")
-            name = desc.split(":")[0].strip()
             default_value = 0 if name in SPECIAL_KEYWORDS else 1
             value = existing_scores.get(desc, default_value)
@@ -401,7 +400,7 @@ def update_test_dimension_view(d_idx, selections):
             # ))
         else:
             slider_updates.append(gr.update(visible=False))
-        # print(f"{desc} -> default value: {existing_scores.get(desc, 0)}")
     # for i in range(MAX_SUB_DIMS):
     #     if i < len(dimension['sub_dims']):
     #         sub_dim_label = dimension['sub_dims'][i]
@@ -412,7 +411,7 @@ def update_test_dimension_view(d_idx, selections):
     prev_btn_update = gr.update(interactive=(d_idx > 0))
     next_btn_update = gr.update(
-        value="Proceed to Final Judgement" if d_idx == len(DIMENSIONS_DATA) - 1 else "Next Dimension",
         interactive=True
     )
@@ -421,7 +420,7 @@ def update_test_dimension_view(d_idx, selections):
 def init_test_question(user_data, q_idx):
     d_idx = 0
     question = user_data["question_set"][q_idx]
-    progress_q = f"Question {q_idx + 1} / {len(user_data['question_set'])} "
     initial_updates = update_test_dimension_view(d_idx, {})
     dim_title_update, prev_btn_update, next_btn_update = initial_updates[:3]
@@ -483,10 +482,10 @@ def navigate_dimensions(direction, q_idx, d_idx, selections, *slider_values):
         ) + tuple(slider_updates)
 def toggle_reference_view(current):
-    if current == "Reference":
-        return gr.update(visible=False), gr.update(visible=True), gr.update(value="Back")
     else:
-        return gr.update(visible=True), gr.update(visible=False), gr.update(value="Reference")
 def back_to_welcome():
     return (
@@ -665,9 +664,9 @@ def submit_question_and_advance(q_idx, d_idx, selections, final_choice, all_resu
             return init_q_updates + (all_results, gr.update(value=""))
         else:
             # 准备完整结果数据
-            result_str = "### Test Fineshed!\n\nOverview of your submission: \n"
             for res in all_results:
-                result_str += f"##### Final Judegement: **{res['selections'].get('final_choice', 'empty')}**\n" # empty 代表未选择
                 for dim_title, dim_data in res['selections'].items():
                     if dim_title == 'final_choice': continue
                     result_str += f"- **{dim_title}**:\n"
@@ -858,80 +857,79 @@ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 960px
         "result": result_page
     }
-with welcome_page:
-    gr.Markdown("# AI Detective: Can you spot the AI?\nYou will hear a series of dialogues. Please determine which respondent is AI.")
-    start_btn = gr.Button("Start Challenge", variant="primary")
-with info_page:
-    gr.Markdown("## 请提供一些基本信息")
-    username_input = gr.Textbox(label="用户名", placeholder="请输入你的昵称")
-    age_input = gr.Radio(["18岁以下", "18-25岁", "26-35岁", "36-50岁", "50岁以上"], label="年龄")
-    gender_input = gr.Radio(["男", "女", "其他"], label="性别")
-    education_input = gr.Radio(["高中及以下", "本科", "硕士", "博士", "其他"], label="学历")
-    education_other_input = gr.Textbox(label="请填写你的学历", visible=False, interactive=False)
-    ai_experience_input = gr.Radio(["从未使用过", "偶尔接触（如看别人用）", "使用过几次，了解基本功能", "经常使用，有一定操作经验", "非常熟悉，深入使用过多个 AI 工具"], label="对 AI 工具的熟悉程度")
-    submit_info_btn = gr.Button("提交并开始学习样例", variant="primary", interactive=False)
-with sample_page:
-    gr.Markdown("## Sample Analysis\nPlease select a dimension to study and practice scoring. All dimensions use the same sample audio.")
-    sample_dimension_selector = gr.Radio(DIMENSION_TITLES, label="Select a Dimension", value=DIMENSION_TITLES[0])
-    with gr.Row():
-        with gr.Column(scale=1):
-            sample_audio = gr.Audio(label="Sample Audio", value=DIMENSIONS_DATA[0]["audio"])
-        with gr.Column(scale=2):
-            with gr.Column(visible=True) as interactive_view:
-                gr.Markdown("#### Please rate the following features (0–5. 0 = Not present; 1 = Machine-like; 3 = Neutral; 5 = Human-like)")
-                sample_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)]
-            with gr.Column(visible=False) as reference_view:
-                gr.Markdown("### Reference Answer Explanation")
-                reference_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=False) for i in range(MAX_SUB_DIMS)]
-    with gr.Row():
-        reference_btn = gr.Button("Reference")
-        go_to_pretest_btn = gr.Button("Got it. Start", variant="primary")
-with pretest_page:
-    gr.Markdown("## Test Instructions\n"
-                "- For each question, you will need to evaluate **all 5 dimensions**.\n"
-                "- Within each dimension, please rate each feature **from 0 to 5**.\n"
-                "- **Scoring guidelines:**\n"
-                "  - **0: Feature not present** (some features will always be present, so score from 1–5);\n"
-                "  - **1: Strongly machine-like**;\n"
-                "  - **2: Somewhat machine-like**;\n"
-                "  - **3: Neutral** (no obvious human or machine tendency);\n"
-                "  - **4: Somewhat human-like**;\n"
-                "  - **5: Strongly human-like**.\n"
-                "- After rating all dimensions, please make a final judgment on whether the respondent is **human or AI** based on your overall impression.\n"
-                "- You can use the 'Previous Dimension' and 'Next Dimension' buttons to freely switch between dimensions and adjust your scores.\n"
-                "## Important Notes\n"
-                "- We ask you to judge **whether the respondent’s behavior in each dimension leans more human-like or machine-like**, not how **strongly the feature is expressed**.\n"
-                "(For example, correct pronunciation does not necessarily mean it’s human; incorrect pronunciation does not necessarily mean it’s a machine. Your judgment should be: *Does the pronunciation sound more like a human or a machine?*)\n"
-                "- Even if you are confident about the respondent’s identity from the start, please **evaluate each dimension independently**. For example, even if you believe the respondent is AI, do not assign all dimensions as machine-like by default — judge each one carefully.")
-    go_to_test_btn = gr.Button("Start Test", variant="primary")
-with test_page:
-    gr.Markdown("## Official Test")
-    question_progress_text = gr.Markdown()
-    test_dimension_title = gr.Markdown()
-    test_audio = gr.Audio(label="Test Audio")
-    gr.Markdown("---\n### Please rate the respondent (not the initiator) on the following features (0 = Not present; 1 = Machine-like; 3 = Neutral; 5 = Human-like)")
-    test_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True, show_label=True) for i in range(MAX_SUB_DIMS)]
-    with gr.Row():
-        prev_dim_btn = gr.Button("Previous Dimension")
-        next_dim_btn = gr.Button("Next Dimension", variant="primary")
-with final_judgment_page:
-    gr.Markdown("## Final Judgment")
-    gr.Markdown("You’ve completed all dimension scores. Please give your final judgment based on your overall impression.")
-    final_human_robot_radio = gr.Radio(["👤 Human", "🤖 AI"], label="Please identify the respondent (required)")
-    submit_final_answer_btn = gr.Button("Submit Answer", variant="primary", interactive=False)
-with result_page:
-    gr.Markdown("## Test Complete")
-    result_text = gr.Markdown()
-    back_to_welcome_btn = gr.Button("Return to Main Page", variant="primary")
     # ==============================================================================
     # 事件绑定 (Event Binding) & IO 列表定义

 DIMENSIONS_DATA = [
     {
+        "title": "语义和语用特征",
         "audio": sample1_audio_path,
         "sub_dims": [
+            "记忆一致性：偏人类：在短上下文中记忆一致，若出现记忆偏差也会提问确认；偏机器：出现上下文记忆不一致且无法察觉或修正（如遗忘掉关键信息坚持错误回答）",
+            "逻辑连贯性：偏人类：逻辑自然流畅；偏机器：逻辑转折生硬或自相矛盾（如：突然切换话题无过渡）",
+            "读音正确性：偏人类：用字发音正确、自然，会结合语境正确使用常见多音字；偏机器：存在不自然的发音错误，常见多音字发音错误",
+            "多语言混杂：偏人类：说话多语言混杂往往和语境相关（专有名词、习惯用法），语言切换生硬卡顿，不自然；偏机器：多语言混杂生硬，无语言切换逻辑",
+            "语言不精确性：偏人类：说话存在含糊表达：如“差不多”、“应该是吧”，且会出现自我修正（“不对不对”）的行为；偏机器：回应通常不存在模糊表达，回答准确、肯定",
+            "填充词使用：偏人类：在思考时经常使用填充词（如‘嗯’‘那个’）；偏机器：很少使用填充词或填充词使用不自然",
+            "隐喻与语用用意：偏人类：使用隐喻、反语、委婉来表达多重含义；偏机器：表达直白，缺乏语义多样性，仅能字面理解语义"
         ],
         "reference_scores": [5, 5, 5, 0, 5, 5, 0]
     },
     {
+        "title": "非生理性副语言特征",
         "audio": sample1_audio_path,
         "sub_dims": [
+            "节奏：偏人类：语速随语义起伏，偶尔卡顿或犹豫；偏机器：说话几乎无停顿或停顿机械",
+            "语调：偏人类：在表达如疑问、惊讶、强调时，音调会自然上扬或下降；偏机器：语调单一或变化过于规律，不符合语境",
+            "重读：偏人类：有意识地重读重要词语，突出重点；偏机器：没有重读词语或或出现强调部位异常",
+            "辅助性发声：偏人类：发出符合语境的非语言声音，如笑声、叹气等；偏机器：辅助性发声语境错误或机械化，或完全无辅助性发声"
         ],
         "reference_scores": [5, 5, 5, 5]
     },
     {
+        "title": "生理性副语言特征",
         "audio": sample1_audio_path,
         "sub_dims": [
+            "微生理杂音：偏人类：说话存在呼吸声、口水音、气泡音等无意识发声，且自然地出现在说话中；偏机器：语音过于干净，或发出不自然杂音（电流声）",
+            "发音不稳定性：偏人类：发音存在一定不规则性（诸如连读、颤音、含糊发音、鼻音等）；偏机器：发音过于清晰规则",
+            "口音：偏人类：存在自然的地区口音或语音特征；偏机器：口音生硬"
         ],
         "reference_scores": [5, 4, 4]
     },
     {
+        "title": "机械人格",
         "audio": sample1_audio_path,
         "sub_dims": [
+            "谄媚现象：偏人类：根据语境判断是否同意对方提出的请求或表达的观点，不总是表示同意或进行附和；偏机器：频繁同意、感谢、道歉，过度认同对方观点，缺乏真实互动感",
+            "书面化表达：偏人类：口语化，表达灵活多变；偏机器：回应句式工整、规范，用词过于正式、频繁列举、用词泛泛"
         ],
         "reference_scores": [5, 5]
     },
     {
+        "title": "情感表达",
         "audio": sample1_audio_path,
         "sub_dims": [
+            "语义层面：偏人类：对悲伤、开心等语境有符合人类的情绪反应；偏机器：未能针对对方情绪作出正常的情感反应，或表达情感的词语空泛、脱离语境",
+            "声学层面：偏人类：音调、音量、节奏等声学特征随情绪动态变化；偏机器：情感语调模式化或与语境不符"
         ],
         "reference_scores": [5, 5]
     }
 ]
 DIMENSION_TITLES = [d["title"] for d in DIMENSIONS_DATA]
+SPECIAL_KEYWORDS = ["多语言混杂", "隐喻与语用用意", "辅助性发声", "口音"]
 MAX_SUB_DIMS = max(len(d['sub_dims']) for d in DIMENSIONS_DATA)
 THE_SUB_DIMS = [d['sub_dims'] for d in DIMENSIONS_DATA]
         # audio_up = gr.update(value=append_cache_buster(dim_data["audio"]))
         interactive_view_up = gr.update(visible=True)
         reference_view_up = gr.update(visible=False)
+        reference_btn_up = gr.update(value="参考")
         sample_slider_ups = []
         ref_slider_ups = []
         scores = dim_data.get("reference_scores", [])
     sub_dims = dim_data["sub_dims"]
     dim_title = dim_data["title"]
     existing_scores = selections.get(dim_data['title'], {})
+    progress_d = f"维度 {d_idx + 1} / {len(DIMENSIONS_DATA)}: **{dim_data['title']}**"
     for i in range(MAX_SUB_DIMS):
         if i < len(sub_dims):
             desc = sub_dims[i]
             # print(f"{desc} -> default value: {existing_scores.get(desc, 0)}")
+            name = desc.split("：")[0].strip()
             default_value = 0 if name in SPECIAL_KEYWORDS else 1
             value = existing_scores.get(desc, default_value)
             # ))
         else:
             slider_updates.append(gr.update(visible=False))
+        print(f"{desc} -> default value: {existing_scores.get(desc, 0)}")
     # for i in range(MAX_SUB_DIMS):
     #     if i < len(dimension['sub_dims']):
     #         sub_dim_label = dimension['sub_dims'][i]
     prev_btn_update = gr.update(interactive=(d_idx > 0))
     next_btn_update = gr.update(
+        value="进入最终判断" if d_idx == len(DIMENSIONS_DATA) - 1 else "下一维度",
         interactive=True
     )
 def init_test_question(user_data, q_idx):
     d_idx = 0
     question = user_data["question_set"][q_idx]
+    progress_q = f"第 {q_idx + 1} / {len(user_data['question_set'])} 题"
     initial_updates = update_test_dimension_view(d_idx, {})
     dim_title_update, prev_btn_update, next_btn_update = initial_updates[:3]
         ) + tuple(slider_updates)
 def toggle_reference_view(current):
+    if current == "参考":
+        return gr.update(visible=False), gr.update(visible=True), gr.update(value="返回")
     else:
+        return gr.update(visible=True), gr.update(visible=False), gr.update(value="参考")
 def back_to_welcome():
     return (
             return init_q_updates + (all_results, gr.update(value=""))
         else:
             # 准备完整结果数据
+            result_str = "### 测试全部完成！\n\n你的提交结果概览：\n"
             for res in all_results:
+                result_str += f"##### 最终判断: **{res['selections'].get('final_choice', '未选择')}**\n"
                 for dim_title, dim_data in res['selections'].items():
                     if dim_title == 'final_choice': continue
                     result_str += f"- **{dim_title}**:\n"
         "result": result_page
     }
+    with welcome_page:
+        gr.Markdown("# AI 识破者\n你将听到一系列对话，请判断哪个回应者是 AI。")
+        start_btn = gr.Button("开始挑战", variant="primary")
+    with info_page:
+        gr.Markdown("## 请提供一些基本信息")
+        username_input = gr.Textbox(label="用户名", placeholder="请输入你的昵称")
+        age_input = gr.Radio(["18岁以下", "18-25岁", "26-35岁", "36-50岁", "50岁以上"], label="年龄")
+        gender_input = gr.Radio(["男", "女", "其他"], label="性别")
+        education_input = gr.Radio(["高中及以下", "本科", "硕士", "博士", "其他"], label="学历")
+        education_other_input = gr.Textbox(label="请填写你的学历", visible=False, interactive=False)
+        ai_experience_input = gr.Radio(["从未使用过", "偶尔接触（如看别人用）", "使用过几次，了解基本功能", "经常使用，有一定操作经验", "非常熟悉，深入使用过多个 AI 工具"], label="对 AI 工具的熟悉程度")
+        submit_info_btn = gr.Button("提交并开始学习样例", variant="primary", interactive=False)
+    with sample_page:
+        gr.Markdown("## 样例分析\n请选择一个维度进行学习和打分练习。所有维度共用同一个样例音频。")
+        sample_dimension_selector = gr.Radio(DIMENSION_TITLES, label="选择学习维度", value=DIMENSION_TITLES[0])
+        with gr.Row():
+            with gr.Column(scale=1):
+                sample_audio = gr.Audio(label="样例音频", value=DIMENSIONS_DATA[0]["audio"])
+            with gr.Column(scale=2):
+                with gr.Column(visible=True) as interactive_view:
+                    gr.Markdown("#### 请为以下特征打分 (0-5分。0-特征无体现；1-机器；3-特征无偏向；5-人类)")
+                    sample_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True) for i in range(MAX_SUB_DIMS)]
+                with gr.Column(visible=False) as reference_view:
+                    gr.Markdown("### 参考答案解析")
+                    reference_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=False) for i in range(MAX_SUB_DIMS)]
+        with gr.Row():
+            reference_btn = gr.Button("参考")
+            go_to_pretest_btn = gr.Button("我明白了，开始测试", variant="primary")
+    with pretest_page:
+        gr.Markdown("## 测试说明\n"
+                      "- 对于每一道题，你都需要对全部 **5 个维度** 进行评估。\n"
+                      "- 在每个维度下，请为出现的每个特征 **从0到5打分**。\n"
+                      "- **评分解释如下：**\n"
+                      "  - **0 分：特征未体现** (有些特征一定会体现，所以按1到5打分）；\n"
+                      "  - **1 分：极度符合机器特征**；\n"
+                      "  - **2 分：较为符合机器特征**；\n"
+                      "  - **3 分：无明显人类或机器倾向**；\n"
+                      "  - **4 分：较为符合人类特征**；\n"
+                      "  - **5 分：极度符合人类特征**。\n"
+                      "- 完成所有维度后，请根据整体印象对回应方的身份做出做出“人类”或“机器人”的 **最终判断**。\n"
+                      "- 你可以使用“上一维度”和“下一维度”按钮在5个维度间自由切换和修改分数。\n"
+                      "## 特别注意\n"
+                      "- 我们希望您能判断每个维度上**回应者**的表现是**偏向人还是机器**，分数的大小反映回应者的语音类人的程度，而**不是**这个维度体现的程度多少\n（如读音正确也不代表是人类，读音错误也不代表是机器，您应当判断的是“听到的发音更偏向机器还是人类”)\n"
+                      "- 即使您一开始就已经很肯定回应方的身份，同样应当**独立地**对每个维度上回应方的表现进行细致的评判。比如您很肯定回应方是机器，也需要独立地对每个维度判断，而非简单地将每个维度归为偏机器。")
+        go_to_test_btn = gr.Button("开始测试", variant="primary")
+    with test_page:
+        gr.Markdown("## 正式测试")
+        question_progress_text = gr.Markdown()
+        test_dimension_title = gr.Markdown()
+        test_audio = gr.Audio(label="测试音频")
+        gr.Markdown("--- \n ### 请为对话中的回应者（非发起者）针对以下特征打分 (0-5分。0-特征无体现；1-机器；3-特征无偏向；5-人类)")
+        test_sliders = [gr.Slider(minimum=0, maximum=5, step=1, label=f"Sub-dim {i+1}", visible=False, interactive=True, show_label = True) for i in range(MAX_SUB_DIMS)]
+        with gr.Row():
+            prev_dim_btn = gr.Button("上一维度")
+            next_dim_btn = gr.Button("下一维度", variant="primary")
+    with final_judgment_page:
+        gr.Markdown("## 最终判断")
+        gr.Markdown("您已完成对所有维度的评分。请根据您的综合印象，做出最终判断。")
+        final_human_robot_radio = gr.Radio(["👤 人类", "🤖 机器人"], label="请判断回应者类型 (必填)")
+        submit_final_answer_btn = gr.Button("提交本题答案", variant="primary", interactive=False)
+    with result_page:
+        gr.Markdown("## 测试完成")
+        result_text = gr.Markdown()
+        back_to_welcome_btn = gr.Button("返回主界面", variant="primary")
     # ==============================================================================
     # 事件绑定 (Event Binding) & IO 列表定义