Spaces:

agenticx
/

TxAgentCrowdsourcingEval

Running

App Files Files Community

richard_dev

by RichardZhu52 - opened Jul 10, 2025

base: refs/heads/main

←

from: refs/pr/3

Discussion Files changed

+196

-113

Files changed (1) hide show

app.py +196 -113

app.py CHANGED Viewed

@@ -287,7 +287,7 @@ def get_evaluator_questions(email, disease_map_data, drug_map_data, user_all_spe
         disease_subspecs = set(specs.get('subspecialties', []))
         # Check for intersection
-        if user_all_specs.intersection(disease_specs) or user_all_specs.intersection(disease_subspecs) or len(user_all_specs)==0:
             relevant_diseases.append(disease)
     relevant_drugs = []
@@ -553,9 +553,9 @@ def go_to_page0_from_minus1(question_in_progress_state):
 def go_to_eval_progress_modal(name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, our_methods=our_methods):
     # 校验用户信息
-    if not name or not email or not years_exp_radio: # or not specialty_dd
-        gr.Info("Please fill out all the required fields (name, email, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", duration=5)
-        return gr.update(visible=True), gr.update(visible=False), None, "Please fill out all the required fields (name, email, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.HTML(), gr.State()
     gr.Info("Loading the data...", duration=3)
     user_info, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, page1_reference_answer, question_for_eval, remaining_count = get_next_eval_question(
@@ -591,7 +591,136 @@ def go_to_page1(show_page_1):
     return updates
 # Define restrict function for each criterion
 def make_restrict_function(base_choices):
     def restrict_choices_page1(radio_choice, score_a, score_b):
         """
@@ -723,7 +852,7 @@ def make_restrict_function(base_choices):
 # --- Define Callback Functions for Confirmation Flow ---
-def build_row_dict(data_subset_state, user_info, question_quality_comments, pairwise, comparisons_reasons, *args):
     num_criteria = len(criteria)
     ratings_A_vals = list(args[:num_criteria])
     ratings_B_vals = list(args[num_criteria:])
@@ -746,7 +875,7 @@ def build_row_dict(data_subset_state, user_info, question_quality_comments, pair
         "Prompt": prompt_text,
         "ResponseA_Model": response_A_model,
         "ResponseB_Model": response_B_model,
-        "Question Quality Comments": question_quality_comments,
     }
     pairwise = [mapping.get(val, val) for val in pairwise]
@@ -760,10 +889,10 @@ def build_row_dict(data_subset_state, user_info, question_quality_comments, pair
     return row
-def final_submit(data_subset_state, user_info, question_quality_comments, pairwise, comparisons_reasons, *args):
     # --- Part 1: Submit the current results (Existing Logic) ---
-    row_dict = build_row_dict(data_subset_state, user_info, question_quality_comments,
-                              pairwise, comparisons_reasons, *args)
     append_to_sheet(user_data=None, custom_row_dict=row_dict, custom_sheet_name=str(
         TXAGENT_RESULTS_SHEET_BASE_NAME), add_header_when_create_sheet=True)
@@ -804,15 +933,14 @@ def final_submit(data_subset_state, user_info, question_quality_comments, pairwi
 # Function to validate page1 inputs and directly submit if valid
 def validate_and_submit_page1(data_subset_state, user_info, *combined_values):
-    # combined_values contains question_quality_comments + pairwise choices + comparison reasons + ratings
     criteria_count = len(criteria_for_comparison)
-    question_quality_comments = combined_values[0]
-    pairwise_list = list(combined_values[1:criteria_count+1])
     comparison_reasons_list = list(
-        combined_values[criteria_count+1:criteria_count*2+1])
     ratings_A_list = list(
-        combined_values[criteria_count*2+1:criteria_count*3+1])
-    ratings_B_list = list(combined_values[criteria_count*3+1:])
     # Check if all pairwise comparisons are filled
     if any(answer is None for answer in pairwise_list):
@@ -873,8 +1001,8 @@ def validate_and_submit_page1(data_subset_state, user_info, *combined_values):
         )
     gr.Info("Submitting your evaluation and loading the next question...")
     # If validation passes, call final_submit and handle form reset
-    submit_result = final_submit(data_subset_state, user_info, question_quality_comments, pairwise_list,
-                                 comparison_reasons_list, *ratings_A_list, *ratings_B_list)
     # Check if there are more questions by looking at the page1 update dict
     # submit_result[1] is the page1 update, submit_result[2] is the final_page update
@@ -960,18 +1088,6 @@ centered_col_css = """
     width: 100% !important;          /* Occupy full width of its column */
     white-space: normal !important;  /* Allow text to wrap onto multiple lines */
 }
-#txagent-demo-btn {
-    background-color: #4CAF50 !important;
-    color: white !important;
-    border-color: #4CAF50 !important;
-    margin-top: 10px !important;
-}
-#api-key-btn {
-    background-color: #FF9800 !important;
-    color: white !important;
-    border-color: #FF9800 !important;
-    margin-top: 10px !important;
-}
 .criteria-radio-score-label [role="radiogroup"],
 .criteria-radio-score-label .gr-radio-group,
 .criteria-radio-score-label .flex {
@@ -1005,6 +1121,7 @@ with gr.Blocks(css=centered_col_css) as demo:
     pairwise_state = gr.State()
     scores_A_state = gr.State()
     comparison_reasons = gr.State()
     unqualified_A_state = gr.State()
     data_subset_state = gr.State()
     question_in_progress = gr.State(0)
@@ -1033,38 +1150,11 @@ with gr.Blocks(css=centered_col_css) as demo:
     with gr.Column(visible=True, elem_id="page-1") as page_minus1:
         gr.HTML("""
         <div>
-            <h1>TxAgent: An AI Agent for Therapeutics</h1>
         </div>
         """)
-        # Add TxAgent demo and API key buttons
-        with gr.Column(scale=1):
-            api_key_btn = gr.Button(
-                value="Request Access",
-                variant="secondary",
-                size="lg",
-                elem_id="api-key-btn"
-            )
-        with gr.Column(scale=1):
-            txagent_demo_btn = gr.Button(
-                value="Access TxAgent",
-                variant="secondary",
-                size="lg",
-                elem_id="txagent-demo-btn"
-            )
-            gr.Markdown(
-                """
-                For live access to TxAgent, you can:
-                - Access TxAgent if you have an account.
-                - Request access if you do not have an account yet.
-                We look forward to your feedback!
-                """
-            )
-        # Add extra white space between sections
-        gr.HTML("<br>")
         with gr.Column(scale=1):
             participate_eval_btn = gr.Button(
                 value="Evaluate TxAgent",
@@ -1083,32 +1173,27 @@ with gr.Blocks(css=centered_col_css) as demo:
                 Thank you for helping improve TxAgent!
                 """
             )
-        # with gr.Column(scale=1):
-        #     submit_questions_btn = gr.Button(
-        #         value="Submit Your Therapeutic Questions",
-        #         variant="primary",
-        #         size="lg",
-        #         elem_id="submit-btn"
-        #     )
         # with gr.Row(elem_classes=["center-row"]):
             # 第二行：分别放两段说明文字
-        # with gr.Column(scale=1):
-        #     gr.Markdown(
-        #         """
-        #         By submitting therapeutic questions, you will:
-        #         - Help identify edge cases and blind spots for AI models.
-        #         - Help extend AI models to reason in new domains.
-        #         - Directly shape future model improvements.
-        #         We look forward to seeing your feedback!
-        #         """
-        #     )
             # Add contact information in Markdown format
             contact_info_markdown = """
@@ -1125,29 +1210,11 @@ with gr.Blocks(css=centered_col_css) as demo:
         # For the Google Form button, we'll use JavaScript to open a new tab.
         # The URL for the Google Form should be replaced with the actual link.
         google_form_url = "https://forms.gle/pYvyvEQQwS5gdupQA"
-        # submit_questions_btn.click(
-        #     fn=None,
-        #     inputs=None,
-        #     outputs=None,
-        #     js=f"() => {{ window.open('{google_form_url}', '_blank'); }}"
-        # )
-        # TxAgent demo button click handler
-        txagent_demo_url = "https://txagent.curebench.ai/"
-        txagent_demo_btn.click(
             fn=None,
             inputs=None,
             outputs=None,
-            js=f"() => {{ window.open('{txagent_demo_url}', '_blank'); }}"
-        )
-        # API key application button click handler
-        api_key_url = "https://docs.google.com/forms/d/e/1FAIpQLScEFhgT1X0wOkpWjEOMGpvhDFyIfoSMzJZ2HA9o0F0BaNcQPw/viewform?usp=dialog"
-        api_key_btn.click(
-            fn=None,
-            inputs=None,
-            outputs=None,
-            js=f"() => {{ window.open('{api_key_url}', '_blank'); }}"
         )
     # Page 0: Welcome / Informational page.
@@ -1158,9 +1225,9 @@ with gr.Blocks(css=centered_col_css) as demo:
         email = gr.Textbox(
             label="Email (required). Use the same email each time you log into this evaluation portal to avoid receiving repeat questions.")
         specialty_dd = gr.Dropdown(
-            choices=specialties_list, label="Primary Medical Specialty (optional). Visit https://www.abms.org/member-boards/specialty-subspecialty-certificates/ for categories.", multiselect=True)
         subspecialty_dd = gr.Dropdown(
-            choices=subspecialties_list, label="Subspecialty (optional). Visit https://www.abms.org/member-boards/specialty-subspecialty-certificates/ for categories.", multiselect=True)
         npi_id = gr.Textbox(
             label="National Provider Identifier ID (optional). Visit https://npiregistry.cms.hhs.gov/search to find your NPI ID. Leave blank if you do not have an NPI ID.")
         years_exp_radio = gr.Radio(
@@ -1195,7 +1262,7 @@ with gr.Blocks(css=centered_col_css) as demo:
     # Page 1: Pairwise Comparison.
     with gr.Column(visible=False) as page1:
-        with gr.Accordion("Click to See Instructions", open=False):
             gr.Markdown("""
                     ## Instructions:
                     Please review these instructions and enter your information to begin:
@@ -1213,13 +1280,21 @@ with gr.Blocks(css=centered_col_css) as demo:
         # gr.Markdown("Comparison")
         # Add small red button and comments text box in the same row
         page1_prompt = gr.HTML()
-        question_quality_comments = gr.Textbox(
-            placeholder="(Optional) Comments on the question quality, question relevance, or your suitability to evaluate it.",
-            show_label=False,
-            scale=3,
-            container=False,
-        )
         page1_error_box = gr.Markdown("")  # ADDED: display validation errors
@@ -1389,14 +1464,22 @@ with gr.Blocks(css=centered_col_css) as demo:
                  chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, data_subset_state],
         scroll_to_output=True
     )
     # Transition from Page 1 to direct submission (no confirmation modal)
     submit_btn_1.click(
         fn=validate_and_submit_page1,
-        inputs=[data_subset_state, user_info_state, question_quality_comments, *pairwise_inputs,
                 *comparison_reasons_inputs, *ratings_A_page1, *ratings_B_page1],
         outputs=[page1_error_box, page1, final_page, page0_error_box, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning,
-                 page1_prompt, data_subset_state, user_info_state, question_quality_comments, *pairwise_inputs, *comparison_reasons_inputs, *ratings_A_page1, *ratings_B_page1],
         scroll_to_output=True
     )

         disease_subspecs = set(specs.get('subspecialties', []))
         # Check for intersection
+        if user_all_specs.intersection(disease_specs) or user_all_specs.intersection(disease_subspecs):
             relevant_diseases.append(disease)
     relevant_drugs = []
 def go_to_eval_progress_modal(name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, our_methods=our_methods):
     # 校验用户信息
+    if not name or not email or not specialty_dd or not years_exp_radio:
+        gr.Info("Please fill out all the required fields (name, email, specialty, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", duration=5)
+        return gr.update(visible=True), gr.update(visible=False), None, "Please fill out all the required fields (name, email, specialty, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.HTML(), gr.State()
     gr.Info("Loading the data...", duration=3)
     user_info, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, page1_reference_answer, question_for_eval, remaining_count = get_next_eval_question(
     return updates
+# --- Skip Question Modal Callbacks ---
+def skip_question_and_load_new(user_info_state, our_methods):
+    # user_info_state is a tuple: (name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, q_id)
+    if user_info_state is None:
+        # Defensive: just close modal if no user info
+        return gr.update(visible=False), gr.update(visible=False), None, "", gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.HTML(), gr.Markdown(), gr.State()
+    # Unpack user_info_state
+    name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, _ = user_info_state
+    user_info, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, page1_reference_answer, question_for_eval, remaining_count = get_next_eval_question(
+        name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, our_methods
+    )
+    if remaining_count == 0:
+        # No more questions, go to final page
+        return gr.update(visible=False), gr.update(visible=False), None, "Based on your submitted data, you have no more questions to evaluate. You may exit the page; we will follow-up if we require anything else from you. Thank you!", gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.HTML(), gr.Markdown(), gr.State()
+    return gr.update(visible=False), gr.update(visible=True), user_info, "", chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, page1_reference_answer, question_for_eval
+# --- Skip‑question handler for the "Wrong Question?" button -------------------
+def skip_current_question(user_info_state, our_methods: list = our_methods):
+    # Guard: user clicked before session started
+    gr.Info("Skipping this question and loading the next one…",  duration=5)
+    if user_info_state is None:
+        return (
+            None,
+            gr.update(
+                value="Please start the evaluation before skipping questions."),
+            gr.update(value=[]),  # Chatbot A history
+            gr.update(value=[]),  # Chatbot B history
+            gr.update(value=""),  # Prompt HTML
+            gr.State()            # data_subset_state
+        )
+    # Unpack evaluator identity
+    name, email, specialty_dd, subspecialty_dd, yrs_exp, exp_desc, npi_id, _ = user_info_state
+    # Pull the next unused question
+    (
+        user_info_new,
+        _chat_a_answer,
+        _chat_b_answer,
+        _chat_a_reasoning,
+        _chat_b_reasoning,
+        _prompt_comp,
+        _ref_comp,
+        question_for_eval,
+        remaining,
+    ) = get_next_eval_question(
+        name, email, specialty_dd, subspecialty_dd, yrs_exp, exp_desc, npi_id, our_methods
+    )
+    # If the pool is exhausted, just notify the evaluator
+    if remaining == 0 or question_for_eval is None:
+        final_msg = (
+            "Based on your submitted data, you have no more questions to evaluate. "
+            "You may exit the page; we will follow‑up if we require anything else from you. "
+            "Thank you!"
+        )
+        return (
+            user_info_state,
+            gr.update(value=final_msg),
+            gr.update(value=[]),
+            gr.update(value=[]),
+            gr.update(value=[]),
+            gr.update(value=[]),
+            gr.update(value=""),
+            gr.State()
+        )
+    # --- Build fresh values for the existing UI components ---
+    chat_a_answer, chat_a_reasoning, _ = format_chat(
+        question_for_eval['models'][0]['reasoning_trace'], tool_database_labels)
+    chat_b_answer, chat_b_reasoning, _ = format_chat(
+        question_for_eval['models'][1]['reasoning_trace'], tool_database_labels)
+    prompt_html = (
+        f"<div style='background-color: #FFEFD5; border: 2px solid #FF8C00; padding: 10px; "
+        f"border-radius: 5px; color: black;'><strong style='color: black;'>Question:</strong> "
+        f"{question_for_eval['question']}</div>"
+    )
+    reference_md = question_for_eval.get("correct_answer", "")
+    gr.Info("New question loaded…",  duration=3)
+    # Return updates to refresh Page 1 in‑place
+    return (
+        user_info_new,
+        gr.update(value=""),                 # clear any previous error text
+        gr.update(value=chat_a_answer),       # Chatbot A history
+        gr.update(value=chat_b_answer),       # Chatbot B history
+        gr.update(value=chat_a_reasoning),    # Chatbot A reasoning
+        gr.update(value=chat_b_reasoning),    # Chatbot B reasoning
+        gr.update(value=prompt_html),        # Prompt
+        question_for_eval                    # store for later pages
+    )
+# --- Handler for "Wrong Question?": flags nonsense and skips
+def flag_nonsense_and_skip(user_info_state, skip_comments=""):
+    """
+    When the evaluator clicks the “Wrong Question?” button, immediately
+    record that this question was flagged as nonsensical/irrelevant and
+    then load the next question (re‑using the existing skip logic).
+    """
+    # 1) Record the flag to the Google Sheet so we keep the feedback even
+    #    if the evaluator stops here.
+    if user_info_state is not None:
+        name, email, specialty_dd, subspecialty_dd, yrs_exp, exp_desc, npi_id, q_id = user_info_state
+        timestamp = datetime.datetime.now().isoformat()
+        row = {
+            "Timestamp": timestamp,
+            "Name": name,
+            "Email": email,
+            "Question ID": q_id,
+            "Question Makes No Sense or Biomedically Irrelevant": True,
+            "Skip Comments": skip_comments,
+        }
+        append_to_sheet(
+            user_data=None,
+            custom_row_dict=row,
+            custom_sheet_name=str(TXAGENT_RESULTS_SHEET_BASE_NAME),
+            add_header_when_create_sheet=True,
+        )
+    # 2) Fall back to the existing skip logic to advance the UI.
+    return skip_current_question(user_info_state)
 # Define restrict function for each criterion
 def make_restrict_function(base_choices):
     def restrict_choices_page1(radio_choice, score_a, score_b):
         """
 # --- Define Callback Functions for Confirmation Flow ---
+def build_row_dict(data_subset_state, user_info, pairwise, comparisons_reasons, nonsense_btn_clicked, *args):
     num_criteria = len(criteria)
     ratings_A_vals = list(args[:num_criteria])
     ratings_B_vals = list(args[num_criteria:])
         "Prompt": prompt_text,
         "ResponseA_Model": response_A_model,
         "ResponseB_Model": response_B_model,
+        "Question Makes No Sense or Biomedically Irrelevant": nonsense_btn_clicked,
     }
     pairwise = [mapping.get(val, val) for val in pairwise]
     return row
+def final_submit(data_subset_state, user_info, pairwise, comparisons_reasons, nonsense_btn_clicked, *args):
     # --- Part 1: Submit the current results (Existing Logic) ---
+    row_dict = build_row_dict(data_subset_state, user_info,
+                              pairwise, comparisons_reasons, nonsense_btn_clicked, *args)
     append_to_sheet(user_data=None, custom_row_dict=row_dict, custom_sheet_name=str(
         TXAGENT_RESULTS_SHEET_BASE_NAME), add_header_when_create_sheet=True)
 # Function to validate page1 inputs and directly submit if valid
 def validate_and_submit_page1(data_subset_state, user_info, *combined_values):
+    # combined_values contains pairwise choices + comparison reasons + ratings
     criteria_count = len(criteria_for_comparison)
+    pairwise_list = list(combined_values[:criteria_count])
     comparison_reasons_list = list(
+        combined_values[criteria_count:criteria_count*2])
     ratings_A_list = list(
+        combined_values[criteria_count*2:criteria_count*3])
+    ratings_B_list = list(combined_values[criteria_count*3:])
     # Check if all pairwise comparisons are filled
     if any(answer is None for answer in pairwise_list):
         )
     gr.Info("Submitting your evaluation and loading the next question...")
     # If validation passes, call final_submit and handle form reset
+    submit_result = final_submit(data_subset_state, user_info, pairwise_list,
+                                 comparison_reasons_list, False, *ratings_A_list, *ratings_B_list)
     # Check if there are more questions by looking at the page1 update dict
     # submit_result[1] is the page1 update, submit_result[2] is the final_page update
     width: 100% !important;          /* Occupy full width of its column */
     white-space: normal !important;  /* Allow text to wrap onto multiple lines */
 }
 .criteria-radio-score-label [role="radiogroup"],
 .criteria-radio-score-label .gr-radio-group,
 .criteria-radio-score-label .flex {
     pairwise_state = gr.State()
     scores_A_state = gr.State()
     comparison_reasons = gr.State()
+    nonsense_btn_clicked = gr.State(False)
     unqualified_A_state = gr.State()
     data_subset_state = gr.State()
     question_in_progress = gr.State(0)
     with gr.Column(visible=True, elem_id="page-1") as page_minus1:
         gr.HTML("""
         <div>
+            <h1>TxAgent Portal: AI Evaluation and Crowdsourcing of Therapeutic Questions</h1>
         </div>
         """)
+        # with gr.Row(elem_classes=["center-row"]):
+        # 第一行：并排放两个按钮
         with gr.Column(scale=1):
             participate_eval_btn = gr.Button(
                 value="Evaluate TxAgent",
                 Thank you for helping improve TxAgent!
                 """
             )
+        with gr.Column(scale=1):
+            submit_questions_btn = gr.Button(
+                value="Submit Your Therapeutic Questions",
+                variant="primary",
+                size="lg",
+                elem_id="submit-btn"
+            )
         # with gr.Row(elem_classes=["center-row"]):
             # 第二行：分别放两段说明文字
+        with gr.Column(scale=1):
+            gr.Markdown(
+                """
+                By submitting therapeutic questions, you will:
+                - Help identify edge cases and blind spots for AI models.
+                - Help extend AI models to reason in new domains.
+                - Directly shape future model improvements.
+                We look forward to seeing your feedback!
+                """
+            )
             # Add contact information in Markdown format
             contact_info_markdown = """
         # For the Google Form button, we'll use JavaScript to open a new tab.
         # The URL for the Google Form should be replaced with the actual link.
         google_form_url = "https://forms.gle/pYvyvEQQwS5gdupQA"
+        submit_questions_btn.click(
             fn=None,
             inputs=None,
             outputs=None,
+            js=f"() => {{ window.open('{google_form_url}', '_blank'); }}"
         )
     # Page 0: Welcome / Informational page.
         email = gr.Textbox(
             label="Email (required). Use the same email each time you log into this evaluation portal to avoid receiving repeat questions.")
         specialty_dd = gr.Dropdown(
+            choices=specialties_list, label="Primary Medical Specialty (required). Visit https://www.abms.org/member-boards/specialty-subspecialty-certificates/ for categories.", multiselect=True)
         subspecialty_dd = gr.Dropdown(
+            choices=subspecialties_list, label="Subspecialty (if applicable). Visit https://www.abms.org/member-boards/specialty-subspecialty-certificates/ for categories.", multiselect=True)
         npi_id = gr.Textbox(
             label="National Provider Identifier ID (optional). Visit https://npiregistry.cms.hhs.gov/search to find your NPI ID. Leave blank if you do not have an NPI ID.")
         years_exp_radio = gr.Radio(
     # Page 1: Pairwise Comparison.
     with gr.Column(visible=False) as page1:
+        with gr.Accordion("Instructions", open=False):
             gr.Markdown("""
                     ## Instructions:
                     Please review these instructions and enter your information to begin:
         # gr.Markdown("Comparison")
         # Add small red button and comments text box in the same row
         page1_prompt = gr.HTML()
+        with gr.Row():
+            nonsense_btn = gr.Button(
+                "Skip Question",
+                size="sm",
+                variant="stop",  # red variant
+                elem_id="invalid-question-btn",
+                elem_classes=["short-btn"],
+                scale=1
+            )
+            skip_comments = gr.Textbox(
+                placeholder="(Optional) Why do you want to skip this question...",
+                show_label=False,
+                scale=3,
+                container=False,
+            )
         page1_error_box = gr.Markdown("")  # ADDED: display validation errors
                  chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, data_subset_state],
         scroll_to_output=True
     )
+    # Skip the current question and load a new one when the evaluator flags it
+    nonsense_btn.click(
+        fn=flag_nonsense_and_skip,
+        inputs=[user_info_state, skip_comments],
+        outputs=[user_info_state, page1_error_box, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning,
+                 page1_prompt, data_subset_state],
+        scroll_to_output=True
+    )
     # Transition from Page 1 to direct submission (no confirmation modal)
     submit_btn_1.click(
         fn=validate_and_submit_page1,
+        inputs=[data_subset_state, user_info_state, *pairwise_inputs,
                 *comparison_reasons_inputs, *ratings_A_page1, *ratings_B_page1],
         outputs=[page1_error_box, page1, final_page, page0_error_box, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning,
+                 page1_prompt, data_subset_state, user_info_state, *pairwise_inputs, *comparison_reasons_inputs, *ratings_A_page1, *ratings_B_page1],
         scroll_to_output=True
     )