richard_dev
#3
by
RichardZhu52 - opened
app.py
CHANGED
|
@@ -287,7 +287,7 @@ def get_evaluator_questions(email, disease_map_data, drug_map_data, user_all_spe
|
|
| 287 |
disease_subspecs = set(specs.get('subspecialties', []))
|
| 288 |
|
| 289 |
# Check for intersection
|
| 290 |
-
if user_all_specs.intersection(disease_specs) or user_all_specs.intersection(disease_subspecs)
|
| 291 |
relevant_diseases.append(disease)
|
| 292 |
|
| 293 |
relevant_drugs = []
|
|
@@ -553,9 +553,9 @@ def go_to_page0_from_minus1(question_in_progress_state):
|
|
| 553 |
|
| 554 |
def go_to_eval_progress_modal(name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, our_methods=our_methods):
|
| 555 |
# 校验用户信息
|
| 556 |
-
if not name or not email or not
|
| 557 |
-
gr.Info("Please fill out all the required fields (name, email, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", duration=5)
|
| 558 |
-
return gr.update(visible=True), gr.update(visible=False), None, "Please fill out all the required fields (name, email, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.HTML(), gr.State()
|
| 559 |
|
| 560 |
gr.Info("Loading the data...", duration=3)
|
| 561 |
user_info, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, page1_reference_answer, question_for_eval, remaining_count = get_next_eval_question(
|
|
@@ -591,7 +591,136 @@ def go_to_page1(show_page_1):
|
|
| 591 |
return updates
|
| 592 |
|
| 593 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 594 |
# Define restrict function for each criterion
|
|
|
|
|
|
|
| 595 |
def make_restrict_function(base_choices):
|
| 596 |
def restrict_choices_page1(radio_choice, score_a, score_b):
|
| 597 |
"""
|
|
@@ -723,7 +852,7 @@ def make_restrict_function(base_choices):
|
|
| 723 |
# --- Define Callback Functions for Confirmation Flow ---
|
| 724 |
|
| 725 |
|
| 726 |
-
def build_row_dict(data_subset_state, user_info,
|
| 727 |
num_criteria = len(criteria)
|
| 728 |
ratings_A_vals = list(args[:num_criteria])
|
| 729 |
ratings_B_vals = list(args[num_criteria:])
|
|
@@ -746,7 +875,7 @@ def build_row_dict(data_subset_state, user_info, question_quality_comments, pair
|
|
| 746 |
"Prompt": prompt_text,
|
| 747 |
"ResponseA_Model": response_A_model,
|
| 748 |
"ResponseB_Model": response_B_model,
|
| 749 |
-
"Question
|
| 750 |
}
|
| 751 |
|
| 752 |
pairwise = [mapping.get(val, val) for val in pairwise]
|
|
@@ -760,10 +889,10 @@ def build_row_dict(data_subset_state, user_info, question_quality_comments, pair
|
|
| 760 |
return row
|
| 761 |
|
| 762 |
|
| 763 |
-
def final_submit(data_subset_state, user_info,
|
| 764 |
# --- Part 1: Submit the current results (Existing Logic) ---
|
| 765 |
-
row_dict = build_row_dict(data_subset_state, user_info,
|
| 766 |
-
pairwise, comparisons_reasons, *args)
|
| 767 |
append_to_sheet(user_data=None, custom_row_dict=row_dict, custom_sheet_name=str(
|
| 768 |
TXAGENT_RESULTS_SHEET_BASE_NAME), add_header_when_create_sheet=True)
|
| 769 |
|
|
@@ -804,15 +933,14 @@ def final_submit(data_subset_state, user_info, question_quality_comments, pairwi
|
|
| 804 |
|
| 805 |
# Function to validate page1 inputs and directly submit if valid
|
| 806 |
def validate_and_submit_page1(data_subset_state, user_info, *combined_values):
|
| 807 |
-
# combined_values contains
|
| 808 |
criteria_count = len(criteria_for_comparison)
|
| 809 |
-
|
| 810 |
-
pairwise_list = list(combined_values[1:criteria_count+1])
|
| 811 |
comparison_reasons_list = list(
|
| 812 |
-
combined_values[criteria_count
|
| 813 |
ratings_A_list = list(
|
| 814 |
-
combined_values[criteria_count*2
|
| 815 |
-
ratings_B_list = list(combined_values[criteria_count*3
|
| 816 |
|
| 817 |
# Check if all pairwise comparisons are filled
|
| 818 |
if any(answer is None for answer in pairwise_list):
|
|
@@ -873,8 +1001,8 @@ def validate_and_submit_page1(data_subset_state, user_info, *combined_values):
|
|
| 873 |
)
|
| 874 |
gr.Info("Submitting your evaluation and loading the next question...")
|
| 875 |
# If validation passes, call final_submit and handle form reset
|
| 876 |
-
submit_result = final_submit(data_subset_state, user_info,
|
| 877 |
-
comparison_reasons_list, *ratings_A_list, *ratings_B_list)
|
| 878 |
|
| 879 |
# Check if there are more questions by looking at the page1 update dict
|
| 880 |
# submit_result[1] is the page1 update, submit_result[2] is the final_page update
|
|
@@ -960,18 +1088,6 @@ centered_col_css = """
|
|
| 960 |
width: 100% !important; /* Occupy full width of its column */
|
| 961 |
white-space: normal !important; /* Allow text to wrap onto multiple lines */
|
| 962 |
}
|
| 963 |
-
#txagent-demo-btn {
|
| 964 |
-
background-color: #4CAF50 !important;
|
| 965 |
-
color: white !important;
|
| 966 |
-
border-color: #4CAF50 !important;
|
| 967 |
-
margin-top: 10px !important;
|
| 968 |
-
}
|
| 969 |
-
#api-key-btn {
|
| 970 |
-
background-color: #FF9800 !important;
|
| 971 |
-
color: white !important;
|
| 972 |
-
border-color: #FF9800 !important;
|
| 973 |
-
margin-top: 10px !important;
|
| 974 |
-
}
|
| 975 |
.criteria-radio-score-label [role="radiogroup"],
|
| 976 |
.criteria-radio-score-label .gr-radio-group,
|
| 977 |
.criteria-radio-score-label .flex {
|
|
@@ -1005,6 +1121,7 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 1005 |
pairwise_state = gr.State()
|
| 1006 |
scores_A_state = gr.State()
|
| 1007 |
comparison_reasons = gr.State()
|
|
|
|
| 1008 |
unqualified_A_state = gr.State()
|
| 1009 |
data_subset_state = gr.State()
|
| 1010 |
question_in_progress = gr.State(0)
|
|
@@ -1033,38 +1150,11 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 1033 |
with gr.Column(visible=True, elem_id="page-1") as page_minus1:
|
| 1034 |
gr.HTML("""
|
| 1035 |
<div>
|
| 1036 |
-
<h1>TxAgent:
|
| 1037 |
</div>
|
| 1038 |
""")
|
| 1039 |
-
#
|
| 1040 |
-
|
| 1041 |
-
api_key_btn = gr.Button(
|
| 1042 |
-
value="Request Access",
|
| 1043 |
-
variant="secondary",
|
| 1044 |
-
size="lg",
|
| 1045 |
-
elem_id="api-key-btn"
|
| 1046 |
-
)
|
| 1047 |
-
with gr.Column(scale=1):
|
| 1048 |
-
txagent_demo_btn = gr.Button(
|
| 1049 |
-
value="Access TxAgent",
|
| 1050 |
-
variant="secondary",
|
| 1051 |
-
size="lg",
|
| 1052 |
-
elem_id="txagent-demo-btn"
|
| 1053 |
-
)
|
| 1054 |
-
|
| 1055 |
-
gr.Markdown(
|
| 1056 |
-
"""
|
| 1057 |
-
For live access to TxAgent, you can:
|
| 1058 |
-
- Access TxAgent if you have an account.
|
| 1059 |
-
- Request access if you do not have an account yet.
|
| 1060 |
-
|
| 1061 |
-
We look forward to your feedback!
|
| 1062 |
-
"""
|
| 1063 |
-
)
|
| 1064 |
-
|
| 1065 |
-
# Add extra white space between sections
|
| 1066 |
-
gr.HTML("<br>")
|
| 1067 |
-
|
| 1068 |
with gr.Column(scale=1):
|
| 1069 |
participate_eval_btn = gr.Button(
|
| 1070 |
value="Evaluate TxAgent",
|
|
@@ -1083,32 +1173,27 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 1083 |
Thank you for helping improve TxAgent!
|
| 1084 |
"""
|
| 1085 |
)
|
| 1086 |
-
|
| 1087 |
-
|
| 1088 |
-
|
| 1089 |
-
|
| 1090 |
-
|
| 1091 |
-
|
| 1092 |
-
|
| 1093 |
-
# variant="primary",
|
| 1094 |
-
# size="lg",
|
| 1095 |
-
# elem_id="submit-btn"
|
| 1096 |
-
# )
|
| 1097 |
|
| 1098 |
# with gr.Row(elem_classes=["center-row"]):
|
| 1099 |
# 第二行:分别放两段说明文字
|
| 1100 |
-
|
| 1101 |
-
|
| 1102 |
-
|
| 1103 |
-
|
| 1104 |
-
|
| 1105 |
-
|
| 1106 |
-
|
| 1107 |
-
|
| 1108 |
-
# We look forward to seeing your feedback!
|
| 1109 |
-
# """
|
| 1110 |
-
# )
|
| 1111 |
|
|
|
|
|
|
|
|
|
|
| 1112 |
|
| 1113 |
# Add contact information in Markdown format
|
| 1114 |
contact_info_markdown = """
|
|
@@ -1125,29 +1210,11 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 1125 |
# For the Google Form button, we'll use JavaScript to open a new tab.
|
| 1126 |
# The URL for the Google Form should be replaced with the actual link.
|
| 1127 |
google_form_url = "https://forms.gle/pYvyvEQQwS5gdupQA"
|
| 1128 |
-
|
| 1129 |
-
# fn=None,
|
| 1130 |
-
# inputs=None,
|
| 1131 |
-
# outputs=None,
|
| 1132 |
-
# js=f"() => {{ window.open('{google_form_url}', '_blank'); }}"
|
| 1133 |
-
# )
|
| 1134 |
-
|
| 1135 |
-
# TxAgent demo button click handler
|
| 1136 |
-
txagent_demo_url = "https://txagent.curebench.ai/"
|
| 1137 |
-
txagent_demo_btn.click(
|
| 1138 |
fn=None,
|
| 1139 |
inputs=None,
|
| 1140 |
outputs=None,
|
| 1141 |
-
js=f"() => {{ window.open('{
|
| 1142 |
-
)
|
| 1143 |
-
|
| 1144 |
-
# API key application button click handler
|
| 1145 |
-
api_key_url = "https://docs.google.com/forms/d/e/1FAIpQLScEFhgT1X0wOkpWjEOMGpvhDFyIfoSMzJZ2HA9o0F0BaNcQPw/viewform?usp=dialog"
|
| 1146 |
-
api_key_btn.click(
|
| 1147 |
-
fn=None,
|
| 1148 |
-
inputs=None,
|
| 1149 |
-
outputs=None,
|
| 1150 |
-
js=f"() => {{ window.open('{api_key_url}', '_blank'); }}"
|
| 1151 |
)
|
| 1152 |
|
| 1153 |
# Page 0: Welcome / Informational page.
|
|
@@ -1158,9 +1225,9 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 1158 |
email = gr.Textbox(
|
| 1159 |
label="Email (required). Use the same email each time you log into this evaluation portal to avoid receiving repeat questions.")
|
| 1160 |
specialty_dd = gr.Dropdown(
|
| 1161 |
-
choices=specialties_list, label="Primary Medical Specialty (
|
| 1162 |
subspecialty_dd = gr.Dropdown(
|
| 1163 |
-
choices=subspecialties_list, label="Subspecialty (
|
| 1164 |
npi_id = gr.Textbox(
|
| 1165 |
label="National Provider Identifier ID (optional). Visit https://npiregistry.cms.hhs.gov/search to find your NPI ID. Leave blank if you do not have an NPI ID.")
|
| 1166 |
years_exp_radio = gr.Radio(
|
|
@@ -1195,7 +1262,7 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 1195 |
|
| 1196 |
# Page 1: Pairwise Comparison.
|
| 1197 |
with gr.Column(visible=False) as page1:
|
| 1198 |
-
with gr.Accordion("
|
| 1199 |
gr.Markdown("""
|
| 1200 |
## Instructions:
|
| 1201 |
Please review these instructions and enter your information to begin:
|
|
@@ -1213,13 +1280,21 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 1213 |
# gr.Markdown("Comparison")
|
| 1214 |
# Add small red button and comments text box in the same row
|
| 1215 |
page1_prompt = gr.HTML()
|
| 1216 |
-
|
| 1217 |
-
|
| 1218 |
-
|
| 1219 |
-
|
| 1220 |
-
|
| 1221 |
-
|
| 1222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1223 |
|
| 1224 |
page1_error_box = gr.Markdown("") # ADDED: display validation errors
|
| 1225 |
|
|
@@ -1389,14 +1464,22 @@ with gr.Blocks(css=centered_col_css) as demo:
|
|
| 1389 |
chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, data_subset_state],
|
| 1390 |
scroll_to_output=True
|
| 1391 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1392 |
|
| 1393 |
# Transition from Page 1 to direct submission (no confirmation modal)
|
| 1394 |
submit_btn_1.click(
|
| 1395 |
fn=validate_and_submit_page1,
|
| 1396 |
-
inputs=[data_subset_state, user_info_state,
|
| 1397 |
*comparison_reasons_inputs, *ratings_A_page1, *ratings_B_page1],
|
| 1398 |
outputs=[page1_error_box, page1, final_page, page0_error_box, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning,
|
| 1399 |
-
page1_prompt, data_subset_state, user_info_state,
|
| 1400 |
scroll_to_output=True
|
| 1401 |
)
|
| 1402 |
|
|
|
|
| 287 |
disease_subspecs = set(specs.get('subspecialties', []))
|
| 288 |
|
| 289 |
# Check for intersection
|
| 290 |
+
if user_all_specs.intersection(disease_specs) or user_all_specs.intersection(disease_subspecs):
|
| 291 |
relevant_diseases.append(disease)
|
| 292 |
|
| 293 |
relevant_drugs = []
|
|
|
|
| 553 |
|
| 554 |
def go_to_eval_progress_modal(name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, our_methods=our_methods):
|
| 555 |
# 校验用户信息
|
| 556 |
+
if not name or not email or not specialty_dd or not years_exp_radio:
|
| 557 |
+
gr.Info("Please fill out all the required fields (name, email, specialty, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", duration=5)
|
| 558 |
+
return gr.update(visible=True), gr.update(visible=False), None, "Please fill out all the required fields (name, email, specialty, years of experience). If you are not a licensed physician with a specific specialty, please choose the specialty that most closely aligns with your biomedical expertise.", gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.HTML(), gr.State()
|
| 559 |
|
| 560 |
gr.Info("Loading the data...", duration=3)
|
| 561 |
user_info, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, page1_reference_answer, question_for_eval, remaining_count = get_next_eval_question(
|
|
|
|
| 591 |
return updates
|
| 592 |
|
| 593 |
|
| 594 |
+
# --- Skip Question Modal Callbacks ---
|
| 595 |
+
def skip_question_and_load_new(user_info_state, our_methods):
|
| 596 |
+
# user_info_state is a tuple: (name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, q_id)
|
| 597 |
+
if user_info_state is None:
|
| 598 |
+
# Defensive: just close modal if no user info
|
| 599 |
+
return gr.update(visible=False), gr.update(visible=False), None, "", gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.HTML(), gr.Markdown(), gr.State()
|
| 600 |
+
# Unpack user_info_state
|
| 601 |
+
name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, _ = user_info_state
|
| 602 |
+
user_info, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, page1_reference_answer, question_for_eval, remaining_count = get_next_eval_question(
|
| 603 |
+
name, email, specialty_dd, subspecialty_dd, years_exp_radio, exp_explanation_tb, npi_id, our_methods
|
| 604 |
+
)
|
| 605 |
+
if remaining_count == 0:
|
| 606 |
+
# No more questions, go to final page
|
| 607 |
+
return gr.update(visible=False), gr.update(visible=False), None, "Based on your submitted data, you have no more questions to evaluate. You may exit the page; we will follow-up if we require anything else from you. Thank you!", gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.Chatbot(), gr.HTML(), gr.Markdown(), gr.State()
|
| 608 |
+
return gr.update(visible=False), gr.update(visible=True), user_info, "", chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, page1_reference_answer, question_for_eval
|
| 609 |
+
|
| 610 |
+
# --- Skip‑question handler for the "Wrong Question?" button -------------------
|
| 611 |
+
|
| 612 |
+
|
| 613 |
+
def skip_current_question(user_info_state, our_methods: list = our_methods):
|
| 614 |
+
# Guard: user clicked before session started
|
| 615 |
+
gr.Info("Skipping this question and loading the next one…", duration=5)
|
| 616 |
+
if user_info_state is None:
|
| 617 |
+
return (
|
| 618 |
+
None,
|
| 619 |
+
gr.update(
|
| 620 |
+
value="Please start the evaluation before skipping questions."),
|
| 621 |
+
gr.update(value=[]), # Chatbot A history
|
| 622 |
+
gr.update(value=[]), # Chatbot B history
|
| 623 |
+
gr.update(value=""), # Prompt HTML
|
| 624 |
+
gr.State() # data_subset_state
|
| 625 |
+
)
|
| 626 |
+
|
| 627 |
+
# Unpack evaluator identity
|
| 628 |
+
name, email, specialty_dd, subspecialty_dd, yrs_exp, exp_desc, npi_id, _ = user_info_state
|
| 629 |
+
|
| 630 |
+
# Pull the next unused question
|
| 631 |
+
(
|
| 632 |
+
user_info_new,
|
| 633 |
+
_chat_a_answer,
|
| 634 |
+
_chat_b_answer,
|
| 635 |
+
_chat_a_reasoning,
|
| 636 |
+
_chat_b_reasoning,
|
| 637 |
+
_prompt_comp,
|
| 638 |
+
_ref_comp,
|
| 639 |
+
question_for_eval,
|
| 640 |
+
remaining,
|
| 641 |
+
) = get_next_eval_question(
|
| 642 |
+
name, email, specialty_dd, subspecialty_dd, yrs_exp, exp_desc, npi_id, our_methods
|
| 643 |
+
)
|
| 644 |
+
|
| 645 |
+
# If the pool is exhausted, just notify the evaluator
|
| 646 |
+
if remaining == 0 or question_for_eval is None:
|
| 647 |
+
final_msg = (
|
| 648 |
+
"Based on your submitted data, you have no more questions to evaluate. "
|
| 649 |
+
"You may exit the page; we will follow‑up if we require anything else from you. "
|
| 650 |
+
"Thank you!"
|
| 651 |
+
)
|
| 652 |
+
return (
|
| 653 |
+
user_info_state,
|
| 654 |
+
gr.update(value=final_msg),
|
| 655 |
+
gr.update(value=[]),
|
| 656 |
+
gr.update(value=[]),
|
| 657 |
+
gr.update(value=[]),
|
| 658 |
+
gr.update(value=[]),
|
| 659 |
+
gr.update(value=""),
|
| 660 |
+
gr.State()
|
| 661 |
+
)
|
| 662 |
+
|
| 663 |
+
# --- Build fresh values for the existing UI components ---
|
| 664 |
+
chat_a_answer, chat_a_reasoning, _ = format_chat(
|
| 665 |
+
question_for_eval['models'][0]['reasoning_trace'], tool_database_labels)
|
| 666 |
+
chat_b_answer, chat_b_reasoning, _ = format_chat(
|
| 667 |
+
question_for_eval['models'][1]['reasoning_trace'], tool_database_labels)
|
| 668 |
+
|
| 669 |
+
prompt_html = (
|
| 670 |
+
f"<div style='background-color: #FFEFD5; border: 2px solid #FF8C00; padding: 10px; "
|
| 671 |
+
f"border-radius: 5px; color: black;'><strong style='color: black;'>Question:</strong> "
|
| 672 |
+
f"{question_for_eval['question']}</div>"
|
| 673 |
+
)
|
| 674 |
+
reference_md = question_for_eval.get("correct_answer", "")
|
| 675 |
+
gr.Info("New question loaded…", duration=3)
|
| 676 |
+
|
| 677 |
+
# Return updates to refresh Page 1 in‑place
|
| 678 |
+
return (
|
| 679 |
+
user_info_new,
|
| 680 |
+
gr.update(value=""), # clear any previous error text
|
| 681 |
+
gr.update(value=chat_a_answer), # Chatbot A history
|
| 682 |
+
gr.update(value=chat_b_answer), # Chatbot B history
|
| 683 |
+
gr.update(value=chat_a_reasoning), # Chatbot A reasoning
|
| 684 |
+
gr.update(value=chat_b_reasoning), # Chatbot B reasoning
|
| 685 |
+
gr.update(value=prompt_html), # Prompt
|
| 686 |
+
question_for_eval # store for later pages
|
| 687 |
+
)
|
| 688 |
+
|
| 689 |
+
# --- Handler for "Wrong Question?": flags nonsense and skips
|
| 690 |
+
|
| 691 |
+
|
| 692 |
+
def flag_nonsense_and_skip(user_info_state, skip_comments=""):
|
| 693 |
+
"""
|
| 694 |
+
When the evaluator clicks the “Wrong Question?” button, immediately
|
| 695 |
+
record that this question was flagged as nonsensical/irrelevant and
|
| 696 |
+
then load the next question (re‑using the existing skip logic).
|
| 697 |
+
"""
|
| 698 |
+
# 1) Record the flag to the Google Sheet so we keep the feedback even
|
| 699 |
+
# if the evaluator stops here.
|
| 700 |
+
if user_info_state is not None:
|
| 701 |
+
name, email, specialty_dd, subspecialty_dd, yrs_exp, exp_desc, npi_id, q_id = user_info_state
|
| 702 |
+
timestamp = datetime.datetime.now().isoformat()
|
| 703 |
+
row = {
|
| 704 |
+
"Timestamp": timestamp,
|
| 705 |
+
"Name": name,
|
| 706 |
+
"Email": email,
|
| 707 |
+
"Question ID": q_id,
|
| 708 |
+
"Question Makes No Sense or Biomedically Irrelevant": True,
|
| 709 |
+
"Skip Comments": skip_comments,
|
| 710 |
+
}
|
| 711 |
+
append_to_sheet(
|
| 712 |
+
user_data=None,
|
| 713 |
+
custom_row_dict=row,
|
| 714 |
+
custom_sheet_name=str(TXAGENT_RESULTS_SHEET_BASE_NAME),
|
| 715 |
+
add_header_when_create_sheet=True,
|
| 716 |
+
)
|
| 717 |
+
|
| 718 |
+
# 2) Fall back to the existing skip logic to advance the UI.
|
| 719 |
+
return skip_current_question(user_info_state)
|
| 720 |
+
|
| 721 |
# Define restrict function for each criterion
|
| 722 |
+
|
| 723 |
+
|
| 724 |
def make_restrict_function(base_choices):
|
| 725 |
def restrict_choices_page1(radio_choice, score_a, score_b):
|
| 726 |
"""
|
|
|
|
| 852 |
# --- Define Callback Functions for Confirmation Flow ---
|
| 853 |
|
| 854 |
|
| 855 |
+
def build_row_dict(data_subset_state, user_info, pairwise, comparisons_reasons, nonsense_btn_clicked, *args):
|
| 856 |
num_criteria = len(criteria)
|
| 857 |
ratings_A_vals = list(args[:num_criteria])
|
| 858 |
ratings_B_vals = list(args[num_criteria:])
|
|
|
|
| 875 |
"Prompt": prompt_text,
|
| 876 |
"ResponseA_Model": response_A_model,
|
| 877 |
"ResponseB_Model": response_B_model,
|
| 878 |
+
"Question Makes No Sense or Biomedically Irrelevant": nonsense_btn_clicked,
|
| 879 |
}
|
| 880 |
|
| 881 |
pairwise = [mapping.get(val, val) for val in pairwise]
|
|
|
|
| 889 |
return row
|
| 890 |
|
| 891 |
|
| 892 |
+
def final_submit(data_subset_state, user_info, pairwise, comparisons_reasons, nonsense_btn_clicked, *args):
|
| 893 |
# --- Part 1: Submit the current results (Existing Logic) ---
|
| 894 |
+
row_dict = build_row_dict(data_subset_state, user_info,
|
| 895 |
+
pairwise, comparisons_reasons, nonsense_btn_clicked, *args)
|
| 896 |
append_to_sheet(user_data=None, custom_row_dict=row_dict, custom_sheet_name=str(
|
| 897 |
TXAGENT_RESULTS_SHEET_BASE_NAME), add_header_when_create_sheet=True)
|
| 898 |
|
|
|
|
| 933 |
|
| 934 |
# Function to validate page1 inputs and directly submit if valid
|
| 935 |
def validate_and_submit_page1(data_subset_state, user_info, *combined_values):
|
| 936 |
+
# combined_values contains pairwise choices + comparison reasons + ratings
|
| 937 |
criteria_count = len(criteria_for_comparison)
|
| 938 |
+
pairwise_list = list(combined_values[:criteria_count])
|
|
|
|
| 939 |
comparison_reasons_list = list(
|
| 940 |
+
combined_values[criteria_count:criteria_count*2])
|
| 941 |
ratings_A_list = list(
|
| 942 |
+
combined_values[criteria_count*2:criteria_count*3])
|
| 943 |
+
ratings_B_list = list(combined_values[criteria_count*3:])
|
| 944 |
|
| 945 |
# Check if all pairwise comparisons are filled
|
| 946 |
if any(answer is None for answer in pairwise_list):
|
|
|
|
| 1001 |
)
|
| 1002 |
gr.Info("Submitting your evaluation and loading the next question...")
|
| 1003 |
# If validation passes, call final_submit and handle form reset
|
| 1004 |
+
submit_result = final_submit(data_subset_state, user_info, pairwise_list,
|
| 1005 |
+
comparison_reasons_list, False, *ratings_A_list, *ratings_B_list)
|
| 1006 |
|
| 1007 |
# Check if there are more questions by looking at the page1 update dict
|
| 1008 |
# submit_result[1] is the page1 update, submit_result[2] is the final_page update
|
|
|
|
| 1088 |
width: 100% !important; /* Occupy full width of its column */
|
| 1089 |
white-space: normal !important; /* Allow text to wrap onto multiple lines */
|
| 1090 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1091 |
.criteria-radio-score-label [role="radiogroup"],
|
| 1092 |
.criteria-radio-score-label .gr-radio-group,
|
| 1093 |
.criteria-radio-score-label .flex {
|
|
|
|
| 1121 |
pairwise_state = gr.State()
|
| 1122 |
scores_A_state = gr.State()
|
| 1123 |
comparison_reasons = gr.State()
|
| 1124 |
+
nonsense_btn_clicked = gr.State(False)
|
| 1125 |
unqualified_A_state = gr.State()
|
| 1126 |
data_subset_state = gr.State()
|
| 1127 |
question_in_progress = gr.State(0)
|
|
|
|
| 1150 |
with gr.Column(visible=True, elem_id="page-1") as page_minus1:
|
| 1151 |
gr.HTML("""
|
| 1152 |
<div>
|
| 1153 |
+
<h1>TxAgent Portal: AI Evaluation and Crowdsourcing of Therapeutic Questions</h1>
|
| 1154 |
</div>
|
| 1155 |
""")
|
| 1156 |
+
# with gr.Row(elem_classes=["center-row"]):
|
| 1157 |
+
# 第一行:并排放两个按钮
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1158 |
with gr.Column(scale=1):
|
| 1159 |
participate_eval_btn = gr.Button(
|
| 1160 |
value="Evaluate TxAgent",
|
|
|
|
| 1173 |
Thank you for helping improve TxAgent!
|
| 1174 |
"""
|
| 1175 |
)
|
| 1176 |
+
with gr.Column(scale=1):
|
| 1177 |
+
submit_questions_btn = gr.Button(
|
| 1178 |
+
value="Submit Your Therapeutic Questions",
|
| 1179 |
+
variant="primary",
|
| 1180 |
+
size="lg",
|
| 1181 |
+
elem_id="submit-btn"
|
| 1182 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1183 |
|
| 1184 |
# with gr.Row(elem_classes=["center-row"]):
|
| 1185 |
# 第二行:分别放两段说明文字
|
| 1186 |
+
with gr.Column(scale=1):
|
| 1187 |
+
gr.Markdown(
|
| 1188 |
+
"""
|
| 1189 |
+
By submitting therapeutic questions, you will:
|
| 1190 |
+
- Help identify edge cases and blind spots for AI models.
|
| 1191 |
+
- Help extend AI models to reason in new domains.
|
| 1192 |
+
- Directly shape future model improvements.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1193 |
|
| 1194 |
+
We look forward to seeing your feedback!
|
| 1195 |
+
"""
|
| 1196 |
+
)
|
| 1197 |
|
| 1198 |
# Add contact information in Markdown format
|
| 1199 |
contact_info_markdown = """
|
|
|
|
| 1210 |
# For the Google Form button, we'll use JavaScript to open a new tab.
|
| 1211 |
# The URL for the Google Form should be replaced with the actual link.
|
| 1212 |
google_form_url = "https://forms.gle/pYvyvEQQwS5gdupQA"
|
| 1213 |
+
submit_questions_btn.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1214 |
fn=None,
|
| 1215 |
inputs=None,
|
| 1216 |
outputs=None,
|
| 1217 |
+
js=f"() => {{ window.open('{google_form_url}', '_blank'); }}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1218 |
)
|
| 1219 |
|
| 1220 |
# Page 0: Welcome / Informational page.
|
|
|
|
| 1225 |
email = gr.Textbox(
|
| 1226 |
label="Email (required). Use the same email each time you log into this evaluation portal to avoid receiving repeat questions.")
|
| 1227 |
specialty_dd = gr.Dropdown(
|
| 1228 |
+
choices=specialties_list, label="Primary Medical Specialty (required). Visit https://www.abms.org/member-boards/specialty-subspecialty-certificates/ for categories.", multiselect=True)
|
| 1229 |
subspecialty_dd = gr.Dropdown(
|
| 1230 |
+
choices=subspecialties_list, label="Subspecialty (if applicable). Visit https://www.abms.org/member-boards/specialty-subspecialty-certificates/ for categories.", multiselect=True)
|
| 1231 |
npi_id = gr.Textbox(
|
| 1232 |
label="National Provider Identifier ID (optional). Visit https://npiregistry.cms.hhs.gov/search to find your NPI ID. Leave blank if you do not have an NPI ID.")
|
| 1233 |
years_exp_radio = gr.Radio(
|
|
|
|
| 1262 |
|
| 1263 |
# Page 1: Pairwise Comparison.
|
| 1264 |
with gr.Column(visible=False) as page1:
|
| 1265 |
+
with gr.Accordion("Instructions", open=False):
|
| 1266 |
gr.Markdown("""
|
| 1267 |
## Instructions:
|
| 1268 |
Please review these instructions and enter your information to begin:
|
|
|
|
| 1280 |
# gr.Markdown("Comparison")
|
| 1281 |
# Add small red button and comments text box in the same row
|
| 1282 |
page1_prompt = gr.HTML()
|
| 1283 |
+
with gr.Row():
|
| 1284 |
+
nonsense_btn = gr.Button(
|
| 1285 |
+
"Skip Question",
|
| 1286 |
+
size="sm",
|
| 1287 |
+
variant="stop", # red variant
|
| 1288 |
+
elem_id="invalid-question-btn",
|
| 1289 |
+
elem_classes=["short-btn"],
|
| 1290 |
+
scale=1
|
| 1291 |
+
)
|
| 1292 |
+
skip_comments = gr.Textbox(
|
| 1293 |
+
placeholder="(Optional) Why do you want to skip this question...",
|
| 1294 |
+
show_label=False,
|
| 1295 |
+
scale=3,
|
| 1296 |
+
container=False,
|
| 1297 |
+
)
|
| 1298 |
|
| 1299 |
page1_error_box = gr.Markdown("") # ADDED: display validation errors
|
| 1300 |
|
|
|
|
| 1464 |
chat_b_answer, chat_a_reasoning, chat_b_reasoning, page1_prompt, data_subset_state],
|
| 1465 |
scroll_to_output=True
|
| 1466 |
)
|
| 1467 |
+
# Skip the current question and load a new one when the evaluator flags it
|
| 1468 |
+
nonsense_btn.click(
|
| 1469 |
+
fn=flag_nonsense_and_skip,
|
| 1470 |
+
inputs=[user_info_state, skip_comments],
|
| 1471 |
+
outputs=[user_info_state, page1_error_box, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning,
|
| 1472 |
+
page1_prompt, data_subset_state],
|
| 1473 |
+
scroll_to_output=True
|
| 1474 |
+
)
|
| 1475 |
|
| 1476 |
# Transition from Page 1 to direct submission (no confirmation modal)
|
| 1477 |
submit_btn_1.click(
|
| 1478 |
fn=validate_and_submit_page1,
|
| 1479 |
+
inputs=[data_subset_state, user_info_state, *pairwise_inputs,
|
| 1480 |
*comparison_reasons_inputs, *ratings_A_page1, *ratings_B_page1],
|
| 1481 |
outputs=[page1_error_box, page1, final_page, page0_error_box, chat_a_answer, chat_b_answer, chat_a_reasoning, chat_b_reasoning,
|
| 1482 |
+
page1_prompt, data_subset_state, user_info_state, *pairwise_inputs, *comparison_reasons_inputs, *ratings_A_page1, *ratings_B_page1],
|
| 1483 |
scroll_to_output=True
|
| 1484 |
)
|
| 1485 |
|