import gradio as gr import json import random import os from datetime import datetime # --- PATH CONFIGURATION --- DATA_PATH = "/home/mshahidul/readctrl/data/data_annotator_data/manual_selections_en.json" SAVE_ROOT = "/home/mshahidul/readctrl/data/annotators_validate_data" QUESTIONS_FILE = "/home/mshahidul/readctrl/code/interface/sp50_questions_en.json" # --- SESSION CONFIGURATION --- NUM_QUESTIONS = 20 NUM_DUPLICATES = 4 NUM_LITERACY_QUERIES = 10 DUPLICATE_INTERVAL = 8 # --- UI HTML COMPONENTS --- GUIDE_HTML = """

Rating Guide: Medical Text Difficulty

Score Description
1Very Easy: Simple words, no medical jargon. Clear to a child.
2Easy: Conversational medical terms (e.g., "flu", "broken bone").
3Moderate: Standard patient education material. Requires some focus.
4Hard: Significant technical jargon. Likely a clinical summary.
5Very Hard: Specialist-level / Academic. Extremely dense.
""" EXAMPLES_HTML = """

Reference Examples (Calibration)

Use these examples of the same medical case to calibrate your ratings:

Level 1-2 (Easy)

"This is about a 20-year-old woman. She had a kidney problem... The problem first showed up when a big blood clot blocked veins in her brain... She took blood thinners and steroid pills."

Reasoning: Uses "kidney problem" instead of "nephrotic syndrome" and "blood thinners" instead of "anticoagulants".

Level 3 (Medium)

"A 20-year-old woman had a 12-year history of idiopathic nephrotic syndrome... treated with anticoagulation and oral corticosteroids... CT showed acute superior mesenteric artery thrombosis."

Reasoning: Uses standard medical terminology but keeps sentences relatively concise and structured.

Level 4-5 (Hard)

"20-year-old woman... idiopathic NS inaugurated by cerebral venous thrombosis extended to the right jugular vein... Hemogasanalysis results showed metabolic acidosis with respiratory compensation."

Reasoning: Highly technical, academic language, specific lab values, and complex physiological processes.
""" # --- DATA LOADING --- def load_all_literacy_questions(): try: with open(QUESTIONS_FILE, "r") as f: return json.load(f) except Exception as e: print(f"Error loading questions: {e}") return [] with open(DATA_PATH, "r") as f: FULL_DATASET = json.load(f) # --- SESSION CLASS --- class AnnotationSession: def __init__(self, dataset, all_questions): k = min(len(dataset), NUM_QUESTIONS) base_samples = random.sample(dataset, k) self.queue = list(base_samples) for i in range(min(NUM_DUPLICATES, k)): self.queue.insert(DUPLICATE_INTERVAL + i, base_samples[i]) self.current_index = 0 self.results = [] self.session_questions = random.sample(all_questions, min(NUM_LITERACY_QUERIES, len(all_questions))) self.session_folder = None # --- LOGIC FUNCTIONS --- def start_and_save_literacy(username, *args): # args contains all the answers from the radio buttons all_q = load_all_literacy_questions() new_session = AnnotationSession(FULL_DATASET, all_q) clean_username = "".join([c for c in username if c.isalnum() or c in (' ', '_', '-')]).strip() or "anonymous" timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") folder_name = f"{clean_username}_{timestamp}" session_folder = os.path.join(SAVE_ROOT, folder_name) os.makedirs(session_folder, exist_ok=True) new_session.session_folder = session_folder literacy_data = [] for i, ans in enumerate(args): if i < len(new_session.session_questions): q_info = new_session.session_questions[i] literacy_data.append({ "question_id": q_info['id'], "question_text": q_info['question'], "user_answer": ans, "is_correct": ans == q_info['correct'] }) with open(os.path.join(session_folder, "literacy_results.json"), "w") as f: json.dump(literacy_data, f, indent=4) first_item = new_session.queue[0] return ( gr.update(visible=False), gr.update(visible=True), first_item['original_text'], first_item['selected_wiki_anchor'], f"Item 1 of {len(new_session.queue)}", new_session ) def submit_rating(doc_slider, wiki_slider, current_session): if current_session is None: gr.Warning("Session lost! Please refresh.") # Pop-up for errors return "", "", "Error: Session lost", 3, 3, None current_pair = current_session.queue[current_session.current_index] # ... (Keep your existing result_entry logic) ... result_entry = { "queue_position": current_session.current_index, "doc_id": current_pair.get('index', 'no_id'), "health_literacy_label": current_pair.get('label', 'no_label'), "doc_rating": doc_slider, "wiki_rating": wiki_slider, "is_duplicate": current_session.current_index >= DUPLICATE_INTERVAL and current_session.current_index < (DUPLICATE_INTERVAL + NUM_DUPLICATES), "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S") } current_session.results.append(result_entry) annotation_file = os.path.join(current_session.session_folder, "annotation_results.json") with open(annotation_file, "w") as f: json.dump(current_session.results, f, indent=4) current_session.current_index += 1 # Check if there are more items if current_session.current_index < len(current_session.queue): # Trigger the "Success" pop-up gr.Info(f"Rating {current_session.current_index} saved successfully!") print(f"Progress Saved: Item {current_session.current_index}") next_pair = current_session.queue[current_session.current_index] return ( next_pair['original_text'], next_pair['selected_wiki_anchor'], f"Item {current_session.current_index + 1} of {len(current_session.queue)}", 3, 3, current_session ) else: # Trigger the "Finished" pop-up gr.Info("Final rating saved. Task complete!") return ( "✅ ALL TASKS COMPLETED", "The data has been saved. You may close this tab.", "Status: Finished", 1, 1, current_session ) # --- UI INTERFACE --- with gr.Blocks(theme=gr.themes.Soft()) as demo: # State object to keep data separate for each user session_state = gr.State() gr.Markdown("# Medical Text Readability Annotation") with gr.Accordion("See Annotation Instructions & Calibration Examples", open=True): gr.HTML(GUIDE_HTML) gr.HTML(EXAMPLES_HTML) with gr.Column(visible=True) as intro_box: username_input = gr.Textbox(label="Enter Your Name/ID", placeholder="e.g., user_1", max_lines=1) gr.Markdown(f"### Pre-Task: Health Literacy Check ({NUM_LITERACY_QUERIES} Questions)") all_possible_q = load_all_literacy_questions() literacy_inputs = [] # We display the first 10 for the UI layout; session logic will pick 10 random ones later for i in range(min(NUM_LITERACY_QUERIES, len(all_possible_q))): q = all_possible_q[i] radio = gr.Radio(choices=q['options'], label=q['question']) literacy_inputs.append(radio) btn_start = gr.Button("Start Annotation", variant="primary") with gr.Column(visible=False) as task_box: progress_label = gr.Label(label="Progress") with gr.Row(): with gr.Column(): doc_display = gr.Textbox(interactive=False, lines=15, label="Text A") doc_slider = gr.Slider(1, 5, step=1, label="Difficulty (1-5)", value=3) with gr.Column(): wiki_display = gr.Textbox(interactive=False, lines=15, label="Text B") wiki_slider = gr.Slider(1, 5, step=1, label="Difficulty (1-5)", value=3) btn_submit = gr.Button("Submit & Next", variant="primary") # --- EVENT HANDLERS --- # Start button: inputs must include username + all radio buttons btn_start.click( fn=start_and_save_literacy, inputs=[username_input] + literacy_inputs, outputs=[intro_box, task_box, doc_display, wiki_display, progress_label, session_state] ) # Submit button: inputs MUST include the session_state btn_submit.click( fn=submit_rating, inputs=[doc_slider, wiki_slider, session_state], # Fixed: Added session_state outputs=[doc_display, wiki_display, progress_label, doc_slider, wiki_slider, session_state] ) if __name__ == "__main__": demo.launch(share=True)