import gradio as gr import json import os # ----------------------------- # CONFIGURATION & DATA LOADING # ----------------------------- EVAL_FILE = "/home/mshahidul/readctrl/data/factual_testing/full_details_evaluation_0_20_qwen3-32B_v2.json" SOURCE_FILE = "/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_syn_data_with_gs_summary_en.json" SAVE_PATH = "/home/mshahidul/readctrl/data/human_eval_results.json" def load_data(): with open(EVAL_FILE, 'r') as f: eval_data = json.load(f) with open(SOURCE_FILE, 'r') as f: source_data = json.load(f) # Create a mapping for quick lookup source_map = {item['index']: item for item in source_data} return eval_data, source_map eval_results, source_lookup = load_data() human_feedback = [] # ----------------------------- # LOGIC # ----------------------------- def get_record(index): entry = eval_results[index] idx_val = entry['index'] source_item = source_lookup.get(idx_val, {}) # Literacy Levels available in this entry levels = list(entry['literacy_levels'].keys()) return entry, source_item, levels def update_ui(record_idx, level): entry, source_item, _ = get_record(record_idx) level_data = entry['literacy_levels'][level] gen_text = source_item.get('diff_label_texts', {}).get(level, "Text not found.") # Extract missing subclaims as lists of strings missing_from_ref = [ d['source_fact'] for d in level_data['details']['completeness'] if d['status'] == 'not_supported' ] missing_from_full = [ d['source_subclaim'] for d in level_data['details'].get('source_coverage', []) if d['status'] == 'not_supported' ] # Return the lists directly to the CheckboxGroup components return ( gen_text, gr.update(choices=missing_from_ref, value=[]), gr.update(choices=missing_from_full, value=[]) ) def save_judgment(record_idx, level, selected_ref, selected_full, comments): entry = eval_results[record_idx] result = { "index": entry['index'], "label": level, "unacceptable_ref_claims": selected_ref, # These are the claims the user "ticked" "unacceptable_full_claims": selected_full, "comments": comments } human_feedback.append(result) with open(SAVE_PATH, 'w') as f: json.dump(human_feedback, f, indent=2) return f"Saved judgment for index {entry['index']} ({level})" # ----------------------------- # UPDATED GRADIO INTERFACE # ----------------------------- with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🩺 Medical Summary: Human Evaluation of Information Loss") gr.Markdown("Select the specific subclaims that constitute an **unacceptable omission** for this literacy level.") with gr.Row(): record_num = gr.Number(label="Record Index (0 to 19)", value=0, precision=0) lit_level = gr.Dropdown( choices=['low_health_literacy', 'intermediate_health_literacy', 'proficient_health_literacy'], label="Target Literacy Level", value='low_health_literacy' ) gr.Markdown("### Generated Text") display_text = gr.Textbox(interactive=False, show_label=False, lines=5) with gr.Row(): with gr.Column(): gr.Markdown("### Missing from Reference Summary") # Changed from HTML to CheckboxGroup ref_check = gr.CheckboxGroup(label="Select Unacceptable Omissions", choices=[]) with gr.Column(): gr.Markdown("### Missing from Full Source Text") # Changed from HTML to CheckboxGroup full_check = gr.CheckboxGroup(label="Select Unacceptable Omissions", choices=[]) comment_box = gr.Textbox(label="Additional Notes (Optional)") submit_btn = gr.Button("Save Judgment", variant="primary") status_msg = gr.Markdown("") # Event Listeners record_num.change(update_ui, inputs=[record_num, lit_level], outputs=[display_text, ref_check, full_check]) lit_level.change(update_ui, inputs=[record_num, lit_level], outputs=[display_text, ref_check, full_check]) submit_btn.click( save_judgment, inputs=[record_num, lit_level, ref_check, full_check, comment_box], outputs=status_msg ) demo.load(update_ui, inputs=[record_num, lit_level], outputs=[display_text, ref_check, full_check]) demo.launch(share=True)