| | import gradio as gr |
| | import json |
| | import os |
| |
|
| | |
| | |
| | |
| | EVAL_FILE = "/home/mshahidul/readctrl/data/factual_testing/full_details_evaluation_0_20_qwen3-32B_v2.json" |
| | SOURCE_FILE = "/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_syn_data_with_gs_summary_en.json" |
| | SAVE_PATH = "/home/mshahidul/readctrl/data/human_eval_results.json" |
| |
|
| | def load_data(): |
| | with open(EVAL_FILE, 'r') as f: |
| | eval_data = json.load(f) |
| | with open(SOURCE_FILE, 'r') as f: |
| | source_data = json.load(f) |
| | |
| | |
| | source_map = {item['index']: item for item in source_data} |
| | return eval_data, source_map |
| |
|
| | eval_results, source_lookup = load_data() |
| | human_feedback = [] |
| |
|
| | |
| | |
| | |
| |
|
| | def get_record(index): |
| | entry = eval_results[index] |
| | idx_val = entry['index'] |
| | source_item = source_lookup.get(idx_val, {}) |
| | |
| | |
| | levels = list(entry['literacy_levels'].keys()) |
| | return entry, source_item, levels |
| | def update_ui(record_idx, level): |
| | entry, source_item, _ = get_record(record_idx) |
| | level_data = entry['literacy_levels'][level] |
| | |
| | gen_text = source_item.get('diff_label_texts', {}).get(level, "Text not found.") |
| | |
| | |
| | missing_from_ref = [ |
| | d['source_fact'] for d in level_data['details']['completeness'] |
| | if d['status'] == 'not_supported' |
| | ] |
| | |
| | missing_from_full = [ |
| | d['source_subclaim'] for d in level_data['details'].get('source_coverage', []) |
| | if d['status'] == 'not_supported' |
| | ] |
| |
|
| | |
| | return ( |
| | gen_text, |
| | gr.update(choices=missing_from_ref, value=[]), |
| | gr.update(choices=missing_from_full, value=[]) |
| | ) |
| |
|
| | def save_judgment(record_idx, level, selected_ref, selected_full, comments): |
| | entry = eval_results[record_idx] |
| | result = { |
| | "index": entry['index'], |
| | "label": level, |
| | "unacceptable_ref_claims": selected_ref, |
| | "unacceptable_full_claims": selected_full, |
| | "comments": comments |
| | } |
| | human_feedback.append(result) |
| | with open(SAVE_PATH, 'w') as f: |
| | json.dump(human_feedback, f, indent=2) |
| | return f"Saved judgment for index {entry['index']} ({level})" |
| |
|
| | |
| | |
| | |
| |
|
| | with gr.Blocks(theme=gr.themes.Soft()) as demo: |
| | gr.Markdown("# 🩺 Medical Summary: Human Evaluation of Information Loss") |
| | gr.Markdown("Select the specific subclaims that constitute an **unacceptable omission** for this literacy level.") |
| | |
| | with gr.Row(): |
| | record_num = gr.Number(label="Record Index (0 to 19)", value=0, precision=0) |
| | lit_level = gr.Dropdown( |
| | choices=['low_health_literacy', 'intermediate_health_literacy', 'proficient_health_literacy'], |
| | label="Target Literacy Level", |
| | value='low_health_literacy' |
| | ) |
| | |
| | gr.Markdown("### Generated Text") |
| | display_text = gr.Textbox(interactive=False, show_label=False, lines=5) |
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | gr.Markdown("### Missing from Reference Summary") |
| | |
| | ref_check = gr.CheckboxGroup(label="Select Unacceptable Omissions", choices=[]) |
| | |
| | with gr.Column(): |
| | gr.Markdown("### Missing from Full Source Text") |
| | |
| | full_check = gr.CheckboxGroup(label="Select Unacceptable Omissions", choices=[]) |
| |
|
| | comment_box = gr.Textbox(label="Additional Notes (Optional)") |
| | submit_btn = gr.Button("Save Judgment", variant="primary") |
| | status_msg = gr.Markdown("") |
| |
|
| | |
| | record_num.change(update_ui, inputs=[record_num, lit_level], outputs=[display_text, ref_check, full_check]) |
| | lit_level.change(update_ui, inputs=[record_num, lit_level], outputs=[display_text, ref_check, full_check]) |
| | |
| | submit_btn.click( |
| | save_judgment, |
| | inputs=[record_num, lit_level, ref_check, full_check, comment_box], |
| | outputs=status_msg |
| | ) |
| | |
| | demo.load(update_ui, inputs=[record_num, lit_level], outputs=[display_text, ref_check, full_check]) |
| |
|
| | demo.launch(share=True) |