| | import gradio as gr |
| | import json |
| | import os |
| | import random |
| | from datetime import datetime |
| |
|
| | |
| | DATA_PATH = '/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_syn_data_with_gs_summary_en.json' |
| | SAVE_PATH = 'annotated_subclaims_triplet.json' |
| |
|
| | with open(DATA_PATH, 'r') as f: |
| | data = json.load(f) |
| |
|
| | |
| | def load_example(index): |
| | if index >= len(data): |
| | return [ |
| | gr.update(value="### π All Done!"), |
| | gr.update(value="You have completed all records in this dataset."), |
| | [], "0%", "0%", "0%", |
| | gr.update(choices=[], value=[]), |
| | gr.update(choices=[], value=[]), |
| | gr.update(choices=[], value=[]) |
| | ] |
| | |
| | record = data[index] |
| | |
| | source_type = random.choice(["Full Original Text", "Gold Summary"]) |
| | |
| | if source_type == "Full Original Text": |
| | text_content = record['fulltext'] |
| | subclaims = record['fulltext_subclaims'] |
| | else: |
| | text_content = record['summary'] |
| | subclaims = record['summary_subclaims'] |
| | |
| | source_info = f"### Instance: {index + 1}/{len(data)} | Source: **{source_type}**" |
| | |
| | return [ |
| | source_info, |
| | text_content, |
| | subclaims, |
| | "0%", "0%", "0%", |
| | gr.update(choices=subclaims, value=[]), |
| | gr.update(choices=subclaims, value=[]), |
| | gr.update(choices=subclaims, value=[]) |
| | ] |
| |
|
| | def calc_pct(selected, total_list): |
| | if not total_list or len(total_list) == 0: |
| | return "0%" |
| | return f"{(len(selected)/len(total_list))*100:.1f}%" |
| |
|
| | def save_and_next(username, index, source_info, low_sel, int_sel, prof_sel, subclaims): |
| | |
| | if not username or username.strip() == "": |
| | gr.Warning("Please enter your name/username before submitting!") |
| | return [index] + load_example(index) |
| |
|
| | stype = "Full Original Text" if "Full Original Text" in source_info else "Gold Summary" |
| | |
| | |
| | now = datetime.now() |
| | timestamp = now.strftime("%Y-%m-%d %H:%M:%S") |
| | |
| | result = { |
| | "annotator": username, |
| | "timestamp": timestamp, |
| | "index": index, |
| | "source_type": stype, |
| | "annotations": { |
| | "low": {"subclaims": low_sel, "pct": len(low_sel)/len(subclaims) if subclaims else 0}, |
| | "intermediate": {"subclaims": int_sel, "pct": len(int_sel)/len(subclaims) if subclaims else 0}, |
| | "proficient": {"subclaims": prof_sel, "pct": len(prof_sel)/len(subclaims) if subclaims else 0} |
| | } |
| | } |
| | |
| | |
| | existing = [] |
| | if os.path.exists(SAVE_PATH): |
| | try: |
| | with open(SAVE_PATH, 'r') as f: existing = json.load(f) |
| | except: existing = [] |
| | |
| | existing.append(result) |
| | with open(SAVE_PATH, 'w') as f: |
| | json.dump(existing, f, indent=4) |
| | |
| | gr.Info(f"Success! Saved at {timestamp}") |
| | |
| | next_idx = index + 1 |
| | return [next_idx] + load_example(next_idx) |
| |
|
| | |
| | with gr.Blocks(theme=gr.themes.Soft(), title="Health Literacy Annotator") as demo: |
| | index_state = gr.State(0) |
| | subclaim_list_state = gr.State([]) |
| | |
| | gr.Markdown("# π₯ Health Literacy Subclaim Annotation\n## Texts labeled as low health literacy include less information than those labeled as intermediate health literacy, and intermediate health literacy texts include less information than proficient health literacy texts.\nSome key information has already been pre-selected to ensure that each label contains a minimum required amount of information. If you believe additional information should be included for a given label, please select the corresponding checkboxes.") |
| | |
| | with gr.Row(): |
| | |
| | with gr.Column(scale=1, variant="panel"): |
| | user_input = gr.Textbox(label="Annotator Name", placeholder="Enter your name...", interactive=True) |
| | gr.HTML("<hr>") |
| | gr.Markdown("### π Level Guidelines") |
| | with gr.Accordion("1. Low Literacy", open=False): |
| | gr.Markdown("- Simple terms, 'living room' language.\n- High paraphrasing.") |
| | with gr.Accordion("2. Intermediate Literacy", open=False): |
| | gr.Markdown("- News-reading level.\n- Balanced context.") |
| | with gr.Accordion("3. Proficient Literacy", open=False): |
| | gr.Markdown("- Academic/Clinical level.\n- Full technical details.") |
| | |
| | gr.HTML("<hr>") |
| | source_display = gr.Markdown("### Initializing...") |
| | text_viewer = gr.Textbox(label="Reference Text Content", interactive=False, lines=12) |
| |
|
| | |
| | with gr.Column(scale=2): |
| | with gr.Row(): |
| | with gr.Column(): |
| | gr.Markdown("### π’ Low") |
| | low_pct = gr.Label(value="0%", label="Coverage") |
| | low_check = gr.CheckboxGroup(label="Subclaims", choices=[]) |
| | |
| | with gr.Column(): |
| | gr.Markdown("### π‘ Intermediate") |
| | int_pct = gr.Label(value="0%", label="Coverage") |
| | int_check = gr.CheckboxGroup(label="Subclaims", choices=[]) |
| | |
| | with gr.Column(): |
| | gr.Markdown("### π΄ Proficient") |
| | prof_pct = gr.Label(value="0%", label="Coverage") |
| | prof_check = gr.CheckboxGroup(label="Subclaims", choices=[]) |
| |
|
| | submit_btn = gr.Button("Submit & Next Record", variant="primary", size="lg") |
| |
|
| | |
| | demo.load( |
| | load_example, |
| | [index_state], |
| | [source_display, text_viewer, subclaim_list_state, low_pct, int_pct, prof_pct, low_check, int_check, prof_check] |
| | ) |
| | |
| | low_check.change(calc_pct, [low_check, subclaim_list_state], low_pct) |
| | int_check.change(calc_pct, [int_check, subclaim_list_state], int_pct) |
| | prof_check.change(calc_pct, [prof_check, subclaim_list_state], prof_pct) |
| |
|
| | submit_btn.click( |
| | save_and_next, |
| | [user_input, index_state, source_display, low_check, int_check, prof_check, subclaim_list_state], |
| | [index_state, source_display, text_viewer, subclaim_list_state, low_pct, int_pct, prof_pct, low_check, int_check, prof_check] |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch(share=True) |