File size: 6,399 Bytes
9c6961c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import gradio as gr
import json
import os
import random
from datetime import datetime

# --- Configuration ---
DATA_PATH = '/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_syn_data_with_gs_summary_en.json'
SAVE_PATH = 'annotated_subclaims_triplet.json'

with open(DATA_PATH, 'r') as f:
    data = json.load(f)

# --- Logic Functions ---
def load_example(index):
    if index >= len(data):
        return [
            gr.update(value="### ๐ŸŽ‰ All Done!"), 
            gr.update(value="You have completed all records in this dataset."), 
            [], "0%", "0%", "0%", 
            gr.update(choices=[], value=[]), 
            gr.update(choices=[], value=[]), 
            gr.update(choices=[], value=[])
        ]
    
    record = data[index]
    # Randomly select evaluation focus
    source_type = random.choice(["Full Original Text", "Gold Summary"])
    
    if source_type == "Full Original Text":
        text_content = record['fulltext']
        subclaims = record['fulltext_subclaims']
    else:
        text_content = record['summary']
        subclaims = record['summary_subclaims']
        
    source_info = f"### Instance: {index + 1}/{len(data)} | Source: **{source_type}**"
    
    return [
        source_info, 
        text_content, 
        subclaims, 
        "0%", "0%", "0%", 
        gr.update(choices=subclaims, value=[]), 
        gr.update(choices=subclaims, value=[]), 
        gr.update(choices=subclaims, value=[])
    ]

def calc_pct(selected, total_list):
    if not total_list or len(total_list) == 0: 
        return "0%"
    return f"{(len(selected)/len(total_list))*100:.1f}%"

def save_and_next(username, index, source_info, low_sel, int_sel, prof_sel, subclaims):
    # Validation
    if not username or username.strip() == "":
        gr.Warning("Please enter your name/username before submitting!")
        return [index] + load_example(index)

    stype = "Full Original Text" if "Full Original Text" in source_info else "Gold Summary"
    
    # Capture current date and time
    now = datetime.now()
    timestamp = now.strftime("%Y-%m-%d %H:%M:%S")
    
    result = {
        "annotator": username,
        "timestamp": timestamp,
        "index": index,
        "source_type": stype,
        "annotations": {
            "low": {"subclaims": low_sel, "pct": len(low_sel)/len(subclaims) if subclaims else 0},
            "intermediate": {"subclaims": int_sel, "pct": len(int_sel)/len(subclaims) if subclaims else 0},
            "proficient": {"subclaims": prof_sel, "pct": len(prof_sel)/len(subclaims) if subclaims else 0}
        }
    }
    
    # Saving logic
    existing = []
    if os.path.exists(SAVE_PATH):
        try:
            with open(SAVE_PATH, 'r') as f: existing = json.load(f)
        except: existing = []
    
    existing.append(result)
    with open(SAVE_PATH, 'w') as f: 
        json.dump(existing, f, indent=4)
    
    gr.Info(f"Success! Saved at {timestamp}")
    
    next_idx = index + 1
    return [next_idx] + load_example(next_idx)

# --- UI Definition ---
with gr.Blocks(theme=gr.themes.Soft(), title="Health Literacy Annotator") as demo:
    index_state = gr.State(0)
    subclaim_list_state = gr.State([])
    
    gr.Markdown("# ๐Ÿฅ Health Literacy Subclaim Annotation\n## Texts labeled as low health literacy include less information than those labeled as intermediate health literacy, and intermediate health literacy texts include less information than proficient health literacy texts.\nSome key information has already been pre-selected to ensure that each label contains a minimum required amount of information. If you believe additional information should be included for a given label, please select the corresponding checkboxes.")
    
    with gr.Row():
        # Sidebar
        with gr.Column(scale=1, variant="panel"):
            user_input = gr.Textbox(label="Annotator Name", placeholder="Enter your name...", interactive=True)
            gr.HTML("<hr>")
            gr.Markdown("### ๐Ÿ“– Level Guidelines")
            with gr.Accordion("1. Low Literacy", open=False):
                gr.Markdown("- Simple terms, 'living room' language.\n- High paraphrasing.")
            with gr.Accordion("2. Intermediate Literacy", open=False):
                gr.Markdown("- News-reading level.\n- Balanced context.")
            with gr.Accordion("3. Proficient Literacy", open=False):
                gr.Markdown("- Academic/Clinical level.\n- Full technical details.")
            
            gr.HTML("<hr>")
            source_display = gr.Markdown("### Initializing...")
            text_viewer = gr.Textbox(label="Reference Text Content", interactive=False, lines=12)

        # Main Area
        with gr.Column(scale=2):
            with gr.Row():
                with gr.Column():
                    gr.Markdown("### ๐ŸŸข Low")
                    low_pct = gr.Label(value="0%", label="Coverage")
                    low_check = gr.CheckboxGroup(label="Subclaims", choices=[])
                
                with gr.Column():
                    gr.Markdown("### ๐ŸŸก Intermediate")
                    int_pct = gr.Label(value="0%", label="Coverage")
                    int_check = gr.CheckboxGroup(label="Subclaims", choices=[])
                
                with gr.Column():
                    gr.Markdown("### ๐Ÿ”ด Proficient")
                    prof_pct = gr.Label(value="0%", label="Coverage")
                    prof_check = gr.CheckboxGroup(label="Subclaims", choices=[])

            submit_btn = gr.Button("Submit & Next Record", variant="primary", size="lg")

    # --- Events ---
    demo.load(
        load_example, 
        [index_state], 
        [source_display, text_viewer, subclaim_list_state, low_pct, int_pct, prof_pct, low_check, int_check, prof_check]
    )
    
    low_check.change(calc_pct, [low_check, subclaim_list_state], low_pct)
    int_check.change(calc_pct, [int_check, subclaim_list_state], int_pct)
    prof_check.change(calc_pct, [prof_check, subclaim_list_state], prof_pct)

    submit_btn.click(
        save_and_next,
        [user_input, index_state, source_display, low_check, int_check, prof_check, subclaim_list_state],
        [index_state, source_display, text_viewer, subclaim_list_state, low_pct, int_pct, prof_pct, low_check, int_check, prof_check]
    )

if __name__ == "__main__":
    demo.launch(share=True)