File size: 7,596 Bytes
1db7196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import gradio as gr
import json
import os
from datetime import datetime

# --- PATH CONFIGURATION ---
# DATA_PATH = "/home/mshahidul/readctrl/data/synthetic_dataset_diff_labels/syn_data_with_gs_summary_en_0_20.json"
DATA_PATH = "/home/mshahidul/readctrl/data/data_annotator_data/syn_data_diff_labels_en_0_80.json"
SAVE_ROOT = "/home/mshahidul/readctrl/data/annotators_validate_data_(20_80)"
os.makedirs(SAVE_ROOT, exist_ok=True)

# --- UI HTML COMPONENTS (Kept same as original) ---
GUIDE_HTML = """
<div style="background-color: #f9f9f9; padding: 15px; border-left: 6px solid #4CAF50; border-radius: 4px; margin-bottom: 20px;">
    <h3>Rating Guide: Medical Text Difficulty</h3>
    <table style="width:100%; border-collapse: collapse; text-align: left;">
        <tr style="background-color: #e8f5e9;">
            <th style="padding: 8px; border: 1px solid #ddd;">Score</th>
            <th style="padding: 8px; border: 1px solid #ddd;">Description</th>
        </tr>
        <tr><td><b>1</b></td><td><b>Very Easy:</b> Simple words, no medical jargon.</td></tr>
        <tr><td><b>2</b></td><td><b>Easy:</b> Conversational medical terms.</td></tr>
        <tr><td><b>3</b></td><td><b>Moderate:</b> Standard patient education material.</td></tr>
        <tr><td><b>4</b></td><td><b>Hard:</b> Significant technical jargon.</td></tr>
        <tr><td><b>5</b></td><td><b>Very Hard:</b> Specialist-level / Academic.</td></tr>
    </table>
</div>
"""

EXAMPLES_HTML = """
<div style="background-color: #ffffff; padding: 15px; border: 1px solid #ddd; border-radius: 4px;">
    <h3 style="color: #2e7d32;">Reference Examples</h3>
    <div style="display: flex; gap: 15px;">
        <div style="flex: 1; background-color: #f1f8e9; padding: 10px; border-radius: 4px;">
            <h4>Level 1-2</h4>
            <p>"She had a kidney problem... a big blood clot blocked veins in her brain."</p>
        </div>
        <div style="flex: 1; background-color: #ffebee; padding: 10px; border-radius: 4px;">
            <h4>Level 4-5</h4>
            <p>"Idiopathic NS inaugurated by cerebral venous thrombosis extended to the right jugular vein."</p>
        </div>
    </div>
</div>
"""

# --- DATA LOADING ---
if os.path.exists(DATA_PATH):
    with open(DATA_PATH, "r") as f:
        FULL_DATASET = json.load(f)
        FULL_DATASET=FULL_DATASET[60:]
else:
    assert False, f"Data file not found at {DATA_PATH}"

# --- PERSISTENCE HELPERS ---
def get_user_dir(username):
    clean_username = "".join([c for c in username if c.isalnum() or c in (' ', '_', '-')]).strip() or "anonymous"
    return os.path.join(SAVE_ROOT, clean_username)

def save_state(user_dir, state_dict):
    with open(os.path.join(user_dir, "state.json"), "w") as f:
        json.dump(state_dict, f, indent=4)

def load_state(user_dir):
    state_path = os.path.join(user_dir, "state.json")
    if os.path.exists(state_path):
        with open(state_path, "r") as f:
            return json.load(f)
    return None

# --- LOGIC FUNCTIONS ---
def get_current_ui_values(state):
    """Helper to get UI values for the current index, including previous ratings if they exist."""
    idx = state['current_index']
    current_item = state['queue'][idx]
    
    # Check if we already have a rating for this specific index
    existing_rating = 3  # Default
    for res in state['results']:
        if res['queue_position'] == idx:
            existing_rating = res['rating']
            break
            
    progress = f"Item {idx + 1} of {len(state['queue'])}"
    return current_item['generated_summary'], progress, existing_rating

def start_session(username):
    if not username:
        gr.Warning("Please enter a username!")
        return [gr.update()] * 5

    user_dir = get_user_dir(username)
    os.makedirs(user_dir, exist_ok=True)
    existing_state = load_state(user_dir)
    
    if existing_state:
        gr.Info(f"Welcome back! Resuming from item {existing_state['current_index'] + 1}.")
        state = existing_state
    else:
        state = {
            "username": username,
            "current_index": 0,
            "queue": list(FULL_DATASET),
            "results": [],
            "completed": False
        }
        save_state(user_dir, state)

    text, progress, rating = get_current_ui_values(state)
    return (gr.update(visible=False), gr.update(visible=True), text, progress, rating, state)

def submit_rating(doc_slider, state):
    if state is None: return "", "Error", 3, 3, None

    user_dir = get_user_dir(state['username'])
    idx = state['current_index']
    current_item = state['queue'][idx]
    
    # Update existing rating if editing, otherwise append
    new_result = {
        "queue_position": idx,
        "doc_id": current_item.get('index', 'no_id'), 
        "label": current_item.get('label', 'no_label'),
        "rating": doc_slider,
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    }
    
    # Logic to overwrite existing rating for this index
    state['results'] = [r for r in state['results'] if r['queue_position'] != idx]
    state['results'].append(new_result)
    state['results'].sort(key=lambda x: x['queue_position']) # Keep sorted

    if idx + 1 < len(state['queue']):
        state['current_index'] += 1
        save_state(user_dir, state)
        # Save results file
        with open(os.path.join(user_dir, "annotation_results.json"), "w") as f:
            json.dump(state['results'], f, indent=4)
            
        text, progress, rating = get_current_ui_values(state)
        return text, progress, rating, state
    else:
        state['completed'] = True
        save_state(user_dir, state)
        return "✅ ALL TASKS COMPLETED", "Status: Finished", 1, state

def go_back(state):
    if state is None or state['current_index'] <= 0:
        gr.Warning("Already at the first item.")
        return [gr.update()] * 3 + [state]

    state['current_index'] -= 1
    text, progress, rating = get_current_ui_values(state)
    return text, progress, rating, state

# --- UI INTERFACE ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    session_state = gr.State()

    gr.Markdown("# Medical Text Readability Annotation")
    
    with gr.Accordion("Instructions & Calibration", open=False):
        gr.HTML(GUIDE_HTML)
        gr.HTML(EXAMPLES_HTML)

    with gr.Column(visible=True) as intro_box:
        username_input = gr.Textbox(label="Enter Your Name/ID", placeholder="e.g., user_101")
        btn_start = gr.Button("Start / Resume Annotation", variant="primary")

    with gr.Column(visible=False) as task_box:
        progress_label = gr.Label(label="Overall Progress")
        doc_display = gr.Textbox(interactive=False, lines=12, label="Medical Text")
        doc_slider = gr.Slider(1, 5, step=1, label="Difficulty (1=Easy, 5=Hard)", value=3)
        
        with gr.Row():
            btn_prev = gr.Button("⬅️ Previous", variant="secondary")
            btn_submit = gr.Button("Submit & Next ➡️", variant="primary")

    # --- EVENT HANDLERS ---
    btn_start.click(
        fn=start_session, 
        inputs=[username_input],
        outputs=[intro_box, task_box, doc_display, progress_label, doc_slider, session_state]
    )
    
    btn_submit.click(
        fn=submit_rating,
        inputs=[doc_slider, session_state],
        outputs=[doc_display, progress_label, doc_slider, session_state]
    )

    btn_prev.click(
        fn=go_back,
        inputs=[session_state],
        outputs=[doc_display, progress_label, doc_slider, session_state]
    )

if __name__ == "__main__":
    demo.launch(share=True)