| import sys |
| import json |
| import html |
| from pathlib import Path |
| import gradio as gr |
|
|
| TEST_PAPERS_DIR = Path(__file__).parent / "test_papers" |
| PAPERS_DIR = TEST_PAPERS_DIR / "papers" |
| GOLD_DIR = TEST_PAPERS_DIR / "annotated" |
|
|
| CATEGORIES = ["Unsupported claim", "Format", "Coherence", "Lacks synthesis"] |
| CATEGORY_COLORS = { |
| "Unsupported claim": "#ffb3b3", |
| "Format": "#ffe8a3", |
| "Coherence": "#b7f0d2", |
| "Lacks synthesis": "#bcd8ff", |
| } |
| CATEGORY_SLUGS = {k: k.lower().replace(" ", "-") for k in CATEGORY_COLORS.keys()} |
|
|
| LEGEND_HTML = """ |
| <div style="margin:8px 0 4px 0;font-size:0.9em"> |
| <div style="font-weight:600;margin-bottom:4px">Legend</div> |
| <div style="display:flex;flex-direction:column;gap:4px"> |
| <div><span style="display:inline-block;width:14px;height:14px;background:#ffb3b3;border-radius:3px;margin-right:6px;vertical-align:middle"></span>Unsupported claim</div> |
| <div><span style="display:inline-block;width:14px;height:14px;background:#ffe8a3;border-radius:3px;margin-right:6px;vertical-align:middle"></span>Format</div> |
| <div><span style="display:inline-block;width:14px;height:14px;background:#b7f0d2;border-radius:3px;margin-right:6px;vertical-align:middle"></span>Coherence</div> |
| <div><span style="display:inline-block;width:14px;height:14px;background:#bcd8ff;border-radius:3px;vertical-align:middle"></span>Lacks synthesis</div> |
| </div> |
| </div> |
| """ |
|
|
| CATEGORY_JS = """ |
| (start, end) => { |
| const findById = (id) => { |
| const direct = document.getElementById(id); |
| if (direct) return direct; |
| const app = document.querySelector('gradio-app'); |
| if (app && app.shadowRoot) { |
| const inShadow = app.shadowRoot.getElementById(id); |
| if (inShadow) return inShadow; |
| } |
| return null; |
| }; |
| const view = findById('paper_view'); |
| const root = findById('paper-content') || view; |
| if (!root) return [start, end, "Text viewer not found."]; |
| const sel = window.getSelection(); |
| if (!sel || sel.rangeCount === 0) return [start, end, ""]; |
| const range = sel.getRangeAt(0); |
| let node = range.commonAncestorContainer; |
| if (node && node.nodeType === 3) node = node.parentNode; |
| const sameRoot = (a, b) => { |
| if (!a || !b || !a.getRootNode || !b.getRootNode) return false; |
| return a.getRootNode() === b.getRootNode(); |
| }; |
| function inViewer(n){ |
| while (n) { |
| if (n.id === 'paper-content' || n.id === 'paper_view') return true; |
| if (n.host) { n = n.host; continue; } |
| n = n.parentNode; |
| } |
| return false; |
| } |
| const selected = range.toString(); |
| if (!selected) return [start, end, ""]; |
| if (!(inViewer(node) && (root.contains(node) || sameRoot(root, node)))) { |
| const text = root.textContent || ""; |
| const idx = text.indexOf(selected); |
| if (idx === -1) return [start, end, ""]; |
| const s = idx; |
| const e = idx + selected.length; |
| return [String(s), String(e), `Captured selection (${s}-${e})`]; |
| } |
| const preRange = document.createRange(); |
| preRange.selectNodeContents(root); |
| preRange.setEnd(range.startContainer, range.startOffset); |
| const s = preRange.toString().length; |
| const e = s + selected.length; |
| return [String(s), String(e), `Captured selection (${s}-${e})`]; |
| } |
| """ |
|
|
|
|
| def _test_files(): |
| files = [f"paper_{i}.txt" for i in range(6, 11)] |
| existing = [f for f in files if (PAPERS_DIR / f).exists()] |
| return existing |
|
|
|
|
| TEST_FILES = _test_files() |
|
|
|
|
| def _read_text(filename): |
| path = PAPERS_DIR / filename |
| if not path.exists(): |
| return None, f"<em>File not found: {html.escape(filename)}</em>" |
| try: |
| return path.read_text(encoding="utf-8", errors="replace"), None |
| except Exception as e: |
| return None, f"<em>Error reading file: {html.escape(str(e))}</em>" |
|
|
|
|
| def _render_with_highlights(text, annotations): |
| if not annotations: |
| escaped = html.escape(text) |
| return ( |
| "<style>" |
| ".hl{padding:0 2px;border-radius:3px;}" |
| "#paper-content{white-space:pre-wrap;font-family:inherit;padding:12px;" |
| "border:1px solid #ddd;border-radius:8px;min-height:420px;" |
| "max-height:420px;overflow:auto;box-sizing:border-box;" |
| "user-select:text;}" |
| "#paper-content:focus{outline:none;}" |
| "</style>" |
| f"<div id='paper-content' contenteditable='false' tabindex='0'>{escaped}</div>" |
| ) |
| slug_for = {k: k.lower().replace(" ", "-") for k in CATEGORY_COLORS.keys()} |
| sorted_anns = sorted(annotations, key=lambda r: (int(r.get("start", -1)), int(r.get("end", -1)))) |
| pieces = [] |
| cursor = 0 |
| text_len = len(text) |
| for ann in sorted_anns: |
| try: |
| start = int(ann.get("start")) |
| end = int(ann.get("end")) |
| except Exception: |
| continue |
| if start < cursor or start < 0 or end < 0 or end > text_len or end <= start: |
| continue |
| pieces.append(html.escape(text[cursor:start])) |
| label = ann.get("label", "") |
| cls = slug_for.get(label, "unknown") |
| color = CATEGORY_COLORS.get(label, "#ddd") |
| span = html.escape(text[start:end]) |
| pieces.append(f"<span class='hl {cls}' style='background:{color}'>{span}</span>") |
| cursor = end |
| pieces.append(html.escape(text[cursor:])) |
| styles = ( |
| "<style>" |
| ".hl{padding:0 2px;border-radius:3px;}" |
| "#paper-content{white-space:pre-wrap;font-family:inherit;padding:12px;" |
| "border:1px solid #ddd;border-radius:8px;min-height:420px;" |
| "max-height:420px;overflow:auto;box-sizing:border-box;" |
| "user-select:text;}" |
| "#paper-content:focus{outline:none;}" |
| "</style>" |
| ) |
| return f"{styles}<div id='paper-content' contenteditable='false' tabindex='0'>{''.join(pieces)}</div>" |
|
|
|
|
| def _get_annotations(state, filename): |
| if not state: |
| return [] |
| return list(state.get(filename, [])) |
|
|
|
|
| def read_paper(filename, ann_state): |
| if not filename: |
| return "<em>No file selected.</em>", "[]" |
| text, err = _read_text(filename) |
| if err: |
| return err, "[]" |
| annotations = _get_annotations(ann_state, filename) |
| return _render_with_highlights(text, annotations), json.dumps(annotations, ensure_ascii=False, indent=2) |
|
|
|
|
| def save_annotation(filename, start, end, category, ann_state): |
| if not filename: |
| return "No file selected.", gr.update(), gr.update(), ann_state |
| if not category: |
| return "No category selected.", gr.update(), gr.update(), ann_state |
| if start in (None, "") or end in (None, ""): |
| return "No selection captured yet.", gr.update(), gr.update(), ann_state |
| try: |
| start_i = int(start) |
| end_i = int(end) |
| except Exception: |
| return f"Invalid start/end positions. start={start!r} end={end!r}", gr.update(), gr.update(), ann_state |
| if start_i < 0 or end_i < 0 or end_i < start_i: |
| return f"Invalid span range. start={start_i} end={end_i}", gr.update(), gr.update(), ann_state |
| text, err = _read_text(filename) |
| if err: |
| return err, gr.update(), gr.update(), ann_state |
| if end_i > len(text): |
| return "Span exceeds file length.", gr.update(), gr.update(), ann_state |
| record = { |
| "file": filename, |
| "start": start_i, |
| "end": end_i, |
| "label": category, |
| "text": text[start_i:end_i], |
| } |
| records = _get_annotations(ann_state, filename) |
| if records: |
| last = records[-1] |
| if ( |
| last.get("file") == record["file"] |
| and last.get("start") == record["start"] |
| and last.get("end") == record["end"] |
| and last.get("label") == record["label"] |
| ): |
| return f"Annotation already saved for {filename}.", gr.update(), gr.update(), ann_state |
| records.append(record) |
| new_state = dict(ann_state or {}) |
| new_state[filename] = records |
| content = _render_with_highlights(text, records) |
| annotations_json = json.dumps(records, ensure_ascii=False, indent=2) |
| return f"Saved annotation for {filename}.", content, annotations_json, new_state |
|
|
|
|
| def remove_annotation(filename, start, end, ann_state): |
| if not filename: |
| return "No file selected.", gr.update(), gr.update(), ann_state |
| if start in (None, "") or end in (None, ""): |
| return "No selection captured yet.", gr.update(), gr.update(), ann_state |
| try: |
| start_i = int(start) |
| end_i = int(end) |
| except Exception: |
| return f"Invalid start/end positions. start={start!r} end={end!r}", gr.update(), gr.update(), ann_state |
| if start_i < 0 or end_i < 0 or end_i < start_i: |
| return f"Invalid span range. start={start_i} end={end_i}", gr.update(), gr.update(), ann_state |
| text, err = _read_text(filename) |
| if err: |
| return err, gr.update(), gr.update(), ann_state |
| records = _get_annotations(ann_state, filename) |
| before = len(records) |
| kept = [] |
| for r in records: |
| try: |
| s = int(r.get("start")) |
| e = int(r.get("end")) |
| except Exception: |
| kept.append(r) |
| continue |
| overlaps = not (e <= start_i or s >= end_i) |
| if not overlaps: |
| kept.append(r) |
| removed = before - len(kept) |
| new_state = dict(ann_state or {}) |
| new_state[filename] = kept |
| content = _render_with_highlights(text, kept) |
| annotations_json = json.dumps(kept, ensure_ascii=False, indent=2) |
| if removed == 0: |
| return "No overlapping highlights to remove.", content, annotations_json, new_state |
| return f"Removed {removed} highlight(s).", content, annotations_json, new_state |
|
|
|
|
| def clear_annotations(filename, ann_state): |
| if not filename: |
| return "No file selected.", gr.update(), gr.update(), ann_state |
| text, err = _read_text(filename) |
| if err: |
| return err, gr.update(), gr.update(), ann_state |
| new_state = dict(ann_state or {}) |
| new_state[filename] = [] |
| content = _render_with_highlights(text, []) |
| return "Cleared annotations for current paper.", content, "[]", new_state |
|
|
|
|
| def _gold_path_for(filename): |
| stem = Path(filename).stem |
| return GOLD_DIR / f"first_ten_agreed_{stem}.json" |
|
|
|
|
| def _load_gold(filename): |
| path = _gold_path_for(filename) |
| if not path.exists(): |
| return [] |
| try: |
| with open(path, "r", encoding="utf-8") as f: |
| data = json.load(f) |
| if isinstance(data, list): |
| return data |
| except Exception: |
| return [] |
| return [] |
|
|
|
|
| def _overlap(a_start, a_end, b_start, b_end): |
| return not (a_end <= b_start or a_start >= b_end) |
|
|
|
|
| def _score_annotations(filename, ann_state): |
| if not filename: |
| return "No file selected." |
| user_anns = _get_annotations(ann_state, filename) |
| gold_anns = _load_gold(filename) |
| if not gold_anns: |
| return "Gold annotations not found for this paper." |
| matched_user = 0 |
| matched_gold = 0 |
| for u in user_anns: |
| try: |
| us = int(u.get("start")) |
| ue = int(u.get("end")) |
| except Exception: |
| continue |
| ul = u.get("label") |
| ok = False |
| for g in gold_anns: |
| try: |
| gs = int(g.get("start")) |
| ge = int(g.get("end")) |
| except Exception: |
| continue |
| gl = g.get("label") |
| if ul == gl and _overlap(us, ue, gs, ge): |
| ok = True |
| break |
| if ok: |
| matched_user += 1 |
| for g in gold_anns: |
| try: |
| gs = int(g.get("start")) |
| ge = int(g.get("end")) |
| except Exception: |
| continue |
| gl = g.get("label") |
| ok = False |
| for u in user_anns: |
| try: |
| us = int(u.get("start")) |
| ue = int(u.get("end")) |
| except Exception: |
| continue |
| ul = u.get("label") |
| if ul == gl and _overlap(us, ue, gs, ge): |
| ok = True |
| break |
| if ok: |
| matched_gold += 1 |
| total_user = len(user_anns) |
| total_gold = len(gold_anns) |
| precision = matched_user / total_user if total_user else 0.0 |
| recall = matched_gold / total_gold if total_gold else 0.0 |
| if precision + recall: |
| f1 = 2 * precision * recall / (precision + recall) |
| else: |
| f1 = 0.0 |
| return ( |
| f"Score for {filename}: matched {matched_user}/{total_user} user spans and " |
| f"{matched_gold}/{total_gold} gold spans. " |
| f"Precision={precision:.2f} Recall={recall:.2f} F1={f1:.2f}" |
| ) |
|
|
|
|
| def _render_gold(filename): |
| if not filename: |
| return "<em>No file selected.</em>" |
| text, err = _read_text(filename) |
| if err: |
| return err |
| gold_anns = _load_gold(filename) |
| if not gold_anns: |
| return "<em>Gold annotations not found for this paper.</em>" |
| return _render_with_highlights(text, gold_anns) |
|
|
|
|
| def _submit_check(filename, ann_state, attempts_state): |
| if not filename: |
| return "No file selected.", "<em>No file selected.</em>", attempts_state |
| attempts = dict(attempts_state or {}) |
| tries = int(attempts.get(filename, 0)) |
| user_anns = _get_annotations(ann_state, filename) |
| gold_anns = _load_gold(filename) |
| if not gold_anns: |
| return "Gold annotations not found for this paper.", "<em>Gold annotations not found.</em>", attempts |
| matched_gold = 0 |
| for g in gold_anns: |
| try: |
| gs = int(g.get("start")) |
| ge = int(g.get("end")) |
| except Exception: |
| continue |
| gl = g.get("label") |
| ok = False |
| for u in user_anns: |
| try: |
| us = int(u.get("start")) |
| ue = int(u.get("end")) |
| except Exception: |
| continue |
| ul = u.get("label") |
| if ul == gl and _overlap(us, ue, gs, ge): |
| ok = True |
| break |
| if ok: |
| matched_gold += 1 |
| total_gold = len(gold_anns) |
| recall = matched_gold / total_gold if total_gold else 0.0 |
| tries += 1 |
| attempts[filename] = tries |
| if recall >= 0.6: |
| msg = ( |
| f"Passed: matched {matched_gold}/{total_gold} gold spans " |
| f"(Recall={recall:.2f})." |
| ) |
| return msg, _render_gold(filename), attempts |
| if tries < 3: |
| remaining = 3 - tries |
| msg = ( |
| f"Try again: matched {matched_gold}/{total_gold} gold spans " |
| f"(Recall={recall:.2f}). {remaining} attempt(s) left." |
| ) |
| return msg, "<em>Gold highlights will appear after 3 attempts or a pass.</em>", attempts |
| msg = ( |
| f"Attempts used. Matched {matched_gold}/{total_gold} gold spans " |
| f"(Recall={recall:.2f}). Showing gold highlights." |
| ) |
| return msg, _render_gold(filename), attempts |
|
|
|
|
| def _reset_attempts_for(filename, attempts_state): |
| if not filename: |
| return attempts_state |
| attempts = dict(attempts_state or {}) |
| attempts[filename] = 0 |
| return attempts |
|
|
|
|
| def _attempts_label(filename, attempts_state): |
| if not filename: |
| return "<div style='text-align:right;font-size:1.35em;'>Attempts: 0/3</div>" |
| attempts = dict(attempts_state or {}) |
| tries = int(attempts.get(filename, 0)) |
| return f"<div style='text-align:right;font-size:1.35em;'>Attempts: {tries}/3</div>" |
|
|
|
|
| def _cycle_paper(current, direction): |
| papers = TEST_FILES |
| if not papers: |
| return gr.update() |
| if current not in papers: |
| return gr.update(value=papers[0]) |
| idx = papers.index(current) |
| next_idx = (idx + direction) % len(papers) |
| return gr.update(value=papers[next_idx]) |
|
|
|
|
| def _progress_label(filename): |
| papers = TEST_FILES |
| if not papers or filename not in papers: |
| return "<div style='font-size:1.35em;'>Test 0 of 0</div>" |
| return ( |
| f"<div style='font-size:1.35em;'>" |
| f"Test {papers.index(filename) + 1} of {len(papers)}" |
| f"</div>" |
| ) |
|
|
|
|
| with gr.Blocks( |
| title="Annotation Check", |
| css=""" |
| #cat_btn_unsupported-claim button, |
| #cat_btn_unsupported-claim .gr-button { background:#ffb3b3 !important; border-color:#ffb3b3 !important; color:#222 !important; } |
| #cat_btn_format button, |
| #cat_btn_format .gr-button { background:#ffe8a3 !important; border-color:#ffe8a3 !important; color:#222 !important; } |
| #cat_btn_coherence button, |
| #cat_btn_coherence .gr-button { background:#b7f0d2 !important; border-color:#b7f0d2 !important; color:#222 !important; } |
| #cat_btn_lacks-synthesis button, |
| #cat_btn_lacks-synthesis .gr-button { background:#bcd8ff !important; border-color:#bcd8ff !important; color:#222 !important; } |
| #top_intro { font-size:1.50em; line-height:1.3; } |
| #progress_label { font-size:1.25em; } |
| #attempts_label { font-size:1.25em; } |
| """, |
| ) as demo: |
| gr.HTML( |
| "<div id='top_intro'>" |
| "This is a test-only annotation app.<br>" |
| "Highlight spans and label them exactly like the main task. " |
| "Gold spans appear after you reach at least 60% recall (or after 3 tries)." |
| "</div>" |
| ) |
| ann_state = gr.State({}) |
| attempts_state = gr.State({}) |
| with gr.Row(): |
| with gr.Column(scale=3): |
| current_file = gr.State(TEST_FILES[0] if TEST_FILES else None) |
| with gr.Row(): |
| progress = gr.Markdown( |
| _progress_label(TEST_FILES[0] if TEST_FILES else None), |
| elem_id="progress_label", |
| ) |
| attempts_label = gr.Markdown( |
| _attempts_label(TEST_FILES[0] if TEST_FILES else None, {}), |
| elem_id="attempts_label", |
| ) |
| if TEST_FILES: |
| initial_text, initial_err = _read_text(TEST_FILES[0]) |
| initial_html = ( |
| _render_with_highlights(initial_text, []) if not initial_err else initial_err |
| ) |
| else: |
| initial_html = "<em>No file selected.</em>" |
| content = gr.HTML(initial_html, elem_id="paper_view") |
| with gr.Row(): |
| prev_btn = gr.Button("Previous test") |
| next_btn = gr.Button("Next test") |
| gold_content = gr.HTML("<em>Gold highlighted spans will appear here after you click 'Submit & check' and pass or ran out of attempts.</em>") |
| with gr.Column(scale=1): |
| gr.Markdown("**Category**") |
| category_buttons = {} |
| for cat in CATEGORIES: |
| slug = CATEGORY_SLUGS.get(cat, cat.lower().replace(" ", "-")) |
| category_buttons[cat] = gr.Button(cat, elem_id=f"cat_btn_{slug}") |
| remove_btn = gr.Button("Remove highlight", variant="secondary") |
| gr.HTML(LEGEND_HTML) |
| clear_btn = gr.Button("Clear annotations", variant="stop") |
| submit_btn = gr.Button("Submit & check") |
| status = gr.Textbox(label="Status", interactive=False, elem_id="status_box") |
| annotations_view = gr.Textbox( |
| label="Annotations for current file (JSON)", lines=10, interactive=False, elem_id="annotations_view" |
| ) |
|
|
| start_pos = gr.Textbox(label="Start (char)", elem_id="start_pos", visible="hidden") |
| end_pos = gr.Textbox(label="End (char)", elem_id="end_pos", visible="hidden") |
| selection_timer = gr.Timer(0.3) |
|
|
| prev_btn.click(lambda f: _cycle_paper(f, -1), inputs=current_file, outputs=current_file) |
| next_btn.click(lambda f: _cycle_paper(f, 1), inputs=current_file, outputs=current_file) |
| current_file.change(read_paper, inputs=[current_file, ann_state], outputs=[content, annotations_view]) |
| current_file.change(lambda: gr.update(value=""), None, start_pos) |
| current_file.change(lambda: gr.update(value=""), None, end_pos) |
| current_file.change(lambda f: _progress_label(f), inputs=current_file, outputs=progress) |
| current_file.change( |
| lambda: "<em>Gold highlighted spans will appear here after you click 'Submit & check' and pass or ran out of attempts.</em>", |
| None, |
| gold_content, |
| ) |
| current_file.change( |
| _reset_attempts_for, |
| inputs=[current_file, attempts_state], |
| outputs=[attempts_state], |
| ) |
| current_file.change( |
| _attempts_label, |
| inputs=[current_file, attempts_state], |
| outputs=[attempts_label], |
| ) |
|
|
| selection_timer.tick( |
| fn=None, |
| inputs=[start_pos, end_pos], |
| outputs=[start_pos, end_pos, status], |
| js=CATEGORY_JS, |
| ) |
| for cat, btn in category_buttons.items(): |
| btn.click( |
| lambda filename, start, end, state, c=cat: save_annotation(filename, start, end, c, state), |
| inputs=[current_file, start_pos, end_pos, ann_state], |
| outputs=[status, content, annotations_view, ann_state], |
| ) |
| remove_btn.click( |
| remove_annotation, |
| inputs=[current_file, start_pos, end_pos, ann_state], |
| outputs=[status, content, annotations_view, ann_state], |
| ) |
| clear_btn.click( |
| clear_annotations, |
| inputs=[current_file, ann_state], |
| outputs=[status, content, annotations_view, ann_state], |
| ) |
| submit_btn.click( |
| _submit_check, |
| inputs=[current_file, ann_state, attempts_state], |
| outputs=[status, gold_content, attempts_state], |
| ) |
| submit_btn.click( |
| _attempts_label, |
| inputs=[current_file, attempts_state], |
| outputs=[attempts_label], |
| ) |
|
|
| if TEST_FILES: |
| _, annotations_val = read_paper(TEST_FILES[0], {}) |
| annotations_view.value = annotations_val |
|
|
|
|
| if __name__ == "__main__": |
| port = 7861 |
| if len(sys.argv) > 1: |
| try: |
| port = int(sys.argv[1]) |
| except Exception: |
| pass |
| demo.launch() |
|
|