AerdnaNami
fixed launch
61544ea
import sys
import json
import html
from pathlib import Path
import gradio as gr
TEST_PAPERS_DIR = Path(__file__).parent / "test_papers"
PAPERS_DIR = TEST_PAPERS_DIR / "papers"
GOLD_DIR = TEST_PAPERS_DIR / "annotated"
CATEGORIES = ["Unsupported claim", "Format", "Coherence", "Lacks synthesis"]
CATEGORY_COLORS = {
"Unsupported claim": "#ffb3b3",
"Format": "#ffe8a3",
"Coherence": "#b7f0d2",
"Lacks synthesis": "#bcd8ff",
}
CATEGORY_SLUGS = {k: k.lower().replace(" ", "-") for k in CATEGORY_COLORS.keys()}
LEGEND_HTML = """
<div style="margin:8px 0 4px 0;font-size:0.9em">
<div style="font-weight:600;margin-bottom:4px">Legend</div>
<div style="display:flex;flex-direction:column;gap:4px">
<div><span style="display:inline-block;width:14px;height:14px;background:#ffb3b3;border-radius:3px;margin-right:6px;vertical-align:middle"></span>Unsupported claim</div>
<div><span style="display:inline-block;width:14px;height:14px;background:#ffe8a3;border-radius:3px;margin-right:6px;vertical-align:middle"></span>Format</div>
<div><span style="display:inline-block;width:14px;height:14px;background:#b7f0d2;border-radius:3px;margin-right:6px;vertical-align:middle"></span>Coherence</div>
<div><span style="display:inline-block;width:14px;height:14px;background:#bcd8ff;border-radius:3px;vertical-align:middle"></span>Lacks synthesis</div>
</div>
</div>
"""
CATEGORY_JS = """
(start, end) => {
const findById = (id) => {
const direct = document.getElementById(id);
if (direct) return direct;
const app = document.querySelector('gradio-app');
if (app && app.shadowRoot) {
const inShadow = app.shadowRoot.getElementById(id);
if (inShadow) return inShadow;
}
return null;
};
const view = findById('paper_view');
const root = findById('paper-content') || view;
if (!root) return [start, end, "Text viewer not found."];
const sel = window.getSelection();
if (!sel || sel.rangeCount === 0) return [start, end, ""];
const range = sel.getRangeAt(0);
let node = range.commonAncestorContainer;
if (node && node.nodeType === 3) node = node.parentNode;
const sameRoot = (a, b) => {
if (!a || !b || !a.getRootNode || !b.getRootNode) return false;
return a.getRootNode() === b.getRootNode();
};
function inViewer(n){
while (n) {
if (n.id === 'paper-content' || n.id === 'paper_view') return true;
if (n.host) { n = n.host; continue; }
n = n.parentNode;
}
return false;
}
const selected = range.toString();
if (!selected) return [start, end, ""];
if (!(inViewer(node) && (root.contains(node) || sameRoot(root, node)))) {
const text = root.textContent || "";
const idx = text.indexOf(selected);
if (idx === -1) return [start, end, ""];
const s = idx;
const e = idx + selected.length;
return [String(s), String(e), `Captured selection (${s}-${e})`];
}
const preRange = document.createRange();
preRange.selectNodeContents(root);
preRange.setEnd(range.startContainer, range.startOffset);
const s = preRange.toString().length;
const e = s + selected.length;
return [String(s), String(e), `Captured selection (${s}-${e})`];
}
"""
def _test_files():
files = [f"paper_{i}.txt" for i in range(6, 11)]
existing = [f for f in files if (PAPERS_DIR / f).exists()]
return existing
TEST_FILES = _test_files()
def _read_text(filename):
path = PAPERS_DIR / filename
if not path.exists():
return None, f"<em>File not found: {html.escape(filename)}</em>"
try:
return path.read_text(encoding="utf-8", errors="replace"), None
except Exception as e:
return None, f"<em>Error reading file: {html.escape(str(e))}</em>"
def _render_with_highlights(text, annotations):
if not annotations:
escaped = html.escape(text)
return (
"<style>"
".hl{padding:0 2px;border-radius:3px;}"
"#paper-content{white-space:pre-wrap;font-family:inherit;padding:12px;"
"border:1px solid #ddd;border-radius:8px;min-height:420px;"
"max-height:420px;overflow:auto;box-sizing:border-box;"
"user-select:text;}"
"#paper-content:focus{outline:none;}"
"</style>"
f"<div id='paper-content' contenteditable='false' tabindex='0'>{escaped}</div>"
)
slug_for = {k: k.lower().replace(" ", "-") for k in CATEGORY_COLORS.keys()}
sorted_anns = sorted(annotations, key=lambda r: (int(r.get("start", -1)), int(r.get("end", -1))))
pieces = []
cursor = 0
text_len = len(text)
for ann in sorted_anns:
try:
start = int(ann.get("start"))
end = int(ann.get("end"))
except Exception:
continue
if start < cursor or start < 0 or end < 0 or end > text_len or end <= start:
continue
pieces.append(html.escape(text[cursor:start]))
label = ann.get("label", "")
cls = slug_for.get(label, "unknown")
color = CATEGORY_COLORS.get(label, "#ddd")
span = html.escape(text[start:end])
pieces.append(f"<span class='hl {cls}' style='background:{color}'>{span}</span>")
cursor = end
pieces.append(html.escape(text[cursor:]))
styles = (
"<style>"
".hl{padding:0 2px;border-radius:3px;}"
"#paper-content{white-space:pre-wrap;font-family:inherit;padding:12px;"
"border:1px solid #ddd;border-radius:8px;min-height:420px;"
"max-height:420px;overflow:auto;box-sizing:border-box;"
"user-select:text;}"
"#paper-content:focus{outline:none;}"
"</style>"
)
return f"{styles}<div id='paper-content' contenteditable='false' tabindex='0'>{''.join(pieces)}</div>"
def _get_annotations(state, filename):
if not state:
return []
return list(state.get(filename, []))
def read_paper(filename, ann_state):
if not filename:
return "<em>No file selected.</em>", "[]"
text, err = _read_text(filename)
if err:
return err, "[]"
annotations = _get_annotations(ann_state, filename)
return _render_with_highlights(text, annotations), json.dumps(annotations, ensure_ascii=False, indent=2)
def save_annotation(filename, start, end, category, ann_state):
if not filename:
return "No file selected.", gr.update(), gr.update(), ann_state
if not category:
return "No category selected.", gr.update(), gr.update(), ann_state
if start in (None, "") or end in (None, ""):
return "No selection captured yet.", gr.update(), gr.update(), ann_state
try:
start_i = int(start)
end_i = int(end)
except Exception:
return f"Invalid start/end positions. start={start!r} end={end!r}", gr.update(), gr.update(), ann_state
if start_i < 0 or end_i < 0 or end_i < start_i:
return f"Invalid span range. start={start_i} end={end_i}", gr.update(), gr.update(), ann_state
text, err = _read_text(filename)
if err:
return err, gr.update(), gr.update(), ann_state
if end_i > len(text):
return "Span exceeds file length.", gr.update(), gr.update(), ann_state
record = {
"file": filename,
"start": start_i,
"end": end_i,
"label": category,
"text": text[start_i:end_i],
}
records = _get_annotations(ann_state, filename)
if records:
last = records[-1]
if (
last.get("file") == record["file"]
and last.get("start") == record["start"]
and last.get("end") == record["end"]
and last.get("label") == record["label"]
):
return f"Annotation already saved for {filename}.", gr.update(), gr.update(), ann_state
records.append(record)
new_state = dict(ann_state or {})
new_state[filename] = records
content = _render_with_highlights(text, records)
annotations_json = json.dumps(records, ensure_ascii=False, indent=2)
return f"Saved annotation for {filename}.", content, annotations_json, new_state
def remove_annotation(filename, start, end, ann_state):
if not filename:
return "No file selected.", gr.update(), gr.update(), ann_state
if start in (None, "") or end in (None, ""):
return "No selection captured yet.", gr.update(), gr.update(), ann_state
try:
start_i = int(start)
end_i = int(end)
except Exception:
return f"Invalid start/end positions. start={start!r} end={end!r}", gr.update(), gr.update(), ann_state
if start_i < 0 or end_i < 0 or end_i < start_i:
return f"Invalid span range. start={start_i} end={end_i}", gr.update(), gr.update(), ann_state
text, err = _read_text(filename)
if err:
return err, gr.update(), gr.update(), ann_state
records = _get_annotations(ann_state, filename)
before = len(records)
kept = []
for r in records:
try:
s = int(r.get("start"))
e = int(r.get("end"))
except Exception:
kept.append(r)
continue
overlaps = not (e <= start_i or s >= end_i)
if not overlaps:
kept.append(r)
removed = before - len(kept)
new_state = dict(ann_state or {})
new_state[filename] = kept
content = _render_with_highlights(text, kept)
annotations_json = json.dumps(kept, ensure_ascii=False, indent=2)
if removed == 0:
return "No overlapping highlights to remove.", content, annotations_json, new_state
return f"Removed {removed} highlight(s).", content, annotations_json, new_state
def clear_annotations(filename, ann_state):
if not filename:
return "No file selected.", gr.update(), gr.update(), ann_state
text, err = _read_text(filename)
if err:
return err, gr.update(), gr.update(), ann_state
new_state = dict(ann_state or {})
new_state[filename] = []
content = _render_with_highlights(text, [])
return "Cleared annotations for current paper.", content, "[]", new_state
def _gold_path_for(filename):
stem = Path(filename).stem
return GOLD_DIR / f"first_ten_agreed_{stem}.json"
def _load_gold(filename):
path = _gold_path_for(filename)
if not path.exists():
return []
try:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
if isinstance(data, list):
return data
except Exception:
return []
return []
def _overlap(a_start, a_end, b_start, b_end):
return not (a_end <= b_start or a_start >= b_end)
def _score_annotations(filename, ann_state):
if not filename:
return "No file selected."
user_anns = _get_annotations(ann_state, filename)
gold_anns = _load_gold(filename)
if not gold_anns:
return "Gold annotations not found for this paper."
matched_user = 0
matched_gold = 0
for u in user_anns:
try:
us = int(u.get("start"))
ue = int(u.get("end"))
except Exception:
continue
ul = u.get("label")
ok = False
for g in gold_anns:
try:
gs = int(g.get("start"))
ge = int(g.get("end"))
except Exception:
continue
gl = g.get("label")
if ul == gl and _overlap(us, ue, gs, ge):
ok = True
break
if ok:
matched_user += 1
for g in gold_anns:
try:
gs = int(g.get("start"))
ge = int(g.get("end"))
except Exception:
continue
gl = g.get("label")
ok = False
for u in user_anns:
try:
us = int(u.get("start"))
ue = int(u.get("end"))
except Exception:
continue
ul = u.get("label")
if ul == gl and _overlap(us, ue, gs, ge):
ok = True
break
if ok:
matched_gold += 1
total_user = len(user_anns)
total_gold = len(gold_anns)
precision = matched_user / total_user if total_user else 0.0
recall = matched_gold / total_gold if total_gold else 0.0
if precision + recall:
f1 = 2 * precision * recall / (precision + recall)
else:
f1 = 0.0
return (
f"Score for {filename}: matched {matched_user}/{total_user} user spans and "
f"{matched_gold}/{total_gold} gold spans. "
f"Precision={precision:.2f} Recall={recall:.2f} F1={f1:.2f}"
)
def _render_gold(filename):
if not filename:
return "<em>No file selected.</em>"
text, err = _read_text(filename)
if err:
return err
gold_anns = _load_gold(filename)
if not gold_anns:
return "<em>Gold annotations not found for this paper.</em>"
return _render_with_highlights(text, gold_anns)
def _submit_check(filename, ann_state, attempts_state):
if not filename:
return "No file selected.", "<em>No file selected.</em>", attempts_state
attempts = dict(attempts_state or {})
tries = int(attempts.get(filename, 0))
user_anns = _get_annotations(ann_state, filename)
gold_anns = _load_gold(filename)
if not gold_anns:
return "Gold annotations not found for this paper.", "<em>Gold annotations not found.</em>", attempts
matched_gold = 0
for g in gold_anns:
try:
gs = int(g.get("start"))
ge = int(g.get("end"))
except Exception:
continue
gl = g.get("label")
ok = False
for u in user_anns:
try:
us = int(u.get("start"))
ue = int(u.get("end"))
except Exception:
continue
ul = u.get("label")
if ul == gl and _overlap(us, ue, gs, ge):
ok = True
break
if ok:
matched_gold += 1
total_gold = len(gold_anns)
recall = matched_gold / total_gold if total_gold else 0.0
tries += 1
attempts[filename] = tries
if recall >= 0.6:
msg = (
f"Passed: matched {matched_gold}/{total_gold} gold spans "
f"(Recall={recall:.2f})."
)
return msg, _render_gold(filename), attempts
if tries < 3:
remaining = 3 - tries
msg = (
f"Try again: matched {matched_gold}/{total_gold} gold spans "
f"(Recall={recall:.2f}). {remaining} attempt(s) left."
)
return msg, "<em>Gold highlights will appear after 3 attempts or a pass.</em>", attempts
msg = (
f"Attempts used. Matched {matched_gold}/{total_gold} gold spans "
f"(Recall={recall:.2f}). Showing gold highlights."
)
return msg, _render_gold(filename), attempts
def _reset_attempts_for(filename, attempts_state):
if not filename:
return attempts_state
attempts = dict(attempts_state or {})
attempts[filename] = 0
return attempts
def _attempts_label(filename, attempts_state):
if not filename:
return "<div style='text-align:right;font-size:1.35em;'>Attempts: 0/3</div>"
attempts = dict(attempts_state or {})
tries = int(attempts.get(filename, 0))
return f"<div style='text-align:right;font-size:1.35em;'>Attempts: {tries}/3</div>"
def _cycle_paper(current, direction):
papers = TEST_FILES
if not papers:
return gr.update()
if current not in papers:
return gr.update(value=papers[0])
idx = papers.index(current)
next_idx = (idx + direction) % len(papers)
return gr.update(value=papers[next_idx])
def _progress_label(filename):
papers = TEST_FILES
if not papers or filename not in papers:
return "<div style='font-size:1.35em;'>Test 0 of 0</div>"
return (
f"<div style='font-size:1.35em;'>"
f"Test {papers.index(filename) + 1} of {len(papers)}"
f"</div>"
)
with gr.Blocks(
title="Annotation Check",
css="""
#cat_btn_unsupported-claim button,
#cat_btn_unsupported-claim .gr-button { background:#ffb3b3 !important; border-color:#ffb3b3 !important; color:#222 !important; }
#cat_btn_format button,
#cat_btn_format .gr-button { background:#ffe8a3 !important; border-color:#ffe8a3 !important; color:#222 !important; }
#cat_btn_coherence button,
#cat_btn_coherence .gr-button { background:#b7f0d2 !important; border-color:#b7f0d2 !important; color:#222 !important; }
#cat_btn_lacks-synthesis button,
#cat_btn_lacks-synthesis .gr-button { background:#bcd8ff !important; border-color:#bcd8ff !important; color:#222 !important; }
#top_intro { font-size:1.50em; line-height:1.3; }
#progress_label { font-size:1.25em; }
#attempts_label { font-size:1.25em; }
""",
) as demo:
gr.HTML(
"<div id='top_intro'>"
"This is a test-only annotation app.<br>"
"Highlight spans and label them exactly like the main task. "
"Gold spans appear after you reach at least 60% recall (or after 3 tries)."
"</div>"
)
ann_state = gr.State({})
attempts_state = gr.State({})
with gr.Row():
with gr.Column(scale=3):
current_file = gr.State(TEST_FILES[0] if TEST_FILES else None)
with gr.Row():
progress = gr.Markdown(
_progress_label(TEST_FILES[0] if TEST_FILES else None),
elem_id="progress_label",
)
attempts_label = gr.Markdown(
_attempts_label(TEST_FILES[0] if TEST_FILES else None, {}),
elem_id="attempts_label",
)
if TEST_FILES:
initial_text, initial_err = _read_text(TEST_FILES[0])
initial_html = (
_render_with_highlights(initial_text, []) if not initial_err else initial_err
)
else:
initial_html = "<em>No file selected.</em>"
content = gr.HTML(initial_html, elem_id="paper_view")
with gr.Row():
prev_btn = gr.Button("Previous test")
next_btn = gr.Button("Next test")
gold_content = gr.HTML("<em>Gold highlighted spans will appear here after you click 'Submit & check' and pass or ran out of attempts.</em>")
with gr.Column(scale=1):
gr.Markdown("**Category**")
category_buttons = {}
for cat in CATEGORIES:
slug = CATEGORY_SLUGS.get(cat, cat.lower().replace(" ", "-"))
category_buttons[cat] = gr.Button(cat, elem_id=f"cat_btn_{slug}")
remove_btn = gr.Button("Remove highlight", variant="secondary")
gr.HTML(LEGEND_HTML)
clear_btn = gr.Button("Clear annotations", variant="stop")
submit_btn = gr.Button("Submit & check")
status = gr.Textbox(label="Status", interactive=False, elem_id="status_box")
annotations_view = gr.Textbox(
label="Annotations for current file (JSON)", lines=10, interactive=False, elem_id="annotations_view"
)
start_pos = gr.Textbox(label="Start (char)", elem_id="start_pos", visible="hidden")
end_pos = gr.Textbox(label="End (char)", elem_id="end_pos", visible="hidden")
selection_timer = gr.Timer(0.3)
prev_btn.click(lambda f: _cycle_paper(f, -1), inputs=current_file, outputs=current_file)
next_btn.click(lambda f: _cycle_paper(f, 1), inputs=current_file, outputs=current_file)
current_file.change(read_paper, inputs=[current_file, ann_state], outputs=[content, annotations_view])
current_file.change(lambda: gr.update(value=""), None, start_pos)
current_file.change(lambda: gr.update(value=""), None, end_pos)
current_file.change(lambda f: _progress_label(f), inputs=current_file, outputs=progress)
current_file.change(
lambda: "<em>Gold highlighted spans will appear here after you click 'Submit & check' and pass or ran out of attempts.</em>",
None,
gold_content,
)
current_file.change(
_reset_attempts_for,
inputs=[current_file, attempts_state],
outputs=[attempts_state],
)
current_file.change(
_attempts_label,
inputs=[current_file, attempts_state],
outputs=[attempts_label],
)
selection_timer.tick(
fn=None,
inputs=[start_pos, end_pos],
outputs=[start_pos, end_pos, status],
js=CATEGORY_JS,
)
for cat, btn in category_buttons.items():
btn.click(
lambda filename, start, end, state, c=cat: save_annotation(filename, start, end, c, state),
inputs=[current_file, start_pos, end_pos, ann_state],
outputs=[status, content, annotations_view, ann_state],
)
remove_btn.click(
remove_annotation,
inputs=[current_file, start_pos, end_pos, ann_state],
outputs=[status, content, annotations_view, ann_state],
)
clear_btn.click(
clear_annotations,
inputs=[current_file, ann_state],
outputs=[status, content, annotations_view, ann_state],
)
submit_btn.click(
_submit_check,
inputs=[current_file, ann_state, attempts_state],
outputs=[status, gold_content, attempts_state],
)
submit_btn.click(
_attempts_label,
inputs=[current_file, attempts_state],
outputs=[attempts_label],
)
if TEST_FILES:
_, annotations_val = read_paper(TEST_FILES[0], {})
annotations_view.value = annotations_val
if __name__ == "__main__":
port = 7861
if len(sys.argv) > 1:
try:
port = int(sys.argv[1])
except Exception:
pass
demo.launch()