""" TEI Annotator — Gradio demo for HuggingFace Spaces. Set HF_TOKEN as a Space secret. All inference calls use that token; visitors use the app without any login or token input. Deploy on HF Spaces (SDK: gradio). Required secret: HF_TOKEN. """ from __future__ import annotations import json import os import random import time import urllib.error import urllib.request from pathlib import Path import gradio as gr # --------------------------------------------------------------------------- # Config (mirrors webservice/main.py — keep in sync if you change models) # --------------------------------------------------------------------------- _HF_TOKEN = os.environ.get("HF_TOKEN", "") _HF_MODELS = [ "meta-llama/Llama-3.3-70B-Instruct:nscale", "meta-llama/Llama-3.1-70B-Instruct:scaleway", "Qwen/Qwen3-32B:nscale", "deepseek-ai/DeepSeek-R1-Distill-Llama-70B:nscale", "Qwen/Qwen2.5-Coder-32B-Instruct:nscale", "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B:nscale", "meta-llama/Llama-3.1-8B-Instruct:nscale", "Qwen/Qwen3-14B:nscale", "openai/gpt-oss-20b:nscale", "Qwen/QwQ-32B:nscale", ] _HF_BASE_URL = "https://router.huggingface.co/v1" _FIXTURE_PATH = Path(__file__).parent / "tests" / "fixtures" / "blbl-examples.tei.xml" # Build the schema once at startup — it's immutable and expensive-ish to rebuild per request. from tei_annotator.schemas.blbl import build_blbl_schema as _build_blbl_schema _SCHEMA = _build_blbl_schema() # --------------------------------------------------------------------------- # HTTP helper # --------------------------------------------------------------------------- def _post_json(url: str, payload: dict, headers: dict, timeout: int = 300) -> dict: body = json.dumps(payload).encode() req = urllib.request.Request(url, data=body, headers=headers, method="POST") try: with urllib.request.urlopen(req, timeout=timeout) as resp: return json.loads(resp.read()) except urllib.error.HTTPError as exc: detail = exc.read().decode(errors="replace") raise RuntimeError(f"HTTP {exc.code}: {detail}") from exc def _make_call_fn(model: str, timeout: int = 300): url = f"{_HF_BASE_URL}/chat/completions" headers = {"Content-Type": "application/json", "Authorization": f"Bearer {_HF_TOKEN}"} def call_fn(prompt: str) -> str: payload = { "model": model, "messages": [{"role": "user", "content": prompt}], "temperature": 0.1, } result = _post_json(url, payload, headers, timeout) return result["choices"][0]["message"]["content"] call_fn.__name__ = f"hf/{model}" return call_fn # --------------------------------------------------------------------------- # Action functions # --------------------------------------------------------------------------- def do_annotate(text: str, model: str): if not _HF_TOKEN: return "", "HF_TOKEN is not set. Add it as a Space secret." if not text.strip(): return "", "Please enter some text to annotate." from tei_annotator.inference.endpoint import EndpointCapability, EndpointConfig from tei_annotator.pipeline import annotate endpoint = EndpointConfig( capability=EndpointCapability.TEXT_GENERATION, call_fn=_make_call_fn(model), ) schema = _SCHEMA t0 = time.monotonic() try: result = annotate(text=text, schema=schema, endpoint=endpoint, gliner_model=None) except Exception as exc: return "", f"Error: {exc}" elapsed = round(time.monotonic() - t0, 1) return result.xml, f"Done in {elapsed}s" def do_load_samples(n: int): from lxml import etree if not _FIXTURE_PATH.exists(): return "Fixture file not found. Make sure tests/fixtures/ is present." tree = etree.parse(str(_FIXTURE_PATH)) bibls = tree.findall(".//{http://www.tei-c.org/ns/1.0}bibl") samples = random.sample(bibls, min(int(n), len(bibls))) from tei_annotator.evaluation.extractor import extract_spans return "\n\n".join(extract_spans(el)[0] for el in samples) _BATCH_SEP = "\n---RECORD|||SEP|||BOUNDARY---\n" def do_evaluate(model: str, n: int, batch_size: int = 1): if not _HF_TOKEN: return None, "HF_TOKEN is not set. Add it as a Space secret." from lxml import etree from tei_annotator.evaluation.extractor import extract_spans from tei_annotator.evaluation.metrics import MatchMode, aggregate, compute_metrics from tei_annotator.inference.endpoint import EndpointCapability, EndpointConfig from tei_annotator.pipeline import annotate if not _FIXTURE_PATH.exists(): return None, "Fixture file not found. Make sure tests/fixtures/ is present." tree = etree.parse(str(_FIXTURE_PATH)) bibls = tree.findall(".//{http://www.tei-c.org/ns/1.0}bibl") samples = random.sample(bibls, min(int(n), len(bibls))) schema = _SCHEMA endpoint = EndpointConfig( capability=EndpointCapability.TEXT_GENERATION, call_fn=_make_call_fn(model), ) batch_size = max(1, int(batch_size)) parser = etree.XMLParser(recover=True) def _annotate_one(el): plain_text, gold_spans = extract_spans(el) try: ann_result = annotate(plain_text, schema, endpoint, gliner_model=None) pred_el = etree.fromstring(f"{ann_result.xml}".encode(), parser) except Exception: pred_el = etree.Element("bibl") _, pred_spans = extract_spans(pred_el) return compute_metrics(gold_spans, pred_spans, mode=MatchMode.TEXT) def _annotate_batch(batch_els): plain_texts = [] gold_spans_list = [] for el in batch_els: pt, gs = extract_spans(el) plain_texts.append(pt) gold_spans_list.append(gs) if any(_BATCH_SEP in t for t in plain_texts): return [_annotate_one(el) for el in batch_els] combined = _BATCH_SEP.join(plain_texts) try: ann_result = annotate(combined, schema, endpoint, gliner_model=None) except Exception as exc: raise RuntimeError(f"Error during batch annotation: {exc}") from exc pieces = ann_result.xml.split(_BATCH_SEP) if len(pieces) != len(batch_els): return [ compute_metrics(gs, [], mode=MatchMode.TEXT) for gs in gold_spans_list ] results = [] for piece, gs in zip(pieces, gold_spans_list): try: pred_el = etree.fromstring(f"{piece}".encode(), parser) except Exception: pred_el = etree.Element("bibl") _, pred_spans = extract_spans(pred_el) results.append(compute_metrics(gs, pred_spans, mode=MatchMode.TEXT)) return results per_result = [] t0 = time.monotonic() try: if batch_size <= 1: for el in samples: per_result.append(_annotate_one(el)) else: for start in range(0, len(samples), batch_size): per_result.extend(_annotate_batch(samples[start : start + batch_size])) except Exception as exc: return None, f"Error during annotation: {exc}" elapsed = round(time.monotonic() - t0, 1) agg = aggregate(per_result) rows = [ [ tag, round(m.precision, 3), round(m.recall, 3), round(m.f1, 3), m.true_positives, m.false_positives, m.false_negatives, ] for tag, m in sorted(agg.per_element.items(), key=lambda kv: -kv[1].f1) ] summary = ( f"n={len(samples)} samples | " f"micro P={agg.micro_precision:.3f} R={agg.micro_recall:.3f} F1={agg.micro_f1:.3f} | " f"{elapsed}s" ) return rows, summary # --------------------------------------------------------------------------- # Gradio UI # --------------------------------------------------------------------------- with gr.Blocks(title="TEI Annotator") as demo: gr.Markdown( "# TEI Annotator\n" "[GitHub Repo](https://github.com/cboulanger/tei-annotator)\n" "This demo annotates bibliographic plain text with TEI XML tags (tei:author, tei:title etc.) " "using open LLMs via the HuggingFace Inference Router." ) if not _HF_TOKEN: gr.Markdown("> **Setup required:** Set `HF_TOKEN` as a Space secret and restart.") with gr.Tabs(): # ── Annotation tab ──────────────────────────────────────────────── with gr.Tab("Annotate"): model_dd = gr.Dropdown( choices=_HF_MODELS, value=_HF_MODELS[0], label="Model" ) text_in = gr.Textbox( lines=5, label="Input text", placeholder="Paste a bibliographic reference here…", ) annotate_btn = gr.Button("Annotate", variant="primary") xml_out = gr.Code(label="XML output", language="html", interactive=False) ann_status = gr.Textbox(label="Status", interactive=False, max_lines=1) annotate_btn.click( do_annotate, inputs=[text_in, model_dd], outputs=[xml_out, ann_status], ) # ── Evaluation tab ──────────────────────────────────────────────── with gr.Tab("Evaluate"): eval_model_dd = gr.Dropdown( choices=_HF_MODELS, value=_HF_MODELS[0], label="Model" ) n_slider = gr.Slider(1, 20, value=5, step=1, label="Number of samples") batch_size_slider = gr.Slider( 1, 20, value=min(n_slider.value, 5), step=1, label="Batch size", info="Records per LLM call. Values > 1 reduce latency but may reduce quality.", ) with gr.Row(): sample_btn = gr.Button("Load sample texts") eval_btn = gr.Button("Run evaluation", variant="primary") sample_out = gr.Textbox( lines=8, label="Sampled texts (from gold standard)", interactive=False, ) eval_table = gr.Dataframe( headers=["Element", "Precision", "Recall", "F1", "TP", "FP", "FN"], label="Per-element metrics (sorted by F1)", ) eval_status = gr.Textbox(label="Summary", interactive=False, max_lines=2) n_slider.change( lambda n: min(int(n), 5), inputs=[n_slider], outputs=[batch_size_slider], ) sample_btn.click(do_load_samples, inputs=[n_slider], outputs=[sample_out]) eval_btn.click( do_evaluate, inputs=[eval_model_dd, n_slider, batch_size_slider], outputs=[eval_table, eval_status], ) if __name__ == "__main__": demo.launch(ssr_mode=False)