Spaces:

gyorilab
/

variants_ner

Sleeping

App Files Files Community

enoriega commited on Mar 11

Commit

8252e17

0 Parent(s):

Initial commit with the app

Browse files

Files changed (8) hide show

.gitignore +10 -0
.python-version +1 -0
README.md +45 -0
backend/__init__.py +1 -0
backend/app.py +607 -0
main.py +5 -0
pyproject.toml +28 -0
uv.lock +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,10 @@

+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.13

README.md ADDED Viewed

	@@ -0,0 +1,45 @@

+# NER UI
+Demo project that exposes a Hugging Face NER experience through:
+- `backend/`: a FastAPI API that fetches available model revisions from the Hugging Face Hub and runs a token-classification pipeline
+- a mounted Gradio UI at `/` for model selection, text submission, and highlighted entity rendering
+## Requirements
+- Python 3.13+
+- `uv`
+## Install
+Install Python dependencies:
+```bash
+uv sync
+```
+## Run the app
+Start the FastAPI server with the mounted Gradio frontend:
+```bash
+uv run ner-ui
+```
+Then open `http://localhost:8000/`.
+API endpoints:
+- `GET /api/health`
+- `GET /api/models/revisions?model_name=dslim/bert-base-NER`
+- `POST /api/ner`
+Request body for `/api/ner`:
+```json
+{
+  "text": "Hugging Face Inc. is based in New York City.",
+  "model_name": "dslim/bert-base-NER",
+  "revision": "main"
+}
+```

backend/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Backend package for the NER demo application."""

backend/app.py ADDED Viewed

	@@ -0,0 +1,607 @@

+from __future__ import annotations
+import logging
+from functools import lru_cache
+from typing import Any
+from html import escape
+import gradio as gr
+from fastapi import FastAPI, HTTPException, Query
+from fastapi.middleware.cors import CORSMiddleware
+from huggingface_hub import HfApi
+from pydantic import BaseModel, Field
+from transformers import pipeline
+LOGGER = logging.getLogger(__name__)
+DEFAULT_MODEL_NAME = "gyorilab/variants-ner-modernbert-base"
+DEFAULT_REVISION = "main"
+DEFAULT_TEXT = (
+    "In our cohort we analyzed variants affecting neuronal signaling and cytoskeletal stability. Sequencing identified several recurrent protein mutations including TP53 R248W, EGFR L858R, and a truncating MAPT Q336* variant predicted to disrupt microtubule binding. At the DNA level we detected substitutions such as TP53 c.743G>A and EGFR c.2573T>G, along with a small deletion c.152_153del causing a frameshift in downstream transcripts. Structural variation analysis further revealed a copy number gain consistent with chr7:55,019,017-55,242,524, overlapping the EGFR locus and resembling CNV strings commonly reported in PubTator3-style annotations. Population-associated polymorphisms were also present, including rs429358 and rs7412 within APOE, as well as rs1801133 in MTHFR. Together, these protein-altering mutations, nucleotide substitutions, and regional copy number changes suggest combined effects on cellular stress responses, signaling pathways, and metabolic regulation in the studied samples."
+)
+LABEL_COLORS = [
+    "#d4a373",
+    "#2a9d8f",
+    "#577590",
+    "#e76f51",
+    "#8d99ae",
+    "#6a994e",
+]
+GRADIO_THEME = gr.themes.Base(
+    primary_hue="cyan",
+    secondary_hue="blue",
+    neutral_hue="slate",
+    radius_size="md",
+)
+GRADIO_CSS = """
+:root {
+  --page-bg: #f3f3f3;
+  --page-text: #66717d;
+  --muted-text: #7d858e;
+  --accent-text: #4ea9b8;
+  --panel-bg: #ffffff;
+  --panel-border: #d5dadd;
+  --panel-shadow: 0 1px 2px rgba(76, 93, 108, 0.08);
+  --card-bg: #f7f8f8;
+  --card-border: #dde3e6;
+  --field-bg: #f7f8f8;
+  --field-border: #d7dde0;
+  --field-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.9);
+  --highlight-text: #20303a;
+  --empty-bg: #eaf4f6;
+  --link-color: #6aa0d6;
+  --secondary-button-bg: #eef3f4;
+  --secondary-button-border: #d3dade;
+  --table-header-bg: #eef3f4;
+  --table-row-alt: #fafbfb;
+  --focus-ring: 0 0 0 3px rgba(83, 173, 188, 0.14);
+}
+.dark,
+body.dark,
+html.dark,
+[data-theme="dark"],
+.dark .gradio-container,
+body.dark .gradio-container,
+html.dark .gradio-container,
+[data-theme="dark"] .gradio-container,
+.gradio-container.dark {
+  --page-bg: #111922;
+  --page-text: #d4dde4;
+  --muted-text: #aeb8c2;
+  --accent-text: #73c6d2;
+  --panel-bg: #18232f;
+  --panel-border: #2a3b4b;
+  --panel-shadow: 0 10px 30px rgba(0, 0, 0, 0.22);
+  --card-bg: #111b24;
+  --card-border: #2a3b4b;
+  --field-bg: #213142;
+  --field-border: #314658;
+  --field-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.03);
+  --highlight-text: #0d1821;
+  --empty-bg: #17313a;
+  --link-color: #8bb8e8;
+  --secondary-button-bg: #223342;
+  --secondary-button-border: #314658;
+  --table-header-bg: #213142;
+  --table-row-alt: #15212c;
+  --focus-ring: 0 0 0 3px rgba(115, 198, 210, 0.2);
+}
+body, .gradio-container {
+  background: var(--page-bg);
+}
+.gradio-container {
+  font-family: "Segoe UI", "Helvetica Neue", Arial, sans-serif;
+  color: var(--page-text);
+  transition: background-color 0.2s ease, color 0.2s ease;
+}
+.hero {
+  padding: 1rem 0 0.4rem;
+}
+.eyebrow {
+  margin: 0 0 0.5rem;
+  text-transform: uppercase;
+  letter-spacing: 0.12em;
+  font-size: 0.74rem;
+  color: var(--accent-text);
+}
+.hero-title {
+  margin: 0;
+  font-size: clamp(1.85rem, 3.2vw, 2.9rem);
+  line-height: 1.05;
+  max-width: 18ch;
+  font-weight: 600;
+  letter-spacing: -0.02em;
+  color: var(--page-text);
+}
+.hero-copy {
+  max-width: 60ch;
+  color: var(--muted-text);
+}
+.panel {
+  border: 1px solid var(--panel-border);
+  background: var(--panel-bg);
+  box-shadow: var(--panel-shadow);
+  border-radius: 10px;
+  padding: 1rem;
+  transition: background-color 0.2s ease, border-color 0.2s ease, box-shadow 0.2s ease;
+}
+.panel > .gap {
+  gap: 0.9rem !important;
+}
+.result-card {
+  min-height: 180px;
+  padding: 1.1rem;
+  border-radius: 8px;
+  background: var(--card-bg);
+  border: 1px solid var(--card-border);
+  transition: background-color 0.2s ease, border-color 0.2s ease;
+}
+.result-text {
+  margin: 0;
+  font-size: 1rem;
+  white-space: pre-wrap;
+  line-height: 1.65;
+  color: var(--page-text);
+}
+.entity-highlight {
+  display: inline-flex;
+  align-items: center;
+  gap: 0.35rem;
+  margin: 0 0.08rem;
+  padding: 0.15rem 0.35rem;
+  border-radius: 6px;
+  color: var(--highlight-text);
+}
+.entity-chip {
+  font-size: 0.72rem;
+  font-weight: 700;
+  text-transform: uppercase;
+}
+.empty-state {
+  margin: 0;
+  border-radius: 8px;
+  padding: 0.8rem 1rem;
+  background: var(--empty-bg);
+  color: var(--page-text);
+}
+.gradio-container a {
+  color: var(--link-color);
+}
+.gradio-container table {
+  color: var(--page-text);
+}
+[data-testid="block-label"] {
+  color: var(--page-text) !important;
+  font-size: 0.84rem !important;
+  font-weight: 600 !important;
+  letter-spacing: 0.01em;
+}
+[data-testid="textbox"],
+[data-testid="dropdown"],
+[data-testid="textbox"] > label,
+[data-testid="dropdown"] > label,
+[data-testid="dataframe"] {
+  background: transparent !important;
+  border: none !important;
+  box-shadow: none !important;
+}
+[data-testid="textbox"] textarea,
+[data-testid="textbox"] input,
+[data-testid="dropdown"] button {
+  background: var(--field-bg) !important;
+  color: var(--page-text) !important;
+  border: 1px solid var(--field-border) !important;
+  border-radius: 8px !important;
+  box-shadow: var(--field-shadow) !important;
+}
+[data-testid="textbox"] textarea,
+[data-testid="textbox"] input {
+  padding: 0.8rem 0.9rem !important;
+}
+[data-testid="dropdown"] button {
+  min-height: 3rem !important;
+}
+[data-testid="textbox"] textarea:focus,
+[data-testid="textbox"] input:focus,
+[data-testid="dropdown"] button:focus,
+[data-testid="dropdown"] button[aria-expanded="true"] {
+  border-color: var(--accent-text) !important;
+  box-shadow: var(--focus-ring) !important;
+}
+[data-testid="dropdown-options"] {
+  background: var(--panel-bg) !important;
+  border: 1px solid var(--field-border) !important;
+  border-radius: 8px !important;
+  box-shadow: 0 8px 24px rgba(76, 93, 108, 0.12) !important;
+}
+[data-testid="dropdown-options"] [role="option"] {
+  color: var(--page-text) !important;
+}
+[data-testid="dropdown-options"] [aria-selected="true"] {
+  background: var(--empty-bg) !important;
+}
+button.primary,
+button.lg.primary {
+  background: #53adbc !important;
+  border: 1px solid #53adbc !important;
+  color: #ffffff !important;
+  border-radius: 8px !important;
+  box-shadow: none !important;
+}
+button.secondary,
+button.lg.secondary {
+  background: var(--secondary-button-bg) !important;
+  border: 1px solid var(--secondary-button-border) !important;
+  color: var(--page-text) !important;
+  border-radius: 8px !important;
+  box-shadow: none !important;
+}
+button.primary:hover,
+button.secondary:hover {
+  filter: brightness(0.98);
+}
+button.primary:focus,
+button.secondary:focus {
+  box-shadow: var(--focus-ring) !important;
+}
+[data-testid="dataframe"] {
+  overflow: hidden !important;
+  border: 1px solid var(--field-border) !important;
+  border-radius: 8px !important;
+  background: var(--panel-bg) !important;
+}
+[data-testid="dataframe"] table {
+  background: var(--panel-bg) !important;
+}
+[data-testid="dataframe"] thead th {
+  background: var(--table-header-bg) !important;
+  color: var(--page-text) !important;
+  border-bottom: 1px solid var(--field-border) !important;
+  font-weight: 600 !important;
+}
+[data-testid="dataframe"] tbody td {
+  color: var(--page-text) !important;
+  background: var(--panel-bg) !important;
+  border-color: var(--card-border) !important;
+}
+[data-testid="dataframe"] tbody tr:nth-child(even) td {
+  background: var(--table-row-alt) !important;
+}
+[data-testid="markdown"] p,
+.gr-markdown p {
+  color: var(--muted-text) !important;
+}
+"""
+class NerRequest(BaseModel):
+    text: str = Field(min_length=1, description="Input text to annotate.")
+    model_name: str = Field(min_length=1, description="Hugging Face model repo id.")
+    revision: str | None = Field(default=None, description="Optional model revision.")
+class EntityPrediction(BaseModel):
+    label: str
+    score: float
+    start: int
+    end: int
+    text: str
+class NerResponse(BaseModel):
+    text: str
+    model_name: str
+    revision: str | None
+    entities: list[EntityPrediction]
+class ModelRevision(BaseModel):
+    name: str
+    kind: str
+class ModelRevisionResponse(BaseModel):
+    model_name: str
+    revisions: list[ModelRevision]
+def get_model_revisions_data(model_name: str) -> ModelRevisionResponse:
+    try:
+        refs = get_hf_api().list_repo_refs(model_name, repo_type="model")
+    except Exception as exc:  # pragma: no cover - network/runtime integration
+        raise HTTPException(
+            status_code=502,
+            detail=f"Unable to fetch revisions for '{model_name}': {exc}",
+        ) from exc
+    revisions = [
+        ModelRevision(name=branch.name, kind="branch")
+        for branch in refs.branches
+    ]
+    revisions.extend(
+        ModelRevision(name=tag.name, kind="tag")
+        for tag in refs.tags
+    )
+    if not revisions:
+        revisions.append(ModelRevision(name=DEFAULT_REVISION, kind="branch"))
+    return ModelRevisionResponse(model_name=model_name, revisions=revisions)
+def run_ner_inference(request: NerRequest) -> NerResponse:
+    try:
+        ner_pipeline = get_ner_pipeline(request.model_name, request.revision)
+        predictions = ner_pipeline(request.text)
+    except Exception as exc:  # pragma: no cover - model/runtime integration
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+    entities = [
+        EntityPrediction(
+            label=prediction["entity_group"],
+            score=float(prediction["score"]),
+            start=int(prediction["start"]),
+            end=int(prediction["end"]),
+            text=request.text[prediction["start"] : prediction["end"]],
+        )
+        for prediction in predictions
+    ]
+    return NerResponse(
+        text=request.text,
+        model_name=request.model_name,
+        revision=request.revision,
+        entities=entities,
+    )
+def render_highlighted_html(text: str, entities: list[EntityPrediction]) -> str:
+    if not text:
+        return '<div class="result-card"><p class="empty-state">Enter text to annotate.</p></div>'
+    if not entities:
+        return (
+            '<div class="result-card">'
+            f'<p class="result-text">{escape(text)}</p>'
+            "</div>"
+        )
+    label_colors: dict[str, str] = {}
+    fragments: list[str] = []
+    cursor = 0
+    for entity in entities:
+        label_color = label_colors.setdefault(
+            entity.label,
+            LABEL_COLORS[len(label_colors) % len(LABEL_COLORS)],
+        )
+        if cursor < entity.start:
+            fragments.append(escape(text[cursor : entity.start]))
+        entity_text = escape(text[entity.start : entity.end])
+        entity_label = escape(entity.label)
+        fragments.append(
+            '<mark class="entity-highlight" '
+            f'style="background-color: {label_color};">'
+            f"{entity_text}"
+            f'<span class="entity-chip">{entity_label}</span>'
+            "</mark>"
+        )
+        cursor = entity.end
+    if cursor < len(text):
+        fragments.append(escape(text[cursor:]))
+    return (
+        '<div class="result-card">'
+        f'<p class="result-text">{"".join(fragments)}</p>'
+        "</div>"
+    )
+def render_entity_table(entities: list[EntityPrediction]) -> list[list[str]]:
+    if not entities:
+        return []
+    return [
+        [
+            entity.label,
+            entity.text,
+            str(entity.start),
+            str(entity.end),
+            f"{entity.score * 100:.1f}%",
+        ]
+        for entity in entities
+    ]
+def load_revisions_for_ui(model_name: str, selected_revision: str | None) -> tuple[gr.Dropdown, str]:
+    trimmed_model_name = model_name.strip()
+    if not trimmed_model_name:
+        return gr.Dropdown(choices=[], value=None), "Enter a Hugging Face model id to load revisions."
+    revision_response = get_model_revisions_data(trimmed_model_name)
+    revision_choices = [
+        (f"{item.name} ({item.kind})", item.name)
+        for item in revision_response.revisions
+    ]
+    revision_names = [item.name for item in revision_response.revisions]
+    revision_value = selected_revision if selected_revision in revision_names else revision_names[0]
+    return (
+        gr.Dropdown(choices=revision_choices, value=revision_value),
+        f"Loaded {len(revision_choices)} revision(s) for `{trimmed_model_name}`.",
+    )
+def run_ner_for_ui(text: str, model_name: str, revision: str | None) -> tuple[str, list[list[str]], str]:
+    trimmed_model_name = model_name.strip()
+    trimmed_text = text.strip()
+    if not trimmed_model_name:
+        raise gr.Error("Model name is required.")
+    if not trimmed_text:
+        raise gr.Error("Input text is required.")
+    response = run_ner_inference(
+        NerRequest(
+            text=trimmed_text,
+            model_name=trimmed_model_name,
+            revision=revision or DEFAULT_REVISION,
+        )
+    )
+    return (
+        render_highlighted_html(response.text, response.entities),
+        render_entity_table(response.entities),
+        f"Found {len(response.entities)} entity span(s) using `{response.model_name}` at revision `{response.revision or DEFAULT_REVISION}`.",
+    )
+def build_gradio_app() -> gr.Blocks:
+    with gr.Blocks(title="NER UI") as demo:
+        gr.HTML(
+            """
+            <section class="hero">
+              <p class="eyebrow">Transformer NER Demo</p>
+              <h1 class="hero-title">Run Hugging Face token classification models against live text.</h1>
+              <p class="hero-copy">
+                Pick a model, choose a revision from the Hub, submit text, and inspect the
+                predicted named entities with class-based highlighting.
+              </p>
+            </section>
+            """
+        )
+        with gr.Row(equal_height=False):
+            with gr.Column(scale=4, elem_classes=["panel"]):
+                model_name = gr.Textbox(
+                    label="Model name",
+                    value=DEFAULT_MODEL_NAME,
+                    placeholder="dslim/bert-base-NER",
+                )
+                with gr.Row():
+                    revision = gr.Dropdown(
+                        label="Revision",
+                        choices=[],
+                        value=None,
+                        allow_custom_value=False,
+                    )
+                    load_revisions = gr.Button("Load revisions", variant="secondary")
+                text = gr.Textbox(
+                    label="Input text",
+                    value=DEFAULT_TEXT,
+                    lines=10,
+                    placeholder="Paste a sentence or paragraph to annotate.",
+                )
+                run_button = gr.Button("Run NER", variant="primary")
+            with gr.Column(scale=5, elem_classes=["panel"]):
+                status = gr.Markdown("Loading available revisions...")
+                highlighted = gr.HTML(
+                    '<div class="result-card"><p class="empty-state">Run NER to see highlighted predictions.</p></div>',
+                    label="Highlighted text",
+                )
+                entity_table = gr.Dataframe(
+                    headers=["Label", "Text", "Start", "End", "Score"],
+                    datatype=["str", "str", "str", "str", "str"],
+                    row_count=(0, "dynamic"),
+                    column_count=(5, "fixed"),
+                    interactive=False,
+                    label="Predicted entities",
+                )
+        revision_event = load_revisions.click(
+            fn=load_revisions_for_ui,
+            inputs=[model_name, revision],
+            outputs=[revision, status],
+            api_name=False,
+        )
+        model_name.submit(
+            fn=load_revisions_for_ui,
+            inputs=[model_name, revision],
+            outputs=[revision, status],
+            api_name=False,
+        )
+        model_name.blur(
+            fn=load_revisions_for_ui,
+            inputs=[model_name, revision],
+            outputs=[revision, status],
+            api_name=False,
+        )
+        run_button.click(
+            fn=run_ner_for_ui,
+            inputs=[text, model_name, revision],
+            outputs=[highlighted, entity_table, status],
+            api_name=False,
+        )
+        text.submit(
+            fn=run_ner_for_ui,
+            inputs=[text, model_name, revision],
+            outputs=[highlighted, entity_table, status],
+            api_name=False,
+        )
+        demo.load(
+            fn=load_revisions_for_ui,
+            inputs=[model_name, revision],
+            outputs=[revision, status],
+            api_name=False,
+        )
+    return demo
+@lru_cache(maxsize=1)
+def get_hf_api() -> HfApi:
+    return HfApi()
+@lru_cache(maxsize=8)
+def get_ner_pipeline(model_name: str, revision: str | None):
+    LOGGER.info("Loading NER pipeline for model=%s revision=%s", model_name, revision)
+    return pipeline(
+        task="token-classification",
+        model=model_name,
+        revision=revision,
+        aggregation_strategy="simple",
+    )
+def create_app() -> FastAPI:
+    app = FastAPI(title="NER UI", version="0.1.0")
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    @app.get("/api/health")
+    async def healthcheck() -> dict[str, str]:
+        return {"status": "ok"}
+    @app.get("/api/models/revisions", response_model=ModelRevisionResponse)
+    async def get_model_revisions(
+        model_name: str = Query(..., min_length=1, description="Hugging Face model repo id"),
+    ) -> ModelRevisionResponse:
+        return get_model_revisions_data(model_name)
+    @app.post("/api/ner", response_model=NerResponse)
+    async def run_ner(request: NerRequest) -> NerResponse:
+        return run_ner_inference(request)
+    demo = build_gradio_app()
+    return gr.mount_gradio_app(app, demo, path="/", theme=GRADIO_THEME, css=GRADIO_CSS)
+app = create_app()
+def main() -> None:
+    import uvicorn
+    uvicorn.run("backend.app:app", host="0.0.0.0", port=8000, reload=True)

main.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from backend.app import main
+if __name__ == "__main__":
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,28 @@

+[build-system]
+requires = ["setuptools>=69"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "ner-ui"
+version = "0.1.0"
+description = "FastAPI and Gradio demo for Hugging Face NER models"
+readme = "README.md"
+requires-python = ">=3.13"
+dependencies = [
+    "fastapi>=0.115.12",
+    "gradio>=5.25.2",
+    "huggingface-hub>=0.31.1",
+    "jinja2>=3.1.6",
+    "torch>=2.7.0",
+    "transformers>=4.52.0",
+    "uvicorn>=0.34.0",
+]
+[project.scripts]
+ner-ui = "backend.app:main"
+[tool.setuptools]
+py-modules = ["main"]
+[tool.setuptools.packages.find]
+include = ["backend*"]

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff