Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import logging | |
| from dataclasses import dataclass | |
| from functools import lru_cache | |
| from html import escape | |
| import gradio as gr | |
| from huggingface_hub import HfApi | |
| from transformers import AutoModelForTokenClassification, AutoTokenizer | |
| from transformers.pipelines.token_classification import AggregationStrategy | |
| from gilda_pipeline import GildaNERPipeline | |
| LOGGER = logging.getLogger(__name__) | |
| MODEL_COLLECTION_SLUG = "gyorilab/indra-bert" | |
| DEFAULT_MODEL_NAME = "gyorilab/variants-ner-modernbert-base" | |
| DEFAULT_REVISION = "softlabels" | |
| DEFAULT_TEXT = ( | |
| "In our cohort we analyzed variants affecting neuronal signaling and cytoskeletal stability. Sequencing identified several recurrent protein mutations including TP53 R248W, EGFR L858R, and a truncating MAPT Q336* variant predicted to disrupt microtubule binding. At the DNA level we detected substitutions such as TP53 c.743G>A and EGFR c.2573T>G, along with a small deletion c.152_153del causing a frameshift in downstream transcripts. Structural variation analysis further revealed a copy number gain consistent with chr7:55,019,017-55,242,524, overlapping the EGFR locus and resembling CNV strings commonly reported in PubTator3-style annotations. Population-associated polymorphisms were also present, including rs429358 and rs7412 within APOE, as well as rs1801133 in MTHFR. Together, these protein-altering mutations, nucleotide substitutions, and regional copy number changes suggest combined effects on cellular stress responses, signaling pathways, and metabolic regulation in the studied samples." | |
| ) | |
| EXEMPLAR_OPTIONS = [ | |
| ( | |
| "Default cohort example - mixed protein, DNA, CNV, and rsID mentions", | |
| DEFAULT_TEXT, | |
| ), | |
| ( | |
| "p53 substitution example - single protein mutation described in prose", | |
| "One example of a protein mutation identified in the cohort was a substitution in the tumor suppressor protein p53 in which the amino acid arginine at position 248 is replaced by tryptophan, a change that alters the structure of the DNA-binding domain and is thought to impair the protein’s ability to regulate target genes involved in cell cycle control.", | |
| ), | |
| ( | |
| "Targeted sequencing panel - protein, cDNA, splice, and truncation variants", | |
| "Targeted sequencing of signaling and DNA repair genes revealed several non-synonymous substitutions affecting conserved residues. In particular, the catalytic cysteine of IDH1 was replaced by arginine, altering the active site chemistry, while a separate substitution replaced valine with glutamic acid in the activation loop of BRAF. At the nucleotide level we also detected splice-proximal and frameshifting events including BRCA1 c.68_69delAG, KRAS c.35G>T, and a donor-site mutation TP53 c.673-1G>A predicted to disrupt normal transcript processing. Additional protein-altering variants included PIK3CA E545K and PTEN R233*, both of which affect key regulators of the PI3K signaling pathway.", | |
| ), | |
| ( | |
| "Comprehensive profiling example - dense mix of substitutions, indels, rsIDs, and CNVs", | |
| "Comprehensive variant profiling revealed a dense cluster of alterations across oncogenic signaling and DNA repair pathways. Protein-level substitutions included KRAS G12D, NRAS Q61R, BRAF V600E, PIK3CA H1047R, IDH2 R140Q, PTEN R130Q, and KIT D816V, each affecting conserved residues within catalytic or regulatory domains. At the nucleotide level, sequencing identified multiple coding and splice-region variants such as KRAS c.35G>A, NRAS c.182A>G, BRCA2 c.5946delT, TP53 c.818G>A, DNMT3A c.2645G>A, NPM1 c.860_863dupTCTG, and FLT3 c.2503G>T. Additional population-linked polymorphisms including rs121913529, rs28934578, rs1042522, and rs121434568 were present in several samples. Structural analysis also revealed a focal copy-number gain reported as chr12:25,380,000-25,465,000 encompassing the KRAS locus and a deletion chr10:89,600,000-89,750,000 overlapping PTEN, suggesting that point mutations, small indels, and regional copy-number changes jointly contribute to the genomic landscape observed in these specimens.", | |
| ), | |
| ( | |
| "Plasmid operon example - bidirectional promoter and trfA/trb operon transcription", | |
| "As in IncP alpha plasmids , these operons are transcribed from a bidirectional promoter region consisting of trfAp for the trfA operon and trbAp and trbBp for the trb operon ." | |
| ), | |
| ( | |
| "Premature ovarian failure study - meiotic gene mutations in MSH5 and DMC1", | |
| "Genetic investigation of four meiotic genes in women with premature ovarian failure. OBJECTIVE: The goal of this study was to determine whether mutations of meiotic genes, such as disrupted meiotic cDNA (DMC1), MutS homolog (MSH4), MSH5, and S. cerevisiae homolog (SPO11), were associated with premature ovarian failure (POF). DESIGN: Case-control study. METHODS: Blood sampling, karyotype, hormonal dosage, ultrasound, and ovarian biopsy were carried out on most patients. However, the main outcome measure was the sequencing of genomic DNA from peripheral blood samples of 41 women with POF and 36 fertile women (controls). RESULTS: A single heterozygous missense mutation, substitution of a cytosine residue with thymidine in exon 2 of MSH5, was found in two Caucasian women in whom POF developed at 18 and 36 years of age. This mutation resulted in replacement of a non-polar amino acid (proline) with a polar amino acid (serine) at position 29 (P29S). Neither 36 control women nor 39 other patients with POF possessed this genetic perturbation. Another POF patient of African origin showed a homozygous nucleotide change in the tenth of DMC1 gene that led to an alteration of the amino acid composition of the protein (M200V). CONCLUSIONS: The symptoms of infertility observed in the DMC1 homozygote mutation carrier and in both patients with a heterozygous substitution in exon 2 of the MSH5 gene provide indirect evidence of the role of genes involved in meiotic recombination in the regulation of ovarian function. MSH5 and DMC1 mutations may be one explanation for POF, albeit uncommon." | |
| ), | |
| ( | |
| "Kinase signaling example - MEKK/JNK1 pathway and MMP-9 promoter regulation", | |
| "Similarly , interfering with MEKK , which lies upstream of JNK1 , using a dominant negative expression vector reduced MMP - 9 promoter activity over the same concentration range which repressed the AP - 1 - thymidine kinase CAT reporter construct " | |
| ), | |
| ( | |
| "HIV/HBV co-infection study - lamivudine resistance substitutions in HBV polymerase", | |
| "High frequency of lamivudine resistance mutations in Brazilian patients co-infected with HIV and hepatitis B. This study analyzed the genotype distribution and frequency of lamivudine (LAM) and tenofovir (TDF) resistance mutations in a group of patients co-infected with HIV and hepatitis B virus (HBV). A cross-sectional study of 847 patients with HIV was conducted. Patients provided blood samples for HBsAg detection. The load of HBV was determined using an \"in-house\" real-time polymerase chain reaction. HBV genotypes/subgenotypes, antiviral resistance, basal core promoter (BCP), and precore mutations were detected by DNA sequencing. Twenty-eight patients with co-infection were identified. The distribution of HBV genotypes among these patients was A (n = 9; 50%), D (n = 4; 22.2%), G (n = 3; 16.7%), and F (n = 2; 11.1%). Eighteen patients were treated with LAM and six patients were treated with LAM plus TDF. The length of exposure to LAM and TDF varied from 4 to 216 months. LAM resistance substitutions (rtL180M + rtM204V) were detected in 10 (50%) of the 20 patients with viremia. This pattern and an accompanying rtV173L mutation was found in four patients. Three patients with the triple polymerase substitution pattern (rtV173L + rtL180M + rtM204V) had associated changes in the envelope gene (sE164D + sI195M). Mutations in the BCP region (A1762T, G1764A) and in the precore region (G1896A, G1899A) were also found. No putative TDF resistance substitution was detected. The data suggest that prolonged LAM use is associated with the emergence of particular changes in the HBV genome, including substitutions that may elicit a vaccine escape phenotype. No putative TDF resistance change was detected after prolonged use of TDF." | |
| ) | |
| ] | |
| LABEL_COLORS = [ | |
| "#d4a373", | |
| "#2a9d8f", | |
| "#577590", | |
| "#e76f51", | |
| "#8d99ae", | |
| "#6a994e", | |
| ] | |
| GRADIO_THEME = gr.themes.Base( | |
| primary_hue="cyan", | |
| secondary_hue="blue", | |
| neutral_hue="slate", | |
| radius_size="md", | |
| ) | |
| GRADIO_CSS = """ | |
| :root { | |
| --page-bg: #f3f3f3; | |
| --page-text: #66717d; | |
| --muted-text: #7d858e; | |
| --accent-text: #4ea9b8; | |
| --panel-bg: #ffffff; | |
| --panel-border: #d5dadd; | |
| --panel-shadow: 0 1px 2px rgba(76, 93, 108, 0.08); | |
| --card-bg: #f7f8f8; | |
| --card-border: #dde3e6; | |
| --field-bg: #f7f8f8; | |
| --field-border: #d7dde0; | |
| --field-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.9); | |
| --highlight-text: #20303a; | |
| --empty-bg: #eaf4f6; | |
| --link-color: #6aa0d6; | |
| --secondary-button-bg: #eef3f4; | |
| --secondary-button-border: #d3dade; | |
| --table-header-bg: #eef3f4; | |
| --table-row-alt: #fafbfb; | |
| --focus-ring: 0 0 0 3px rgba(83, 173, 188, 0.14); | |
| } | |
| .dark, | |
| body.dark, | |
| html.dark, | |
| [data-theme="dark"], | |
| .dark .gradio-container, | |
| body.dark .gradio-container, | |
| html.dark .gradio-container, | |
| [data-theme="dark"] .gradio-container, | |
| .gradio-container.dark { | |
| --page-bg: #111922; | |
| --page-text: #d4dde4; | |
| --muted-text: #aeb8c2; | |
| --accent-text: #73c6d2; | |
| --panel-bg: #18232f; | |
| --panel-border: #2a3b4b; | |
| --panel-shadow: 0 10px 30px rgba(0, 0, 0, 0.22); | |
| --card-bg: #111b24; | |
| --card-border: #2a3b4b; | |
| --field-bg: #213142; | |
| --field-border: #314658; | |
| --field-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.03); | |
| --highlight-text: #0d1821; | |
| --empty-bg: #17313a; | |
| --link-color: #8bb8e8; | |
| --secondary-button-bg: #223342; | |
| --secondary-button-border: #314658; | |
| --table-header-bg: #213142; | |
| --table-row-alt: #15212c; | |
| --focus-ring: 0 0 0 3px rgba(115, 198, 210, 0.2); | |
| } | |
| body, .gradio-container { | |
| background: var(--page-bg); | |
| } | |
| .gradio-container { | |
| font-family: "Segoe UI", "Helvetica Neue", Arial, sans-serif; | |
| color: var(--page-text); | |
| transition: background-color 0.2s ease, color 0.2s ease; | |
| } | |
| .hero { | |
| padding: 1rem 0 0.4rem; | |
| } | |
| .eyebrow { | |
| margin: 0 0 0.5rem; | |
| text-transform: uppercase; | |
| letter-spacing: 0.12em; | |
| font-size: 0.74rem; | |
| color: var(--accent-text); | |
| } | |
| .hero-title { | |
| margin: 0; | |
| font-size: clamp(1.85rem, 3.2vw, 2.9rem); | |
| line-height: 1.05; | |
| max-width: 18ch; | |
| font-weight: 600; | |
| letter-spacing: -0.02em; | |
| color: var(--page-text); | |
| } | |
| .hero-copy { | |
| max-width: 60ch; | |
| color: var(--muted-text); | |
| } | |
| .panel { | |
| border: 1px solid var(--panel-border); | |
| background: var(--panel-bg); | |
| box-shadow: var(--panel-shadow); | |
| border-radius: 10px; | |
| padding: 1rem; | |
| transition: background-color 0.2s ease, border-color 0.2s ease, box-shadow 0.2s ease; | |
| } | |
| .panel > .gap { | |
| gap: 0.9rem !important; | |
| } | |
| .result-card { | |
| min-height: 180px; | |
| padding: 1.1rem; | |
| border-radius: 8px; | |
| background: var(--card-bg); | |
| border: 1px solid var(--card-border); | |
| transition: background-color 0.2s ease, border-color 0.2s ease; | |
| } | |
| .result-text { | |
| margin: 0; | |
| font-size: 1rem; | |
| white-space: pre-wrap; | |
| line-height: 1.65; | |
| color: var(--page-text); | |
| } | |
| .entity-highlight { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 0.35rem; | |
| margin: 0 0.08rem; | |
| padding: 0.15rem 0.35rem; | |
| border-radius: 6px; | |
| color: var(--highlight-text); | |
| position: relative; | |
| } | |
| .entity-highlight[data-tooltip] { | |
| cursor: help; | |
| } | |
| .entity-highlight[data-tooltip]::after { | |
| content: attr(data-tooltip); | |
| position: absolute; | |
| bottom: calc(100% + 6px); | |
| left: 50%; | |
| transform: translateX(-50%); | |
| background: #1a2733; | |
| color: #d4dde4; | |
| padding: 0.3rem 0.6rem; | |
| border-radius: 5px; | |
| font-size: 0.75rem; | |
| font-family: monospace; | |
| white-space: nowrap; | |
| opacity: 0; | |
| pointer-events: none; | |
| transition: opacity 0.15s ease; | |
| z-index: 100; | |
| box-shadow: 0 2px 8px rgba(0, 0, 0, 0.25); | |
| } | |
| .entity-highlight[data-tooltip]:hover::after { | |
| opacity: 1; | |
| } | |
| .entity-highlight[data-tooltip] { | |
| box-shadow: inset 0 0 0 1.5px rgba(0, 0, 0, 0.18); | |
| } | |
| .entity-link { | |
| text-decoration: none; | |
| color: inherit; | |
| } | |
| .entity-link .entity-highlight { | |
| cursor: pointer; | |
| transition: filter 0.15s ease, transform 0.15s ease; | |
| } | |
| .entity-link:hover .entity-highlight { | |
| filter: brightness(1.1); | |
| transform: translateY(-1px); | |
| } | |
| .entity-chip { | |
| font-size: 0.72rem; | |
| font-weight: 700; | |
| text-transform: uppercase; | |
| } | |
| .empty-state { | |
| margin: 0; | |
| border-radius: 8px; | |
| padding: 0.8rem 1rem; | |
| background: var(--empty-bg); | |
| color: var(--page-text); | |
| } | |
| .gradio-container a { | |
| color: var(--link-color); | |
| } | |
| .gradio-container table { | |
| color: var(--page-text); | |
| } | |
| [data-testid="block-label"] { | |
| color: var(--page-text) !important; | |
| font-size: 0.84rem !important; | |
| font-weight: 600 !important; | |
| letter-spacing: 0.01em; | |
| } | |
| [data-testid="textbox"], | |
| [data-testid="dropdown"], | |
| [data-testid="textbox"] > label, | |
| [data-testid="dropdown"] > label, | |
| [data-testid="dataframe"] { | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| } | |
| [data-testid="textbox"] textarea, | |
| [data-testid="textbox"] input, | |
| [data-testid="dropdown"] button { | |
| background: var(--field-bg) !important; | |
| color: var(--page-text) !important; | |
| border: 1px solid var(--field-border) !important; | |
| border-radius: 8px !important; | |
| box-shadow: var(--field-shadow) !important; | |
| } | |
| [data-testid="textbox"] textarea, | |
| [data-testid="textbox"] input { | |
| padding: 0.8rem 0.9rem !important; | |
| } | |
| [data-testid="dropdown"] button { | |
| min-height: 3rem !important; | |
| } | |
| [data-testid="textbox"] textarea:focus, | |
| [data-testid="textbox"] input:focus, | |
| [data-testid="dropdown"] button:focus, | |
| [data-testid="dropdown"] button[aria-expanded="true"] { | |
| border-color: var(--accent-text) !important; | |
| box-shadow: var(--focus-ring) !important; | |
| } | |
| [data-testid="dropdown-options"] { | |
| background: var(--panel-bg) !important; | |
| border: 1px solid var(--field-border) !important; | |
| border-radius: 8px !important; | |
| box-shadow: 0 8px 24px rgba(76, 93, 108, 0.12) !important; | |
| } | |
| [data-testid="dropdown-options"] [role="option"] { | |
| color: var(--page-text) !important; | |
| } | |
| [data-testid="dropdown-options"] [aria-selected="true"] { | |
| background: var(--empty-bg) !important; | |
| } | |
| button.primary, | |
| button.lg.primary { | |
| background: #53adbc !important; | |
| border: 1px solid #53adbc !important; | |
| color: #ffffff !important; | |
| border-radius: 8px !important; | |
| box-shadow: none !important; | |
| } | |
| button.secondary, | |
| button.lg.secondary { | |
| background: var(--secondary-button-bg) !important; | |
| border: 1px solid var(--secondary-button-border) !important; | |
| color: var(--page-text) !important; | |
| border-radius: 8px !important; | |
| box-shadow: none !important; | |
| } | |
| button.primary:hover, | |
| button.secondary:hover { | |
| filter: brightness(0.98); | |
| } | |
| button.primary:focus, | |
| button.secondary:focus { | |
| box-shadow: var(--focus-ring) !important; | |
| } | |
| [data-testid="dataframe"] { | |
| overflow: hidden !important; | |
| border: 1px solid var(--field-border) !important; | |
| border-radius: 8px !important; | |
| background: var(--panel-bg) !important; | |
| } | |
| [data-testid="dataframe"] table { | |
| background: var(--panel-bg) !important; | |
| } | |
| [data-testid="dataframe"] thead th { | |
| background: var(--table-header-bg) !important; | |
| color: var(--page-text) !important; | |
| border-bottom: 1px solid var(--field-border) !important; | |
| font-weight: 600 !important; | |
| } | |
| [data-testid="dataframe"] tbody td { | |
| color: var(--page-text) !important; | |
| background: var(--panel-bg) !important; | |
| border-color: var(--card-border) !important; | |
| } | |
| [data-testid="dataframe"] tbody tr:nth-child(even) td { | |
| background: var(--table-row-alt) !important; | |
| } | |
| [data-testid="markdown"] p, | |
| .gr-markdown p { | |
| color: var(--muted-text) !important; | |
| } | |
| """ | |
| class EntityPrediction: | |
| label: str | |
| score: float | |
| start: int | |
| end: int | |
| text: str | |
| ground_db: str = "" | |
| ground_id: str = "" | |
| ground_name: str = "" | |
| def get_hf_api() -> HfApi: | |
| return HfApi() | |
| def get_collection_model_names() -> list[str]: | |
| collection = get_hf_api().get_collection(MODEL_COLLECTION_SLUG) | |
| model_names = [item.item_id for item in collection.items if item.item_type == "model"] | |
| if not model_names: | |
| raise ValueError(f"No model repos found in Hugging Face collection '{MODEL_COLLECTION_SLUG}'.") | |
| return model_names | |
| def get_ner_pipeline(model_name: str, revision: str | None): | |
| LOGGER.info("Loading NER pipeline for model=%s revision=%s", model_name, revision) | |
| model = AutoModelForTokenClassification.from_pretrained(model_name, revision=revision) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, revision=revision) | |
| return GildaNERPipeline( | |
| model=model, | |
| tokenizer=tokenizer, | |
| aggregation_strategy=AggregationStrategy.SIMPLE, | |
| ) | |
| def get_model_revisions_data(model_name: str) -> list[tuple[str, str]]: | |
| refs = get_hf_api().list_repo_refs(model_name, repo_type="model") | |
| revisions = [(branch.name, "branch") for branch in refs.branches] | |
| revisions.extend((tag.name, "tag") for tag in refs.tags) | |
| return revisions or [(DEFAULT_REVISION, "branch")] | |
| def run_ner_inference(text: str, model_name: str, revision: str | None) -> list[EntityPrediction]: | |
| ner_pipeline = get_ner_pipeline(model_name, revision) | |
| predictions = ner_pipeline(text) | |
| results = [] | |
| for prediction in predictions: | |
| matches = prediction.get("grounding", []) | |
| top = matches[0] if matches else None | |
| results.append(EntityPrediction( | |
| label=prediction["entity_group"], | |
| score=float(prediction["score"]), | |
| start=int(prediction["start"]), | |
| end=int(prediction["end"]), | |
| text=text[prediction["start"] : prediction["end"]], | |
| ground_db=top.term.db if top else "", | |
| ground_id=top.term.id if top else "", | |
| ground_name=top.term.entry_name if top else "", | |
| )) | |
| return results | |
| _DB_URL_TEMPLATES: dict[str, str] = { | |
| "HGNC": "https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:{id}", | |
| "UP": "https://www.uniprot.org/uniprot/{id}", | |
| "CHEBI": "https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:{id}", | |
| "GO": "https://amigo.geneontology.org/amigo/term/GO:{id}", | |
| "MESH": "https://meshb.nlm.nih.gov/record/ui?ui={id}", | |
| "HP": "https://hpo.jax.org/app/browse/term/HP:{id}", | |
| "DOID": "https://www.disease-ontology.org/term/DOID:{id}", | |
| "NCBIGENE": "https://www.ncbi.nlm.nih.gov/gene/{id}", | |
| "taxonomy": "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id={id}", | |
| } | |
| def _ground_url(db: str, entity_id: str) -> str: | |
| template = _DB_URL_TEMPLATES.get(db, "") | |
| return template.format(id=entity_id) if template else "" | |
| def render_highlighted_html(text: str, entities: list[EntityPrediction]) -> str: | |
| if not text: | |
| return '<div class="result-card"><p class="empty-state">Enter text to annotate.</p></div>' | |
| if not entities: | |
| return ( | |
| '<div class="result-card">' | |
| f'<p class="result-text">{escape(text)}</p>' | |
| "</div>" | |
| ) | |
| label_colors: dict[str, str] = {} | |
| fragments: list[str] = [] | |
| cursor = 0 | |
| for entity in entities: | |
| label_color = label_colors.setdefault( | |
| entity.label, | |
| LABEL_COLORS[len(label_colors) % len(LABEL_COLORS)], | |
| ) | |
| if cursor < entity.start: | |
| fragments.append(escape(text[cursor : entity.start])) | |
| entity_text = escape(text[entity.start : entity.end]) | |
| entity_label = escape(entity.label) | |
| tooltip_attr = "" | |
| link_open = "" | |
| link_close = "" | |
| if entity.ground_db and entity.ground_id: | |
| tooltip_val = escape(f"{entity.ground_db}:{entity.ground_id}") | |
| tooltip_attr = f' data-tooltip="{tooltip_val}"' | |
| url = _ground_url(entity.ground_db, entity.ground_id) | |
| if url: | |
| link_open = f'<a href="{escape(url)}" target="_blank" rel="noopener noreferrer" class="entity-link">' | |
| link_close = "</a>" | |
| fragments.append( | |
| link_open | |
| + '<mark class="entity-highlight" ' | |
| f'style="background-color: {label_color};"' | |
| f"{tooltip_attr}>" | |
| f"{entity_text}" | |
| f'<span class="entity-chip">{entity_label}</span>' | |
| "</mark>" | |
| + link_close | |
| ) | |
| cursor = entity.end | |
| if cursor < len(text): | |
| fragments.append(escape(text[cursor:])) | |
| return ( | |
| '<div class="result-card">' | |
| f'<p class="result-text">{"".join(fragments)}</p>' | |
| "</div>" | |
| ) | |
| def render_entity_table(entities: list[EntityPrediction]) -> list[list[str]]: | |
| return [ | |
| [ | |
| entity.label, | |
| entity.text, | |
| str(entity.start), | |
| str(entity.end), | |
| f"{entity.score * 100:.1f}%", | |
| entity.ground_db, | |
| entity.ground_id, | |
| entity.ground_name, | |
| ] | |
| for entity in entities | |
| ] | |
| def load_model_options_for_ui() -> tuple[gr.Dropdown, str]: | |
| try: | |
| model_names = get_collection_model_names() | |
| except Exception as exc: | |
| raise gr.Error(f"Unable to fetch models for '{MODEL_COLLECTION_SLUG}': {exc}") from exc | |
| model_value = DEFAULT_MODEL_NAME if DEFAULT_MODEL_NAME in model_names else model_names[0] | |
| return ( | |
| gr.Dropdown(choices=model_names, value=model_value), | |
| model_value, | |
| ) | |
| def load_controls_for_ui() -> tuple[gr.Dropdown, gr.Dropdown, str, str]: | |
| model_dropdown, selected_model = load_model_options_for_ui() | |
| revision_dropdown, status, pipeline_snippet = load_revisions_for_ui(selected_model, DEFAULT_REVISION) | |
| return model_dropdown, revision_dropdown, status, pipeline_snippet | |
| def load_revisions_for_ui(model_name: str | None, selected_revision: str | None) -> tuple[gr.Dropdown, str, str]: | |
| trimmed_model_name = (model_name or "").strip() | |
| if not trimmed_model_name: | |
| raise gr.Error("Model name is required.") | |
| try: | |
| revisions = get_model_revisions_data(trimmed_model_name) | |
| except Exception as exc: | |
| raise gr.Error(f"Unable to fetch revisions for '{trimmed_model_name}': {exc}") from exc | |
| revision_choices = [(f"{name} ({kind})", name) for name, kind in revisions] | |
| revision_names = [name for name, _kind in revisions] | |
| if selected_revision in revision_names: | |
| revision_value = selected_revision | |
| elif DEFAULT_REVISION in revision_names: | |
| revision_value = DEFAULT_REVISION | |
| else: | |
| revision_value = revision_names[0] | |
| return ( | |
| gr.Dropdown(choices=revision_choices, value=revision_value), | |
| f"Loaded {len(revision_choices)} revision(s) for `{trimmed_model_name}`.", | |
| render_pipeline_snippet(trimmed_model_name, revision_value), | |
| ) | |
| def run_ner_for_ui(text: str, model_name: str | None, revision: str | None) -> tuple[str, list[list[str]], str]: | |
| trimmed_model_name = (model_name or "").strip() | |
| trimmed_text = text.strip() | |
| if not trimmed_model_name: | |
| raise gr.Error("Model name is required.") | |
| if not trimmed_text: | |
| raise gr.Error("Input text is required.") | |
| try: | |
| entities = run_ner_inference( | |
| text=trimmed_text, | |
| model_name=trimmed_model_name, | |
| revision=revision or DEFAULT_REVISION, | |
| ) | |
| except Exception as exc: | |
| raise gr.Error(str(exc)) from exc | |
| active_revision = revision or DEFAULT_REVISION | |
| return ( | |
| render_highlighted_html(trimmed_text, entities), | |
| render_entity_table(entities), | |
| f"Found {len(entities)} entity span(s) using `{trimmed_model_name}` at revision `{active_revision}`.", | |
| ) | |
| def select_exemplar_text(exemplar_text: str) -> str: | |
| return exemplar_text | |
| def render_pipeline_snippet(model_name: str | None, revision: str | None) -> str: | |
| active_model_name = (model_name or "").strip() or DEFAULT_MODEL_NAME | |
| active_revision = revision or DEFAULT_REVISION | |
| return f"""from transformers import AutoModelForTokenClassification, AutoTokenizer | |
| from transformers.pipelines.token_classification import AggregationStrategy | |
| from gilda_pipeline import GildaNERPipeline | |
| model_name = "{active_model_name}" | |
| revision = "{active_revision}" | |
| ner = GildaNERPipeline( | |
| model=AutoModelForTokenClassification.from_pretrained(model_name, revision=revision), | |
| tokenizer=AutoTokenizer.from_pretrained(model_name, revision=revision), | |
| aggregation_strategy=AggregationStrategy.SIMPLE, | |
| ) | |
| text = "TP53 R248W and EGFR c.2573T>G were detected in the sample." | |
| for prediction in ner(text): | |
| top = prediction["grounding"][0] if prediction["grounding"] else None | |
| print(prediction["entity_group"], prediction["word"], | |
| top.term.db, top.term.id, top.term.entry_name if top else "—") | |
| """ | |
| def build_demo() -> gr.Blocks: | |
| with gr.Blocks(title="INDRA BERT Variant NER Explorer", theme=GRADIO_THEME, css=GRADIO_CSS) as demo: | |
| gr.HTML( | |
| """ | |
| <section class="hero"> | |
| <p class="eyebrow">INDRA BERT Variant NER</p> | |
| <h1 class="hero-title">Extract variant mentions from biomedical text with INDRA BERT models.</h1> | |
| <p class="hero-copy"> | |
| Choose a model from the gyorilab/indra-bert collection, pick a Hub revision, | |
| and test how each checkpoint tags proteins, DNA variants, rsIDs, and copy-number spans. | |
| </p> | |
| </section> | |
| """ | |
| ) | |
| with gr.Row(equal_height=False): | |
| with gr.Column(scale=4, elem_classes=["panel"]): | |
| with gr.Row(): | |
| model_name = gr.Dropdown( | |
| label="Model name", | |
| choices=[], | |
| value=None, | |
| allow_custom_value=False, | |
| ) | |
| revision = gr.Dropdown( | |
| label="Revision", | |
| choices=[], | |
| value=None, | |
| allow_custom_value=False, | |
| ) | |
| exemplar = gr.Dropdown( | |
| label="Exemplar text", | |
| choices=EXEMPLAR_OPTIONS, | |
| value=DEFAULT_TEXT, | |
| allow_custom_value=False, | |
| info="Choose a preloaded sample. Each option includes a short description.", | |
| ) | |
| text = gr.Textbox( | |
| label="Input text", | |
| value=DEFAULT_TEXT, | |
| lines=10, | |
| placeholder="Paste a sentence or paragraph to annotate.", | |
| ) | |
| run_button = gr.Button("Run NER", variant="primary") | |
| pipeline_snippet = gr.Code( | |
| label="Python pipeline example", | |
| value=render_pipeline_snippet(DEFAULT_MODEL_NAME, DEFAULT_REVISION), | |
| language="python", | |
| interactive=False, | |
| lines=12, | |
| ) | |
| with gr.Column(scale=5, elem_classes=["panel"]): | |
| status = gr.Markdown("Loading available revisions...") | |
| highlighted = gr.HTML( | |
| '<div class="result-card"><p class="empty-state">Run NER to see highlighted predictions.</p></div>', | |
| label="Highlighted text", | |
| ) | |
| entity_table = gr.Dataframe( | |
| headers=["Label", "Text", "Start", "End", "Score", "DB", "ID", "Name"], | |
| datatype=["str", "str", "str", "str", "str", "str", "str", "str"], | |
| row_count=(0, "dynamic"), | |
| col_count=(8, "fixed"), | |
| interactive=False, | |
| label="Predicted entities", | |
| ) | |
| model_name.change( | |
| fn=load_revisions_for_ui, | |
| inputs=[model_name, revision], | |
| outputs=[revision, status, pipeline_snippet], | |
| api_name=False, | |
| ) | |
| revision.change( | |
| fn=render_pipeline_snippet, | |
| inputs=[model_name, revision], | |
| outputs=[pipeline_snippet], | |
| api_name=False, | |
| ) | |
| exemplar.change( | |
| fn=select_exemplar_text, | |
| inputs=[exemplar], | |
| outputs=[text], | |
| api_name=False, | |
| ) | |
| run_button.click( | |
| fn=run_ner_for_ui, | |
| inputs=[text, model_name, revision], | |
| outputs=[highlighted, entity_table, status], | |
| api_name=False, | |
| ) | |
| text.submit( | |
| fn=run_ner_for_ui, | |
| inputs=[text, model_name, revision], | |
| outputs=[highlighted, entity_table, status], | |
| api_name=False, | |
| ) | |
| demo.load( | |
| fn=load_controls_for_ui, | |
| outputs=[model_name, revision, status, pipeline_snippet], | |
| api_name=False, | |
| ) | |
| return demo | |
| demo = build_demo() | |
| def main() -> None: | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| ) | |
| if __name__ == "__main__": | |
| main() | |