File size: 6,546 Bytes
37e16c7 7c77588 37e16c7 7c77588 d3fc638 37e16c7 d3fc638 37e16c7 d3fc638 76e2ee3 37e16c7 d3fc638 37e16c7 d3fc638 76e2ee3 37e16c7 d3fc638 37e16c7 d3fc638 76e2ee3 37e16c7 d3fc638 37e16c7 d3fc638 9e59dc4 76e2ee3 37e16c7 d3fc638 37e16c7 d3fc638 37e16c7 d3fc638 37e16c7 d3fc638 37e16c7 d3fc638 37e16c7 d3fc638 37e16c7 d3fc638 37e16c7 d3fc638 37e16c7 d3fc638 37e16c7 76e2ee3 37e16c7 9e59dc4 37e16c7 9e59dc4 37e16c7 9e59dc4 37e16c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
from typing import Dict, Union
import sys
from gliner import GLiNER
import gradio as gr
import os
import sys
hf_token = os.getenv("HF_KEY")
model = GLiNER.from_pretrained("VAGOsolutions/SauerkrautLM-GLiNER", token=hf_token).eval()
examples = [
# English Example - Sports
[
"Cristiano Ronaldo dos Santos Aveiro (born 5 February 1985) is a Portuguese professional footballer who plays as a forward for and captains both Saudi Pro League club Al Nassr and the Portugal national team. He has won five Ballon d'Or awards, a record three UEFA Men's Player of the Year Awards, and four European Golden Shoes. He has won 33 trophies in his career, including seven league titles, five UEFA Champions Leagues, the UEFA European Championship and the UEFA Nations League.",
"person, award, date, competitions, teams",
0.8,
False,
],
# German Example - Politics
[
"Angela Merkel war von 2005 bis 2021 Bundeskanzlerin der Bundesrepublik Deutschland. Sie wurde am 17. Juli 1954 in Hamburg geboren und war Mitglied der CDU. Unter ihrer Führung bewältigte Deutschland mehrere Krisen, darunter die Finanzkrise 2008 und die Flüchtlingskrise 2015.",
"person, position, location, date, organization, event",
0.8,
False,
],
# French Example - Business
[
"Emmanuel Macron est président de la République française depuis mai 2017. Avant sa carrière politique, il a travaillé chez Rothschild & Cie Banque. Il a été ministre de l'Économie sous François Hollande de 2014 à 2016.",
"person, position, location, date, company, predecessor",
0.8,
False,
],
# Italian Example - Science
[
"Rita Levi-Montalcini ha vinto il Premio Nobel per la Medicina nel 1986 per la scoperta del fattore di crescita nervoso (NGF). Nata a Torino il 22 aprile 1909, ha lavorato presso l'Università di Washington a St. Louis per molti anni.",
"person, award, date, discovery, location, organization",
0.8,
False,
],
# Spanish Example - PII Detection
[
"El paciente Juan García Martínez, nacido el 15/03/1978, fue admitido en el Hospital Universitario de Madrid. Su correo electrónico es juan.garcia@email.com y su número de teléfono es +34 912 345 678. Reside en Calle Mayor 45, 28013 Madrid.",
"person, date, organization, email, phone, address",
0.75,
False,
],
]
def ner(
text, labels: str, threshold: float, nested_ner: bool
) -> Dict[str, Union[str, int, float]]:
labels = labels.split(",")
return {
"text": text,
"entities": [
{
"entity": entity["label"],
"word": entity["text"],
"start": entity["start"],
"end": entity["end"],
"score": 0,
}
for entity in model.predict_entities(
text, labels, flat_ner=not nested_ner, threshold=threshold
)
],
}
with gr.Blocks(title="SauerkrautLM-GLiNER") as demo:
gr.Markdown(
"""
# SauerkrautLM-GLiNER: Multilingual Zero-Shot Named Entity Recognition
SauerkrautLM-GLiNER is a multilingual GLiNER-style model for **zero-shot named entity recognition (NER)** based on the `jhu-clsp/mmBERT-base` backbone (a ModernBERT-style multilingual encoder).
### Key Features
- **Multilingual Support**: Trained jointly on **English, German, French, Italian, and Spanish**
- **Zero-Shot Entity Recognition**: Identify any entity type without requiring retraining - just provide your custom label list!
- **21k+ Entity Types**: Trained on roughly 21k distinct entity types across multiple domains
- **Superior Performance**: Achieves **+23.02 F1 points** over gliner_multi-v2.1 on multilingual benchmarks
- **General-Purpose**: Works for broad-domain extraction, PII detection, and specialized taxonomies
### Performance Highlights
- **CrossNER + Multilingual Domains**: 55.34 F1 average (vs. 32.32 for gliner_multi-v2.1)
- **PII Detection**: 44.94 F1 average across 5 languages
- **Real-Time Performance**: Fast inference suitable for production applications
### Useful Links
- **Model Page**: [SauerkrautLM-GLiNER](https://huggingface.co/VAGOsolutions/SauerkrautLM-GLiNER) on Hugging Face
- **Demo Space**: [Live Demo](https://huggingface.co/spaces/VAGOsolutions/mmbert_GLiNER_DEMO)
- **Benchmark Dataset**: [gliner-benchmark-multilingual](https://huggingface.co/datasets/VAGOsolutions/gliner-benchmark-multilingual)
"""
)
input_text = gr.Textbox(
value=examples[0][0], label="Text input", placeholder="Enter your text here in any of the supported languages (EN, DE, FR, IT, ES)", lines=6
)
with gr.Row() as row:
labels = gr.Textbox(
value=examples[0][1],
label="Labels",
placeholder="Enter your labels here (comma separated)",
scale=2,
)
threshold = gr.Slider(
0,
1,
value=0.8,
step=0.01,
label="Threshold",
info="Lower the threshold to increase how many entities get predicted. Recommended: 0.8 for general NER, 0.75 for PII detection.",
scale=1,
)
nested_ner = gr.Checkbox(
value=False,
label="Nested NER",
info="Allow for nested NER?",
scale=0,
)
output = gr.HighlightedText(label="Predicted Entities")
submit_btn = gr.Button("Submit")
examples_component = gr.Examples(
examples=examples,
inputs=[input_text, labels, threshold, nested_ner],
outputs=output,
fn=ner,
cache_examples=True,
label="Examples",
)
# Submitting
input_text.submit(
fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output
)
labels.submit(
fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output
)
threshold.release(
fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output
)
submit_btn.click(
fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output
)
nested_ner.change(
fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output
)
demo.queue()
demo.launch(debug=True) |