DavidGF commited on
Commit
d3fc638
·
verified ·
1 Parent(s): 9e59dc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -30
app.py CHANGED
@@ -6,35 +6,42 @@ import os
6
  import sys
7
 
8
  hf_token = os.getenv("HF_KEY")
9
- model = GLiNER.from_pretrained("VAGOsolutions/mmbert_GLiNER", token=hf_token).eval()
10
 
11
 
12
  examples = [
13
- # English Example
14
  [
15
- "[EN] On March 15, 2024, Dr. Sarah Johnson from Boston General Hospital diagnosed patient Michael Thompson, born on June 10, 1980 in New York, with pneumonia. The patient resides at 123 Main Street, Boston, and was prescribed a 7-day course of Amoxicillin 500mg.",
16
- "date, doctor, hospital, diagnosis, patient, date of birth, place of birth, address, prescription, duration, medication",
17
- 0.75,
18
  False,
19
  ],
20
- # German Example
21
  [
22
- "[DE] Die Firma TechSolutions GmbH, mit Sitz in der Musterstraße 45, Berlin, hat am 20.04.2024 einen Vertrag über 50.000€ mit der Innovations AG abgeschlossen. Der Geschäftsführer Max Müller und die Projektleiterin Anna Schmidt werden das Projekt bis zum 30.06.2024 abschließen.",
23
- "firma, adresse, datum, vertrag, betrag, partner, geschäftsführer, projektleiter, deadline",
24
- 0.75,
25
  False,
26
  ],
27
- # Italian Example
28
  [
29
- "[IT] La ditta Giardini Belli S.p.A., con partita IVA 01234567890, ha emesso una fattura di 500€ per la consulenza giuridica fornita dall'avvocato Giulia Bianchi, il cui studio si trova in Piazza del Duomo, Milano, dal giorno 01/04/2024.",
30
- "organizzazione, partita iva, importo, avvocato, indirizzo, data",
31
- 0.75,
32
  False,
33
  ],
34
- # Spanish Example
35
  [
36
- "[ES] El 12/05/2024, el Banco Nacional de España aprobó un préstamo hipotecario de 300.000€ a favor de Carlos García, empleado en la empresa Tecnología Avanzada S.L., con sede en Calle Gran Vía 25, Madrid. El préstamo tiene un plazo de amortización de 25 años y está garantizado por la propiedad ubicada en Avenida de la Paz 10, Barcelona.",
37
- "fecha, banco, tipo de préstamo, importe, beneficiario, empleador, dirección empresa, plazo, garantía, dirección propiedad",
 
 
 
 
 
 
 
38
  0.75,
39
  False,
40
  ],
@@ -60,30 +67,36 @@ def ner(
60
  ],
61
  }
62
 
63
- with gr.Blocks(title="GLiNER-base") as demo:
64
  gr.Markdown(
65
  """
66
- # GLiNER Multilingual Named Entity Recognition Model
 
 
67
 
68
  ### Key Features
69
 
70
- - **Multilingual Support**: The model supports multiple languages including English, German, Italian, Spanish, and more
71
- - **Zero-Shot Entity Recognition**: Identify any entity type without requiring retraining
72
- - **Efficient Architecture**: Lightweight and suitable for resource-constrained environments
73
- - **Real-Time Performance**: Fast inference suitable for production applications
 
74
 
75
- ### Performance
76
 
77
- - **Inference Time**: Optimized for both CPU and GPU environments, delivering fast and efficient entity recognition
78
- - **Accuracy**: High precision and recall across multiple languages and entity types
 
79
 
80
- ### Useful Link
81
- - **Model Page**: [mmbert_GLiNER](https://huggingface.co/VAGOsolutions/mmbert_GLiNER) on Hugging Face
 
 
82
  """
83
  )
84
 
85
  input_text = gr.Textbox(
86
- value=examples[0][0], label="Text input", placeholder="Enter your text here (optional: prefix with [EN], [DE], [IT], [ES] for language tag)", lines=6
87
  )
88
  with gr.Row() as row:
89
  labels = gr.Textbox(
@@ -95,10 +108,10 @@ with gr.Blocks(title="GLiNER-base") as demo:
95
  threshold = gr.Slider(
96
  0,
97
  1,
98
- value=0.75,
99
  step=0.01,
100
  label="Threshold",
101
- info="Lower the threshold to increase how many entities get predicted.",
102
  scale=1,
103
  )
104
  nested_ner = gr.Checkbox(
 
6
  import sys
7
 
8
  hf_token = os.getenv("HF_KEY")
9
+ model = GLiNER.from_pretrained("VAGOsolutions/SauerkrautLM-GLiNER", token=hf_token).eval()
10
 
11
 
12
  examples = [
13
+ # English Example - Sports
14
  [
15
+ "Cristiano Ronaldo dos Santos Aveiro (born 5 February 1985) is a Portuguese professional footballer who plays as a forward for and captains both Saudi Pro League club Al Nassr and the Portugal national team. He has won five Ballon d'Or awards, a record three UEFA Men's Player of the Year Awards, and four European Golden Shoes. He has won 33 trophies in his career, including seven league titles, five UEFA Champions Leagues, the UEFA European Championship and the UEFA Nations League.",
16
+ "person, award, date, competitions, teams",
17
+ 0.8,
18
  False,
19
  ],
20
+ # German Example - Politics
21
  [
22
+ "Angela Merkel war von 2005 bis 2021 Bundeskanzlerin der Bundesrepublik Deutschland. Sie wurde am 17. Juli 1954 in Hamburg geboren und war Mitglied der CDU. Unter ihrer Führung bewältigte Deutschland mehrere Krisen, darunter die Finanzkrise 2008 und die Flüchtlingskrise 2015.",
23
+ "person, position, location, date, organization, event",
24
+ 0.8,
25
  False,
26
  ],
27
+ # French Example - Business
28
  [
29
+ "Emmanuel Macron est président de la République française depuis mai 2017. Avant sa carrière politique, il a travaillé chez Rothschild & Cie Banque. Il a été ministre de l'Économie sous François Hollande de 2014 à 2016.",
30
+ "person, position, location, date, company, predecessor",
31
+ 0.8,
32
  False,
33
  ],
34
+ # Italian Example - Science
35
  [
36
+ "Rita Levi-Montalcini ha vinto il Premio Nobel per la Medicina nel 1986 per la scoperta del fattore di crescita nervoso (NGF). Nata a Torino il 22 aprile 1909, ha lavorato presso l'Università di Washington a St. Louis per molti anni.",
37
+ "person, award, date, discovery, location, organization",
38
+ 0.8,
39
+ False,
40
+ ],
41
+ # Spanish Example - PII Detection
42
+ [
43
+ "El paciente Juan García Martínez, nacido el 15/03/1978, fue admitido en el Hospital Universitario de Madrid. Su correo electrónico es juan.garcia@email.com y su número de teléfono es +34 912 345 678. Reside en Calle Mayor 45, 28013 Madrid.",
44
+ "person, date, organization, email, phone, address",
45
  0.75,
46
  False,
47
  ],
 
67
  ],
68
  }
69
 
70
+ with gr.Blocks(title="SauerkrautLM-GLiNER") as demo:
71
  gr.Markdown(
72
  """
73
+ # SauerkrautLM-GLiNER: Multilingual Zero-Shot Named Entity Recognition
74
+
75
+ SauerkrautLM-GLiNER is a multilingual GLiNER-style model for **zero-shot named entity recognition (NER)** based on the `jhu-clsp/mmBERT-base` backbone (a ModernBERT-style multilingual encoder).
76
 
77
  ### Key Features
78
 
79
+ - **Multilingual Support**: Trained jointly on **English, German, French, Italian, and Spanish**
80
+ - **Zero-Shot Entity Recognition**: Identify any entity type without requiring retraining - just provide your custom label list!
81
+ - **21k+ Entity Types**: Trained on roughly 21k distinct entity types across multiple domains
82
+ - **Superior Performance**: Achieves **+23.02 F1 points** over gliner_multi-v2.1 on multilingual benchmarks
83
+ - **General-Purpose**: Works for broad-domain extraction, PII detection, and specialized taxonomies
84
 
85
+ ### Performance Highlights
86
 
87
+ - **CrossNER + Multilingual Domains**: 55.34 F1 average (vs. 32.32 for gliner_multi-v2.1)
88
+ - **PII Detection**: 44.94 F1 average across 5 languages
89
+ - **Real-Time Performance**: Fast inference suitable for production applications
90
 
91
+ ### Useful Links
92
+ - **Model Page**: [SauerkrautLM-GLiNER](https://huggingface.co/VAGOsolutions/SauerkrautLM-GLiNER) on Hugging Face
93
+ - **Demo Space**: [Live Demo](https://huggingface.co/spaces/VAGOsolutions/mmbert_GLiNER_DEMO)
94
+ - **Benchmark Dataset**: [gliner-benchmark-multilingual](https://huggingface.co/datasets/VAGOsolutions/gliner-benchmark-multilingual)
95
  """
96
  )
97
 
98
  input_text = gr.Textbox(
99
+ value=examples[0][0], label="Text input", placeholder="Enter your text here in any of the supported languages (EN, DE, FR, IT, ES)", lines=6
100
  )
101
  with gr.Row() as row:
102
  labels = gr.Textbox(
 
108
  threshold = gr.Slider(
109
  0,
110
  1,
111
+ value=0.8,
112
  step=0.01,
113
  label="Threshold",
114
+ info="Lower the threshold to increase how many entities get predicted. Recommended: 0.8 for general NER, 0.75 for PII detection.",
115
  scale=1,
116
  )
117
  nested_ner = gr.Checkbox(