BaoLocTown commited on
Commit
bb44c75
·
1 Parent(s): a66b5e8

[ADD] Logics model dropdown

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +33 -8
  3. app_old.py +168 -0
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: GLiNER-Multi-PII
3
  emoji: 💻
4
  colorFrom: pink
5
  colorTo: blue
 
1
  ---
2
+ title: GLiNER-VN
3
  emoji: 💻
4
  colorFrom: pink
5
  colorTo: blue
app.py CHANGED
@@ -2,7 +2,22 @@ from typing import Dict, Union
2
  from gliner import GLiNER
3
  import gradio as gr
4
 
5
- model = GLiNER.from_pretrained("BaoLocTown/gliner-vn-demo")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  examples = [
8
  [
@@ -33,9 +48,10 @@ examples = [
33
 
34
 
35
  def ner(
36
- text, labels: str, threshold: float, nested_ner: bool
37
  ) -> Dict[str, Union[str, int, float]]:
38
  labels = labels.split(",")
 
39
  return {
40
  "text": text,
41
  "entities": [
@@ -112,6 +128,12 @@ harilala.rasoanaivo@telma.mg => email
112
  """
113
  )
114
 
 
 
 
 
 
 
115
  input_text = gr.Textbox(
116
  value=examples[0][0], label="Text input", placeholder="Enter your text here"
117
  )
@@ -142,26 +164,29 @@ harilala.rasoanaivo@telma.mg => email
142
  examples = gr.Examples(
143
  examples,
144
  fn=ner,
145
- inputs=[input_text, labels, threshold, nested_ner],
146
  outputs=output,
147
  cache_examples=True,
148
  )
149
 
150
  # Submitting
151
  input_text.submit(
152
- fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output
153
  )
154
  labels.submit(
155
- fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output
156
  )
157
  threshold.release(
158
- fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output
159
  )
160
  submit_btn.click(
161
- fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output
162
  )
163
  nested_ner.change(
164
- fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output
 
 
 
165
  )
166
 
167
  demo.queue()
 
2
  from gliner import GLiNER
3
  import gradio as gr
4
 
5
+ # Available models
6
+ AVAILABLE_MODELS = {
7
+ "BaoLocTown/gliner-vn-demo": "BaoLocTown/gliner-vn-demo",
8
+ "nvidia/gliner-PII": "nvidia/gliner-PII",
9
+ "urchade/gliner_multi_pii-v1": "urchade/gliner_multi_pii-v1",
10
+ "BaoLocTown/gliner-bi-large-v1.0-tuned-v3-50k": "BaoLocTown/gliner-bi-large-v1.0-tuned-v3-50k",
11
+ }
12
+
13
+ # Cache for loaded models
14
+ model_cache = {}
15
+
16
+ def get_model(model_name: str) -> GLiNER:
17
+ """Get model from cache or load it if not cached."""
18
+ if model_name not in model_cache:
19
+ model_cache[model_name] = GLiNER.from_pretrained(model_name)
20
+ return model_cache[model_name]
21
 
22
  examples = [
23
  [
 
48
 
49
 
50
  def ner(
51
+ text, labels: str, threshold: float, nested_ner: bool, model_name: str
52
  ) -> Dict[str, Union[str, int, float]]:
53
  labels = labels.split(",")
54
+ model = get_model(model_name)
55
  return {
56
  "text": text,
57
  "entities": [
 
128
  """
129
  )
130
 
131
+ model_dropdown = gr.Dropdown(
132
+ choices=list(AVAILABLE_MODELS.keys()),
133
+ value="BaoLocTown/gliner-vn-demo",
134
+ label="Model",
135
+ info="Select the GLiNER model to use",
136
+ )
137
  input_text = gr.Textbox(
138
  value=examples[0][0], label="Text input", placeholder="Enter your text here"
139
  )
 
164
  examples = gr.Examples(
165
  examples,
166
  fn=ner,
167
+ inputs=[input_text, labels, threshold, nested_ner, model_dropdown],
168
  outputs=output,
169
  cache_examples=True,
170
  )
171
 
172
  # Submitting
173
  input_text.submit(
174
+ fn=ner, inputs=[input_text, labels, threshold, nested_ner, model_dropdown], outputs=output
175
  )
176
  labels.submit(
177
+ fn=ner, inputs=[input_text, labels, threshold, nested_ner, model_dropdown], outputs=output
178
  )
179
  threshold.release(
180
+ fn=ner, inputs=[input_text, labels, threshold, nested_ner, model_dropdown], outputs=output
181
  )
182
  submit_btn.click(
183
+ fn=ner, inputs=[input_text, labels, threshold, nested_ner, model_dropdown], outputs=output
184
  )
185
  nested_ner.change(
186
+ fn=ner, inputs=[input_text, labels, threshold, nested_ner, model_dropdown], outputs=output
187
+ )
188
+ model_dropdown.change(
189
+ fn=ner, inputs=[input_text, labels, threshold, nested_ner, model_dropdown], outputs=output
190
  )
191
 
192
  demo.queue()
app_old.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Union
2
+ from gliner import GLiNER
3
+ import gradio as gr
4
+
5
+ model = GLiNER.from_pretrained("BaoLocTown/gliner-vn-demo")
6
+
7
+ examples = [
8
+ [
9
+ "Công ty TNHH XYZ tuyển dụng vị trí **Nhân viên Kinh doanh** với yêu cầu tốt nghiệp đại học, kỹ năng giao tiếp tốt và đam mê xây dựng mối quan hệ khách hàng. Quyền lợi hấp dẫn, lương cạnh tranh, thưởng theo doanh số và cơ hội thăng tiến. Liên hệ: **0903 123 456** hoặc **[tuyendung@xyz.com](mailto:tuyendung@xyz.com)**.",
10
+ "person, company, phone, job title",
11
+ 0.5,
12
+ False,
13
+ ],
14
+ [
15
+ "Số hộ chiếu của ông Phạm Hùng Cường là A1234567 và ông đang cư trú tại địa chỉ 123 Đường Láng, Hà Nội. Ông cũng sở hữu số tài khoản ngân hàng 9876543210 tại Ngân hàng VietinBank.",
16
+ "person, company, phone, job title, location, email, credit card number, date of birth, passport number, bank number",
17
+ 0.3,
18
+ False
19
+ ],
20
+ [
21
+ "Mã số bảo hiểm y tế của ông Trần Quang Minh là <b>987654321</b>, và số điện thoại của ông là 0987-654-321. Ông đang điều trị bệnh tại bệnh viện <i>Chợ Rẫy</i>, TP.HCM.",
22
+ "person, company, phone, job title, location, email, credit card number, date of birth, passport number, bank number, student id number, organization, social security number, health insurance id number",
23
+ 0.3,
24
+ False
25
+ ],
26
+ [
27
+ "Chị **Hoàng Thị Lan**, mã số sinh viên **20237865**, đã đăng ký tham gia khóa học “Data Science” tại **Đại học FPT**, và đã cung cấp số tài khoản ngân hàng ***1234567890123456*** tại **Vietcombank**. Cô cũng đã thanh toán qua số thẻ tín dụng ***4321-8765-1234-5678*** và cung cấp số hộ chiếu <b>C1234567</b>. Chị Hoàng Thị Lan sinh ngày <i>15/08/1998</i> và có email là ***hoangthilan123@fpt.edu.vn***. Cô cũng đã cung cấp số điện thoại ***0909123456*** để nhận thông báo.",
28
+ "person, company, phone, job title, location, email, credit card number, date of birth, passport number, bank number, student id number, organization, social security number, health insurance id number",
29
+ 0.3,
30
+ False
31
+ ]
32
+ ]
33
+
34
+
35
+ def ner(
36
+ text, labels: str, threshold: float, nested_ner: bool
37
+ ) -> Dict[str, Union[str, int, float]]:
38
+ labels = labels.split(",")
39
+ return {
40
+ "text": text,
41
+ "entities": [
42
+ {
43
+ "entity": entity["label"],
44
+ "word": entity["text"],
45
+ "start": entity["start"],
46
+ "end": entity["end"],
47
+ "score": 0,
48
+ }
49
+ for entity in model.predict_entities(
50
+ text, labels, flat_ner=not nested_ner, threshold=threshold
51
+ )
52
+ ],
53
+ }
54
+
55
+
56
+ with gr.Blocks(title="GLiNER-M-v2.1") as demo:
57
+ gr.Markdown(
58
+ """
59
+ # GLiNER-PII (Personnally Identifiable Information extraction)
60
+
61
+ GLiNER is a Named Entity Recognition (NER) model capable of identifying any entity type using a bidirectional transformer encoder (BERT-like). It provides a practical alternative to traditional NER models, which are limited to predefined entities, and Large Language Models (LLMs) that, despite their flexibility, are costly and large for resource-constrained scenarios.
62
+
63
+ The model has been trained by fine-tuning urchade/gliner_multi-v2.1 on the urchade/synthetic-pii-ner-mistral-v1 dataset.
64
+
65
+ ## Links
66
+
67
+ * Model: https://huggingface.co/urchade/gliner_multi_pii-v1
68
+ * All GLiNER models: https://huggingface.co/models?library=gliner
69
+ * Paper: https://arxiv.org/abs/2311.08526
70
+ * Repository: https://github.com/urchade/GLiNER
71
+ """
72
+ )
73
+ with gr.Accordion("How to run this model locally", open=False):
74
+ gr.Markdown(
75
+ """
76
+ ## Installation
77
+ To use this model, you must install the GLiNER Python library:
78
+ ```
79
+ !pip install gliner
80
+ ```
81
+
82
+ ## Usage
83
+ Once you've downloaded the GLiNER library, you can import the GLiNER class. You can then load this model using `GLiNER.from_pretrained` and predict entities with `predict_entities`.
84
+ """
85
+ )
86
+ gr.Code(
87
+ '''
88
+ from gliner import GLiNER
89
+
90
+ model = GLiNER.from_pretrained("urchade/gliner_multi_pii-v1")
91
+
92
+ text = """
93
+ Harilala Rasoanaivo, un homme d'affaires local d'Antananarivo, a enregistré une nouvelle société nommée "Rasoanaivo Enterprises" au Lot II M 92 Antohomadinika. Son numéro est le +261 32 22 345 67, et son adresse électronique est harilala.rasoanaivo@telma.mg. Il a fourni son numéro de sécu 501-02-1234 pour l'enregistrement.
94
+ """
95
+
96
+ labels = ["work", "booking number", "personally identifiable information", "driver licence", "person", "book", "full address", "company", "actor", "character", "email", "passport number", "Social Security Number", "phone number"]
97
+ entities = model.predict_entities(text, labels)
98
+
99
+ for entity in entities:
100
+ print(entity["text"], "=>", entity["label"])
101
+ ''',
102
+ language="python",
103
+ )
104
+ gr.Code(
105
+ """
106
+ Harilala Rasoanaivo => person
107
+ Rasoanaivo Enterprises => company
108
+ Lot II M 92 Antohomadinika => full address
109
+ +261 32 22 345 67 => phone number
110
+ harilala.rasoanaivo@telma.mg => email
111
+ 501-02-1234 => Social Security Number
112
+ """
113
+ )
114
+
115
+ input_text = gr.Textbox(
116
+ value=examples[0][0], label="Text input", placeholder="Enter your text here"
117
+ )
118
+ with gr.Row() as row:
119
+ labels = gr.Textbox(
120
+ value=examples[0][1],
121
+ label="Labels",
122
+ placeholder="Enter your labels here (comma separated)",
123
+ scale=2,
124
+ )
125
+ threshold = gr.Slider(
126
+ 0,
127
+ 1,
128
+ value=0.3,
129
+ step=0.01,
130
+ label="Threshold",
131
+ info="Lower the threshold to increase how many entities get predicted.",
132
+ scale=1,
133
+ )
134
+ nested_ner = gr.Checkbox(
135
+ value=examples[0][2],
136
+ label="Nested NER",
137
+ info="Allow for nested NER?",
138
+ scale=0,
139
+ )
140
+ output = gr.HighlightedText(label="Predicted Entities")
141
+ submit_btn = gr.Button("Submit")
142
+ examples = gr.Examples(
143
+ examples,
144
+ fn=ner,
145
+ inputs=[input_text, labels, threshold, nested_ner],
146
+ outputs=output,
147
+ cache_examples=True,
148
+ )
149
+
150
+ # Submitting
151
+ input_text.submit(
152
+ fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output
153
+ )
154
+ labels.submit(
155
+ fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output
156
+ )
157
+ threshold.release(
158
+ fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output
159
+ )
160
+ submit_btn.click(
161
+ fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output
162
+ )
163
+ nested_ner.change(
164
+ fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output
165
+ )
166
+
167
+ demo.queue()
168
+ demo.launch(debug=True)