Spaces:
Running
Running
| from typing import Dict, Union | |
| from gliner import GLiNER | |
| import gradio as gr | |
| import os | |
| model = GLiNER.from_pretrained( | |
| "gravitee-io/gliner-pii-detection", | |
| token=os.getenv("HUGGINGFACE_TOKEN"), | |
| load_onnx_model=True, | |
| load_tokenizer=True, onnx_model_file="model.onnx" | |
| ) | |
| examples = [ | |
| [ | |
| "Jana Kowalczyk's driver license number is PL-DL-55443322 and she resides at 78 Ulica Nowowiejska, Wrocław. Her contact email is jana.k@example.com.", | |
| "name, driver_license_number, street_address, email", | |
| 0.5, | |
| False, | |
| ], | |
| [ | |
| "Nguyen Van Long from Hanoi logs in from the IP 10.0.0.5 and uses the API key: 12ab34cd56ef78gh90ij. His company is VietNet Global.", | |
| "name, street_address, ipv4, api_key, company", | |
| 0.5, | |
| False, | |
| ], | |
| [ | |
| "Sarah Johnson made a transaction using the credit card 379354508162306 and the CVV 834. Her IBAN is GB29 NWBK 6016 1331 9268 19.", | |
| "name, credit_card_number, credit_card_security_code, iban", | |
| 0.5, | |
| False, | |
| ], | |
| [ | |
| "Employee Thomas Becker has the ID DE-EMP-44991 and joined DataFlux GmbH on 2021-12-01. His internal email is t.becker@dataflux.de.", | |
| "name, employee_id, company, date, email", | |
| 0.5, | |
| False, | |
| ], | |
| [ | |
| "Laura Rossi lives at Via Roma 101, Milano. Her social security number is IT-9988776655 and she was born on 1982-07-14.", | |
| "name, street_address, ssn, date_of_birth", | |
| 0.5, | |
| False, | |
| ], | |
| [ | |
| "Omar El-Zein uses the SWIFT code BOFAUS3N and his bank routing number is 026009593. He lives near 12 Al-Azhar Street, Cairo.", | |
| "name, swift_bic_code, bank_routing_number, street_address", | |
| 0.5, | |
| False, | |
| ], | |
| [ | |
| "Chen Wei's employee badge shows ID EMP-CN-8899. He signed the contract on 2023-03-20 at 10:15 AM using the password Dragon@123.", | |
| "name, employee_id, date, time, password", | |
| 0.5, | |
| False, | |
| ], | |
| [ | |
| "Fatoumata Diarra, born 1994-04-04, lives at 45 Avenue de la Liberté, Bamako. Her BBAN is ML2930012345678901234567890.", | |
| "name, date_of_birth, street_address, bban", | |
| 0.5, | |
| False, | |
| ], | |
| [ | |
| "Daniel Evans has a passport number K01234567 and a permanent address at 500 Pine Street, Seattle. His contact number is +1-206-555-0199.", | |
| "name, passport_number, street_address, phone_number", | |
| 0.5, | |
| False, | |
| ], | |
| [ | |
| "Alejandro Torres created his customer account on 2024-08-30 using email ale.torres@correo.mx and ID CUST-MX-1122.", | |
| "name, date, email, customer_id", | |
| 0.5, | |
| False, | |
| ], | |
| ] | |
| def ner( | |
| text, labels: str, threshold: float, nested_ner: bool | |
| ) -> Dict[str, Union[str, int, float]]: | |
| labels = labels.split(",") | |
| return { | |
| "text": text, | |
| "entities": [ | |
| { | |
| "entity": entity["label"], | |
| "word": entity["text"], | |
| "start": entity["start"], | |
| "end": entity["end"], | |
| "score": 0, | |
| } | |
| for entity in model.predict_entities( | |
| text, labels, flat_ner=not nested_ner, threshold=threshold | |
| ) | |
| ], | |
| } | |
| with gr.Blocks(title="GLiNER-M-v2.1") as demo: | |
| gr.Markdown( | |
| """ | |
| # Gravitee PII (Personnally Identifiable Information extraction) | |
| GLiNER is a Named Entity Recognition (NER) model capable of identifying any entity type using a bidirectional transformer encoder (BERT-like). It provides a practical alternative to traditional NER models, which are limited to predefined entities, and Large Language Models (LLMs) that, despite their flexibility, are costly and large for resource-constrained scenarios. | |
| """ | |
| ) | |
| with gr.Accordion("How to run this model locally", open=False): | |
| gr.Markdown( | |
| """ | |
| ## Installation | |
| To use this model, you must install the GLiNER Python library: | |
| ``` | |
| !pip install gliner | |
| ``` | |
| ## Usage | |
| Once you've downloaded the GLiNER library, you can import the GLiNER class. You can then load this model using `GLiNER.from_pretrained` and predict entities with `predict_entities`. | |
| """ | |
| ) | |
| gr.Code( | |
| """ | |
| model = GLiNER.from_pretrained( | |
| "gravitee-io/gliner-pii-detection", | |
| load_onnx_model=True, | |
| load_tokenizer=True, onnx_model_file="model.onnx" | |
| ) | |
| text = ''' | |
| Hey, just a quick update. I talked to David yesterday. | |
| He sent over the files from his private email (david.doe@example.com), and we should be careful with his SSN: 123-45-6789. | |
| Also, please don't push the GitHub repo until we remove the API key: ghp_abcdEfgh1234567890. | |
| He mentioned his new address is 123 Maple Street in New York. | |
| His PC adress is 192.168.1.100. | |
| ''' | |
| labels = ["name", | |
| "email", | |
| "ssn", | |
| "api_key", | |
| "street_address", | |
| "date", | |
| "ipv4"] | |
| entities = model.predict_entities(text, labels) | |
| for entity in entities: | |
| print(entity["text"], "=>", entity["label"], "=>", entity["score"]) | |
| """ | |
| ) | |
| input_text = gr.Textbox( | |
| value=examples[0][0], label="Text input", placeholder="Enter your text here" | |
| ) | |
| with gr.Row() as row: | |
| labels = gr.Textbox( | |
| value=examples[0][1], | |
| label="Labels", | |
| placeholder="Enter your labels here (comma separated)", | |
| scale=2, | |
| ) | |
| threshold = gr.Slider( | |
| 0, | |
| 1, | |
| value=0.3, | |
| step=0.01, | |
| label="Threshold", | |
| info="Lower the threshold to increase how many entities get predicted.", | |
| scale=1, | |
| ) | |
| nested_ner = gr.Checkbox( | |
| value=examples[0][2], | |
| label="Nested NER", | |
| info="Allow for nested NER?", | |
| scale=0, | |
| ) | |
| output = gr.HighlightedText(label="Predicted Entities") | |
| submit_btn = gr.Button("Submit") | |
| examples = gr.Examples( | |
| examples, | |
| fn=ner, | |
| inputs=[input_text, labels, threshold, nested_ner], | |
| outputs=output, | |
| cache_examples=True, | |
| ) | |
| # Submitting | |
| input_text.submit( | |
| fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output | |
| ) | |
| labels.submit( | |
| fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output | |
| ) | |
| threshold.release( | |
| fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output | |
| ) | |
| submit_btn.click( | |
| fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output | |
| ) | |
| nested_ner.change( | |
| fn=ner, inputs=[input_text, labels, threshold, nested_ner], outputs=output | |
| ) | |
| demo.queue() | |
| demo.launch(debug=True) | |