| | import gradio as gr |
| | from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification |
| |
|
| | |
| | |
| | MODEL_ID = "Negative-Star-Innovators/MiniLM-L6-finetuned-pii-detection" |
| |
|
| | print("Loading model...") |
| | tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) |
| | model = AutoModelForTokenClassification.from_pretrained(MODEL_ID) |
| |
|
| | |
| | pii_pipeline = pipeline( |
| | "token-classification", |
| | model=model, |
| | tokenizer=tokenizer, |
| | aggregation_strategy="simple" |
| | ) |
| |
|
| | |
| | def redact_pii(text): |
| | if not text.strip(): |
| | return "" |
| | |
| | |
| | results = pii_pipeline(text) |
| | |
| | |
| | if not results: |
| | return text |
| | |
| | |
| | |
| | |
| | |
| | results_sorted = sorted(results, key=lambda x: x['start'], reverse=True) |
| | |
| | redacted_text = text |
| | for entity in results_sorted: |
| | start = entity['start'] |
| | end = entity['end'] |
| | label = entity['entity_group'] |
| | |
| | |
| | replacement = f"[REDACTED {label.upper()}]" |
| | redacted_text = redacted_text[:start] + replacement + redacted_text[end:] |
| | |
| | return redacted_text |
| |
|
| | |
| | |
| | demo = gr.Interface( |
| | fn=redact_pii, |
| | inputs=gr.Textbox( |
| | lines=5, |
| | label="Input Text", |
| | placeholder="Paste text containing sensitive data (names, emails, routing numbers) here..." |
| | ), |
| | outputs=gr.Textbox( |
| | lines=5, |
| | label="Redacted Output" |
| | ), |
| | title="🛡️ Secure PII Redaction Playground", |
| | description=( |
| | "Test our highly efficient (90MB) PII detection model that is capable of running locally on your device. " |
| | "It quickly scrubs Personally Identifiable Information entirely on CPU, making it perfect " |
| | "for sanitizing data before sending it to third-party cloud LLMs and other parties." |
| | ), |
| | article = ( |
| | "📧 **Please reach out if you have a question or feedback. We also do custom projects, consultating, freelance and collaboration:** [thieves@negativestarinnovators.com](mailto:thieves@negativestarinnovators.com)" |
| | ), |
| | examples=[ |
| | ["John Doe's routing number is 123456789 and his email is john.doe@email.com."], |
| | ["Please update the shipping address for Jane Smith to 123 Secure Lane. Her phone number is 555-0198."], |
| | ["The patient, Michael Johnson, was born on 10/12/1985. His SSN is 000-11-2222."] |
| | ], |
| | flagging_mode="never" |
| | ) |
| |
|
| | |
| | if __name__ == "__main__": |
| | demo.launch() |