Spaces:
Sleeping
Sleeping
| import re | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline | |
| # Load pretrained NER model | |
| tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER") | |
| model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER") | |
| ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") | |
| # --- Core Redaction Function --- | |
| def redact_text(input_text, custom_pattern=None, pattern_type=None): | |
| redacted_text = input_text | |
| # Apply model-based NER redaction | |
| ner_results = ner_pipeline(input_text) | |
| entities = sorted(ner_results, key=lambda x: x["start"], reverse=True) | |
| for ent in entities: | |
| redacted_text = ( | |
| redacted_text[: ent["start"]] + "[REDACTED]" + redacted_text[ent["end"] :] | |
| ) | |
| # Apply custom regex redaction if provided | |
| if pattern_type == "Date (YYYY-MM-DD or DD/MM/YYYY)": | |
| custom_pattern = r"\b\d{4}-\d{2}-\d{2}\b|\b\d{2}/\d{2}/\d{4}\b" | |
| elif pattern_type == "16-digit Number (e.g., Credit Card)": | |
| custom_pattern = r"\b\d{16}\b" | |
| if custom_pattern: | |
| redacted_text = re.sub(custom_pattern, "[REDACTED]", redacted_text) | |
| return redacted_text | |
| # --- Gradio Interface --- | |
| with gr.Blocks(title="PII Redactor") as demo: | |
| gr.Markdown("## π PII Redactor using BERT NER + Custom Regex") | |
| with gr.Row(): | |
| text_input = gr.Textbox( | |
| label="Input Text", | |
| placeholder="Enter text containing PII (e.g. names, dates, locations, credit cards)...", | |
| lines=5, | |
| ) | |
| with gr.Row(): | |
| pattern_type = gr.Dropdown( | |
| ["None", "Date (YYYY-MM-DD or DD/MM/YYYY)", "16-digit Number (e.g., Credit Card)"], | |
| label="Select Regex Sample", | |
| value="None" | |
| ) | |
| custom_pattern = gr.Textbox( | |
| label="Or Enter Custom Regex Pattern", | |
| placeholder=r"e.g. \b\d{3}-\d{2}-\d{4}\b for SSN", | |
| ) | |
| redact_button = gr.Button("π Redact PII") | |
| output_text = gr.Textbox(label="Redacted Output", lines=5) | |
| redact_button.click( | |
| fn=redact_text, | |
| inputs=[text_input, custom_pattern, pattern_type], | |
| outputs=output_text, | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() | |