Suchinthana commited on
Commit
d4fde93
·
1 Parent(s): 058c0d4

Init commit

Browse files
Files changed (2) hide show
  1. app.py +67 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
4
+
5
+ # Load pretrained NER model
6
+ tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
7
+ model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
8
+ ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
9
+
10
+ # --- Core Redaction Function ---
11
+ def redact_text(input_text, custom_pattern=None, pattern_type=None):
12
+ redacted_text = input_text
13
+
14
+ # Apply model-based NER redaction
15
+ ner_results = ner_pipeline(input_text)
16
+ entities = sorted(ner_results, key=lambda x: x["start"], reverse=True)
17
+ for ent in entities:
18
+ redacted_text = (
19
+ redacted_text[: ent["start"]] + "[REDACTED]" + redacted_text[ent["end"] :]
20
+ )
21
+
22
+ # Apply custom regex redaction if provided
23
+ if pattern_type == "Date (YYYY-MM-DD or DD/MM/YYYY)":
24
+ custom_pattern = r"\b\d{4}-\d{2}-\d{2}\b|\b\d{2}/\d{2}/\d{4}\b"
25
+ elif pattern_type == "16-digit Number (e.g., Credit Card)":
26
+ custom_pattern = r"\b\d{16}\b"
27
+
28
+ if custom_pattern:
29
+ redacted_text = re.sub(custom_pattern, "[REDACTED]", redacted_text)
30
+
31
+ return redacted_text
32
+
33
+
34
+ # --- Gradio Interface ---
35
+ with gr.Blocks(title="PII Redactor") as demo:
36
+ gr.Markdown("## 🔒 PII Redactor using BERT NER + Custom Regex")
37
+
38
+ with gr.Row():
39
+ text_input = gr.Textbox(
40
+ label="Input Text",
41
+ placeholder="Enter text containing PII (e.g. names, dates, locations, credit cards)...",
42
+ lines=5,
43
+ )
44
+
45
+ with gr.Row():
46
+ pattern_type = gr.Dropdown(
47
+ ["None", "Date (YYYY-MM-DD or DD/MM/YYYY)", "16-digit Number (e.g., Credit Card)"],
48
+ label="Select Regex Sample",
49
+ value="None"
50
+ )
51
+ custom_pattern = gr.Textbox(
52
+ label="Or Enter Custom Regex Pattern",
53
+ placeholder=r"e.g. \b\d{3}-\d{2}-\d{4}\b for SSN",
54
+ )
55
+
56
+ redact_button = gr.Button("🔍 Redact PII")
57
+ output_text = gr.Textbox(label="Redacted Output", lines=5)
58
+
59
+ redact_button.click(
60
+ fn=redact_text,
61
+ inputs=[text_input, custom_pattern, pattern_type],
62
+ outputs=output_text,
63
+ )
64
+
65
+ # Launch the app
66
+ if __name__ == "__main__":
67
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers
2
+ gradio