pii_masking / app.py
neshkatrapati's picture
PII Space
3e218a8
import gradio as gr
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline
import torch
# Load the model and tokenizer
model_name = "neshkatrapati/pii-mark-1"
try:
# Load tokenizer and model
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForTokenClassification.from_pretrained(model_name)
# Create NER pipeline
pipe = pipeline("text-generation", model="neshkatrapati/pii-mark-1")
def detect_pii(text):
"""
Function to detect PII in the input text
"""
text = f"<|begin_of_text|> <|end_of_text|> <|start_header_id|>system<|end_header_id|> You are a personal information (PII) redaction system. Your task is to identify personally identifiable information within a given text, classify the type of the PII and replace it by a tag. <|eot_id|><|start_header_id|>user<|end_header_id|> Here is the given text : {text} - Identify personally identifiable information within this text and replace it by a relevant tag. <|eot_id|><|start_header_id|>assistant<|end_header_id|> "
print(text)
if not text.strip():
return "Please enter some text to analyze."
try:
# Run PII detection
results = pipe(text, renormalize_logits=True)
print(results)
if not results:
return "No PII detected in the text."
return results[0]["generated_text"][len(text):]
# Format the results
# output = "**PII Detection Results:**\n\n"
#
# for i, entity in enumerate(results, 1):
# output += f"**{i}. {entity['entity_group']}**\n"
# output += f" - Text: `{entity['word']}`\n"
# output += f" - Confidence: {entity['score']:.4f}\n"
# output += f" - Position: {entity['start']}-{entity['end']}\n\n"
#
# return output
except Exception as e:
return f"Error processing text: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="PII Detection Tool", theme=gr.themes.Soft()) as demo:
gr.Markdown("# πŸ”’ PII Detection Tool")
gr.Markdown(
"Enter text below to detect Personally Identifiable Information (PII) using the `Alonzo AI` model.")
with gr.Row():
with gr.Column(scale=1):
input_text = gr.Textbox(
label="Input Text",
placeholder="My name is Clark Maxwell, I live at 123 Main Street New York, Reach out to me at clark@maxwell.com",
lines=6,
max_lines=10
)
submit_btn = gr.Button("Detect PII", variant="primary")
clear_btn = gr.Button("Clear", variant="secondary")
with gr.Column(scale=1):
output_text = gr.Markdown(
label="Detection Results",
value="Results will appear here..."
)
# Example inputs
gr.Markdown("### πŸ“ Example Texts to Try:")
examples = [
["My name is jason statham and my email is john.doe@email.com. I live at 123 Main Street, New York."],
["Please contact maria burnham at (555) 123-4567 or mary@company.com for more information."],
["SSN: 123-45-6789, DOB: 01/15/1990, Phone: +1-800-555-0123"]
]
gr.Examples(
examples=examples,
inputs=input_text,
outputs=output_text,
fn=detect_pii,
cache_examples=True
)
# Event handlers
submit_btn.click(
fn=detect_pii,
inputs=input_text,
outputs=output_text
)
input_text.submit(
fn=detect_pii,
inputs=input_text,
outputs=output_text
)
clear_btn.click(
fn=lambda: ("", "Results will appear here..."),
outputs=[input_text, output_text]
)
# Launch the app
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0", # Allow external access
server_port=7860, # Default Gradio port
share=False, # Set to True if you want a public link
debug=True
)
except Exception as e:
print(f"Error loading model: {e}")
print("Make sure you have the required dependencies installed:")
print("pip install gradio transformers torch")
# Fallback demo if model loading fails
def fallback_detect_pii(text):
return f"Model loading failed. Please check your setup.\nError: {str(e)}"
with gr.Blocks() as demo:
gr.Markdown("# PII Detection Tool (Model Loading Failed)")
input_text = gr.Textbox(label="Input Text")
output_text = gr.Textbox(label="Output")
gr.Button("Detect PII").click(fallback_detect_pii, input_text, output_text)
if __name__ == "__main__":
demo.launch()