Spaces:

neshkatrapati
/

pii_masking

Sleeping

App Files Files Community

pii_masking / app.py

neshkatrapati

PII Space

3e218a8 5 months ago

raw

history blame contribute delete

5.03 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForTokenClassification
	from transformers import pipeline
	import torch

	# Load the model and tokenizer
	model_name = "neshkatrapati/pii-mark-1"

	try:
	# Load tokenizer and model
	# tokenizer = AutoTokenizer.from_pretrained(model_name)
	# model = AutoModelForTokenClassification.from_pretrained(model_name)

	# Create NER pipeline
	pipe = pipeline("text-generation", model="neshkatrapati/pii-mark-1")


	def detect_pii(text):
	"""
	Function to detect PII in the input text
	"""

	text = f"<\|begin_of_text\|> <\|end_of_text\|> <\|start_header_id\|>system<\|end_header_id\|> You are a personal information (PII) redaction system. Your task is to identify personally identifiable information within a given text, classify the type of the PII and replace it by a tag. <\|eot_id\|><\|start_header_id\|>user<\|end_header_id\|> Here is the given text : {text} - Identify personally identifiable information within this text and replace it by a relevant tag. <\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|> "
	print(text)
	if not text.strip():
	return "Please enter some text to analyze."

	try:
	# Run PII detection
	results = pipe(text, renormalize_logits=True)
	print(results)
	if not results:
	return "No PII detected in the text."
	return results[0]["generated_text"][len(text):]
	# Format the results
	# output = "PII Detection Results:\n\n"
	#
	# for i, entity in enumerate(results, 1):
	# output += f"{i}. {entity['entity_group']}\n"
	# output += f" - Text: `{entity['word']}`\n"
	# output += f" - Confidence: {entity['score']:.4f}\n"
	# output += f" - Position: {entity['start']}-{entity['end']}\n\n"
	#
	# return output

	except Exception as e:
	return f"Error processing text: {str(e)}"


	# Create Gradio interface
	with gr.Blocks(title="PII Detection Tool", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🔒 PII Detection Tool")
	gr.Markdown(
	"Enter text below to detect Personally Identifiable Information (PII) using the `Alonzo AI` model.")

	with gr.Row():
	with gr.Column(scale=1):
	input_text = gr.Textbox(
	label="Input Text",
	placeholder="My name is Clark Maxwell, I live at 123 Main Street New York, Reach out to me at clark@maxwell.com",
	lines=6,
	max_lines=10
	)

	submit_btn = gr.Button("Detect PII", variant="primary")
	clear_btn = gr.Button("Clear", variant="secondary")

	with gr.Column(scale=1):
	output_text = gr.Markdown(
	label="Detection Results",
	value="Results will appear here..."
	)

	# Example inputs
	gr.Markdown("### 📝 Example Texts to Try:")
	examples = [
	["My name is jason statham and my email is john.doe@email.com. I live at 123 Main Street, New York."],
	["Please contact maria burnham at (555) 123-4567 or mary@company.com for more information."],
	["SSN: 123-45-6789, DOB: 01/15/1990, Phone: +1-800-555-0123"]
	]

	gr.Examples(
	examples=examples,
	inputs=input_text,
	outputs=output_text,
	fn=detect_pii,
	cache_examples=True
	)

	# Event handlers
	submit_btn.click(
	fn=detect_pii,
	inputs=input_text,
	outputs=output_text
	)

	input_text.submit(
	fn=detect_pii,
	inputs=input_text,
	outputs=output_text
	)

	clear_btn.click(
	fn=lambda: ("", "Results will appear here..."),
	outputs=[input_text, output_text]
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0", # Allow external access
	server_port=7860, # Default Gradio port
	share=False, # Set to True if you want a public link
	debug=True
	)

	except Exception as e:
	print(f"Error loading model: {e}")
	print("Make sure you have the required dependencies installed:")
	print("pip install gradio transformers torch")


	# Fallback demo if model loading fails
	def fallback_detect_pii(text):
	return f"Model loading failed. Please check your setup.\nError: {str(e)}"


	with gr.Blocks() as demo:
	gr.Markdown("# PII Detection Tool (Model Loading Failed)")
	input_text = gr.Textbox(label="Input Text")
	output_text = gr.Textbox(label="Output")
	gr.Button("Detect PII").click(fallback_detect_pii, input_text, output_text)

	if __name__ == "__main__":
	demo.launch()