|
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
import torch
|
|
|
import gradio as gr
|
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
model = AutoModelForCausalLM.from_pretrained(
|
|
|
"betterdataai/PII_DETECTION_MODEL",
|
|
|
trust_remote_code=True
|
|
|
).to(device)
|
|
|
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
"betterdataai/PII_DETECTION_MODEL",
|
|
|
trust_remote_code=True
|
|
|
)
|
|
|
|
|
|
classes_list = ['<pin>','<api_key>','<bank_routing_number>','<bban>','<company>','<credit_card_number>','<credit_card_security_code>','<customer_id>','<date>','<date_of_birth>','<date_time>','<driver_license_number>','<email>','<employee_id>','<first_name>','<iban>','<ipv4>','<ipv6>','<last_name>','<local_latlng>','<name>','<passport_number>','<password>','<phone_number>','<social_security_number>','<street_address>','<swift_bic_code>','<time>','<user_name>']
|
|
|
|
|
|
prompt_template = """You are an AI assistant who is responisble for identifying Personal Identifiable information (PII). You will be given a passage of text and you have to \
|
|
|
identify the PII data present in the passage. You should only identify the data based on the classes provided and not make up any class on your own.
|
|
|
|
|
|
```PII Classes```
|
|
|
{classes}
|
|
|
|
|
|
The given text is:
|
|
|
{text}
|
|
|
|
|
|
The PII data are:
|
|
|
"""
|
|
|
|
|
|
def detect_pii(user_input_text):
|
|
|
try:
|
|
|
|
|
|
new_prompt = prompt_template.format(classes="\n".join(classes_list), text=user_input_text)
|
|
|
|
|
|
|
|
|
tokenized_input = tokenizer(new_prompt, return_tensors="pt").to(device)
|
|
|
|
|
|
|
|
|
output = model.generate(**tokenized_input, max_new_tokens=250)
|
|
|
|
|
|
|
|
|
|
|
|
decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
|
|
|
if "The PII data are:\n" in decoded_output:
|
|
|
pii_classes = decoded_output.rsplit("The PII data are:\n", 1)[1]
|
|
|
else:
|
|
|
pii_classes = "Could not parse model output."
|
|
|
|
|
|
return pii_classes
|
|
|
except Exception as e:
|
|
|
return f"An error occurred: {str(e)}"
|
|
|
|
|
|
|
|
|
iface = gr.Interface(
|
|
|
fn=detect_pii,
|
|
|
inputs=gr.Textbox(lines=5, label="Enter Text Here"),
|
|
|
outputs=gr.Textbox(label="Detected PII"),
|
|
|
title="PII Detection Model",
|
|
|
description="This app uses 'betterdataai/PII_DETECTION_MODEL' to find PII in text."
|
|
|
)
|
|
|
|
|
|
iface.launch()
|
|
|
|