YAML Metadata Warning:empty or missing yaml metadata in repo card
Check out the documentation for more information.
π‘ Example Inference Code
You can try this PII Masking model directly with the following script:
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import torch
# ----------------------------
# Load model & tokenizer
# ----------------------------
model_name = "traromal/AIccel_entity_masker_Gemma3_270m"
print(f"Loading model: {model_name}")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
low_cpu_mem_usage=True
)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
print(f"β
Model loaded on {device}")
# ----------------------------
# System prompt
# ----------------------------
SYSTEM_PROMPT = """You are a global data privacy expert.
Identify and mask all PII (Personally Identifiable Information) in text.
Replace each with an appropriate tag like [NAME], [AADHAR_NUMBER], [PHONE], etc.
Also list detected entities with their type and sensitivity level."""
# ----------------------------
# Masking function
# ----------------------------
def mask_pii(text, stream=False):
messages = [
{'role': 'system', 'content': SYSTEM_PROMPT},
{'role': 'user', 'content': f'Mask all sensitive PII in:\n\n"{text}"'}
]
chat = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(chat, return_tensors="pt").to(device)
if stream:
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
model.generate(
**inputs,
max_new_tokens=512,
temperature=0.7,
top_p=0.9,
top_k=50,
do_sample=True,
streamer=streamer,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
)
else:
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.7,
top_p=0.9,
top_k=50,
do_sample=True,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
)
response = tokenizer.decode(outputs[0], skip_special_tokens=False)
if "<start_of_turn>model" in response:
response = response.split("<start_of_turn>model")[-1].replace("<end_of_turn>", "").strip()
print(response)
return response
# ----------------------------
# Quick examples
# ----------------------------
examples = [
"My name is Rajesh Kumar and my Aadhar number is 1234-5678-9012. Contact me at +91-9876543210.",
"Patient Priya Sharma, Blood Group: B+, UHID: MH2023-12345, DOB: 15/08/1990.",
"Please transfer βΉ50,000 to account 123456789012 (IFSC: HDFC0001234). UPI ID: amit.kumar@paytm.",
"John Smith, SSN: 123-45-6789, email: john.smith@gmail.com",
]
for text in examples:
print("\nπ§© Original:", text)
print("π Masked:")
mask_pii(text, stream=True)
print("=" * 80)
# ----------------------------
# Interactive mode
# ----------------------------
while True:
user_text = input("\nπ Enter text to mask (or 'exit'): ").strip()
if user_text.lower() in ["exit", "quit", "q"]:
break
mask_pii(user_text, stream=True)
- Downloads last month
- -
Inference Providers NEW
This model isn't deployed by any Inference Provider. π Ask for provider support