privacy-moderation-large / sample-usage.py
PL-RnD's picture
feat: Update README and include sample-usage.py script
cb16128
raw
history blame
1.22 kB
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import numpy as np
import pandas as pd
# Load the model and tokenizer
model_name = "PL-RnD/privacy-moderation-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
# Example text
texts = [
"Here is my credit card number: 1234-5678-9012-3456",
"This is a regular message without sensitive information.",
"For homeowners insurance, select deductibles from $500 to $2,500. Higher deductibles lower premiums.",
"Solidarity: My enrollment includes my kid's braces at $4,000 total—family strained. Push for orthodontic expansions. Email blast to reps starting now.",
]
# Tokenize the input
inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
# Get model predictions
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
predictions = torch.argmax(logits, dim=-1)
# Convert predictions to labels
labels = ["non-violation", "violation"]
predicted_labels = [labels[pred] for pred in predictions.numpy()]
# Display results
df = pd.DataFrame({"text": texts, "label": predicted_labels})
print(df)