File size: 1,521 Bytes
d970d96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import pandas as pd

# Load the model and tokenizer
model_name = "PL-RnD/privacy-moderation-large-4bit"
# Decide device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
# Move model to device (if it was already loaded with a device_map this is a no-op)
model.to(device)

# Example text
texts = [
    "Here is my credit card number: 1234-5678-9012-3456",
    "This is a regular message without sensitive information.",
    "For homeowners insurance, select deductibles from $500 to $2,500. Higher deductibles lower premiums.",
    "Solidarity: My enrollment includes my kid's braces at $4,000 total—family strained. Push for orthodontic expansions. Email blast to reps starting now.",
]
# Tokenize the input and move to device
inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
inputs = {k: v.to(device) for k, v in inputs.items()}

# Get model predictions
with torch.no_grad():
    outputs = model(**inputs)

logits = outputs.logits
predictions = torch.argmax(logits, dim=-1)
# Convert predictions to labels
labels = ["non-violation", "violation"]
# Ensure predictions on CPU before converting
predicted_labels = [labels[pred] for pred in predictions.cpu().tolist()]
# Display results
df = pd.DataFrame({"text": texts, "label": predicted_labels})
print(df)