from transformers import AutoModelForSequenceClassification, AutoTokenizer import torch import pandas as pd # Load the model and tokenizer model_name = "PL-RnD/privacy-moderation-large-4bit" # Decide device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) # Move model to device (if it was already loaded with a device_map this is a no-op) model.to(device) # Example text texts = [ "Here is my credit card number: 1234-5678-9012-3456", "This is a regular message without sensitive information.", "For homeowners insurance, select deductibles from $500 to $2,500. Higher deductibles lower premiums.", "Solidarity: My enrollment includes my kid's braces at $4,000 total—family strained. Push for orthodontic expansions. Email blast to reps starting now.", ] # Tokenize the input and move to device inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True) inputs = {k: v.to(device) for k, v in inputs.items()} # Get model predictions with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits predictions = torch.argmax(logits, dim=-1) # Convert predictions to labels labels = ["non-violation", "violation"] # Ensure predictions on CPU before converting predicted_labels = [labels[pred] for pred in predictions.cpu().tolist()] # Display results df = pd.DataFrame({"text": texts, "label": predicted_labels}) print(df)