| from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
| import torch | |
| import numpy as np | |
| import pandas as pd | |
| # Load the model and tokenizer | |
| model_name = "PL-RnD/privacy-moderation-large" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| # Example text | |
| texts = [ | |
| "Here is my credit card number: 1234-5678-9012-3456", | |
| "This is a regular message without sensitive information.", | |
| "For homeowners insurance, select deductibles from $500 to $2,500. Higher deductibles lower premiums.", | |
| "Solidarity: My enrollment includes my kid's braces at $4,000 total—family strained. Push for orthodontic expansions. Email blast to reps starting now.", | |
| ] | |
| # Tokenize the input | |
| inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True) | |
| # Get model predictions | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| logits = outputs.logits | |
| predictions = torch.argmax(logits, dim=-1) | |
| # Convert predictions to labels | |
| labels = ["non-violation", "violation"] | |
| predicted_labels = [labels[pred] for pred in predictions.numpy()] | |
| # Display results | |
| df = pd.DataFrame({"text": texts, "label": predicted_labels}) | |
| print(df) | |