feat: Update README and include sample-usage.py script

cb16128 5 months ago

1.22 kB

	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	import torch
	import numpy as np
	import pandas as pd

	# Load the model and tokenizer
	model_name = "PL-RnD/privacy-moderation-large"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	# Example text
	texts = [
	"Here is my credit card number: 1234-5678-9012-3456",
	"This is a regular message without sensitive information.",
	"For homeowners insurance, select deductibles from $500 to $2,500. Higher deductibles lower premiums.",
	"Solidarity: My enrollment includes my kid's braces at $4,000 total—family strained. Push for orthodontic expansions. Email blast to reps starting now.",
	]
	# Tokenize the input
	inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
	# Get model predictions
	with torch.no_grad():
	outputs = model(**inputs)

	logits = outputs.logits
	predictions = torch.argmax(logits, dim=-1)
	# Convert predictions to labels
	labels = ["non-violation", "violation"]
	predicted_labels = [labels[pred] for pred in predictions.numpy()]
	# Display results
	df = pd.DataFrame({"text": texts, "label": predicted_labels})
	print(df)