PL-RnD
/

privacy-moderation-small-4bit

@@ -44,13 +44,18 @@ You can use this model for text classification tasks related to privacy moderati
 ```python
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import torch
-import numpy as np
 import pandas as pd
 # Load the model and tokenizer
 model_name = "PL-RnD/privacy-moderation-small-4bit"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSequenceClassification.from_pretrained(model_name)
 # Example text
 texts = [
     "Here is my credit card number: 1234-5678-9012-3456",
@@ -58,8 +63,10 @@ texts = [
     "For homeowners insurance, select deductibles from $500 to $2,500. Higher deductibles lower premiums.",
     "Solidarity: My enrollment includes my kid's braces at $4,000 total—family strained. Push for orthodontic expansions. Email blast to reps starting now.",
 ]
-# Tokenize the input
 inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
 # Get model predictions
 with torch.no_grad():
     outputs = model(**inputs)
@@ -68,7 +75,8 @@ logits = outputs.logits
 predictions = torch.argmax(logits, dim=-1)
 # Convert predictions to labels
 labels = ["non-violation", "violation"]
-predicted_labels = [labels[pred] for pred in predictions.numpy()]
 # Display results
 df = pd.DataFrame({"text": texts, "label": predicted_labels})
 print(df)

 ```python
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import torch
 import pandas as pd
 # Load the model and tokenizer
 model_name = "PL-RnD/privacy-moderation-small-4bit"
+# Decide device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSequenceClassification.from_pretrained(model_name)
+# Move model to device (if it was already loaded with a device_map this is a no-op)
+model.to(device)
 # Example text
 texts = [
     "Here is my credit card number: 1234-5678-9012-3456",
     "For homeowners insurance, select deductibles from $500 to $2,500. Higher deductibles lower premiums.",
     "Solidarity: My enrollment includes my kid's braces at $4,000 total—family strained. Push for orthodontic expansions. Email blast to reps starting now.",
 ]
+# Tokenize the input and move to device
 inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
+inputs = {k: v.to(device) for k, v in inputs.items()}
 # Get model predictions
 with torch.no_grad():
     outputs = model(**inputs)
 predictions = torch.argmax(logits, dim=-1)
 # Convert predictions to labels
 labels = ["non-violation", "violation"]
+# Ensure predictions on CPU before converting
+predicted_labels = [labels[pred] for pred in predictions.cpu().tolist()]
 # Display results
 df = pd.DataFrame({"text": texts, "label": predicted_labels})
 print(df)

sample_script.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+import torch
+import pandas as pd
+# Load the model and tokenizer
+model_name = "PL-RnD/privacy-moderation-small-4bit"
+# Decide device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+# Move model to device (if it was already loaded with a device_map this is a no-op)
+model.to(device)
+# Example text
+texts = [
+    "Here is my credit card number: 1234-5678-9012-3456",
+    "This is a regular message without sensitive information.",
+    "For homeowners insurance, select deductibles from $500 to $2,500. Higher deductibles lower premiums.",
+    "Solidarity: My enrollment includes my kid's braces at $4,000 total—family strained. Push for orthodontic expansions. Email blast to reps starting now.",
+]
+# Tokenize the input and move to device
+inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
+inputs = {k: v.to(device) for k, v in inputs.items()}
+# Get model predictions
+with torch.no_grad():
+    outputs = model(**inputs)
+logits = outputs.logits
+predictions = torch.argmax(logits, dim=-1)
+# Convert predictions to labels
+labels = ["non-violation", "violation"]
+# Ensure predictions on CPU before converting
+predicted_labels = [labels[pred] for pred in predictions.cpu().tolist()]
+# Display results
+df = pd.DataFrame({"text": texts, "label": predicted_labels})
+print(df)