fix: Update script to handle device moves
Browse files- README.md +11 -3
- sample_script.py +38 -0
README.md
CHANGED
|
@@ -44,13 +44,18 @@ You can use this model for text classification tasks related to privacy moderati
|
|
| 44 |
```python
|
| 45 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 46 |
import torch
|
| 47 |
-
import numpy as np
|
| 48 |
import pandas as pd
|
| 49 |
|
| 50 |
# Load the model and tokenizer
|
| 51 |
model_name = "PL-RnD/privacy-moderation-small-4bit"
|
|
|
|
|
|
|
|
|
|
| 52 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 53 |
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
|
|
|
|
|
|
|
|
|
| 54 |
# Example text
|
| 55 |
texts = [
|
| 56 |
"Here is my credit card number: 1234-5678-9012-3456",
|
|
@@ -58,8 +63,10 @@ texts = [
|
|
| 58 |
"For homeowners insurance, select deductibles from $500 to $2,500. Higher deductibles lower premiums.",
|
| 59 |
"Solidarity: My enrollment includes my kid's braces at $4,000 total—family strained. Push for orthodontic expansions. Email blast to reps starting now.",
|
| 60 |
]
|
| 61 |
-
# Tokenize the input
|
| 62 |
inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
|
|
|
|
|
|
|
| 63 |
# Get model predictions
|
| 64 |
with torch.no_grad():
|
| 65 |
outputs = model(**inputs)
|
|
@@ -68,7 +75,8 @@ logits = outputs.logits
|
|
| 68 |
predictions = torch.argmax(logits, dim=-1)
|
| 69 |
# Convert predictions to labels
|
| 70 |
labels = ["non-violation", "violation"]
|
| 71 |
-
|
|
|
|
| 72 |
# Display results
|
| 73 |
df = pd.DataFrame({"text": texts, "label": predicted_labels})
|
| 74 |
print(df)
|
|
|
|
| 44 |
```python
|
| 45 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 46 |
import torch
|
|
|
|
| 47 |
import pandas as pd
|
| 48 |
|
| 49 |
# Load the model and tokenizer
|
| 50 |
model_name = "PL-RnD/privacy-moderation-small-4bit"
|
| 51 |
+
# Decide device
|
| 52 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 53 |
+
|
| 54 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 55 |
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
| 56 |
+
# Move model to device (if it was already loaded with a device_map this is a no-op)
|
| 57 |
+
model.to(device)
|
| 58 |
+
|
| 59 |
# Example text
|
| 60 |
texts = [
|
| 61 |
"Here is my credit card number: 1234-5678-9012-3456",
|
|
|
|
| 63 |
"For homeowners insurance, select deductibles from $500 to $2,500. Higher deductibles lower premiums.",
|
| 64 |
"Solidarity: My enrollment includes my kid's braces at $4,000 total—family strained. Push for orthodontic expansions. Email blast to reps starting now.",
|
| 65 |
]
|
| 66 |
+
# Tokenize the input and move to device
|
| 67 |
inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
|
| 68 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 69 |
+
|
| 70 |
# Get model predictions
|
| 71 |
with torch.no_grad():
|
| 72 |
outputs = model(**inputs)
|
|
|
|
| 75 |
predictions = torch.argmax(logits, dim=-1)
|
| 76 |
# Convert predictions to labels
|
| 77 |
labels = ["non-violation", "violation"]
|
| 78 |
+
# Ensure predictions on CPU before converting
|
| 79 |
+
predicted_labels = [labels[pred] for pred in predictions.cpu().tolist()]
|
| 80 |
# Display results
|
| 81 |
df = pd.DataFrame({"text": texts, "label": predicted_labels})
|
| 82 |
print(df)
|
sample_script.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 2 |
+
import torch
|
| 3 |
+
import pandas as pd
|
| 4 |
+
|
| 5 |
+
# Load the model and tokenizer
|
| 6 |
+
model_name = "PL-RnD/privacy-moderation-small-4bit"
|
| 7 |
+
# Decide device
|
| 8 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 9 |
+
|
| 10 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 11 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
| 12 |
+
# Move model to device (if it was already loaded with a device_map this is a no-op)
|
| 13 |
+
model.to(device)
|
| 14 |
+
|
| 15 |
+
# Example text
|
| 16 |
+
texts = [
|
| 17 |
+
"Here is my credit card number: 1234-5678-9012-3456",
|
| 18 |
+
"This is a regular message without sensitive information.",
|
| 19 |
+
"For homeowners insurance, select deductibles from $500 to $2,500. Higher deductibles lower premiums.",
|
| 20 |
+
"Solidarity: My enrollment includes my kid's braces at $4,000 total—family strained. Push for orthodontic expansions. Email blast to reps starting now.",
|
| 21 |
+
]
|
| 22 |
+
# Tokenize the input and move to device
|
| 23 |
+
inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
|
| 24 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 25 |
+
|
| 26 |
+
# Get model predictions
|
| 27 |
+
with torch.no_grad():
|
| 28 |
+
outputs = model(**inputs)
|
| 29 |
+
|
| 30 |
+
logits = outputs.logits
|
| 31 |
+
predictions = torch.argmax(logits, dim=-1)
|
| 32 |
+
# Convert predictions to labels
|
| 33 |
+
labels = ["non-violation", "violation"]
|
| 34 |
+
# Ensure predictions on CPU before converting
|
| 35 |
+
predicted_labels = [labels[pred] for pred in predictions.cpu().tolist()]
|
| 36 |
+
# Display results
|
| 37 |
+
df = pd.DataFrame({"text": texts, "label": predicted_labels})
|
| 38 |
+
print(df)
|