PL-RnD commited on
Commit
4893e5a
·
1 Parent(s): fe35353

fix: Update script to handle device moves

Browse files
Files changed (2) hide show
  1. README.md +11 -3
  2. sample_script.py +38 -0
README.md CHANGED
@@ -44,13 +44,18 @@ You can use this model for text classification tasks related to privacy moderati
44
  ```python
45
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
46
  import torch
47
- import numpy as np
48
  import pandas as pd
49
 
50
  # Load the model and tokenizer
51
  model_name = "PL-RnD/privacy-moderation-small-4bit"
 
 
 
52
  tokenizer = AutoTokenizer.from_pretrained(model_name)
53
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
 
 
 
54
  # Example text
55
  texts = [
56
  "Here is my credit card number: 1234-5678-9012-3456",
@@ -58,8 +63,10 @@ texts = [
58
  "For homeowners insurance, select deductibles from $500 to $2,500. Higher deductibles lower premiums.",
59
  "Solidarity: My enrollment includes my kid's braces at $4,000 total—family strained. Push for orthodontic expansions. Email blast to reps starting now.",
60
  ]
61
- # Tokenize the input
62
  inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
 
 
63
  # Get model predictions
64
  with torch.no_grad():
65
  outputs = model(**inputs)
@@ -68,7 +75,8 @@ logits = outputs.logits
68
  predictions = torch.argmax(logits, dim=-1)
69
  # Convert predictions to labels
70
  labels = ["non-violation", "violation"]
71
- predicted_labels = [labels[pred] for pred in predictions.numpy()]
 
72
  # Display results
73
  df = pd.DataFrame({"text": texts, "label": predicted_labels})
74
  print(df)
 
44
  ```python
45
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
46
  import torch
 
47
  import pandas as pd
48
 
49
  # Load the model and tokenizer
50
  model_name = "PL-RnD/privacy-moderation-small-4bit"
51
+ # Decide device
52
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
53
+
54
  tokenizer = AutoTokenizer.from_pretrained(model_name)
55
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
56
+ # Move model to device (if it was already loaded with a device_map this is a no-op)
57
+ model.to(device)
58
+
59
  # Example text
60
  texts = [
61
  "Here is my credit card number: 1234-5678-9012-3456",
 
63
  "For homeowners insurance, select deductibles from $500 to $2,500. Higher deductibles lower premiums.",
64
  "Solidarity: My enrollment includes my kid's braces at $4,000 total—family strained. Push for orthodontic expansions. Email blast to reps starting now.",
65
  ]
66
+ # Tokenize the input and move to device
67
  inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
68
+ inputs = {k: v.to(device) for k, v in inputs.items()}
69
+
70
  # Get model predictions
71
  with torch.no_grad():
72
  outputs = model(**inputs)
 
75
  predictions = torch.argmax(logits, dim=-1)
76
  # Convert predictions to labels
77
  labels = ["non-violation", "violation"]
78
+ # Ensure predictions on CPU before converting
79
+ predicted_labels = [labels[pred] for pred in predictions.cpu().tolist()]
80
  # Display results
81
  df = pd.DataFrame({"text": texts, "label": predicted_labels})
82
  print(df)
sample_script.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
2
+ import torch
3
+ import pandas as pd
4
+
5
+ # Load the model and tokenizer
6
+ model_name = "PL-RnD/privacy-moderation-small-4bit"
7
+ # Decide device
8
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
+
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
12
+ # Move model to device (if it was already loaded with a device_map this is a no-op)
13
+ model.to(device)
14
+
15
+ # Example text
16
+ texts = [
17
+ "Here is my credit card number: 1234-5678-9012-3456",
18
+ "This is a regular message without sensitive information.",
19
+ "For homeowners insurance, select deductibles from $500 to $2,500. Higher deductibles lower premiums.",
20
+ "Solidarity: My enrollment includes my kid's braces at $4,000 total—family strained. Push for orthodontic expansions. Email blast to reps starting now.",
21
+ ]
22
+ # Tokenize the input and move to device
23
+ inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
24
+ inputs = {k: v.to(device) for k, v in inputs.items()}
25
+
26
+ # Get model predictions
27
+ with torch.no_grad():
28
+ outputs = model(**inputs)
29
+
30
+ logits = outputs.logits
31
+ predictions = torch.argmax(logits, dim=-1)
32
+ # Convert predictions to labels
33
+ labels = ["non-violation", "violation"]
34
+ # Ensure predictions on CPU before converting
35
+ predicted_labels = [labels[pred] for pred in predictions.cpu().tolist()]
36
+ # Display results
37
+ df = pd.DataFrame({"text": texts, "label": predicted_labels})
38
+ print(df)