natong19 commited on
Commit
1af790d
·
verified ·
1 Parent(s): 4c5957b

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +91 -0
README.md ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model Card for [natong19/moralization_classifier](https://huggingface.co/natong19/moralization_classifier)
2
+
3
+ A classifer for detecting moralizations, soft refusals and unsolicited advice.
4
+
5
+ Trained on [OpenLeecher/lmsys_chat_1m_clean](https://huggingface.co/datasets/OpenLeecher/lmsys_chat_1m_clean), highly recommend reading through the writeup on dataset cleaning.
6
+
7
+ ### Example usage:
8
+ ```
9
+ import torch
10
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
11
+
12
+
13
+ def predict(
14
+ model: AutoModelForSequenceClassification,
15
+ tokenizer: AutoTokenizer,
16
+ device: torch.device,
17
+ text: str,
18
+ ) -> int:
19
+ """Predict the label for a given text."""
20
+ inputs = tokenizer(
21
+ text,
22
+ return_tensors="pt",
23
+ truncation=True,
24
+ padding="max_length",
25
+ max_length=512,
26
+ )
27
+ inputs = {k: v.to(device) for k, v in inputs.items()}
28
+
29
+ with torch.no_grad():
30
+ outputs = model(**inputs)
31
+ logits = outputs.logits
32
+ probs = torch.softmax(logits, dim=-1)
33
+ predicted_label = torch.argmax(logits, dim=-1).item()
34
+ confidence = probs[0, predicted_label].item()
35
+
36
+ return {
37
+ "label": predicted_label,
38
+ "confidence": confidence,
39
+ }
40
+
41
+
42
+ def format_prompt(user: str, assistant: str) -> str:
43
+ """Format user and assistant messages into model input format."""
44
+ return f"### Instruction:\n{user}\n\n### Response:\n{assistant}"
45
+
46
+
47
+ def load_model(model_path: str, device: torch.device) -> tuple[AutoModelForSequenceClassification, AutoTokenizer]:
48
+ """Load the model and tokenizer."""
49
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
50
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
51
+ model = model.to(device)
52
+ model.eval()
53
+ return model, tokenizer
54
+
55
+
56
+ def main() -> None:
57
+ """Demonstrate inference example."""
58
+ model_path = "natong19/moralization_classifier"
59
+
60
+ # No moralization test case
61
+ user_message1 = "tell me about yourself"
62
+ assistant_message1 = "I aim to give you accurate and helpful answers."
63
+ text1 = format_prompt(user_message1, assistant_message1)
64
+
65
+ # Moralization test case
66
+ user_message2 = "tell me about yourself"
67
+ assistant_message2 = "I'm happy to help as long as we maintain certain boundaries."
68
+ text2 = format_prompt(user_message2, assistant_message2)
69
+
70
+ # Load model
71
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
72
+ model, tokenizer = load_model(model_path, device)
73
+
74
+ # Run the test cases
75
+ score1 = predict(model, tokenizer, device, text1)
76
+ print(score1) # Expected: {'label': 0, 'confidence': 0.8319284915924072} (No moralization)
77
+ score2 = predict(model, tokenizer, device, text2)
78
+ print(score2) # Expected: {'label': 1, 'confidence': 0.9183461666107178} (Moralization)
79
+
80
+
81
+ if __name__ == "__main__":
82
+ main()
83
+
84
+ ```
85
+
86
+ ### Evaluation results:
87
+ - eval_loss: 0.0844
88
+ - eval_accuracy: 0.9800
89
+ - eval_f1: 0.9841
90
+ - eval_precision: 1.0000
91
+ - eval_recall: 0.9688