Spaces:
Sleeping
Sleeping
Commit
·
2097249
1
Parent(s):
a642a97
chore: use binary classifier
Browse files
app.py
CHANGED
|
@@ -19,10 +19,10 @@ chat_model = AutoModelForCausalLM.from_pretrained(chat_model_name, dtype=torch.b
|
|
| 19 |
chat_model.to("cuda")
|
| 20 |
chat_tokenizer = AutoTokenizer.from_pretrained(chat_model_name)
|
| 21 |
|
| 22 |
-
moderator_model_name = "saiteki-kai/QA-DeBERTa-v3-large"
|
| 23 |
moderator_model = AutoModelForSequenceClassification.from_pretrained(moderator_model_name, device_map="cpu")
|
| 24 |
moderator_model.to("cuda")
|
| 25 |
-
moderator_tokenizer = AutoTokenizer.from_pretrained(moderator_model_name)
|
| 26 |
|
| 27 |
def generate_responses(model, tokenizer, prompts):
|
| 28 |
messages = [[{"role": "user", "content": message}] for message in prompts]
|
|
@@ -51,14 +51,13 @@ def classify_pairs(model, tokenizer, prompts, responses):
|
|
| 51 |
prompt + "[SEP]" + response for prompt, response in zip(prompts, responses)
|
| 52 |
]
|
| 53 |
|
| 54 |
-
input_ids = tokenizer(texts, padding=True, truncation=True, max_length=
|
|
|
|
| 55 |
|
| 56 |
with torch.inference_mode():
|
| 57 |
outputs = model(**input_ids)
|
| 58 |
-
scores = torch.
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
unsafety_scores = [sum(s) / len(s) if len(s) > 0 else 0.0 for s in scores]
|
| 62 |
|
| 63 |
return unsafety_scores
|
| 64 |
|
|
|
|
| 19 |
chat_model.to("cuda")
|
| 20 |
chat_tokenizer = AutoTokenizer.from_pretrained(chat_model_name)
|
| 21 |
|
| 22 |
+
moderator_model_name = "saiteki-kai/QA-DeBERTa-v3-large-binary-3"
|
| 23 |
moderator_model = AutoModelForSequenceClassification.from_pretrained(moderator_model_name, device_map="cpu")
|
| 24 |
moderator_model.to("cuda")
|
| 25 |
+
moderator_tokenizer = AutoTokenizer.from_pretrained(moderator_model_name, padding_side="right")
|
| 26 |
|
| 27 |
def generate_responses(model, tokenizer, prompts):
|
| 28 |
messages = [[{"role": "user", "content": message}] for message in prompts]
|
|
|
|
| 51 |
prompt + "[SEP]" + response for prompt, response in zip(prompts, responses)
|
| 52 |
]
|
| 53 |
|
| 54 |
+
input_ids = tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt").to(model.device)
|
| 55 |
+
print(tokenizer.batch_decode(input_ids["input_ids"]))
|
| 56 |
|
| 57 |
with torch.inference_mode():
|
| 58 |
outputs = model(**input_ids)
|
| 59 |
+
scores = torch.softmax(outputs.logits, dim=-1).detach().cpu()
|
| 60 |
+
unsafety_scores = [float(s[1]) for s in scores] # get unsafe axis
|
|
|
|
|
|
|
| 61 |
|
| 62 |
return unsafety_scores
|
| 63 |
|