Spaces:
Sleeping
Sleeping
File size: 3,060 Bytes
dea455e c1cccf2 b28dd28 dea455e 0195be0 8cfe629 b28dd28 0195be0 b28dd28 dea455e 0195be0 2097249 b28dd28 dea455e 2097249 8cfe629 0195be0 d28e427 f7f608b eec20e0 0195be0 42006d1 0195be0 a642a97 c1cccf2 eec20e0 c1cccf2 d28e427 8cfe629 0195be0 2097249 f7f608b 0195be0 f7f608b 2097249 0195be0 4ade37a 0195be0 13bc307 e562a6b ed7d675 e8c05eb 0195be0 42006d1 4cf07ab 42006d1 b28dd28 c574a7f e8c05eb 0195be0 8cfe629 b28dd28 bd093f8 ff8bdc6 ed7d675 93f6e7c 0195be0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import torch
import spaces
import logging
import gradio as gr
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
AutoModelForSequenceClassification,
)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
chat_model_name = "sapienzanlp/Minerva-7B-instruct-v1.0"
chat_model = AutoModelForCausalLM.from_pretrained(chat_model_name, dtype=torch.bfloat16, device_map="cpu")
chat_model.to("cuda")
chat_tokenizer = AutoTokenizer.from_pretrained(chat_model_name)
moderator_model_name = "saiteki-kai/QA-DeBERTa-v3-large-binary-3"
moderator_model = AutoModelForSequenceClassification.from_pretrained(moderator_model_name, device_map="cpu")
moderator_model.to("cuda")
moderator_tokenizer = AutoTokenizer.from_pretrained(moderator_model_name, padding_side="right")
def generate_responses(model, tokenizer, prompts):
messages = [[{"role": "user", "content": message}] for message in prompts]
texts = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt").to(model.device)
with torch.inference_mode():
generated_ids = model.generate(
**model_inputs,
do_sample=False,
temperature=0,
repetition_penalty=1.1,
max_new_tokens=512,
)
prompt_lengths = model_inputs["attention_mask"].sum(dim=1) + 1
generated_ids = [
output_ids[length:] for length, output_ids in zip(prompt_lengths, generated_ids)
]
responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
return responses
def classify_pairs(model, tokenizer, prompts, responses):
texts = [
prompt + "[SEP]" + response for prompt, response in zip(prompts, responses)
]
input_ids = tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt").to(model.device)
print(tokenizer.batch_decode(input_ids["input_ids"]))
with torch.inference_mode():
outputs = model(**input_ids)
scores = torch.softmax(outputs.logits, dim=-1).detach().cpu()
unsafety_scores = [float(s[1]) for s in scores] # get unsafe axis
return unsafety_scores
@spaces.GPU(duration=120)
def generate(submission: list[dict[str, str]], team_id: str) -> list[dict[str, str | float]]:
print("GENERATE")
ids = [s["id"] for s in submission]
prompts = [s["prompt"] for s in submission]
responses = generate_responses(chat_model, chat_tokenizer, prompts)
print(responses)
scores = classify_pairs(moderator_model, moderator_tokenizer, prompts, responses)
print(scores)
outputs = [
{"id": id, "prompt": prompt, "response": response, "score": score, "model": chat_model_name, "team_id": team_id}
for id, prompt, response, score in zip(ids, prompts, responses, scores)
]
return outputs
with gr.Blocks() as demo:
gr.Markdown("Welcome")
gr.api(generate, api_name="scores", batch=False)
demo.launch()
|