Spaces:
Sleeping
Sleeping
File size: 2,396 Bytes
8cfe629 c1cccf2 0195be0 c1cccf2 8cfe629 0195be0 c1cccf2 0195be0 8cfe629 a7e0131 0195be0 d28e427 0195be0 c1cccf2 eec20e0 0195be0 eec20e0 c1cccf2 eec20e0 c1cccf2 d28e427 8cfe629 0195be0 8cfe629 bd093f8 ff8bdc6 31ab39c 93f6e7c bd093f8 0195be0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import gradio as gr
import spaces
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
AutoModelForSequenceClassification,
)
import torch
chat_model_name = "sapienzanlp/Minerva-7B-instruct-v1.0"
chat_model = AutoModelForCausalLM.from_pretrained(
chat_model_name, torch_dtype=torch.bfloat16, device_map="auto"
)
chat_tokenizer = AutoTokenizer.from_pretrained(chat_model_name)
moderator_model_name = "saiteki-kai/QA-DeBERTa-v3-large"
moderator_model = AutoModelForSequenceClassification.from_pretrained(
moderator_model_name, device_map="auto"
)
moderator_tokenizer = AutoTokenizer.from_pretrained(moderator_model_name)
def generate_responses(model, tokenizer, prompts):
messages = [[{"role": "user", "content": message}] for message in prompts]
texts = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
with torch.inference_mode():
model_inputs = tokenizer(texts, padding=True, return_tensors="pt").to(model.device)
generated_ids = model.generate(
**model_inputs,
do_sample=False,
temperature=0,
repetition_penalty=1.0,
max_new_tokens=512,
)
prompt_lengths = (model_inputs.input_ids != tokenizer.pad_token_id).sum(dim=1)
generated_ids = [
output_ids[length:] for length, output_ids in zip(prompt_lengths, generated_ids)
]
responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
return responses
def classify_pairs(model, tokenizer, prompts, responses):
texts = [
prompt + "[SEP]" + response for prompt, response in zip(prompts, responses)
]
with torch.inference_mode():
input_ids = tokenizer(texts, padding=True, max_length=512).to(model.device)
outputs = model(**input_ids)
return outputs
@spaces.GPU()
def generate(prompts: list[str]) -> list[dict[str, str | float]]:
responses = generate_responses(chat_model, chat_tokenizer, prompts)
scores = classify_pairs(moderator_model, moderator_tokenizer, prompts, responses)
return [
{"prompt": prompt, "response": response, "score": score}
for prompt, response, score in zip(prompts, responses, scores)
]
with gr.Blocks() as demo:
gr.Markdown("Welcome")
gr.api(generate, api_name="scores", batch=False)
demo.queue()
demo.launch()
|