Spaces:

MindLabUnimib
/

prova2

Sleeping

File size: 1,700 Bytes

8cfe629
c1cccf2
a7e0131
c1cccf2
 
8cfe629
a7e0131
c1cccf2
 
 
 
 
 
8cfe629
a7e0131
 
 
 
c1cccf2
 
 
 
 
8cfe629
c1cccf2
 
 
 
 
 
a7e0131
 
 
 
c1cccf2
 
 
 
 
8cfe629
a7e0131
8cfe629
93f6e7c

import gradio as gr
import spaces
from transformers import pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "sapienzanlp/Minerva-7B-instruct-v1.0"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

classifier = pipeline("text-classification", model="saiteki-kai/QA-DeBERTa-v3-large-threshold-v2")

@spaces.GPU(duration=120)
def generate(message):
    messages = [
        {"role": "system", "content": "You are a helpul assistant named Zurich, a 14 billion parameter Large Language model, you were fine-tuned and trained by Ruben Roy. You have been trained with the GammaCorpus v2 dataset, a dataset filled with structured and filtered multi-turn conversations, this was also made by Ruben Roy."}, # Attribution to Qwen is not included to prevent hallucinations.
        {"role": "user", "content": message}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    generated_ids = model.generate(
        **model_inputs,
        do_sample=False,
        temperature=0,
        repetition_penalty=1.0,
        max_new_tokens=512,
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    return response, classifier(text)


demo = gr.Interface(fn=generate, inputs=gr.Text(), outputs=gr.Text())
demo.launch()