Spaces:
Sleeping
Sleeping
File size: 3,005 Bytes
a415299 e6710db b488926 e6710db a415299 e6710db a415299 e6710db a415299 e6710db a415299 e6710db a415299 e6710db a415299 e6710db a415299 e6710db a415299 e6710db a415299 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
from sentence_transformers import SentenceTransformer, util
# Carregar modelos
model_name = "deepset/roberta-base-squad2"
qa_pipeline = pipeline('question-answering', model=model_name, tokenizer=model_name)
chat_client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
embed_model = SentenceTransformer('all-MiniLM-L6-v2')
class MultiModelQA:
def __init__(self, qa_pipeline, chat_client, embed_model):
self.qa_pipeline = qa_pipeline
self.chat_client = chat_client
self.embed_model = embed_model
def answer_with_qa_model(self, question, context):
return self.qa_pipeline({'question': question, 'context': context})['answer']
def answer_with_chat_model(self, question, system_message, max_tokens, temperature, top_p):
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": question}
]
response = ""
for msg in self.chat_client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = msg.choices[0].delta.content
response += token
return response
def comparar_semanticamente(self, resp1, resp2):
emb1 = self.embed_model.encode(resp1, convert_to_tensor=True)
emb2 = self.embed_model.encode(resp2, convert_to_tensor=True)
similarity = util.cos_sim(emb1, emb2).item()
return similarity
multiqa = MultiModelQA(qa_pipeline, chat_client, embed_model)
def responder_e_comparar(question, context, system_message, max_tokens, temperature, top_p):
qa_resp = multiqa.answer_with_qa_model(question, context)
chat_resp = multiqa.answer_with_chat_model(question, system_message, max_tokens, temperature, top_p)
similaridade = multiqa.comparar_semanticamente(qa_resp, chat_resp)
result = f"""### Resposta do modelo QA:
{qa_resp}
### Resposta do modelo Chat:
{chat_resp}
### Similaridade semântica (coseno): {similaridade:.2%}
"""
return result
# Interface Gradio
demo = gr.Interface(
fn=responder_e_comparar,
inputs=[
gr.Textbox(label="Pergunta"),
gr.Textbox(label="Contexto"),
gr.Textbox(value="Você é um assistente útil.", label="Mensagem do sistema"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Máximo de tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperatura"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
],
outputs=gr.Markdown(),
title="Comparador de Respostas de Modelos",
description="Compara as respostas de um modelo de QA e um modelo de chat (Zephyr-7B) e calcula a similaridade semântica entre elas."
)
if __name__ == "__main__":
demo.launch()
|