Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline | |
| from sentence_transformers import SentenceTransformer, util | |
| # Carregar modelos | |
| model_name = "deepset/roberta-base-squad2" | |
| qa_pipeline = pipeline('question-answering', model=model_name, tokenizer=model_name) | |
| chat_client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
| embed_model = SentenceTransformer('all-MiniLM-L6-v2') | |
| class MultiModelQA: | |
| def __init__(self, qa_pipeline, chat_client, embed_model): | |
| self.qa_pipeline = qa_pipeline | |
| self.chat_client = chat_client | |
| self.embed_model = embed_model | |
| def answer_with_qa_model(self, question, context): | |
| return self.qa_pipeline({'question': question, 'context': context})['answer'] | |
| def answer_with_chat_model(self, question, system_message, max_tokens, temperature, top_p): | |
| messages = [ | |
| {"role": "system", "content": system_message}, | |
| {"role": "user", "content": question} | |
| ] | |
| response = "" | |
| for msg in self.chat_client.chat_completion( | |
| messages, | |
| max_tokens=max_tokens, | |
| stream=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| ): | |
| token = msg.choices[0].delta.content | |
| response += token | |
| return response | |
| def comparar_semanticamente(self, resp1, resp2): | |
| emb1 = self.embed_model.encode(resp1, convert_to_tensor=True) | |
| emb2 = self.embed_model.encode(resp2, convert_to_tensor=True) | |
| similarity = util.cos_sim(emb1, emb2).item() | |
| return similarity | |
| multiqa = MultiModelQA(qa_pipeline, chat_client, embed_model) | |
| def responder_e_comparar(question, context, system_message, max_tokens, temperature, top_p): | |
| qa_resp = multiqa.answer_with_qa_model(question, context) | |
| chat_resp = multiqa.answer_with_chat_model(question, system_message, max_tokens, temperature, top_p) | |
| similaridade = multiqa.comparar_semanticamente(qa_resp, chat_resp) | |
| result = f"""### Resposta do modelo QA: | |
| {qa_resp} | |
| ### Resposta do modelo Chat: | |
| {chat_resp} | |
| ### Similaridade semântica (coseno): {similaridade:.2%} | |
| """ | |
| return result | |
| # Interface Gradio | |
| demo = gr.Interface( | |
| fn=responder_e_comparar, | |
| inputs=[ | |
| gr.Textbox(label="Pergunta"), | |
| gr.Textbox(label="Contexto"), | |
| gr.Textbox(value="Você é um assistente útil.", label="Mensagem do sistema"), | |
| gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Máximo de tokens"), | |
| gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperatura"), | |
| gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"), | |
| ], | |
| outputs=gr.Markdown(), | |
| title="Comparador de Respostas de Modelos", | |
| description="Compara as respostas de um modelo de QA e um modelo de chat (Zephyr-7B) e calcula a similaridade semântica entre elas." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |