import gradio as gr from huggingface_hub import InferenceClient from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline from sentence_transformers import SentenceTransformer, util # Carregar modelos model_name = "deepset/roberta-base-squad2" qa_pipeline = pipeline('question-answering', model=model_name, tokenizer="HF_TOKEN_1") chat_client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") embed_model = SentenceTransformer('all-MiniLM-L6-v2') class MultiModelQA: def __init__(self, qa_pipeline, chat_client, embed_model): self.qa_pipeline = qa_pipeline self.chat_client = chat_client self.embed_model = embed_model def answer_with_qa_model(self, question, context): return self.qa_pipeline({'question': question, 'context': context})['answer'] def answer_with_chat_model(self, question, system_message, max_tokens, temperature, top_p): messages = [ {"role": "system", "content": system_message}, {"role": "user", "content": question} ] response = "" for msg in self.chat_client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = msg.choices[0].delta.content response += token return response def comparar_semanticamente(self, resp1, resp2): emb1 = self.embed_model.encode(resp1, convert_to_tensor=True) emb2 = self.embed_model.encode(resp2, convert_to_tensor=True) similarity = util.cos_sim(emb1, emb2).item() return similarity multiqa = MultiModelQA(qa_pipeline, chat_client, embed_model) def responder_e_comparar(question, context, system_message, max_tokens, temperature, top_p): qa_resp = multiqa.answer_with_qa_model(question, context) chat_resp = multiqa.answer_with_chat_model(question, system_message, max_tokens, temperature, top_p) similaridade = multiqa.comparar_semanticamente(qa_resp, chat_resp) result = f"""### Resposta do modelo QA: {qa_resp} ### Resposta do modelo Chat: {chat_resp} ### Similaridade semântica (coseno): {similaridade:.2%} """ return result # Interface Gradio demo = gr.Interface( fn=responder_e_comparar, inputs=[ gr.Textbox(label="Pergunta"), gr.Textbox(label="Contexto"), gr.Textbox(value="Você é um assistente útil.", label="Mensagem do sistema"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Máximo de tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperatura"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"), ], outputs=gr.Markdown(), title="Comparador de Respostas de Modelos", description="Compara as respostas de um modelo de QA e um modelo de chat (Zephyr-7B) e calcula a similaridade semântica entre elas." ) if __name__ == "__main__": demo.launch()