Files changed (1) hide show
  1. app.py +42 -59
app.py CHANGED
@@ -3,80 +3,63 @@ from huggingface_hub import InferenceClient
3
  from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
4
  from sentence_transformers import SentenceTransformer, util
5
 
6
- # Carregar modelos
7
  model_name = "deepset/roberta-base-squad2"
8
- qa_pipeline = pipeline('question-answering', model=model_name, tokenizer=model_name)
9
- chat_client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
10
- embed_model = SentenceTransformer('all-MiniLM-L6-v2')
11
 
 
12
 
13
- class MultiModelQA:
14
- def __init__(self, qa_pipeline, chat_client, embed_model):
15
- self.qa_pipeline = qa_pipeline
16
- self.chat_client = chat_client
17
- self.embed_model = embed_model
18
 
19
- def answer_with_qa_model(self, question, context):
20
- return self.qa_pipeline({'question': question, 'context': context})['answer']
21
 
22
- def answer_with_chat_model(self, question, system_message, max_tokens, temperature, top_p):
23
- messages = [
24
- {"role": "system", "content": system_message},
25
- {"role": "user", "content": question}
26
- ]
27
- response = ""
28
- for msg in self.chat_client.chat_completion(
29
- messages,
30
- max_tokens=max_tokens,
31
- stream=True,
32
- temperature=temperature,
33
- top_p=top_p,
34
- ):
35
- token = msg.choices[0].delta.content
36
- response += token
37
- return response
38
 
39
- def comparar_semanticamente(self, resp1, resp2):
40
- emb1 = self.embed_model.encode(resp1, convert_to_tensor=True)
41
- emb2 = self.embed_model.encode(resp2, convert_to_tensor=True)
42
- similarity = util.cos_sim(emb1, emb2).item()
43
- return similarity
44
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- multiqa = MultiModelQA(qa_pipeline, chat_client, embed_model)
47
 
 
 
 
 
 
48
 
49
- def responder_e_comparar(question, context, system_message, max_tokens, temperature, top_p):
50
- qa_resp = multiqa.answer_with_qa_model(question, context)
51
- chat_resp = multiqa.answer_with_chat_model(question, system_message, max_tokens, temperature, top_p)
52
- similaridade = multiqa.comparar_semanticamente(qa_resp, chat_resp)
53
 
54
- result = f"""### Resposta do modelo QA:
55
- {qa_resp}
 
 
56
 
57
- ### Resposta do modelo Chat:
58
- {chat_resp}
59
-
60
- ### Similaridade semântica (coseno): {similaridade:.2%}
61
- """
62
- return result
63
 
64
 
65
  # Interface Gradio
66
- demo = gr.Interface(
67
- fn=responder_e_comparar,
68
- inputs=[
69
- gr.Textbox(label="Pergunta"),
70
- gr.Textbox(label="Contexto"),
71
- gr.Textbox(value="Você é um assistente útil.", label="Mensagem do sistema"),
72
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Máximo de tokens"),
73
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperatura"),
74
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
75
- ],
76
- outputs=gr.Markdown(),
77
- title="Comparador de Respostas de Modelos",
78
- description="Compara as respostas de um modelo de QA e um modelo de chat (Zephyr-7B) e calcula a similaridade semântica entre elas."
79
- )
80
 
81
  if __name__ == "__main__":
82
  demo.launch()
 
3
  from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
4
  from sentence_transformers import SentenceTransformer, util
5
 
6
+ # Modelos
7
  model_name = "deepset/roberta-base-squad2"
8
+ qa_pipeline = pipeline("question-answering", model=model_name, tokenizer=model_name)
 
 
9
 
10
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
11
 
12
+ # Modelo para comparação semântica (cosine similarity)
13
+ similarity_model = SentenceTransformer("all-MiniLM-L6-v2")
 
 
 
14
 
 
 
15
 
16
+ def get_qa_pipeline_answer(question, context):
17
+ return qa_pipeline({"question": question, "context": context})["answer"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
 
 
 
 
 
19
 
20
+ def get_zephyr_answer(question, context):
21
+ messages = [
22
+ {"role": "system", "content": "You are a helpful assistant."},
23
+ {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}"},
24
+ ]
25
+ response = client.chat_completion(
26
+ messages,
27
+ max_tokens=512,
28
+ temperature=0.7,
29
+ top_p=0.95,
30
+ )
31
+ return response.choices[0].message.content.strip()
32
 
 
33
 
34
+ def compare_answers(answer1, answer2):
35
+ emb1 = similarity_model.encode(answer1, convert_to_tensor=True)
36
+ emb2 = similarity_model.encode(answer2, convert_to_tensor=True)
37
+ similarity = util.cos_sim(emb1, emb2).item()
38
+ return round(similarity, 3)
39
 
 
 
 
 
40
 
41
+ def respond(question, context):
42
+ answer1 = get_qa_pipeline_answer(question, context)
43
+ answer2 = get_zephyr_answer(question, context)
44
+ similarity_score = compare_answers(answer1, answer2)
45
 
46
+ return (
47
+ f"📘 Roberta-base-squad2:\n{answer1}\n\n"
48
+ f"🧠 Zephyr-7b:\n{answer2}\n\n"
49
+ f"🔍 Similaridade Semântica: **{similarity_score}**"
50
+ )
 
51
 
52
 
53
  # Interface Gradio
54
+ with gr.Blocks() as demo:
55
+ gr.Markdown("# 🔎 Perguntas com dois modelos\nCompare duas respostas e veja a similaridade.")
56
+ with gr.Row():
57
+ question = gr.Textbox(label="Pergunta")
58
+ context = gr.Textbox(label="Contexto")
59
+ submit_btn = gr.Button("Obter Respostas")
60
+ output = gr.Textbox(label="Respostas e Similaridade")
61
+
62
+ submit_btn.click(respond, inputs=[question, context], outputs=output)
 
 
 
 
 
63
 
64
  if __name__ == "__main__":
65
  demo.launch()