File size: 3,260 Bytes
39b42ed
35cb222
39b42ed
3756719
 
 
97201c8
 
 
3756719
39b42ed
35cb222
 
 
 
39b42ed
 
 
 
 
 
 
 
 
 
 
 
 
97201c8
7a3f8ea
39b42ed
 
7a3f8ea
39b42ed
 
97201c8
 
 
 
 
 
39b42ed
 
97201c8
 
 
 
 
 
 
 
 
 
 
 
39b42ed
0ef31de
97201c8
 
0ef31de
 
97201c8
 
39b42ed
0ef31de
 
 
35cb222
0ef31de
 
 
35cb222
0ef31de
35cb222
7a3f8ea
 
35cb222
39b42ed
 
 
 
 
 
0ef31de
39b42ed
7a3f8ea
39b42ed
 
35cb222
97201c8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
import os
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.llms import HuggingFaceHub

# --- PASSO 0: VERIFICAR E CRIAR A PASTA 'data' ---
if not os.path.exists("data"):
    os.makedirs("data")

# --- PASSO 1: CARREGAR OS DOCUMENTOS ---
loader = PyPDFDirectoryLoader("data")
documents = loader.load()

# --- PASSO 2: PROCESSAR E DIVIDIR O TEXTO ---
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
docs = text_splitter.split_documents(documents)

# --- PASSO 3: CRIAR EMBEDDINGS E O BANCO DE DADOS VETORIAL ---
model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)
db = FAISS.from_documents(docs, embeddings)

# --- PASSO 4: CONFIGURAR O MODELO DE LINGUAGEM (LLM) ---
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
llm = HuggingFaceHub(
    repo_id=repo_id,
    model_kwargs={"temperature": 0.2, "max_new_tokens": 1024}
)

# --- PASSO 5: CRIAR A CADEIA DE BUSCA (NOVA API) ---
system_prompt = (
    "Você é um assistente especializado em análise de contratos do DETRAN-RJ. "
    "Use o contexto fornecido para responder à pergunta de forma precisa e concisa. "
    "Se você não souber a resposta, diga que não sabe. "
    "Contexto: {context}"
)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}"),
])

question_answer_chain = create_stuff_documents_chain(llm, prompt)
qa_chain = create_retrieval_chain(
    db.as_retriever(search_kwargs={"k": 3}),
    question_answer_chain
)

# --- PASSO 6: CRIAR A INTERFACE COM O GRADIO ---
def process_query(query):
    try:
        result = qa_chain.invoke({"input": query})
        answer = result.get('answer', 'Não foi possível encontrar uma resposta.')
        return answer
    except Exception as e:
        print(f"Erro: {e}")
        return f"Ocorreu um erro ao processar sua pergunta: {str(e)}"

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🤖 Assistente de Análise do Contrato DETRAN-RJ")
    gr.Markdown("Faça uma pergunta sobre os documentos do contrato e o assistente buscará a resposta nos arquivos.")

    with gr.Row():
        inp = gr.Textbox(label="Qual a sua pergunta?", placeholder="Ex: Qual o valor total do contrato?", scale=4)
        btn = gr.Button("Enviar", variant="primary", scale=1)

    out = gr.Markdown(label="Resposta")

    btn.click(fn=process_query, inputs=inp, outputs=out)
    inp.submit(fn=process_query, inputs=inp, outputs=out)

    gr.Examples(
        examples=[
            "Qual o objeto do contrato?",
            "Qual o valor total do contrato?",
            "Quem é o gestor do contrato?",
            "Qual o prazo de vigência do contrato?",
            "Qual a empresa contratada?",
        ],
        inputs=inp
    )

demo.launch()