File size: 4,536 Bytes
5d91cef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be91656
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
import gradio as gr
from typing import List
from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.llms.groq import Groq
from llama_index.core.memory import ChatSummaryMemoryBuffer
import chromadb
from tempfile import TemporaryDirectory
from PyPDF2 import PdfReader
from corretor import corrigir_texto  # <<< Correção importada aqui
import platform

# Wrapper de embedding compatível com ChromaDB
class ChromaEmbeddingWrapper:
    def __init__(self, model_name: str):
        self.model = HuggingFaceEmbedding(model_name=model_name)

    def __call__(self, input: List[str]) -> List[List[float]]:
        return self.model.embed_documents(input)

# Inicializa modelos de embedding
embed_model = HuggingFaceEmbedding(model_name='intfloat/multilingual-e5-large')
embed_model_chroma = ChromaEmbeddingWrapper(model_name='intfloat/multilingual-e5-large')

# Inicializa ChromaDB

# Define caminho seguro dependendo do sistema operacional
if platform.system() == "Windows":
    chroma_path = "./chroma_db"
else:
    chroma_path = "/tmp/chroma_db"

chroma_client = chromadb.PersistentClient(path=chroma_path)

collection_name = 'documentos_bitdoglab'
chroma_collection = chroma_client.get_or_create_collection(
    name=collection_name,
    embedding_function=embed_model_chroma
)

vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Inicializa LLM da Groq
Groq_api = os.environ.get("GROQ_API_KEY")
llms = Groq(model='llama3-70b-8192', api_key=Groq_api or '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')

# Estados globais
document_index = None
chat_engine = None

# Carregamento único do PDF
def carregar_pdf_inicial():
    global document_index, chat_engine

    try:
        with TemporaryDirectory() as tmpdir:
            pdf_path = "BitDogLab_info_v2.pdf"
            text = ""
            reader = PdfReader(pdf_path)
            for page in reader.pages:
                text += page.extract_text() or ""

            with open(os.path.join(tmpdir, "temp.txt"), "w", encoding="utf-8") as f:
                f.write(text)

            documentos = SimpleDirectoryReader(input_dir=tmpdir)
            docs = documentos.load_data()

            node_parser = SentenceSplitter(chunk_size=1200,chunk_overlap=150)
            nodes = node_parser.get_nodes_from_documents(docs, show_progress=True)

            document_index = VectorStoreIndex(nodes, storage_context=storage_context, embed_model=embed_model)

            memory = ChatSummaryMemoryBuffer(llm=llms, token_limit=256)

            chat_engine = document_index.as_chat_engine(
                chat_mode='context',
                llm=llms,
                memory=memory,
                system_prompt='''Você é especialista na placa BitDog Lab e sua função é ajudar os usuários nas dúvidas e informações sobre a placa e como criar códigos.'''
            )

            print("PDF carregado com sucesso.")

    except Exception as e:
        print(f"Erro ao carregar PDF: {e}")

# Função de chat com correção de texto
def converse_com_bot(message, chat_history):
    global chat_engine

    if chat_engine is None:
        return "Erro: o bot ainda não está pronto.", chat_history

    response = chat_engine.chat(message)
    resposta_corrigida = corrigir_texto(response.response)  # <<< Aplica correção

    if chat_history is None:
        chat_history = []

    chat_history.append({"role": "user", "content": message})
    chat_history.append({"role": "assistant", "content": resposta_corrigida})

    return "", chat_history

# Resetar conversa
def resetar_chat():
    global chat_engine
    if chat_engine:
        chat_engine.reset()
    return []

# Carregar PDF na inicialização
carregar_pdf_inicial()

# Interface Gradio
with gr.Blocks() as app:
    gr.Markdown("# 🤖  Chatbot BitDog Lab - Seu assistente para esclarecer dúvidas")

    chatbot = gr.Chatbot(label="Conversa", type="messages")
    msg = gr.Textbox(label='Digite a sua mensagem')
    limpar = gr.Button('Limpar')

    msg.submit(converse_com_bot, [msg, chatbot], [msg, chatbot])
    limpar.click(resetar_chat, None, chatbot, queue=False)

    #app.launch()
    app.launch(server_name="0.0.0.0", server_port=7860,share=True)