File size: 4,511 Bytes
3488a2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import os
import shutil
import gradio as gr
from typing import List
from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.llms.groq import Groq
from llama_index.core.memory import ChatSummaryMemoryBuffer
import chromadb
from tempfile import TemporaryDirectory
from PyPDF2 import PdfReader

# Wrapper de embedding compatível com ChromaDB
class ChromaEmbeddingWrapper:
    def __init__(self, model_name: str):
        self.model = HuggingFaceEmbedding(model_name=model_name)

    def __call__(self, input: List[str]) -> List[List[float]]:
        return self.model.embed_documents(input)

# Inicializa modelos de embedding
embed_model = HuggingFaceEmbedding(model_name='intfloat/multilingual-e5-large')
embed_model_chroma = ChromaEmbeddingWrapper(model_name='intfloat/multilingual-e5-large')

# Inicializa ChromaDB
chroma_client = chromadb.PersistentClient(path='./chroma_db')
collection_name = 'documentos_serenatto'
chroma_collection = chroma_client.get_or_create_collection(
    name=collection_name,
    embedding_function=embed_model_chroma
)

vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Inicializa LLM da Groq
Groq_api = os.environ.get("GROQ_API_KEY")
llms = Groq(model='llama3-70b-8192', api_key='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')

# Estados globais
document_index = None
chat_engine = None

# Processamento do PDF

def process_pdf(file):
    global document_index, chat_engine

    try:
        with TemporaryDirectory() as tmpdir:
            pdf_path = os.path.join(tmpdir, "upload.pdf")
            shutil.copy(file.name, pdf_path)

            text = ""
            reader = PdfReader(pdf_path)
            for page in reader.pages:
                text += page.extract_text() or ""

            with open(os.path.join(tmpdir, "temp.txt"), "w", encoding="utf-8") as f:
                f.write(text)

            documentos = SimpleDirectoryReader(input_dir=tmpdir)
            docs = documentos.load_data()

            node_parser = SentenceSplitter(chunk_size=1200)
            nodes = node_parser.get_nodes_from_documents(docs, show_progress=True)

            document_index = VectorStoreIndex(nodes, storage_context=storage_context, embed_model=embed_model)

            memory = ChatSummaryMemoryBuffer(llm=llms, token_limit=256)

            chat_engine = document_index.as_chat_engine(
                chat_mode='context',
                llm=llms,
                memory=memory,
                system_prompt='''Voce é especialista em cafes da loja Serenatto, uma loja online que vende graos de cafe
                torrados, sua funçao é tirar duvidas de forma simpatica e natural sobre os graos disponiveis.'''
            )

            return "PDF carregado com sucesso! Agora você pode conversar com o bot."

    except Exception as e:
        return f"Erro ao processar PDF: {e}"

# Chat com histórico estilo "messages"
def converse_com_bot(message, chat_history):
    global chat_engine

    if chat_engine is None:
        return "Por favor, envie um PDF primeiro.", chat_history

    response = chat_engine.chat(message)

    if chat_history is None:
        chat_history = []

    chat_history.append({"role": "user", "content": message})
    chat_history.append({"role": "assistant", "content": response.response})

    return "", chat_history

# Resetar conversa
def resetar_chat():
    global chat_engine
    if chat_engine:
        chat_engine.reset()
    return []

# Interface Gradio com upload de PDF
with gr.Blocks() as app:
    gr.Markdown("# Chatbot da Serenatto - Especialista em Cafés")

    with gr.Row():
        upload = gr.File(label="📄 Envie seu PDF")
        upload_button = gr.Button("Carregar PDF")

    output_status = gr.Textbox(label="Status", interactive=False)

    chatbot = gr.Chatbot(label="Conversa", type="messages")
    msg = gr.Textbox(label='Digite a sua mensagem')
    limpar = gr.Button('Limpar')

    upload_button.click(process_pdf, inputs=upload, outputs=output_status).then(
        resetar_chat, None, chatbot
    )
    msg.submit(converse_com_bot, [msg, chatbot], [msg, chatbot])
    limpar.click(resetar_chat, None, chatbot, queue=False)

    app.launch(debug=True)