File size: 4,511 Bytes
3488a2a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import os
import shutil
import gradio as gr
from typing import List
from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.llms.groq import Groq
from llama_index.core.memory import ChatSummaryMemoryBuffer
import chromadb
from tempfile import TemporaryDirectory
from PyPDF2 import PdfReader
# Wrapper de embedding compatível com ChromaDB
class ChromaEmbeddingWrapper:
def __init__(self, model_name: str):
self.model = HuggingFaceEmbedding(model_name=model_name)
def __call__(self, input: List[str]) -> List[List[float]]:
return self.model.embed_documents(input)
# Inicializa modelos de embedding
embed_model = HuggingFaceEmbedding(model_name='intfloat/multilingual-e5-large')
embed_model_chroma = ChromaEmbeddingWrapper(model_name='intfloat/multilingual-e5-large')
# Inicializa ChromaDB
chroma_client = chromadb.PersistentClient(path='./chroma_db')
collection_name = 'documentos_serenatto'
chroma_collection = chroma_client.get_or_create_collection(
name=collection_name,
embedding_function=embed_model_chroma
)
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
# Inicializa LLM da Groq
Groq_api = os.environ.get("GROQ_API_KEY")
llms = Groq(model='llama3-70b-8192', api_key='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')
# Estados globais
document_index = None
chat_engine = None
# Processamento do PDF
def process_pdf(file):
global document_index, chat_engine
try:
with TemporaryDirectory() as tmpdir:
pdf_path = os.path.join(tmpdir, "upload.pdf")
shutil.copy(file.name, pdf_path)
text = ""
reader = PdfReader(pdf_path)
for page in reader.pages:
text += page.extract_text() or ""
with open(os.path.join(tmpdir, "temp.txt"), "w", encoding="utf-8") as f:
f.write(text)
documentos = SimpleDirectoryReader(input_dir=tmpdir)
docs = documentos.load_data()
node_parser = SentenceSplitter(chunk_size=1200)
nodes = node_parser.get_nodes_from_documents(docs, show_progress=True)
document_index = VectorStoreIndex(nodes, storage_context=storage_context, embed_model=embed_model)
memory = ChatSummaryMemoryBuffer(llm=llms, token_limit=256)
chat_engine = document_index.as_chat_engine(
chat_mode='context',
llm=llms,
memory=memory,
system_prompt='''Voce é especialista em cafes da loja Serenatto, uma loja online que vende graos de cafe
torrados, sua funçao é tirar duvidas de forma simpatica e natural sobre os graos disponiveis.'''
)
return "PDF carregado com sucesso! Agora você pode conversar com o bot."
except Exception as e:
return f"Erro ao processar PDF: {e}"
# Chat com histórico estilo "messages"
def converse_com_bot(message, chat_history):
global chat_engine
if chat_engine is None:
return "Por favor, envie um PDF primeiro.", chat_history
response = chat_engine.chat(message)
if chat_history is None:
chat_history = []
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": response.response})
return "", chat_history
# Resetar conversa
def resetar_chat():
global chat_engine
if chat_engine:
chat_engine.reset()
return []
# Interface Gradio com upload de PDF
with gr.Blocks() as app:
gr.Markdown("# Chatbot da Serenatto - Especialista em Cafés")
with gr.Row():
upload = gr.File(label="📄 Envie seu PDF")
upload_button = gr.Button("Carregar PDF")
output_status = gr.Textbox(label="Status", interactive=False)
chatbot = gr.Chatbot(label="Conversa", type="messages")
msg = gr.Textbox(label='Digite a sua mensagem')
limpar = gr.Button('Limpar')
upload_button.click(process_pdf, inputs=upload, outputs=output_status).then(
resetar_chat, None, chatbot
)
msg.submit(converse_com_bot, [msg, chatbot], [msg, chatbot])
limpar.click(resetar_chat, None, chatbot, queue=False)
app.launch(debug=True) |