Spaces:
Sleeping
Sleeping
| import os | |
| import glob | |
| import logging | |
| from dotenv import load_dotenv | |
| import gradio as gr | |
| from langchain_community.document_loaders import DirectoryLoader, TextLoader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain_openai import OpenAIEmbeddings, ChatOpenAI | |
| from langchain_chroma import Chroma | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain.prompts import PromptTemplate | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Load environment variables | |
| load_dotenv() | |
| # Access API key | |
| api_key = os.getenv("OPEN_AI_APIKEY") | |
| MODEL = "gpt-4o-mini" | |
| db_name = "vector_db" | |
| # System prompt defining Gaia's personality and role | |
| system_prompt = """ | |
| Você é Gaia, assistente virtual da Bio Mundo, com 26 anos. Sua personalidade é amigável, carismática e traz um toque de humor. Você ajuda funcionários e parceiros a: | |
| - Gerir pessoas; | |
| - Estimular um clima organizacional positivo; | |
| - Aplicar valores éticos da empresa. | |
| Quando o tema envolva ética, analise situações ou guie comportamentos de acordo com os seguintes princípios: | |
| - Ética; | |
| - Integridade; | |
| - Conformidade com as leis e padrões de Bio Mundo; | |
| - Foco na satisfação e proteção da marca. | |
| Se surgir dúvida sobre condutas ou regras, explique os princípios de forma clara e educativa. | |
| """ | |
| # Carregamento dos documentos | |
| folders = [f for f in glob.glob("knowledge-base/*") if os.path.isdir(f)] | |
| files = [f for f in glob.glob("knowledge-base/*.txt") if os.path.isfile(f)] | |
| def add_metadata(doc, doc_type): | |
| doc.metadata["doc_type"] = doc_type | |
| return doc | |
| text_loader_kwargs = { | |
| "encoding": "utf-8" | |
| } | |
| documents = [] | |
| # Carrega arquivos diretamente em knowledge-base/ | |
| for file in files: | |
| loader = TextLoader(file, **text_loader_kwargs) | |
| docs = loader.load() | |
| documents.extend([add_metadata(doc, "root") for doc in docs]) | |
| # Carrega arquivos em subpastas (se existirem) | |
| for folder in folders: | |
| doc_type = os.path.basename(folder) | |
| loader = DirectoryLoader(folder, glob="**/*.txt", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs) | |
| docs = loader.load() | |
| documents.extend([add_metadata(doc, doc_type) for doc in docs]) | |
| text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=50) | |
| chunks = text_splitter.split_documents(documents) | |
| print(f"Total chunks: {len(chunks)}") | |
| print(f"Documents types found: {set([doc.metadata['doc_type'] for doc in chunks])}") | |
| if not chunks: | |
| raise ValueError("Nenhum documento encontrado em 'knowledge-base'. Adicione arquivos .txt para continuar.") | |
| embeddings = OpenAIEmbeddings(model="text-embedding-3-small", openai_api_key=api_key) | |
| # Add before vectorstore creation | |
| logger.info("Starting vectorstore creation...") | |
| logger.info(f"Number of documents: {len(chunks)}") | |
| logger.info(f"Working directory: {os.getcwd()}") | |
| def create_vectorstore(chunks, embeddings, db_name): | |
| try: | |
| if os.path.exists(db_name): | |
| logger.info(f"Deleting existing collection in {db_name}") | |
| Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection() | |
| logger.info("Creating new vectorstore...") | |
| vectorstore = Chroma.from_documents( | |
| documents=chunks, | |
| embedding=embeddings, | |
| persist_directory=db_name | |
| ) | |
| logger.info(f"Created vectorstore with {len(chunks)} documents") | |
| return vectorstore | |
| except Exception as e: | |
| logger.error(f"Error creating vectorstore: {str(e)}") | |
| # Fallback to in-memory vectorstore if persistence fails | |
| logger.info("Falling back to in-memory vectorstore") | |
| return Chroma.from_documents(documents=chunks, embedding=embeddings) | |
| # Replace existing vectorstore creation with new function | |
| vectorstore = create_vectorstore(chunks, embeddings, db_name) | |
| collection = vectorstore._collection | |
| count = collection.count() | |
| sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0] | |
| dimensions = len(sample_embedding) | |
| print(f"There are {count:,} vectors with {dimensions:,} dimensions in the vector store") | |
| # create a new Chat with OpenAI | |
| llm = ChatOpenAI(temperature=0.7, model_name=MODEL, api_key=api_key) | |
| # set up the conversation memory for the chat | |
| memory = ConversationBufferMemory( | |
| memory_key='chat_history', | |
| return_messages=True, | |
| output_key='answer' # Specify which output to store | |
| ) | |
| # the retriever is an abstraction over the VectorStore that will be used during RAG | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) | |
| # Create the prompt templates | |
| condense_question_prompt = PromptTemplate.from_template(""" | |
| Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. | |
| Chat History: | |
| {chat_history} | |
| Follow Up Input: {question} | |
| Standalone question:""") | |
| qa_prompt = PromptTemplate.from_template(f""" | |
| {system_prompt} | |
| Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
| {{context}} | |
| Question: {{question}} | |
| Helpful Answer:""") | |
| # putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory | |
| conversation_chain = ConversationalRetrievalChain.from_llm( | |
| llm=llm, | |
| retriever=retriever, | |
| memory=memory, | |
| condense_question_prompt=condense_question_prompt, | |
| combine_docs_chain_kwargs={"prompt": qa_prompt}, | |
| return_source_documents=True, | |
| verbose=True | |
| ) | |
| # Update the chat function to handle the response properly | |
| def chat(question, history): | |
| # Se for a primeira mensagem (histórico vazio), envie a mensagem de boas-vindas | |
| if not history: | |
| welcome_message = """Olá! Meu nome é Gaia, sou sua parceira na Bio Mundo. Estou aqui para ajudar com o que você precisar! Meus principais tópicos são: | |
| - Gestão de pessoas; | |
| - Clima organizacional positivo; | |
| - Valores éticos da cultura Bio Mundo. | |
| Posso te ajudar com algo específico hoje? 😊""" | |
| return welcome_message | |
| result = conversation_chain.invoke({ | |
| "question": question | |
| }) | |
| return result["answer"] | |
| # Set up the Gradio interface with fullscreen and default colors | |
| with gr.Blocks(css=""" | |
| /* Make the interface full screen */ | |
| .gradio-container { | |
| position: absolute; | |
| top: 0; | |
| left: 0; | |
| right: 0; | |
| bottom: 0; | |
| width: 100%; | |
| height: 100%; | |
| max-width: 100%; | |
| max-height: 100%; | |
| margin: 0; | |
| padding: 0; | |
| overflow: hidden; | |
| } | |
| /* Remove custom styling for bot messages to use default colors */ | |
| .message.bot { | |
| /* Reset any custom styling */ | |
| } | |
| /* Keep the user message styling */ | |
| .message.user { | |
| background-color: #4CAF50 !important; | |
| color: white !important; | |
| } | |
| /* Make the chatbot container taller */ | |
| #chatbot { | |
| height: calc(100vh - 200px) !important; | |
| } | |
| /* Center the title */ | |
| .title-container { | |
| text-align: center; | |
| margin-bottom: 1rem; | |
| } | |
| /* Make the title text green */ | |
| .title-text { | |
| color: #2E7D32; | |
| font-size: 2.5rem; | |
| } | |
| """) as demo: | |
| # Create a custom header | |
| gr.HTML(""" | |
| """) | |
| # Create the chatbot interface | |
| chatbot = gr.Chatbot( | |
| placeholder="<strong>Sou Gaia, parceira da Biomundo!</strong><br>Pergunte-me qualquer coisa!", | |
| avatar_images=(None, "assets/Gaia.jpeg"), | |
| type="messages", | |
| elem_id="chatbot", | |
| height=700 # Increased height | |
| ) | |
| # Create the chat interface with only supported parameters | |
| chat_interface = gr.ChatInterface( | |
| fn=chat, | |
| chatbot=chatbot, | |
| examples=[ | |
| "Como posso melhorar o clima organizacional na minha unidade?", | |
| "Qual é a política de não concorrência da Bio Mundo?", | |
| "Como devo reconhecer o bom trabalho da minha equipe?", | |
| "Um funcionário está desrespeitando outro. O que devo fazer?", | |
| "Quais são as métricas avaliadas no Programa Comunidade Bio+?" | |
| ] | |
| ) | |
| # Launch the Gradio app in fullscreen mode | |
| if __name__ == "__main__": | |
| demo.launch(share=True) |