| # build_the_brain.py | |
| from langchain.document_loaders import TextLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import Chroma | |
| from langchain.embeddings import SentenceTransformerEmbeddings | |
| print("Building the brain from knowledge.txt... This may take a few minutes on a CPU.") | |
| # Load the knowledge base | |
| loader = TextLoader('knowledge.txt', encoding='utf-8') | |
| documents = loader.load() | |
| # Split the document into chunks | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=100) | |
| docs = text_splitter.split_documents(documents) | |
| # Define the embedding function | |
| embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") | |
| # Create and save the ChromaDB database | |
| db = Chroma.from_documents( | |
| docs, | |
| embedding_function, | |
| persist_directory="./chroma_db" | |
| ) | |
| print("\n----------------------------------------------------") | |
| print("The brain has been built and saved successfully!") | |
| print("You can now run the main application with: streamlit run app.py") | |
| print("----------------------------------------------------") |