conversational-time-machine / build_the_brain.py
Deepanshu7284's picture
Initial deployment of Churchill AI
250d7f4
# build_the_brain.py
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings
print("Building the brain from knowledge.txt... This may take a few minutes on a CPU.")
# Load the knowledge base
loader = TextLoader('knowledge.txt', encoding='utf-8')
documents = loader.load()
# Split the document into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=100)
docs = text_splitter.split_documents(documents)
# Define the embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# Create and save the ChromaDB database
db = Chroma.from_documents(
docs,
embedding_function,
persist_directory="./chroma_db"
)
print("\n----------------------------------------------------")
print("The brain has been built and saved successfully!")
print("You can now run the main application with: streamlit run app.py")
print("----------------------------------------------------")