Spaces:

Deepanshu7284
/

conversational-time-machine

Sleeping

conversational-time-machine / build_the_brain.py

Initial deployment of Churchill AI

250d7f4 6 months ago

1.12 kB

	# build_the_brain.py
	from langchain.document_loaders import TextLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.vectorstores import Chroma
	from langchain.embeddings import SentenceTransformerEmbeddings

	print("Building the brain from knowledge.txt... This may take a few minutes on a CPU.")

	# Load the knowledge base
	loader = TextLoader('knowledge.txt', encoding='utf-8')
	documents = loader.load()

	# Split the document into chunks
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=100)
	docs = text_splitter.split_documents(documents)

	# Define the embedding function
	embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

	# Create and save the ChromaDB database
	db = Chroma.from_documents(
	docs,
	embedding_function,
	persist_directory="./chroma_db"
	)

	print("\n----------------------------------------------------")
	print("The brain has been built and saved successfully!")
	print("You can now run the main application with: streamlit run app.py")
	print("----------------------------------------------------")