import os from langchain.document_loaders import TextLoader from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import Chroma from langchain.embeddings import HuggingFaceEmbeddings def update_vector_db(data_folder="new_data", db_path="chroma_store"): embed = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") db = Chroma(persist_directory=db_path, embedding_function=embed) docs = [] for file in os.listdir(data_folder): if file.endswith(".txt"): loader = TextLoader(os.path.join(data_folder, file), encoding="utf-8") docs.extend(loader.load()) splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50) chunks = splitter.split_documents(docs) db.add_documents(chunks) db.persist()