Spaces:
Sleeping
Sleeping
File size: 1,092 Bytes
bf6dbfa 0643073 bf6dbfa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | import os
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from rag.embeddings import get_embeddings
def build_vectorstore(filepath: str = "data/knowledge_base.md"):
"""
Loads the knowledge base, splits it, and builds a FAISS vector store.
"""
if not os.path.exists(filepath):
raise FileNotFoundError(f"Knowledge base not found at {filepath}")
loader = TextLoader(filepath)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=100,
chunk_overlap=20,
separators=["\n\n", "\n", " ", ""]
)
splits = text_splitter.split_documents(docs)
embeddings = get_embeddings()
vectorstore = FAISS.from_documents(splits, embeddings)
return vectorstore
_vectorstore = None
def get_vectorstore(filepath: str = "data/knowledge_base.md"):
global _vectorstore
if _vectorstore is None:
_vectorstore = build_vectorstore(filepath)
return _vectorstore
|