File size: 1,092 Bytes
bf6dbfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0643073
bf6dbfa
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from rag.embeddings import get_embeddings

def build_vectorstore(filepath: str = "data/knowledge_base.md"):
    """
    Loads the knowledge base, splits it, and builds a FAISS vector store.
    """
    if not os.path.exists(filepath):
        raise FileNotFoundError(f"Knowledge base not found at {filepath}")

    loader = TextLoader(filepath)
    docs = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=100,
        chunk_overlap=20,
        separators=["\n\n", "\n", " ", ""]
    )
    splits = text_splitter.split_documents(docs)

    embeddings = get_embeddings()
    vectorstore = FAISS.from_documents(splits, embeddings)

    return vectorstore


_vectorstore = None

def get_vectorstore(filepath: str = "data/knowledge_base.md"):
    global _vectorstore
    if _vectorstore is None:
        _vectorstore = build_vectorstore(filepath)
    return _vectorstore