File size: 585 Bytes
8255e91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from embedding import get_embedding
from vector import VectorStore
from chunk import SimpleTextChunker
from parse import PDFTextExtractor

def build_knowledge_base(pdf_folder):
    extractor = PDFTextExtractor(pdf_folder)
    documents = extractor.extract_all_pdfs()

    chunker = SimpleTextChunker()
    all_chunks = chunker.process_documents(documents)

    store = VectorStore()
    embeddings = [get_embedding(chunk["content"]) for chunk in all_chunks]

    store.add(embeddings, all_chunks)

    print(f"✅ Knowledge base built with {len(all_chunks)} chunks.")
    return store