Spaces:
No application file
No application file
File size: 585 Bytes
8255e91 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
from embedding import get_embedding
from vector import VectorStore
from chunk import SimpleTextChunker
from parse import PDFTextExtractor
def build_knowledge_base(pdf_folder):
extractor = PDFTextExtractor(pdf_folder)
documents = extractor.extract_all_pdfs()
chunker = SimpleTextChunker()
all_chunks = chunker.process_documents(documents)
store = VectorStore()
embeddings = [get_embedding(chunk["content"]) for chunk in all_chunks]
store.add(embeddings, all_chunks)
print(f"✅ Knowledge base built with {len(all_chunks)} chunks.")
return store
|