ZKnowledgeAgent / app /services /document_loader.py
mghfuran's picture
deploy: initial clean deployment with lfs tracked database
c1afa55
Raw
History Blame Contribute Delete
395 Bytes
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
def load_pdf(file_path):
loader = PyPDFLoader(file_path)
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=100
)
chunks = splitter.split_documents(documents)
return chunks