Spaces:
Paused
Paused
File size: 350 Bytes
e792350 |
1 2 3 4 5 6 7 8 9 10 11 |
# loader.py
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
def load_and_split_pdf(pdf_path):
loader = PyPDFLoader(pdf_path)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
return splitter.split_documents(docs)
|