File size: 507 Bytes
cdfb101 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
from langchain_community.document_loaders import PyPDFLoader
class Parser():
def __init__(self):
pass
def parse(self,file_path):
loader = PyPDFLoader(file_path)
pages = loader.load_and_split()
no_of_pages = pages[0].metadata["total_pages"]
content = []
for i in range(no_of_pages):
content.append(pages[i].page_content)
return {
"content": content,
"no_of_pages": no_of_pages
}
|