File size: 507 Bytes
cdfb101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from langchain_community.document_loaders import PyPDFLoader

class Parser():
    def __init__(self):
        pass
    def parse(self,file_path):
        loader = PyPDFLoader(file_path)
        pages = loader.load_and_split()
        no_of_pages = pages[0].metadata["total_pages"]
        content = []
        for i in range(no_of_pages):
            content.append(pages[i].page_content)

        return {
            "content": content,
            "no_of_pages": no_of_pages
        }