File size: 1,005 Bytes
4225666
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from langchain_core.documents import Document
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from pathlib import Path
from datetime import datetime
import uuid
from typing import Optional, List


class document_loader:
    def __init__(self, filepath: Path, glob: str = "*.pdf"):
        self.filepath = filepath
        self.glob = glob
        self.loader = PyPDFLoader

    # loading services
    def load(self):
        doc_loader = PyPDFLoader(self.filepath)
        return doc_loader.load()

    def load_md(self):
        return UnstructuredMarkdownLoader(self.filepath).load()

    def lazy_load(self):
        doc_loader = PyPDFLoader(self.filepath)
        return doc_loader.lazy_load()

    def load_multiple(self):
        doc_loader = DirectoryLoader(
            self.filepath,
            glob=self.glob,
            loader_cls=PyPDFLoader
        )
        return doc_loader.load()