from langchain_core.documents import Document from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader from langchain_community.document_loaders import UnstructuredMarkdownLoader from pathlib import Path from datetime import datetime import uuid from typing import Optional, List class document_loader: def __init__(self, filepath: Path, glob: str = "*.pdf"): self.filepath = filepath self.glob = glob self.loader = PyPDFLoader # loading services def load(self): doc_loader = PyPDFLoader(self.filepath) return doc_loader.load() def load_md(self): return UnstructuredMarkdownLoader(self.filepath).load() def lazy_load(self): doc_loader = PyPDFLoader(self.filepath) return doc_loader.lazy_load() def load_multiple(self): doc_loader = DirectoryLoader( self.filepath, glob=self.glob, loader_cls=PyPDFLoader ) return doc_loader.load()