vgecbot / old /services /document_loader.py
harsh-dev's picture
docker deployment
4225666
from langchain_core.documents import Document
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from pathlib import Path
class document_loader:
def __init__(self, filepath, glob: str = "*.pdf"):
self.filepath = filepath
self.glob = glob
self.loader = PyPDFLoader
def load(self):
doc_loader = PyPDFLoader(self.filepath)
return doc_loader.load()
def load_md(self):
return UnstructuredMarkdownLoader(self.filepath).load()
def lazy_load(self):
doc_loader = PyPDFLoader(self.filepath)
return doc_loader.lazy_load()
def load_multiple(self):
doc_loader = DirectoryLoader(
self.filepath,
glob=self.glob,
loader_cls=PyPDFLoader
)
return doc_loader.load()