vgecbot / app /services /document_loader.py
harsh-dev's picture
docker deployment
4225666
from langchain_core.documents import Document
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from pathlib import Path
from datetime import datetime
import uuid
from typing import Optional, List
class document_loader:
def __init__(self, filepath: Path, glob: str = "*.pdf"):
self.filepath = filepath
self.glob = glob
self.loader = PyPDFLoader
# loading services
def load(self):
doc_loader = PyPDFLoader(self.filepath)
return doc_loader.load()
def load_md(self):
return UnstructuredMarkdownLoader(self.filepath).load()
def lazy_load(self):
doc_loader = PyPDFLoader(self.filepath)
return doc_loader.lazy_load()
def load_multiple(self):
doc_loader = DirectoryLoader(
self.filepath,
glob=self.glob,
loader_cls=PyPDFLoader
)
return doc_loader.load()