Spaces:
Sleeping
Sleeping
| from langchain_core.documents import Document | |
| from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader | |
| from langchain_community.document_loaders import UnstructuredMarkdownLoader | |
| from pathlib import Path | |
| from datetime import datetime | |
| import uuid | |
| from typing import Optional, List | |
| class document_loader: | |
| def __init__(self, filepath: Path, glob: str = "*.pdf"): | |
| self.filepath = filepath | |
| self.glob = glob | |
| self.loader = PyPDFLoader | |
| # loading services | |
| def load(self): | |
| doc_loader = PyPDFLoader(self.filepath) | |
| return doc_loader.load() | |
| def load_md(self): | |
| return UnstructuredMarkdownLoader(self.filepath).load() | |
| def lazy_load(self): | |
| doc_loader = PyPDFLoader(self.filepath) | |
| return doc_loader.lazy_load() | |
| def load_multiple(self): | |
| doc_loader = DirectoryLoader( | |
| self.filepath, | |
| glob=self.glob, | |
| loader_cls=PyPDFLoader | |
| ) | |
| return doc_loader.load() | |