Spaces:
No application file
No application file
| from xml.dom.minidom import Document | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.document_loaders.pdf import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| class DocumentLoader: | |
| def __init__(self): | |
| self.pdf_loader = None | |
| self.pdfs = [] | |
| self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| def load_pdf(self, pdf_name) -> list[Document]: | |
| self.pdf_loader = PyPDFLoader(file_path=f"documents/{pdf_name}") | |
| docs = self.pdf_loader.load() | |
| self.pdfs.append(docs) | |
| print(f"{pdf_name} has been read successfully") | |
| document_splits = self.text_splitter.split_documents(docs) | |
| return document_splits # type: ignore |