Spaces:
Runtime error
Runtime error
| from pathlib import Path | |
| from langchain.document_loaders import ( | |
| PDFMinerLoader, | |
| PyMuPDFLoader, | |
| UnstructuredPDFLoader, | |
| ) | |
| def test_unstructured_pdf_loader() -> None: | |
| """Test unstructured loader.""" | |
| file_path = Path(__file__).parent.parent / "examples/hello.pdf" | |
| loader = UnstructuredPDFLoader(str(file_path)) | |
| docs = loader.load() | |
| assert len(docs) == 1 | |
| def test_pdfminer_loader() -> None: | |
| """Test PDFMiner loader.""" | |
| file_path = Path(__file__).parent.parent / "examples/hello.pdf" | |
| loader = PDFMinerLoader(str(file_path)) | |
| docs = loader.load() | |
| assert len(docs) == 1 | |
| file_path = Path(__file__).parent.parent / "examples/layout-parser-paper.pdf" | |
| loader = PDFMinerLoader(str(file_path)) | |
| docs = loader.load() | |
| assert len(docs) == 1 | |
| def test_pymupdf_loader() -> None: | |
| """Test PyMuPDF loader.""" | |
| file_path = Path(__file__).parent.parent / "examples/hello.pdf" | |
| loader = PyMuPDFLoader(str(file_path)) | |
| docs = loader.load() | |
| assert len(docs) == 1 | |
| file_path = Path(__file__).parent.parent / "examples/layout-parser-paper.pdf" | |
| loader = PyMuPDFLoader(str(file_path)) | |
| docs = loader.load() | |
| assert len(docs) == 16 | |
| assert loader.web_path is None | |
| web_path = "https://people.sc.fsu.edu/~jpeterson/hello_world.pdf" | |
| loader = PyMuPDFLoader(web_path) | |
| docs = loader.load() | |
| assert loader.web_path == web_path | |
| assert loader.file_path != web_path | |
| assert len(docs) == 1 | |