Spaces:
Runtime error
Runtime error
| import pytest | |
| from deepengineer.common_path import DATA_DIR | |
| from deepengineer.webcrawler.async_crawl import ( | |
| crawl4ai_extract_markdown_of_url_async, | |
| download_pdf_async, | |
| download_pdf_or_arxiv_pdf_async, | |
| ) | |
| from deepengineer.webcrawler.testing import ARXIV_URL, URL_PDF, URL_WIKIPEDIA | |
| async def test_crawl4ai_extract_markdown_of_url_async(): | |
| markdown = await crawl4ai_extract_markdown_of_url_async(URL_WIKIPEDIA) | |
| assert isinstance(markdown, str) | |
| assert "Graphite-moderated reactor" in markdown | |
| async def test_download_pdf_async(): | |
| output_path = DATA_DIR / "temp.pdf" | |
| output_path.unlink(missing_ok=True) | |
| pdf_path = await download_pdf_async(URL_PDF, output_path=output_path) | |
| assert pdf_path == output_path | |
| assert output_path.exists() | |
| async def test_arxiv_download_pdf_async(): | |
| output_path = DATA_DIR / "temp.pdf" | |
| output_path.unlink(missing_ok=True) | |
| assert not output_path.exists() | |
| pdf_path = await download_pdf_or_arxiv_pdf_async(ARXIV_URL, output_path=output_path) | |
| assert pdf_path == output_path | |
| assert output_path.exists() | |