| import os |
| import sys |
| import pytest |
| import asyncio |
| import base64 |
| from PIL import Image |
| import io |
|
|
| |
| parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
| sys.path.append(parent_dir) |
|
|
| from crawl4ai.async_webcrawler import AsyncWebCrawler |
|
|
| @pytest.mark.asyncio |
| async def test_basic_screenshot(): |
| async with AsyncWebCrawler(verbose=True) as crawler: |
| url = "https://example.com" |
| result = await crawler.arun(url=url, bypass_cache=True, screenshot=True) |
| |
| assert result.success |
| assert result.screenshot is not None |
| |
| |
| image_data = base64.b64decode(result.screenshot) |
| image = Image.open(io.BytesIO(image_data)) |
| assert image.format == "PNG" |
|
|
| @pytest.mark.asyncio |
| async def test_screenshot_with_wait_for(): |
| async with AsyncWebCrawler(verbose=True) as crawler: |
| |
| url = "https://www.youtube.com" |
| wait_for = "css:#content" |
| |
| result = await crawler.arun( |
| url=url, |
| bypass_cache=True, |
| screenshot=True, |
| wait_for=wait_for |
| ) |
| |
| assert result.success |
| assert result.screenshot is not None |
| |
| |
| image_data = base64.b64decode(result.screenshot) |
| image = Image.open(io.BytesIO(image_data)) |
| assert image.format == "PNG" |
| |
| |
| |
|
|
| @pytest.mark.asyncio |
| async def test_screenshot_with_js_wait_for(): |
| async with AsyncWebCrawler(verbose=True) as crawler: |
| url = "https://www.amazon.com" |
| wait_for = "js:() => document.querySelector('#nav-logo-sprites') !== null" |
| |
| result = await crawler.arun( |
| url=url, |
| bypass_cache=True, |
| screenshot=True, |
| wait_for=wait_for |
| ) |
| |
| assert result.success |
| assert result.screenshot is not None |
| |
| image_data = base64.b64decode(result.screenshot) |
| image = Image.open(io.BytesIO(image_data)) |
| assert image.format == "PNG" |
|
|
| @pytest.mark.asyncio |
| async def test_screenshot_without_wait_for(): |
| async with AsyncWebCrawler(verbose=True) as crawler: |
| url = "https://www.nytimes.com" |
| |
| result = await crawler.arun(url=url, bypass_cache=True, screenshot=True) |
| |
| assert result.success |
| assert result.screenshot is not None |
| |
| image_data = base64.b64decode(result.screenshot) |
| image = Image.open(io.BytesIO(image_data)) |
| assert image.format == "PNG" |
|
|
| @pytest.mark.asyncio |
| async def test_screenshot_comparison(): |
| async with AsyncWebCrawler(verbose=True) as crawler: |
| url = "https://www.reddit.com" |
| wait_for = "css:#SHORTCUT_FOCUSABLE_DIV" |
| |
| |
| result_without_wait = await crawler.arun( |
| url=url, |
| bypass_cache=True, |
| screenshot=True |
| ) |
| |
| |
| result_with_wait = await crawler.arun( |
| url=url, |
| bypass_cache=True, |
| screenshot=True, |
| wait_for=wait_for |
| ) |
| |
| assert result_without_wait.success and result_with_wait.success |
| assert result_without_wait.screenshot is not None |
| assert result_with_wait.screenshot is not None |
| |
| |
| image_without_wait = Image.open(io.BytesIO(base64.b64decode(result_without_wait.screenshot))) |
| image_with_wait = Image.open(io.BytesIO(base64.b64decode(result_with_wait.screenshot))) |
| |
| |
| |
| assert image_with_wait.size[0] >= image_without_wait.size[0] |
| assert image_with_wait.size[1] >= image_without_wait.size[1] |
|
|
| |
| if __name__ == "__main__": |
| pytest.main([__file__, "-v"]) |