| | import os |
| | import sys |
| | import pytest |
| | import asyncio |
| | import base64 |
| | from PIL import Image |
| | import io |
| |
|
| | |
| | parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
| | sys.path.append(parent_dir) |
| |
|
| | from crawl4ai.async_webcrawler import AsyncWebCrawler |
| |
|
| | @pytest.mark.asyncio |
| | async def test_basic_screenshot(): |
| | async with AsyncWebCrawler(verbose=True) as crawler: |
| | url = "https://example.com" |
| | result = await crawler.arun(url=url, bypass_cache=True, screenshot=True) |
| | |
| | assert result.success |
| | assert result.screenshot is not None |
| | |
| | |
| | image_data = base64.b64decode(result.screenshot) |
| | image = Image.open(io.BytesIO(image_data)) |
| | assert image.format == "PNG" |
| |
|
| | @pytest.mark.asyncio |
| | async def test_screenshot_with_wait_for(): |
| | async with AsyncWebCrawler(verbose=True) as crawler: |
| | |
| | url = "https://www.youtube.com" |
| | wait_for = "css:#content" |
| | |
| | result = await crawler.arun( |
| | url=url, |
| | bypass_cache=True, |
| | screenshot=True, |
| | wait_for=wait_for |
| | ) |
| | |
| | assert result.success |
| | assert result.screenshot is not None |
| | |
| | |
| | image_data = base64.b64decode(result.screenshot) |
| | image = Image.open(io.BytesIO(image_data)) |
| | assert image.format == "PNG" |
| | |
| | |
| | |
| |
|
| | @pytest.mark.asyncio |
| | async def test_screenshot_with_js_wait_for(): |
| | async with AsyncWebCrawler(verbose=True) as crawler: |
| | url = "https://www.amazon.com" |
| | wait_for = "js:() => document.querySelector('#nav-logo-sprites') !== null" |
| | |
| | result = await crawler.arun( |
| | url=url, |
| | bypass_cache=True, |
| | screenshot=True, |
| | wait_for=wait_for |
| | ) |
| | |
| | assert result.success |
| | assert result.screenshot is not None |
| | |
| | image_data = base64.b64decode(result.screenshot) |
| | image = Image.open(io.BytesIO(image_data)) |
| | assert image.format == "PNG" |
| |
|
| | @pytest.mark.asyncio |
| | async def test_screenshot_without_wait_for(): |
| | async with AsyncWebCrawler(verbose=True) as crawler: |
| | url = "https://www.nytimes.com" |
| | |
| | result = await crawler.arun(url=url, bypass_cache=True, screenshot=True) |
| | |
| | assert result.success |
| | assert result.screenshot is not None |
| | |
| | image_data = base64.b64decode(result.screenshot) |
| | image = Image.open(io.BytesIO(image_data)) |
| | assert image.format == "PNG" |
| |
|
| | @pytest.mark.asyncio |
| | async def test_screenshot_comparison(): |
| | async with AsyncWebCrawler(verbose=True) as crawler: |
| | url = "https://www.reddit.com" |
| | wait_for = "css:#SHORTCUT_FOCUSABLE_DIV" |
| | |
| | |
| | result_without_wait = await crawler.arun( |
| | url=url, |
| | bypass_cache=True, |
| | screenshot=True |
| | ) |
| | |
| | |
| | result_with_wait = await crawler.arun( |
| | url=url, |
| | bypass_cache=True, |
| | screenshot=True, |
| | wait_for=wait_for |
| | ) |
| | |
| | assert result_without_wait.success and result_with_wait.success |
| | assert result_without_wait.screenshot is not None |
| | assert result_with_wait.screenshot is not None |
| | |
| | |
| | image_without_wait = Image.open(io.BytesIO(base64.b64decode(result_without_wait.screenshot))) |
| | image_with_wait = Image.open(io.BytesIO(base64.b64decode(result_with_wait.screenshot))) |
| | |
| | |
| | |
| | assert image_with_wait.size[0] >= image_without_wait.size[0] |
| | assert image_with_wait.size[1] >= image_without_wait.size[1] |
| |
|
| | |
| | if __name__ == "__main__": |
| | pytest.main([__file__, "-v"]) |