| | import os |
| | import sys |
| | import asyncio |
| | import shutil |
| | from typing import List |
| | import tempfile |
| | import time |
| |
|
| | |
| | parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
| | sys.path.append(parent_dir) |
| |
|
| | from crawl4ai.async_webcrawler import AsyncWebCrawler |
| |
|
| | class TestDownloads: |
| | def __init__(self): |
| | self.temp_dir = tempfile.mkdtemp(prefix="crawl4ai_test_") |
| | self.download_dir = os.path.join(self.temp_dir, "downloads") |
| | os.makedirs(self.download_dir, exist_ok=True) |
| | self.results: List[str] = [] |
| | |
| | def cleanup(self): |
| | shutil.rmtree(self.temp_dir) |
| | |
| | def log_result(self, test_name: str, success: bool, message: str = ""): |
| | result = f"{'✅' if success else '❌'} {test_name}: {message}" |
| | self.results.append(result) |
| | print(result) |
| | |
| | async def test_basic_download(self): |
| | """Test basic file download functionality""" |
| | try: |
| | async with AsyncWebCrawler( |
| | accept_downloads=True, |
| | downloads_path=self.download_dir, |
| | verbose=True |
| | ) as crawler: |
| | |
| | result = await crawler.arun( |
| | url="https://www.python.org/downloads/", |
| | js_code=""" |
| | // Click first download link |
| | const downloadLink = document.querySelector('a[href$=".exe"]'); |
| | if (downloadLink) downloadLink.click(); |
| | """ |
| | ) |
| | |
| | success = result.downloaded_files is not None and len(result.downloaded_files) > 0 |
| | self.log_result( |
| | "Basic Download", |
| | success, |
| | f"Downloaded {len(result.downloaded_files or [])} files" if success else "No files downloaded" |
| | ) |
| | except Exception as e: |
| | self.log_result("Basic Download", False, str(e)) |
| |
|
| | async def test_persistent_context_download(self): |
| | """Test downloads with persistent context""" |
| | try: |
| | user_data_dir = os.path.join(self.temp_dir, "user_data") |
| | os.makedirs(user_data_dir, exist_ok=True) |
| | |
| | async with AsyncWebCrawler( |
| | accept_downloads=True, |
| | downloads_path=self.download_dir, |
| | use_persistent_context=True, |
| | user_data_dir=user_data_dir, |
| | verbose=True |
| | ) as crawler: |
| | result = await crawler.arun( |
| | url="https://www.python.org/downloads/", |
| | js_code=""" |
| | const downloadLink = document.querySelector('a[href$=".exe"]'); |
| | if (downloadLink) downloadLink.click(); |
| | """ |
| | ) |
| | |
| | success = result.downloaded_files is not None and len(result.downloaded_files) > 0 |
| | self.log_result( |
| | "Persistent Context Download", |
| | success, |
| | f"Downloaded {len(result.downloaded_files or [])} files" if success else "No files downloaded" |
| | ) |
| | except Exception as e: |
| | self.log_result("Persistent Context Download", False, str(e)) |
| |
|
| | async def test_multiple_downloads(self): |
| | """Test multiple simultaneous downloads""" |
| | try: |
| | async with AsyncWebCrawler( |
| | accept_downloads=True, |
| | downloads_path=self.download_dir, |
| | verbose=True |
| | ) as crawler: |
| | result = await crawler.arun( |
| | url="https://www.python.org/downloads/", |
| | js_code=""" |
| | // Click multiple download links |
| | const downloadLinks = document.querySelectorAll('a[href$=".exe"]'); |
| | downloadLinks.forEach(link => link.click()); |
| | """ |
| | ) |
| | |
| | success = result.downloaded_files is not None and len(result.downloaded_files) > 1 |
| | self.log_result( |
| | "Multiple Downloads", |
| | success, |
| | f"Downloaded {len(result.downloaded_files or [])} files" if success else "Not enough files downloaded" |
| | ) |
| | except Exception as e: |
| | self.log_result("Multiple Downloads", False, str(e)) |
| |
|
| | async def test_different_browsers(self): |
| | """Test downloads across different browser types""" |
| | browsers = ["chromium", "firefox", "webkit"] |
| | |
| | for browser_type in browsers: |
| | try: |
| | async with AsyncWebCrawler( |
| | accept_downloads=True, |
| | downloads_path=self.download_dir, |
| | browser_type=browser_type, |
| | verbose=True |
| | ) as crawler: |
| | result = await crawler.arun( |
| | url="https://www.python.org/downloads/", |
| | js_code=""" |
| | const downloadLink = document.querySelector('a[href$=".exe"]'); |
| | if (downloadLink) downloadLink.click(); |
| | """ |
| | ) |
| | |
| | success = result.downloaded_files is not None and len(result.downloaded_files) > 0 |
| | self.log_result( |
| | f"{browser_type.title()} Download", |
| | success, |
| | f"Downloaded {len(result.downloaded_files or [])} files" if success else "No files downloaded" |
| | ) |
| | except Exception as e: |
| | self.log_result(f"{browser_type.title()} Download", False, str(e)) |
| |
|
| | async def test_edge_cases(self): |
| | """Test various edge cases""" |
| | |
| | |
| | try: |
| | async with AsyncWebCrawler( |
| | accept_downloads=True, |
| | verbose=True |
| | ) as crawler: |
| | result = await crawler.arun( |
| | url="https://www.python.org/downloads/", |
| | js_code="document.querySelector('a[href$=\".exe\"]').click()" |
| | ) |
| | self.log_result( |
| | "Default Download Path", |
| | True, |
| | f"Downloaded to default path: {result.downloaded_files[0] if result.downloaded_files else 'None'}" |
| | ) |
| | except Exception as e: |
| | self.log_result("Default Download Path", False, str(e)) |
| |
|
| | |
| | try: |
| | async with AsyncWebCrawler( |
| | accept_downloads=True, |
| | downloads_path="/invalid/path/that/doesnt/exist", |
| | verbose=True |
| | ) as crawler: |
| | result = await crawler.arun( |
| | url="https://www.python.org/downloads/", |
| | js_code="document.querySelector('a[href$=\".exe\"]').click()" |
| | ) |
| | self.log_result("Invalid Download Path", False, "Should have raised an error") |
| | except Exception as e: |
| | self.log_result("Invalid Download Path", True, "Correctly handled invalid path") |
| |
|
| | |
| | try: |
| | async with AsyncWebCrawler( |
| | accept_downloads=False, |
| | verbose=True |
| | ) as crawler: |
| | result = await crawler.arun( |
| | url="https://www.python.org/downloads/", |
| | js_code="document.querySelector('a[href$=\".exe\"]').click()" |
| | ) |
| | success = result.downloaded_files is None |
| | self.log_result( |
| | "Disabled Downloads", |
| | success, |
| | "Correctly ignored downloads" if success else "Unexpectedly downloaded files" |
| | ) |
| | except Exception as e: |
| | self.log_result("Disabled Downloads", False, str(e)) |
| |
|
| | async def run_all_tests(self): |
| | """Run all test cases""" |
| | print("\n🧪 Running Download Tests...\n") |
| | |
| | test_methods = [ |
| | self.test_basic_download, |
| | self.test_persistent_context_download, |
| | self.test_multiple_downloads, |
| | self.test_different_browsers, |
| | self.test_edge_cases |
| | ] |
| | |
| | for test in test_methods: |
| | print(f"\n📝 Running {test.__doc__}...") |
| | await test() |
| | await asyncio.sleep(2) |
| | |
| | print("\n📊 Test Results Summary:") |
| | for result in self.results: |
| | print(result) |
| | |
| | successes = len([r for r in self.results if '✅' in r]) |
| | total = len(self.results) |
| | print(f"\nTotal: {successes}/{total} tests passed") |
| | |
| | self.cleanup() |
| |
|
| | async def main(): |
| | tester = TestDownloads() |
| | await tester.run_all_tests() |
| |
|
| | if __name__ == "__main__": |
| | asyncio.run(main()) |