| import asyncio |
| from crawl4ai import * |
|
|
| async def main(): |
| browser_config = BrowserConfig(headless=True, verbose=True) |
| async with AsyncWebCrawler(config=browser_config) as crawler: |
| crawler_config = CrawlerRunConfig( |
| cache_mode=CacheMode.BYPASS, |
| markdown_generator=DefaultMarkdownGenerator( |
| content_filter=PruningContentFilter(threshold=0.48, threshold_type="fixed", min_word_threshold=0) |
| ) |
| ) |
| result = await crawler.arun( |
| url="https://www.helloworld.org", |
| config=crawler_config |
| ) |
| print(result.markdown_v2.raw_markdown[:500]) |
|
|
| if __name__ == "__main__": |
| asyncio.run(main()) |