| |
|
| | import requests, base64, os |
| |
|
| | data = { |
| | "urls": ["https://www.nbcnews.com/business"], |
| | "screenshot": True, |
| | } |
| |
|
| | response = requests.post("https://crawl4ai.com/crawl", json=data) |
| | result = response.json()['results'][0] |
| | print(result.keys()) |
| | |
| | |
| | |
| | with open("screenshot.png", "wb") as f: |
| | f.write(base64.b64decode(result['screenshot'])) |
| | |
| | |
| | data = { |
| | "urls": [ |
| | "https://www.nbcnews.com/business" |
| | ], |
| | "css_selector": "article", |
| | "screenshot": True, |
| | } |
| |
|
| | |
| | data = { |
| | "urls": [ |
| | "https://www.nbcnews.com/business" |
| | ], |
| | "screenshot": True, |
| | 'js' : [""" |
| | const loadMoreButton = Array.from(document.querySelectorAll('button')). |
| | find(button => button.textContent.includes('Load More')); |
| | loadMoreButton && loadMoreButton.click(); |
| | """] |
| | } |
| |
|
| | |
| | data = { |
| | "urls": [ |
| | "https://www.nbcnews.com/business" |
| | ], |
| | "extraction_strategy": "CosineStrategy", |
| | "extraction_strategy_args": { |
| | "semantic_filter": "inflation rent prices" |
| | }, |
| | } |
| |
|
| | |
| | data = { |
| | "urls": [ |
| | "https://www.nbcnews.com/business" |
| | ], |
| | "extraction_strategy": "LLMExtractionStrategy", |
| | "extraction_strategy_args": { |
| | "provider": "groq/llama3-8b-8192", |
| | "api_token": os.environ.get("GROQ_API_KEY"), |
| | "instruction": """I am interested in only financial news, |
| | and translate them in French.""" |
| | }, |
| | } |
| |
|
| |
|