Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -42,7 +42,7 @@ async def get_data(url: str):
|
|
| 42 |
async def scrape_website(url):
|
| 43 |
async with async_playwright() as p:
|
| 44 |
# Try using WebKit or Firefox if Chromium fails
|
| 45 |
-
browser = await p.webkit.launch(headless=
|
| 46 |
|
| 47 |
# Create a new browser context with a realistic user-agent
|
| 48 |
context = await browser.new_context(
|
|
@@ -62,9 +62,17 @@ async def scrape_website(url):
|
|
| 62 |
# Route to block images, videos, and CSS to speed up page load
|
| 63 |
await page.route("**/*", lambda route: route.abort() if route.request.resource_type in ["image", "media", "stylesheet", "font", "xhr"] else route.continue_())
|
| 64 |
|
| 65 |
-
# Navigate to the page with an extended timeout and alternate loading strategy
|
| 66 |
-
await page.goto(url, wait_until='domcontentloaded', timeout=60000)
|
| 67 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
# Get the title of the page
|
| 69 |
title = await page.title()
|
| 70 |
|
|
|
|
| 42 |
async def scrape_website(url):
|
| 43 |
async with async_playwright() as p:
|
| 44 |
# Try using WebKit or Firefox if Chromium fails
|
| 45 |
+
browser = await p.webkit.launch(headless=True) # Switch to WebKit
|
| 46 |
|
| 47 |
# Create a new browser context with a realistic user-agent
|
| 48 |
context = await browser.new_context(
|
|
|
|
| 62 |
# Route to block images, videos, and CSS to speed up page load
|
| 63 |
await page.route("**/*", lambda route: route.abort() if route.request.resource_type in ["image", "media", "stylesheet", "font", "xhr"] else route.continue_())
|
| 64 |
|
|
|
|
|
|
|
| 65 |
try:
|
| 66 |
+
# Introduce a slight delay to mimic human behavior
|
| 67 |
+
await asyncio.sleep(random.uniform(1, 3))
|
| 68 |
+
|
| 69 |
+
# Navigate to the page with an extended timeout
|
| 70 |
+
await page.goto(url, wait_until='domcontentloaded', timeout=60000)
|
| 71 |
+
|
| 72 |
+
# Simulate human behavior by scrolling and moving the mouse
|
| 73 |
+
await page.mouse.move(random.uniform(0, 100), random.uniform(0, 100))
|
| 74 |
+
await page.mouse.wheel(0, random.uniform(200, 400))
|
| 75 |
+
await asyncio.sleep(random.uniform(1, 3)) # Random delay
|
| 76 |
# Get the title of the page
|
| 77 |
title = await page.title()
|
| 78 |
|