Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files
main.py
CHANGED
|
@@ -51,10 +51,27 @@ def clean_html(html_content: str):
|
|
| 51 |
|
| 52 |
async def scrape_with_playwright(url: str):
|
| 53 |
async with async_playwright() as p:
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
context = await browser.new_context(
|
| 56 |
-
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
|
|
|
|
|
|
|
|
| 57 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
page = await context.new_page()
|
| 59 |
|
| 60 |
try:
|
|
|
|
| 51 |
|
| 52 |
async def scrape_with_playwright(url: str):
|
| 53 |
async with async_playwright() as p:
|
| 54 |
+
# Launch with arguments to hide automation
|
| 55 |
+
browser = await p.chromium.launch(
|
| 56 |
+
headless=True,
|
| 57 |
+
args=["--disable-blink-features=AutomationControlled"]
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# Use a modern User-Agent and realistic viewport
|
| 61 |
context = await browser.new_context(
|
| 62 |
+
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
| 63 |
+
viewport={"width": 1920, "height": 1080},
|
| 64 |
+
locale="en-US",
|
| 65 |
+
timezone_id="America/New_York"
|
| 66 |
)
|
| 67 |
+
|
| 68 |
+
# Add init script to further hide webdriver property
|
| 69 |
+
await context.add_init_script("""
|
| 70 |
+
Object.defineProperty(navigator, 'webdriver', {
|
| 71 |
+
get: () => undefined
|
| 72 |
+
});
|
| 73 |
+
""")
|
| 74 |
+
|
| 75 |
page = await context.new_page()
|
| 76 |
|
| 77 |
try:
|