Almaatla commited on
Commit
74d8962
·
verified ·
1 Parent(s): b1b78f4

Upload 4 files

Browse files
Files changed (2) hide show
  1. main.py +4 -7
  2. requirements.txt +1 -0
main.py CHANGED
@@ -4,6 +4,7 @@ nest_asyncio.apply()
4
  from fastapi import FastAPI, HTTPException
5
  from pydantic import BaseModel, HttpUrl
6
  from playwright.async_api import async_playwright
 
7
  from bs4 import BeautifulSoup, Comment
8
  import re
9
  import asyncio
@@ -65,15 +66,11 @@ async def scrape_with_playwright(url: str):
65
  timezone_id="America/New_York"
66
  )
67
 
68
- # Add init script to further hide webdriver property
69
- await context.add_init_script("""
70
- Object.defineProperty(navigator, 'webdriver', {
71
- get: () => undefined
72
- });
73
- """)
74
-
75
  page = await context.new_page()
76
 
 
 
 
77
  try:
78
  # Go to URL and wait for network to be idle (load complete)
79
  await page.goto(url, wait_until="networkidle", timeout=30000)
 
4
  from fastapi import FastAPI, HTTPException
5
  from pydantic import BaseModel, HttpUrl
6
  from playwright.async_api import async_playwright
7
+ from playwright_stealth import stealth_async
8
  from bs4 import BeautifulSoup, Comment
9
  import re
10
  import asyncio
 
66
  timezone_id="America/New_York"
67
  )
68
 
 
 
 
 
 
 
 
69
  page = await context.new_page()
70
 
71
+ # Apply stealth to the page
72
+ await stealth_async(page)
73
+
74
  try:
75
  # Go to URL and wait for network to be idle (load complete)
76
  await page.goto(url, wait_until="networkidle", timeout=30000)
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  fastapi
2
  uvicorn
3
  playwright
 
4
  nest_asyncio
5
  beautifulsoup4
6
  lxml
 
1
  fastapi
2
  uvicorn
3
  playwright
4
+ playwright-stealth
5
  nest_asyncio
6
  beautifulsoup4
7
  lxml