Almaatla commited on
Commit
b1b78f4
·
verified ·
1 Parent(s): 6fe80f2

Upload 4 files

Browse files
Files changed (1) hide show
  1. main.py +19 -2
main.py CHANGED
@@ -51,10 +51,27 @@ def clean_html(html_content: str):
51
 
52
  async def scrape_with_playwright(url: str):
53
  async with async_playwright() as p:
54
- browser = await p.chromium.launch(headless=True)
 
 
 
 
 
 
55
  context = await browser.new_context(
56
- user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
 
 
 
57
  )
 
 
 
 
 
 
 
 
58
  page = await context.new_page()
59
 
60
  try:
 
51
 
52
  async def scrape_with_playwright(url: str):
53
  async with async_playwright() as p:
54
+ # Launch with arguments to hide automation
55
+ browser = await p.chromium.launch(
56
+ headless=True,
57
+ args=["--disable-blink-features=AutomationControlled"]
58
+ )
59
+
60
+ # Use a modern User-Agent and realistic viewport
61
  context = await browser.new_context(
62
+ user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
63
+ viewport={"width": 1920, "height": 1080},
64
+ locale="en-US",
65
+ timezone_id="America/New_York"
66
  )
67
+
68
+ # Add init script to further hide webdriver property
69
+ await context.add_init_script("""
70
+ Object.defineProperty(navigator, 'webdriver', {
71
+ get: () => undefined
72
+ });
73
+ """)
74
+
75
  page = await context.new_page()
76
 
77
  try: