Spaces:
Build error
Build error
Commit ·
b7ad199
1
Parent(s): 04f322e
feat try add init script
Browse files- app/util/browser_agent.py +33 -2
app/util/browser_agent.py
CHANGED
|
@@ -46,10 +46,22 @@ class BrowserAgent:
|
|
| 46 |
self.stealth_manager = Stealth().use_async(async_playwright())
|
| 47 |
self.p = await self.stealth_manager.__aenter__()
|
| 48 |
|
| 49 |
-
self.browser = await self.p.chromium.launch(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
self.context = await self.browser.new_context(
|
| 51 |
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
| 52 |
-
viewport={'width': 1920, 'height': 1080}
|
|
|
|
|
|
|
| 53 |
)
|
| 54 |
print("🚀 Browser agent initialized with Stealth API.")
|
| 55 |
return self
|
|
@@ -86,6 +98,25 @@ class BrowserAgent:
|
|
| 86 |
self.link_map[url] = LinkNode(href=url, overview=overview, parent=parent_url, child=[], depth=depth)
|
| 87 |
|
| 88 |
page = await self.context.new_page()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
try:
|
| 90 |
content, soup, is_pdf = await self._get_page_content(page, url)
|
| 91 |
|
|
|
|
| 46 |
self.stealth_manager = Stealth().use_async(async_playwright())
|
| 47 |
self.p = await self.stealth_manager.__aenter__()
|
| 48 |
|
| 49 |
+
self.browser = await self.p.chromium.launch(
|
| 50 |
+
headless=True,
|
| 51 |
+
args=[
|
| 52 |
+
'--disable-blink-features=AutomationControlled',
|
| 53 |
+
'--disable-dev-shm-usage',
|
| 54 |
+
'--no-sandbox',
|
| 55 |
+
'--disable-setuid-sandbox',
|
| 56 |
+
'--disable-web-security',
|
| 57 |
+
'--disable-features=IsolateOrigins,site-per-process'
|
| 58 |
+
]
|
| 59 |
+
)
|
| 60 |
self.context = await self.browser.new_context(
|
| 61 |
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
| 62 |
+
viewport={'width': 1920, 'height': 1080},
|
| 63 |
+
locale='en-US',
|
| 64 |
+
timezone_id='America/New_York'
|
| 65 |
)
|
| 66 |
print("🚀 Browser agent initialized with Stealth API.")
|
| 67 |
return self
|
|
|
|
| 98 |
self.link_map[url] = LinkNode(href=url, overview=overview, parent=parent_url, child=[], depth=depth)
|
| 99 |
|
| 100 |
page = await self.context.new_page()
|
| 101 |
+
await page.add_init_script("""
|
| 102 |
+
// Override the navigator.webdriver property
|
| 103 |
+
Object.defineProperty(navigator, 'webdriver', {
|
| 104 |
+
get: () => undefined
|
| 105 |
+
});
|
| 106 |
+
|
| 107 |
+
// Override chrome property
|
| 108 |
+
window.chrome = {
|
| 109 |
+
runtime: {}
|
| 110 |
+
};
|
| 111 |
+
|
| 112 |
+
// Override permissions
|
| 113 |
+
const originalQuery = window.navigator.permissions.query;
|
| 114 |
+
window.navigator.permissions.query = (parameters) => (
|
| 115 |
+
parameters.name === 'notifications' ?
|
| 116 |
+
Promise.resolve({ state: Notification.permission }) :
|
| 117 |
+
originalQuery(parameters)
|
| 118 |
+
);
|
| 119 |
+
""")
|
| 120 |
try:
|
| 121 |
content, soup, is_pdf = await self._get_page_content(page, url)
|
| 122 |
|