| | """ |
| | 浏览器自动化获取 reCAPTCHA token |
| | 使用 Playwright 访问页面并执行 reCAPTCHA 验证 |
| | """ |
| | import asyncio |
| | import time |
| | import re |
| | from typing import Optional, Dict |
| |
|
| | from ..core.logger import debug_logger |
| |
|
| | |
| | try: |
| | from playwright.async_api import async_playwright, Browser, BrowserContext |
| | PLAYWRIGHT_AVAILABLE = True |
| | except ImportError: |
| | PLAYWRIGHT_AVAILABLE = False |
| |
|
| |
|
| | def parse_proxy_url(proxy_url: str) -> Optional[Dict[str, str]]: |
| | """解析代理URL,分离协议、主机、端口、认证信息 |
| | |
| | Args: |
| | proxy_url: 代理URL,格式:protocol://[username:password@]host:port |
| | |
| | Returns: |
| | 代理配置字典,包含server、username、password(如果有认证) |
| | """ |
| | proxy_pattern = r'^(socks5|http|https)://(?:([^:]+):([^@]+)@)?([^:]+):(\d+)$' |
| | match = re.match(proxy_pattern, proxy_url) |
| |
|
| | if match: |
| | protocol, username, password, host, port = match.groups() |
| | proxy_config = {'server': f'{protocol}://{host}:{port}'} |
| |
|
| | if username and password: |
| | proxy_config['username'] = username |
| | proxy_config['password'] = password |
| |
|
| | return proxy_config |
| | return None |
| |
|
| |
|
| | def validate_browser_proxy_url(proxy_url: str) -> tuple[bool, str]: |
| | """验证浏览器代理URL格式(仅支持HTTP和无认证SOCKS5) |
| | |
| | Args: |
| | proxy_url: 代理URL |
| | |
| | Returns: |
| | (是否有效, 错误信息) |
| | """ |
| | if not proxy_url or not proxy_url.strip(): |
| | return True, "" |
| |
|
| | proxy_url = proxy_url.strip() |
| | parsed = parse_proxy_url(proxy_url) |
| |
|
| | if not parsed: |
| | return False, "代理URL格式错误,正确格式:http://host:port 或 socks5://host:port" |
| |
|
| | |
| | has_auth = 'username' in parsed |
| |
|
| | |
| | protocol = parsed['server'].split('://')[0] |
| |
|
| | |
| | if protocol == 'socks5' and has_auth: |
| | return False, "浏览器不支持带认证的SOCKS5代理,请使用HTTP代理或移除SOCKS5认证" |
| |
|
| | |
| | if protocol in ['http', 'https']: |
| | return True, "" |
| |
|
| | |
| | if protocol == 'socks5' and not has_auth: |
| | return True, "" |
| |
|
| | return False, f"不支持的代理协议:{protocol}" |
| |
|
| |
|
| | class BrowserCaptchaService: |
| | """浏览器自动化获取 reCAPTCHA token(单例模式)""" |
| |
|
| | _instance: Optional['BrowserCaptchaService'] = None |
| | _lock = asyncio.Lock() |
| |
|
| | def __init__(self, db=None): |
| | """初始化服务(始终使用无头模式)""" |
| | self.headless = True |
| | self.playwright = None |
| | self.browser: Optional[Browser] = None |
| | self._initialized = False |
| | self.website_key = "6LdsFiUsAAAAAIjVDZcuLhaHiDn5nnHVXVRQGeMV" |
| | self.db = db |
| |
|
| | @classmethod |
| | async def get_instance(cls, db=None) -> 'BrowserCaptchaService': |
| | """获取单例实例""" |
| | if cls._instance is None: |
| | async with cls._lock: |
| | if cls._instance is None: |
| | cls._instance = cls(db) |
| | await cls._instance.initialize() |
| | return cls._instance |
| |
|
| | async def initialize(self): |
| | """初始化浏览器(启动一次)""" |
| | if self._initialized: |
| | return |
| |
|
| | try: |
| | |
| | if not PLAYWRIGHT_AVAILABLE: |
| | debug_logger.log_error("[BrowserCaptcha] ❌ Playwright 不可用,请使用 YesCaptcha 服务") |
| | raise ImportError("Playwright 未安装,请使用 YesCaptcha 服务") |
| |
|
| | |
| | proxy_url = None |
| | if self.db: |
| | captcha_config = await self.db.get_captcha_config() |
| | if captcha_config.browser_proxy_enabled and captcha_config.browser_proxy_url: |
| | proxy_url = captcha_config.browser_proxy_url |
| |
|
| | debug_logger.log_info(f"[BrowserCaptcha] 正在启动浏览器... (proxy={proxy_url or 'None'})") |
| | self.playwright = await async_playwright().start() |
| |
|
| | |
| | launch_options = { |
| | 'headless': self.headless, |
| | 'args': [ |
| | '--disable-blink-features=AutomationControlled', |
| | '--disable-dev-shm-usage', |
| | '--no-sandbox', |
| | '--disable-setuid-sandbox' |
| | ] |
| | } |
| |
|
| | |
| | if proxy_url: |
| | proxy_config = parse_proxy_url(proxy_url) |
| | if proxy_config: |
| | launch_options['proxy'] = proxy_config |
| | auth_info = "auth=yes" if 'username' in proxy_config else "auth=no" |
| | debug_logger.log_info(f"[BrowserCaptcha] 代理配置: {proxy_config['server']} ({auth_info})") |
| | else: |
| | debug_logger.log_warning(f"[BrowserCaptcha] 代理URL格式错误: {proxy_url}") |
| |
|
| | self.browser = await self.playwright.chromium.launch(**launch_options) |
| | self._initialized = True |
| | debug_logger.log_info(f"[BrowserCaptcha] ✅ 浏览器已启动 (headless={self.headless}, proxy={proxy_url or 'None'})") |
| | except Exception as e: |
| | debug_logger.log_error(f"[BrowserCaptcha] ❌ 浏览器启动失败: {str(e)}") |
| | raise |
| |
|
| | async def get_token(self, project_id: str) -> Optional[str]: |
| | """获取 reCAPTCHA token |
| | |
| | Args: |
| | project_id: Flow项目ID |
| | |
| | Returns: |
| | reCAPTCHA token字符串,如果获取失败返回None |
| | """ |
| | if not self._initialized: |
| | await self.initialize() |
| |
|
| | start_time = time.time() |
| | context = None |
| |
|
| | try: |
| | |
| | context = await self.browser.new_context( |
| | viewport={'width': 1920, 'height': 1080}, |
| | user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', |
| | locale='en-US', |
| | timezone_id='America/New_York' |
| | ) |
| | page = await context.new_page() |
| |
|
| | website_url = f"https://labs.google/fx/tools/flow/project/{project_id}" |
| |
|
| | debug_logger.log_info(f"[BrowserCaptcha] 访问页面: {website_url}") |
| |
|
| | |
| | try: |
| | await page.goto(website_url, wait_until="domcontentloaded", timeout=30000) |
| | except Exception as e: |
| | debug_logger.log_warning(f"[BrowserCaptcha] 页面加载超时或失败: {str(e)}") |
| |
|
| | |
| | debug_logger.log_info("[BrowserCaptcha] 检查并加载 reCAPTCHA v3 脚本...") |
| | script_loaded = await page.evaluate(""" |
| | () => { |
| | if (window.grecaptcha && typeof window.grecaptcha.execute === 'function') { |
| | return true; |
| | } |
| | return false; |
| | } |
| | """) |
| |
|
| | if not script_loaded: |
| | |
| | debug_logger.log_info("[BrowserCaptcha] 注入 reCAPTCHA v3 脚本...") |
| | await page.evaluate(f""" |
| | () => {{ |
| | return new Promise((resolve) => {{ |
| | const script = document.createElement('script'); |
| | script.src = 'https://www.google.com/recaptcha/api.js?render={self.website_key}'; |
| | script.async = true; |
| | script.defer = true; |
| | script.onload = () => resolve(true); |
| | script.onerror = () => resolve(false); |
| | document.head.appendChild(script); |
| | }}); |
| | }} |
| | """) |
| |
|
| | |
| | debug_logger.log_info("[BrowserCaptcha] 等待reCAPTCHA初始化...") |
| | for i in range(20): |
| | grecaptcha_ready = await page.evaluate(""" |
| | () => { |
| | return window.grecaptcha && |
| | typeof window.grecaptcha.execute === 'function'; |
| | } |
| | """) |
| | if grecaptcha_ready: |
| | debug_logger.log_info(f"[BrowserCaptcha] reCAPTCHA 已准备好(等待了 {i*0.5} 秒)") |
| | break |
| | await asyncio.sleep(0.5) |
| | else: |
| | debug_logger.log_warning("[BrowserCaptcha] reCAPTCHA 初始化超时,继续尝试执行...") |
| |
|
| | |
| | await page.wait_for_timeout(1000) |
| |
|
| | |
| | debug_logger.log_info("[BrowserCaptcha] 执行reCAPTCHA验证...") |
| | token = await page.evaluate(""" |
| | async (websiteKey) => { |
| | try { |
| | if (!window.grecaptcha) { |
| | console.error('[BrowserCaptcha] window.grecaptcha 不存在'); |
| | return null; |
| | } |
| | |
| | if (typeof window.grecaptcha.execute !== 'function') { |
| | console.error('[BrowserCaptcha] window.grecaptcha.execute 不是函数'); |
| | return null; |
| | } |
| | |
| | // 确保grecaptcha已准备好 |
| | await new Promise((resolve, reject) => { |
| | const timeout = setTimeout(() => { |
| | reject(new Error('reCAPTCHA加载超时')); |
| | }, 15000); |
| | |
| | if (window.grecaptcha && window.grecaptcha.ready) { |
| | window.grecaptcha.ready(() => { |
| | clearTimeout(timeout); |
| | resolve(); |
| | }); |
| | } else { |
| | clearTimeout(timeout); |
| | resolve(); |
| | } |
| | }); |
| | |
| | // 执行reCAPTCHA v3 |
| | const token = await window.grecaptcha.execute(websiteKey, { |
| | action: 'FLOW_GENERATION' |
| | }); |
| | |
| | return token; |
| | } catch (error) { |
| | console.error('[BrowserCaptcha] reCAPTCHA执行错误:', error); |
| | return null; |
| | } |
| | } |
| | """, self.website_key) |
| |
|
| | duration_ms = (time.time() - start_time) * 1000 |
| |
|
| | if token: |
| | debug_logger.log_info(f"[BrowserCaptcha] ✅ Token获取成功(耗时 {duration_ms:.0f}ms)") |
| | return token |
| | else: |
| | debug_logger.log_error("[BrowserCaptcha] Token获取失败(返回null)") |
| | return None |
| |
|
| | except Exception as e: |
| | debug_logger.log_error(f"[BrowserCaptcha] 获取token异常: {str(e)}") |
| | return None |
| | finally: |
| | |
| | if context: |
| | try: |
| | await context.close() |
| | except: |
| | pass |
| |
|
| | async def close(self): |
| | """关闭浏览器""" |
| | try: |
| | if self.browser: |
| | try: |
| | await self.browser.close() |
| | except Exception as e: |
| | |
| | if "Connection closed" not in str(e): |
| | debug_logger.log_warning(f"[BrowserCaptcha] 关闭浏览器时出现异常: {str(e)}") |
| | finally: |
| | self.browser = None |
| |
|
| | if self.playwright: |
| | try: |
| | await self.playwright.stop() |
| | except Exception: |
| | pass |
| | finally: |
| | self.playwright = None |
| |
|
| | self._initialized = False |
| | debug_logger.log_info("[BrowserCaptcha] 浏览器已关闭") |
| | except Exception as e: |
| | debug_logger.log_error(f"[BrowserCaptcha] 关闭浏览器异常: {str(e)}") |
| |
|