| import asyncio |
| import time |
| import re |
| import os |
| from typing import Optional, Dict |
|
|
| from ..core.logger import debug_logger |
|
|
| |
| try: |
| from playwright.async_api import async_playwright, BrowserContext, Page |
| PLAYWRIGHT_AVAILABLE = True |
| except ImportError: |
| PLAYWRIGHT_AVAILABLE = False |
|
|
| |
| def parse_proxy_url(proxy_url: str) -> Optional[Dict[str, str]]: |
| """解析代理URL,分离协议、主机、端口、认证信息""" |
| proxy_pattern = r'^(socks5|http|https)://(?:([^:]+):([^@]+)@)?([^:]+):(\d+)$' |
| match = re.match(proxy_pattern, proxy_url) |
| if match: |
| protocol, username, password, host, port = match.groups() |
| proxy_config = {'server': f'{protocol}://{host}:{port}'} |
| if username and password: |
| proxy_config['username'] = username |
| proxy_config['password'] = password |
| return proxy_config |
| return None |
|
|
| class BrowserCaptchaService: |
| """浏览器自动化获取 reCAPTCHA token(持久化有头模式)""" |
|
|
| _instance: Optional['BrowserCaptchaService'] = None |
| _lock = asyncio.Lock() |
|
|
| def __init__(self, db=None): |
| """初始化服务""" |
| |
| self.headless = False |
| self.playwright = None |
| |
| self.context: Optional[BrowserContext] = None |
| self._initialized = False |
| self.website_key = "6LdsFiUsAAAAAIjVDZcuLhaHiDn5nnHVXVRQGeMV" |
| self.db = db |
| |
| |
| |
| self.user_data_dir = os.path.join(os.getcwd(), "browser_data") |
|
|
| @classmethod |
| async def get_instance(cls, db=None) -> 'BrowserCaptchaService': |
| if cls._instance is None: |
| async with cls._lock: |
| if cls._instance is None: |
| cls._instance = cls(db) |
| |
| return cls._instance |
|
|
| async def initialize(self): |
| """初始化持久化浏览器上下文""" |
| if self._initialized and self.context: |
| return |
|
|
| try: |
| |
| if not PLAYWRIGHT_AVAILABLE: |
| debug_logger.log_error("[BrowserCaptcha] ❌ Playwright 不可用,请使用 YesCaptcha 服务") |
| raise ImportError("Playwright 未安装,请使用 YesCaptcha 服务") |
|
|
| proxy_url = None |
| if self.db: |
| captcha_config = await self.db.get_captcha_config() |
| if captcha_config.browser_proxy_enabled and captcha_config.browser_proxy_url: |
| proxy_url = captcha_config.browser_proxy_url |
|
|
| debug_logger.log_info(f"[BrowserCaptcha] 正在启动浏览器 (用户数据目录: {self.user_data_dir})...") |
| self.playwright = await async_playwright().start() |
|
|
| |
| launch_options = { |
| 'headless': self.headless, |
| 'user_data_dir': self.user_data_dir, |
| 'viewport': {'width': 1280, 'height': 720}, |
| 'args': [ |
| '--disable-blink-features=AutomationControlled', |
| '--disable-infobars', |
| '--no-sandbox', |
| '--disable-setuid-sandbox', |
| ] |
| } |
|
|
| |
| if proxy_url: |
| proxy_config = parse_proxy_url(proxy_url) |
| if proxy_config: |
| launch_options['proxy'] = proxy_config |
| debug_logger.log_info(f"[BrowserCaptcha] 使用代理: {proxy_config['server']}") |
|
|
| |
| |
| self.context = await self.playwright.chromium.launch_persistent_context(**launch_options) |
| |
| |
| self.context.set_default_timeout(30000) |
|
|
| self._initialized = True |
| debug_logger.log_info(f"[BrowserCaptcha] ✅ 浏览器已启动 (Profile: {self.user_data_dir})") |
| |
| except Exception as e: |
| debug_logger.log_error(f"[BrowserCaptcha] ❌ 浏览器启动失败: {str(e)}") |
| raise |
|
|
| async def get_token(self, project_id: str) -> Optional[str]: |
| """获取 reCAPTCHA token""" |
| |
| if not self._initialized or not self.context: |
| await self.initialize() |
|
|
| start_time = time.time() |
| page: Optional[Page] = None |
|
|
| try: |
| |
| |
| page = await self.context.new_page() |
|
|
| website_url = f"https://labs.google/fx/tools/flow/project/{project_id}" |
| debug_logger.log_info(f"[BrowserCaptcha] 访问页面: {website_url}") |
|
|
| |
| try: |
| await page.goto(website_url, wait_until="domcontentloaded") |
| except Exception as e: |
| debug_logger.log_warning(f"[BrowserCaptcha] 页面加载警告: {str(e)}") |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| script_loaded = await page.evaluate("() => { return !!(window.grecaptcha && window.grecaptcha.execute); }") |
| if not script_loaded: |
| await page.evaluate(f""" |
| () => {{ |
| const script = document.createElement('script'); |
| script.src = 'https://www.google.com/recaptcha/api.js?render={self.website_key}'; |
| script.async = true; script.defer = true; |
| document.head.appendChild(script); |
| }} |
| """) |
| |
| await page.wait_for_timeout(2000) |
|
|
| |
| token = await page.evaluate(f""" |
| async () => {{ |
| try {{ |
| return await window.grecaptcha.execute('{self.website_key}', {{ action: 'FLOW_GENERATION' }}); |
| }} catch (e) {{ return null; }} |
| }} |
| """) |
| |
| if token: |
| debug_logger.log_info(f"[BrowserCaptcha] ✅ Token获取成功") |
| return token |
| else: |
| debug_logger.log_error("[BrowserCaptcha] Token获取失败") |
| return None |
|
|
| except Exception as e: |
| debug_logger.log_error(f"[BrowserCaptcha] 异常: {str(e)}") |
| return None |
| finally: |
| |
| if page: |
| try: |
| await page.close() |
| except: |
| pass |
|
|
| async def close(self): |
| """完全关闭浏览器(清理资源时调用)""" |
| try: |
| if self.context: |
| await self.context.close() |
| self.context = None |
| |
| if self.playwright: |
| await self.playwright.stop() |
| self.playwright = None |
| |
| self._initialized = False |
| debug_logger.log_info("[BrowserCaptcha] 浏览器服务已关闭") |
| except Exception as e: |
| debug_logger.log_error(f"[BrowserCaptcha] 关闭异常: {str(e)}") |
|
|
| |
| async def open_login_window(self): |
| """调用此方法打开一个永久窗口供你登录Google""" |
| await self.initialize() |
| page = await self.context.new_page() |
| await page.goto("https://accounts.google.com/") |
| print("请在打开的浏览器中登录账号。登录完成后,无需关闭浏览器,脚本下次运行时会自动使用此状态。") |