feat(browser-captcha): support multi-project resident tabs for reCAPTCHA
Browse files- Add ResidentTabInfo class to manage per-project resident tab information
- Replace single resident tab with dictionary mapping project_id to tab info
- Implement automatic resident tab creation for each project_id on demand
- Add thread-safe lock protection for resident tab operations
- Update stop_resident_mode to support closing specific project tabs
- Add _create_resident_tab method for project-specific tab creation
- Add _close_resident_tab method for proper tab cleanup
- Maintain backward compatibility with existing single resident mode properties
- Update status query methods to support multiple resident tabs
- Clear resident tabs dictionary when browser closes
src/services/browser_captcha_personal.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
"""
|
| 2 |
浏览器自动化获取 reCAPTCHA token
|
| 3 |
使用 nodriver (undetected-chromedriver 继任者) 实现反检测浏览器
|
| 4 |
-
支持常驻模式:
|
| 5 |
"""
|
| 6 |
import asyncio
|
| 7 |
import time
|
|
@@ -13,11 +13,20 @@ import nodriver as uc
|
|
| 13 |
from ..core.logger import debug_logger
|
| 14 |
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
class BrowserCaptchaService:
|
| 17 |
"""浏览器自动化获取 reCAPTCHA token(nodriver 有头模式)
|
| 18 |
|
| 19 |
支持两种模式:
|
| 20 |
-
1. 常驻模式 (Resident Mode): 保持
|
| 21 |
2. 传统模式 (Legacy Mode): 每次请求创建新标签页 (fallback)
|
| 22 |
"""
|
| 23 |
|
|
@@ -34,11 +43,15 @@ class BrowserCaptchaService:
|
|
| 34 |
# 持久化 profile 目录
|
| 35 |
self.user_data_dir = os.path.join(os.getcwd(), "browser_data")
|
| 36 |
|
| 37 |
-
# 常驻模式相关属性
|
| 38 |
-
self.
|
| 39 |
-
self.
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
@classmethod
|
| 44 |
async def get_instance(cls, db=None) -> 'BrowserCaptchaService':
|
|
@@ -153,14 +166,34 @@ class BrowserCaptchaService:
|
|
| 153 |
self._running = True
|
| 154 |
debug_logger.log_info(f"[BrowserCaptcha] ✅ 常驻模式已启动 (project: {project_id})")
|
| 155 |
|
| 156 |
-
async def stop_resident_mode(self):
|
| 157 |
-
"""停止常驻模式
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
if not self._running:
|
| 159 |
return
|
| 160 |
|
| 161 |
self._running = False
|
| 162 |
-
|
| 163 |
-
# 关闭常驻标签页
|
| 164 |
if self.resident_tab:
|
| 165 |
try:
|
| 166 |
await self.resident_tab.close()
|
|
@@ -170,8 +203,6 @@ class BrowserCaptchaService:
|
|
| 170 |
|
| 171 |
self.resident_project_id = None
|
| 172 |
self._recaptcha_ready = False
|
| 173 |
-
|
| 174 |
-
debug_logger.log_info("[BrowserCaptcha] 常驻模式已停止")
|
| 175 |
|
| 176 |
async def _wait_for_recaptcha(self, tab) -> bool:
|
| 177 |
"""等待 reCAPTCHA 加载
|
|
@@ -283,33 +314,141 @@ class BrowserCaptchaService:
|
|
| 283 |
async def get_token(self, project_id: str) -> Optional[str]:
|
| 284 |
"""获取 reCAPTCHA token
|
| 285 |
|
| 286 |
-
常驻模式:
|
| 287 |
-
|
| 288 |
-
|
| 289 |
Args:
|
| 290 |
project_id: Flow项目ID
|
| 291 |
|
| 292 |
Returns:
|
| 293 |
reCAPTCHA token字符串,如果获取失败返回None
|
| 294 |
"""
|
| 295 |
-
#
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
duration_ms = (time.time() - start_time) * 1000
|
| 302 |
if token:
|
| 303 |
debug_logger.log_info(f"[BrowserCaptcha] ✅ Token生成成功(耗时 {duration_ms:.0f}ms)")
|
| 304 |
return token
|
| 305 |
else:
|
| 306 |
-
debug_logger.log_warning("[BrowserCaptcha] 常驻
|
| 307 |
-
|
| 308 |
-
debug_logger.log_warning("[BrowserCaptcha] 常驻标签页
|
| 309 |
-
|
| 310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
return await self._get_token_legacy(project_id)
|
| 312 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
async def _get_token_legacy(self, project_id: str) -> Optional[str]:
|
| 314 |
"""传统模式获取 reCAPTCHA token(每次创建新标签页)
|
| 315 |
|
|
@@ -377,7 +516,7 @@ class BrowserCaptchaService:
|
|
| 377 |
|
| 378 |
async def close(self):
|
| 379 |
"""关闭浏览器"""
|
| 380 |
-
# 先停止常驻模式
|
| 381 |
await self.stop_resident_mode()
|
| 382 |
|
| 383 |
try:
|
|
@@ -390,6 +529,7 @@ class BrowserCaptchaService:
|
|
| 390 |
self.browser = None
|
| 391 |
|
| 392 |
self._initialized = False
|
|
|
|
| 393 |
debug_logger.log_info("[BrowserCaptcha] 浏览器已关闭")
|
| 394 |
except Exception as e:
|
| 395 |
debug_logger.log_error(f"[BrowserCaptcha] 关闭浏览器异常: {str(e)}")
|
|
@@ -404,13 +544,19 @@ class BrowserCaptchaService:
|
|
| 404 |
# ========== 状态查询 ==========
|
| 405 |
|
| 406 |
def is_resident_mode_active(self) -> bool:
|
| 407 |
-
"""检查
|
| 408 |
-
return self._running
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
|
| 410 |
-
def
|
| 411 |
-
"""获取当前
|
| 412 |
-
return self.
|
| 413 |
|
| 414 |
def get_resident_project_id(self) -> Optional[str]:
|
| 415 |
-
"""获取当前常驻的 project_id"""
|
|
|
|
|
|
|
| 416 |
return self.resident_project_id
|
|
|
|
| 1 |
"""
|
| 2 |
浏览器自动化获取 reCAPTCHA token
|
| 3 |
使用 nodriver (undetected-chromedriver 继任者) 实现反检测浏览器
|
| 4 |
+
支持常驻模式:为每个 project_id 自动创建常驻标签页,即时生成 token
|
| 5 |
"""
|
| 6 |
import asyncio
|
| 7 |
import time
|
|
|
|
| 13 |
from ..core.logger import debug_logger
|
| 14 |
|
| 15 |
|
| 16 |
+
class ResidentTabInfo:
|
| 17 |
+
"""常驻标签页信息结构"""
|
| 18 |
+
def __init__(self, tab, project_id: str):
|
| 19 |
+
self.tab = tab
|
| 20 |
+
self.project_id = project_id
|
| 21 |
+
self.recaptcha_ready = False
|
| 22 |
+
self.created_at = time.time()
|
| 23 |
+
|
| 24 |
+
|
| 25 |
class BrowserCaptchaService:
|
| 26 |
"""浏览器自动化获取 reCAPTCHA token(nodriver 有头模式)
|
| 27 |
|
| 28 |
支持两种模式:
|
| 29 |
+
1. 常驻模式 (Resident Mode): 为每个 project_id 保持常驻标签页,即时生成 token
|
| 30 |
2. 传统模式 (Legacy Mode): 每次请求创建新标签页 (fallback)
|
| 31 |
"""
|
| 32 |
|
|
|
|
| 43 |
# 持久化 profile 目录
|
| 44 |
self.user_data_dir = os.path.join(os.getcwd(), "browser_data")
|
| 45 |
|
| 46 |
+
# 常驻模式相关属性 (支持多 project_id)
|
| 47 |
+
self._resident_tabs: dict[str, 'ResidentTabInfo'] = {} # project_id -> 常驻标签页信息
|
| 48 |
+
self._resident_lock = asyncio.Lock() # 保护常驻标签页操作
|
| 49 |
+
|
| 50 |
+
# 兼容旧 API(保留 single resident 属性作为别名)
|
| 51 |
+
self.resident_project_id: Optional[str] = None # 向后兼容
|
| 52 |
+
self.resident_tab = None # 向后兼容
|
| 53 |
+
self._running = False # 向后兼容
|
| 54 |
+
self._recaptcha_ready = False # 向后兼容
|
| 55 |
|
| 56 |
@classmethod
|
| 57 |
async def get_instance(cls, db=None) -> 'BrowserCaptchaService':
|
|
|
|
| 166 |
self._running = True
|
| 167 |
debug_logger.log_info(f"[BrowserCaptcha] ✅ 常驻模式已启动 (project: {project_id})")
|
| 168 |
|
| 169 |
+
async def stop_resident_mode(self, project_id: Optional[str] = None):
|
| 170 |
+
"""停止常驻模式
|
| 171 |
+
|
| 172 |
+
Args:
|
| 173 |
+
project_id: 指定要关闭的 project_id,如果为 None 则关闭所有常驻标签页
|
| 174 |
+
"""
|
| 175 |
+
async with self._resident_lock:
|
| 176 |
+
if project_id:
|
| 177 |
+
# 关闭指定的常驻标签页
|
| 178 |
+
await self._close_resident_tab(project_id)
|
| 179 |
+
debug_logger.log_info(f"[BrowserCaptcha] 已关闭 project_id={project_id} 的常驻模式")
|
| 180 |
+
else:
|
| 181 |
+
# 关闭所有常驻标签页
|
| 182 |
+
project_ids = list(self._resident_tabs.keys())
|
| 183 |
+
for pid in project_ids:
|
| 184 |
+
resident_info = self._resident_tabs.pop(pid, None)
|
| 185 |
+
if resident_info and resident_info.tab:
|
| 186 |
+
try:
|
| 187 |
+
await resident_info.tab.close()
|
| 188 |
+
except Exception:
|
| 189 |
+
pass
|
| 190 |
+
debug_logger.log_info(f"[BrowserCaptcha] 已关闭所有常驻标签页 (共 {len(project_ids)} 个)")
|
| 191 |
+
|
| 192 |
+
# 向后兼容:清理旧属性
|
| 193 |
if not self._running:
|
| 194 |
return
|
| 195 |
|
| 196 |
self._running = False
|
|
|
|
|
|
|
| 197 |
if self.resident_tab:
|
| 198 |
try:
|
| 199 |
await self.resident_tab.close()
|
|
|
|
| 203 |
|
| 204 |
self.resident_project_id = None
|
| 205 |
self._recaptcha_ready = False
|
|
|
|
|
|
|
| 206 |
|
| 207 |
async def _wait_for_recaptcha(self, tab) -> bool:
|
| 208 |
"""等待 reCAPTCHA 加载
|
|
|
|
| 314 |
async def get_token(self, project_id: str) -> Optional[str]:
|
| 315 |
"""获取 reCAPTCHA token
|
| 316 |
|
| 317 |
+
自动常驻模式:如果该 project_id 没有常驻标签页,则自动创建并常驻
|
| 318 |
+
|
|
|
|
| 319 |
Args:
|
| 320 |
project_id: Flow项目ID
|
| 321 |
|
| 322 |
Returns:
|
| 323 |
reCAPTCHA token字符串,如果获取失败返回None
|
| 324 |
"""
|
| 325 |
+
# 确保浏览器已初始化
|
| 326 |
+
await self.initialize()
|
| 327 |
+
|
| 328 |
+
# 尝试从常驻标签页获取 token
|
| 329 |
+
async with self._resident_lock:
|
| 330 |
+
resident_info = self._resident_tabs.get(project_id)
|
| 331 |
+
|
| 332 |
+
# 如果该 project_id 没有常驻标签页,则自动创建
|
| 333 |
+
if resident_info is None:
|
| 334 |
+
debug_logger.log_info(f"[BrowserCaptcha] project_id={project_id} 没有常驻标签页,正在创建...")
|
| 335 |
+
resident_info = await self._create_resident_tab(project_id)
|
| 336 |
+
if resident_info is None:
|
| 337 |
+
debug_logger.log_warning(f"[BrowserCaptcha] 无法为 project_id={project_id} 创建常驻标签页,fallback 到传统模式")
|
| 338 |
+
return await self._get_token_legacy(project_id)
|
| 339 |
+
self._resident_tabs[project_id] = resident_info
|
| 340 |
+
debug_logger.log_info(f"[BrowserCaptcha] ✅ 已为 project_id={project_id} 创建常驻标签页 (当前共 {len(self._resident_tabs)} 个)")
|
| 341 |
+
|
| 342 |
+
# 使用常驻标签页生成 token
|
| 343 |
+
if resident_info and resident_info.recaptcha_ready and resident_info.tab:
|
| 344 |
+
start_time = time.time()
|
| 345 |
+
debug_logger.log_info(f"[BrowserCaptcha] 从常驻标签页即时生成 token (project: {project_id})...")
|
| 346 |
+
try:
|
| 347 |
+
token = await self._execute_recaptcha_on_tab(resident_info.tab)
|
| 348 |
duration_ms = (time.time() - start_time) * 1000
|
| 349 |
if token:
|
| 350 |
debug_logger.log_info(f"[BrowserCaptcha] ✅ Token生成成功(耗时 {duration_ms:.0f}ms)")
|
| 351 |
return token
|
| 352 |
else:
|
| 353 |
+
debug_logger.log_warning(f"[BrowserCaptcha] 常驻标签页生成失败 (project: {project_id}),尝试重建...")
|
| 354 |
+
except Exception as e:
|
| 355 |
+
debug_logger.log_warning(f"[BrowserCaptcha] 常驻标签页异常: {e},尝试重建...")
|
| 356 |
+
|
| 357 |
+
# 常驻标签页失效,尝试重建
|
| 358 |
+
async with self._resident_lock:
|
| 359 |
+
await self._close_resident_tab(project_id)
|
| 360 |
+
resident_info = await self._create_resident_tab(project_id)
|
| 361 |
+
if resident_info:
|
| 362 |
+
self._resident_tabs[project_id] = resident_info
|
| 363 |
+
# 重建后立即尝试生成
|
| 364 |
+
try:
|
| 365 |
+
token = await self._execute_recaptcha_on_tab(resident_info.tab)
|
| 366 |
+
if token:
|
| 367 |
+
debug_logger.log_info(f"[BrowserCaptcha] ✅ 重建后 Token生成成功")
|
| 368 |
+
return token
|
| 369 |
+
except Exception:
|
| 370 |
+
pass
|
| 371 |
+
|
| 372 |
+
# 最终 Fallback: 使用传统模式
|
| 373 |
+
debug_logger.log_warning(f"[BrowserCaptcha] 所有常驻方式失败,fallback 到传统模式 (project: {project_id})")
|
| 374 |
return await self._get_token_legacy(project_id)
|
| 375 |
|
| 376 |
+
async def _create_resident_tab(self, project_id: str) -> Optional[ResidentTabInfo]:
|
| 377 |
+
"""为指定 project_id 创建常驻标签页
|
| 378 |
+
|
| 379 |
+
Args:
|
| 380 |
+
project_id: 项目 ID
|
| 381 |
+
|
| 382 |
+
Returns:
|
| 383 |
+
ResidentTabInfo 对象,或 None(创建失败)
|
| 384 |
+
"""
|
| 385 |
+
try:
|
| 386 |
+
website_url = f"https://labs.google/fx/tools/flow/project/{project_id}"
|
| 387 |
+
debug_logger.log_info(f"[BrowserCaptcha] 为 project_id={project_id} 创建常驻标签页,访问: {website_url}")
|
| 388 |
+
|
| 389 |
+
# 创建新标签页
|
| 390 |
+
tab = await self.browser.get(website_url, new_tab=True)
|
| 391 |
+
|
| 392 |
+
# 等待页面加载完成
|
| 393 |
+
page_loaded = False
|
| 394 |
+
for retry in range(15):
|
| 395 |
+
try:
|
| 396 |
+
await asyncio.sleep(1)
|
| 397 |
+
ready_state = await tab.evaluate("document.readyState")
|
| 398 |
+
if ready_state == "complete":
|
| 399 |
+
page_loaded = True
|
| 400 |
+
break
|
| 401 |
+
except ConnectionRefusedError as e:
|
| 402 |
+
debug_logger.log_warning(f"[BrowserCaptcha] 标签页连接丢失: {e}")
|
| 403 |
+
return None
|
| 404 |
+
except Exception as e:
|
| 405 |
+
debug_logger.log_warning(f"[BrowserCaptcha] 等待页面异常: {e},重试 {retry + 1}/15...")
|
| 406 |
+
await asyncio.sleep(1)
|
| 407 |
+
|
| 408 |
+
if not page_loaded:
|
| 409 |
+
debug_logger.log_error(f"[BrowserCaptcha] 页面加载超时 (project: {project_id})")
|
| 410 |
+
try:
|
| 411 |
+
await tab.close()
|
| 412 |
+
except:
|
| 413 |
+
pass
|
| 414 |
+
return None
|
| 415 |
+
|
| 416 |
+
# 等待 reCAPTCHA 加载
|
| 417 |
+
recaptcha_ready = await self._wait_for_recaptcha(tab)
|
| 418 |
+
|
| 419 |
+
if not recaptcha_ready:
|
| 420 |
+
debug_logger.log_error(f"[BrowserCaptcha] reCAPTCHA 加载失败 (project: {project_id})")
|
| 421 |
+
try:
|
| 422 |
+
await tab.close()
|
| 423 |
+
except:
|
| 424 |
+
pass
|
| 425 |
+
return None
|
| 426 |
+
|
| 427 |
+
# 创建常驻信息对象
|
| 428 |
+
resident_info = ResidentTabInfo(tab, project_id)
|
| 429 |
+
resident_info.recaptcha_ready = True
|
| 430 |
+
|
| 431 |
+
debug_logger.log_info(f"[BrowserCaptcha] ✅ 常驻标签页创建成功 (project: {project_id})")
|
| 432 |
+
return resident_info
|
| 433 |
+
|
| 434 |
+
except Exception as e:
|
| 435 |
+
debug_logger.log_error(f"[BrowserCaptcha] 创建常驻标签页异常: {e}")
|
| 436 |
+
return None
|
| 437 |
+
|
| 438 |
+
async def _close_resident_tab(self, project_id: str):
|
| 439 |
+
"""关闭指定 project_id 的常驻标签页
|
| 440 |
+
|
| 441 |
+
Args:
|
| 442 |
+
project_id: 项目 ID
|
| 443 |
+
"""
|
| 444 |
+
resident_info = self._resident_tabs.pop(project_id, None)
|
| 445 |
+
if resident_info and resident_info.tab:
|
| 446 |
+
try:
|
| 447 |
+
await resident_info.tab.close()
|
| 448 |
+
debug_logger.log_info(f"[BrowserCaptcha] 已关闭 project_id={project_id} 的常驻标签页")
|
| 449 |
+
except Exception as e:
|
| 450 |
+
debug_logger.log_warning(f"[BrowserCaptcha] 关闭标签页时异常: {e}")
|
| 451 |
+
|
| 452 |
async def _get_token_legacy(self, project_id: str) -> Optional[str]:
|
| 453 |
"""传统模式获取 reCAPTCHA token(每次创建新标签页)
|
| 454 |
|
|
|
|
| 516 |
|
| 517 |
async def close(self):
|
| 518 |
"""关闭浏览器"""
|
| 519 |
+
# 先停止所有常驻模式(关闭所有常驻标签页)
|
| 520 |
await self.stop_resident_mode()
|
| 521 |
|
| 522 |
try:
|
|
|
|
| 529 |
self.browser = None
|
| 530 |
|
| 531 |
self._initialized = False
|
| 532 |
+
self._resident_tabs.clear() # 确保清空常驻字典
|
| 533 |
debug_logger.log_info("[BrowserCaptcha] 浏览器已关闭")
|
| 534 |
except Exception as e:
|
| 535 |
debug_logger.log_error(f"[BrowserCaptcha] 关闭浏览器异常: {str(e)}")
|
|
|
|
| 544 |
# ========== 状态查询 ==========
|
| 545 |
|
| 546 |
def is_resident_mode_active(self) -> bool:
|
| 547 |
+
"""检查是否有任何常驻标签页激活"""
|
| 548 |
+
return len(self._resident_tabs) > 0 or self._running
|
| 549 |
+
|
| 550 |
+
def get_resident_count(self) -> int:
|
| 551 |
+
"""获取当前常驻标签页数量"""
|
| 552 |
+
return len(self._resident_tabs)
|
| 553 |
|
| 554 |
+
def get_resident_project_ids(self) -> list[str]:
|
| 555 |
+
"""获取所有当前常驻的 project_id 列表"""
|
| 556 |
+
return list(self._resident_tabs.keys())
|
| 557 |
|
| 558 |
def get_resident_project_id(self) -> Optional[str]:
|
| 559 |
+
"""获取当前常驻的 project_id(向后兼容,返回第一个)"""
|
| 560 |
+
if self._resident_tabs:
|
| 561 |
+
return next(iter(self._resident_tabs.keys()))
|
| 562 |
return self.resident_project_id
|