netlops commited on
Commit
9291512
·
1 Parent(s): 6d28e78

feat(browser-captcha): support multi-project resident tabs for reCAPTCHA

Browse files

- Add ResidentTabInfo class to manage per-project resident tab information
- Replace single resident tab with dictionary mapping project_id to tab info
- Implement automatic resident tab creation for each project_id on demand
- Add thread-safe lock protection for resident tab operations
- Update stop_resident_mode to support closing specific project tabs
- Add _create_resident_tab method for project-specific tab creation
- Add _close_resident_tab method for proper tab cleanup
- Maintain backward compatibility with existing single resident mode properties
- Update status query methods to support multiple resident tabs
- Clear resident tabs dictionary when browser closes

src/services/browser_captcha_personal.py CHANGED
@@ -1,7 +1,7 @@
1
  """
2
  浏览器自动化获取 reCAPTCHA token
3
  使用 nodriver (undetected-chromedriver 继任者) 实现反检测浏览器
4
- 支持常驻模式:基于单一 project_id 保持常驻标签页,即时生成 token
5
  """
6
  import asyncio
7
  import time
@@ -13,11 +13,20 @@ import nodriver as uc
13
  from ..core.logger import debug_logger
14
 
15
 
 
 
 
 
 
 
 
 
 
16
  class BrowserCaptchaService:
17
  """浏览器自动化获取 reCAPTCHA token(nodriver 有头模式)
18
 
19
  支持两种模式:
20
- 1. 常驻模式 (Resident Mode): 保持一个常驻标签页,即时生成 token
21
  2. 传统模式 (Legacy Mode): 每次请求创建新标签页 (fallback)
22
  """
23
 
@@ -34,11 +43,15 @@ class BrowserCaptchaService:
34
  # 持久化 profile 目录
35
  self.user_data_dir = os.path.join(os.getcwd(), "browser_data")
36
 
37
- # 常驻模式相关属性
38
- self.resident_project_id: Optional[str] = None # 常驻 project_id
39
- self.resident_tab = None # 常驻标签页
40
- self._running = False # 常驻模式运行状态
41
- self._recaptcha_ready = False # reCAPTCHA 是否已加载
 
 
 
 
42
 
43
  @classmethod
44
  async def get_instance(cls, db=None) -> 'BrowserCaptchaService':
@@ -153,14 +166,34 @@ class BrowserCaptchaService:
153
  self._running = True
154
  debug_logger.log_info(f"[BrowserCaptcha] ✅ 常驻模式已启动 (project: {project_id})")
155
 
156
- async def stop_resident_mode(self):
157
- """停止常驻模式"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  if not self._running:
159
  return
160
 
161
  self._running = False
162
-
163
- # 关闭常驻标签页
164
  if self.resident_tab:
165
  try:
166
  await self.resident_tab.close()
@@ -170,8 +203,6 @@ class BrowserCaptchaService:
170
 
171
  self.resident_project_id = None
172
  self._recaptcha_ready = False
173
-
174
- debug_logger.log_info("[BrowserCaptcha] 常驻模式已停止")
175
 
176
  async def _wait_for_recaptcha(self, tab) -> bool:
177
  """等待 reCAPTCHA 加载
@@ -283,33 +314,141 @@ class BrowserCaptchaService:
283
  async def get_token(self, project_id: str) -> Optional[str]:
284
  """获取 reCAPTCHA token
285
 
286
- 常驻模式:直接从常驻标签页即时生成 token
287
- 传统模式:每次创建新标签页 (fallback)
288
-
289
  Args:
290
  project_id: Flow项目ID
291
 
292
  Returns:
293
  reCAPTCHA token字符串,如果获取失败返回None
294
  """
295
- # 如果是常驻模式且 project_id 匹配,直接从常驻标签页生成
296
- if self._running and self.resident_project_id == project_id:
297
- if self._recaptcha_ready and self.resident_tab:
298
- start_time = time.time()
299
- debug_logger.log_info("[BrowserCaptcha] 从常驻标签页即时生成 token...")
300
- token = await self._execute_recaptcha_on_tab(self.resident_tab)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  duration_ms = (time.time() - start_time) * 1000
302
  if token:
303
  debug_logger.log_info(f"[BrowserCaptcha] ✅ Token生成成功(耗时 {duration_ms:.0f}ms)")
304
  return token
305
  else:
306
- debug_logger.log_warning("[BrowserCaptcha] 常驻模式生成失败,fallback到传统模式")
307
- else:
308
- debug_logger.log_warning("[BrowserCaptcha] 常驻标签页未就绪fallback到传统模式")
309
-
310
- # Fallback: 使用传统模式
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  return await self._get_token_legacy(project_id)
312
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
  async def _get_token_legacy(self, project_id: str) -> Optional[str]:
314
  """传统模式获取 reCAPTCHA token(每次创建新标签页)
315
 
@@ -377,7 +516,7 @@ class BrowserCaptchaService:
377
 
378
  async def close(self):
379
  """关闭浏览器"""
380
- # 先停止常驻模式
381
  await self.stop_resident_mode()
382
 
383
  try:
@@ -390,6 +529,7 @@ class BrowserCaptchaService:
390
  self.browser = None
391
 
392
  self._initialized = False
 
393
  debug_logger.log_info("[BrowserCaptcha] 浏览器已关闭")
394
  except Exception as e:
395
  debug_logger.log_error(f"[BrowserCaptcha] 关闭浏览器异常: {str(e)}")
@@ -404,13 +544,19 @@ class BrowserCaptchaService:
404
  # ========== 状态查询 ==========
405
 
406
  def is_resident_mode_active(self) -> bool:
407
- """检查常驻模式是否激活"""
408
- return self._running
 
 
 
 
409
 
410
- def get_queue_size(self) -> int:
411
- """获取当前缓存队大小"""
412
- return self.token_queue.qsize()
413
 
414
  def get_resident_project_id(self) -> Optional[str]:
415
- """获取当前常驻的 project_id"""
 
 
416
  return self.resident_project_id
 
1
  """
2
  浏览器自动化获取 reCAPTCHA token
3
  使用 nodriver (undetected-chromedriver 继任者) 实现反检测浏览器
4
+ 支持常驻模式:为每个 project_id 自动创建常驻标签页,即时生成 token
5
  """
6
  import asyncio
7
  import time
 
13
  from ..core.logger import debug_logger
14
 
15
 
16
+ class ResidentTabInfo:
17
+ """常驻标签页信息结构"""
18
+ def __init__(self, tab, project_id: str):
19
+ self.tab = tab
20
+ self.project_id = project_id
21
+ self.recaptcha_ready = False
22
+ self.created_at = time.time()
23
+
24
+
25
  class BrowserCaptchaService:
26
  """浏览器自动化获取 reCAPTCHA token(nodriver 有头模式)
27
 
28
  支持两种模式:
29
+ 1. 常驻模式 (Resident Mode): 为每个 project_id 保持常驻标签页,即时生成 token
30
  2. 传统模式 (Legacy Mode): 每次请求创建新标签页 (fallback)
31
  """
32
 
 
43
  # 持久化 profile 目录
44
  self.user_data_dir = os.path.join(os.getcwd(), "browser_data")
45
 
46
+ # 常驻模式相关属性 (支持多 project_id)
47
+ self._resident_tabs: dict[str, 'ResidentTabInfo'] = {} # project_id -> 常驻标签页信息
48
+ self._resident_lock = asyncio.Lock() # 保护常驻标签页操作
49
+
50
+ # 兼容旧 API(保留 single resident 属性作为别名)
51
+ self.resident_project_id: Optional[str] = None # 向后兼容
52
+ self.resident_tab = None # 向后兼容
53
+ self._running = False # 向后兼容
54
+ self._recaptcha_ready = False # 向后兼容
55
 
56
  @classmethod
57
  async def get_instance(cls, db=None) -> 'BrowserCaptchaService':
 
166
  self._running = True
167
  debug_logger.log_info(f"[BrowserCaptcha] ✅ 常驻模式已启动 (project: {project_id})")
168
 
169
+ async def stop_resident_mode(self, project_id: Optional[str] = None):
170
+ """停止常驻模式
171
+
172
+ Args:
173
+ project_id: 指定要关闭的 project_id,如果为 None 则关闭所有常驻标签页
174
+ """
175
+ async with self._resident_lock:
176
+ if project_id:
177
+ # 关闭指定的常驻标签页
178
+ await self._close_resident_tab(project_id)
179
+ debug_logger.log_info(f"[BrowserCaptcha] 已关闭 project_id={project_id} 的常驻模式")
180
+ else:
181
+ # 关闭所有常驻标签页
182
+ project_ids = list(self._resident_tabs.keys())
183
+ for pid in project_ids:
184
+ resident_info = self._resident_tabs.pop(pid, None)
185
+ if resident_info and resident_info.tab:
186
+ try:
187
+ await resident_info.tab.close()
188
+ except Exception:
189
+ pass
190
+ debug_logger.log_info(f"[BrowserCaptcha] 已关闭所有常驻标签页 (共 {len(project_ids)} 个)")
191
+
192
+ # 向后兼容:清理旧属性
193
  if not self._running:
194
  return
195
 
196
  self._running = False
 
 
197
  if self.resident_tab:
198
  try:
199
  await self.resident_tab.close()
 
203
 
204
  self.resident_project_id = None
205
  self._recaptcha_ready = False
 
 
206
 
207
  async def _wait_for_recaptcha(self, tab) -> bool:
208
  """等待 reCAPTCHA 加载
 
314
  async def get_token(self, project_id: str) -> Optional[str]:
315
  """获取 reCAPTCHA token
316
 
317
+ 自动常驻模式:如果该 project_id 没有常驻标签页,则自动创建并常驻
318
+
 
319
  Args:
320
  project_id: Flow项目ID
321
 
322
  Returns:
323
  reCAPTCHA token字符串,如果获取失败返回None
324
  """
325
+ # 确保浏览器已初始化
326
+ await self.initialize()
327
+
328
+ # 尝试从常驻标签页获取 token
329
+ async with self._resident_lock:
330
+ resident_info = self._resident_tabs.get(project_id)
331
+
332
+ # 如果该 project_id 没有常驻标签页,则自动创建
333
+ if resident_info is None:
334
+ debug_logger.log_info(f"[BrowserCaptcha] project_id={project_id} 没有常驻标签页,正在创建...")
335
+ resident_info = await self._create_resident_tab(project_id)
336
+ if resident_info is None:
337
+ debug_logger.log_warning(f"[BrowserCaptcha] 无法为 project_id={project_id} 创建常驻标签页,fallback 到传统模式")
338
+ return await self._get_token_legacy(project_id)
339
+ self._resident_tabs[project_id] = resident_info
340
+ debug_logger.log_info(f"[BrowserCaptcha] ✅ 已为 project_id={project_id} 创建常驻标签页 (当前共 {len(self._resident_tabs)} 个)")
341
+
342
+ # 使用常驻标签页生成 token
343
+ if resident_info and resident_info.recaptcha_ready and resident_info.tab:
344
+ start_time = time.time()
345
+ debug_logger.log_info(f"[BrowserCaptcha] 从常驻标签页即时生成 token (project: {project_id})...")
346
+ try:
347
+ token = await self._execute_recaptcha_on_tab(resident_info.tab)
348
  duration_ms = (time.time() - start_time) * 1000
349
  if token:
350
  debug_logger.log_info(f"[BrowserCaptcha] ✅ Token生成成功(耗时 {duration_ms:.0f}ms)")
351
  return token
352
  else:
353
+ debug_logger.log_warning(f"[BrowserCaptcha] 常驻标签页生成失败 (project: {project_id})尝试重建...")
354
+ except Exception as e:
355
+ debug_logger.log_warning(f"[BrowserCaptcha] 常驻标签页异常: {e}尝试重建...")
356
+
357
+ # 常驻标签页失效,尝试重建
358
+ async with self._resident_lock:
359
+ await self._close_resident_tab(project_id)
360
+ resident_info = await self._create_resident_tab(project_id)
361
+ if resident_info:
362
+ self._resident_tabs[project_id] = resident_info
363
+ # 重建后立即尝试生成
364
+ try:
365
+ token = await self._execute_recaptcha_on_tab(resident_info.tab)
366
+ if token:
367
+ debug_logger.log_info(f"[BrowserCaptcha] ✅ 重建后 Token生成成功")
368
+ return token
369
+ except Exception:
370
+ pass
371
+
372
+ # 最终 Fallback: 使用传统模式
373
+ debug_logger.log_warning(f"[BrowserCaptcha] 所有常驻方式失败,fallback 到传统模式 (project: {project_id})")
374
  return await self._get_token_legacy(project_id)
375
 
376
+ async def _create_resident_tab(self, project_id: str) -> Optional[ResidentTabInfo]:
377
+ """为指定 project_id 创建常驻标签页
378
+
379
+ Args:
380
+ project_id: 项目 ID
381
+
382
+ Returns:
383
+ ResidentTabInfo 对象,或 None(创建失败)
384
+ """
385
+ try:
386
+ website_url = f"https://labs.google/fx/tools/flow/project/{project_id}"
387
+ debug_logger.log_info(f"[BrowserCaptcha] 为 project_id={project_id} 创建常驻标签页,访问: {website_url}")
388
+
389
+ # 创建新标签页
390
+ tab = await self.browser.get(website_url, new_tab=True)
391
+
392
+ # 等待页面加载完成
393
+ page_loaded = False
394
+ for retry in range(15):
395
+ try:
396
+ await asyncio.sleep(1)
397
+ ready_state = await tab.evaluate("document.readyState")
398
+ if ready_state == "complete":
399
+ page_loaded = True
400
+ break
401
+ except ConnectionRefusedError as e:
402
+ debug_logger.log_warning(f"[BrowserCaptcha] 标签页连接丢失: {e}")
403
+ return None
404
+ except Exception as e:
405
+ debug_logger.log_warning(f"[BrowserCaptcha] 等待页面异常: {e},重试 {retry + 1}/15...")
406
+ await asyncio.sleep(1)
407
+
408
+ if not page_loaded:
409
+ debug_logger.log_error(f"[BrowserCaptcha] 页面加载超时 (project: {project_id})")
410
+ try:
411
+ await tab.close()
412
+ except:
413
+ pass
414
+ return None
415
+
416
+ # 等待 reCAPTCHA 加载
417
+ recaptcha_ready = await self._wait_for_recaptcha(tab)
418
+
419
+ if not recaptcha_ready:
420
+ debug_logger.log_error(f"[BrowserCaptcha] reCAPTCHA 加载失败 (project: {project_id})")
421
+ try:
422
+ await tab.close()
423
+ except:
424
+ pass
425
+ return None
426
+
427
+ # 创建常驻信息对象
428
+ resident_info = ResidentTabInfo(tab, project_id)
429
+ resident_info.recaptcha_ready = True
430
+
431
+ debug_logger.log_info(f"[BrowserCaptcha] ✅ 常驻标签页创建成功 (project: {project_id})")
432
+ return resident_info
433
+
434
+ except Exception as e:
435
+ debug_logger.log_error(f"[BrowserCaptcha] 创建常驻标签页异常: {e}")
436
+ return None
437
+
438
+ async def _close_resident_tab(self, project_id: str):
439
+ """关闭指定 project_id 的常驻标签页
440
+
441
+ Args:
442
+ project_id: 项目 ID
443
+ """
444
+ resident_info = self._resident_tabs.pop(project_id, None)
445
+ if resident_info and resident_info.tab:
446
+ try:
447
+ await resident_info.tab.close()
448
+ debug_logger.log_info(f"[BrowserCaptcha] 已关闭 project_id={project_id} 的常驻标签页")
449
+ except Exception as e:
450
+ debug_logger.log_warning(f"[BrowserCaptcha] 关闭标签页时异常: {e}")
451
+
452
  async def _get_token_legacy(self, project_id: str) -> Optional[str]:
453
  """传统模式获取 reCAPTCHA token(每次创建新标签页)
454
 
 
516
 
517
  async def close(self):
518
  """关闭浏览器"""
519
+ # 先停止所有常驻模式(关闭所有常驻标签页)
520
  await self.stop_resident_mode()
521
 
522
  try:
 
529
  self.browser = None
530
 
531
  self._initialized = False
532
+ self._resident_tabs.clear() # 确保清空常驻字典
533
  debug_logger.log_info("[BrowserCaptcha] 浏览器已关闭")
534
  except Exception as e:
535
  debug_logger.log_error(f"[BrowserCaptcha] 关闭浏览器异常: {str(e)}")
 
544
  # ========== 状态查询 ==========
545
 
546
  def is_resident_mode_active(self) -> bool:
547
+ """检查是否有任何常驻标签页激活"""
548
+ return len(self._resident_tabs) > 0 or self._running
549
+
550
+ def get_resident_count(self) -> int:
551
+ """获取当前常驻标签页数量"""
552
+ return len(self._resident_tabs)
553
 
554
+ def get_resident_project_ids(self) -> list[str]:
555
+ """获取所有当前常驻的 project_id """
556
+ return list(self._resident_tabs.keys())
557
 
558
  def get_resident_project_id(self) -> Optional[str]:
559
+ """获取当前常驻的 project_id(向后兼容,返回第一个)"""
560
+ if self._resident_tabs:
561
+ return next(iter(self._resident_tabs.keys()))
562
  return self.resident_project_id