netlops commited on
Commit
91dcd8f
·
1 Parent(s): 9291512

feat(i2v): enhance image-to-video documentation and single/dual frame support

Browse files

- Update README.md to clarify single vs dual frame image support with
automatic model_key selection
- Add detailed explanation of automatic adaptation system for
different image counts
- Include clear distinction between single frame mode (1 image) and
dual frame mode (2 images) with proper model selection

fix(browser-captcha): increase page load timeout from 15s to 60s

- Extend retry range from 15 to 60 attempts for page loading
- Update debug logging to reflect new timeout values
- Improve reliability of captcha service by allowing more time for
page load completion

fix(api): correct I2V API endpoint URL for single frame generation

- Change URL from batchAsyncGenerateVideoStartAndEndImage to
batchAsyncGenerateVideoStartImage for single frame scenarios

fix(model-config): correct model_key for I2V single frame mode

- Fix typo in model_key: remove duplicate 'fl_' in
veo_3_1_i2v_s_fast_fl_landscape
- Implement automatic model_key transformation for single frame mode
by replacing '_fl_' with '_' in model keys
- Add debug logging for model key transformation process

README.md CHANGED
@@ -117,7 +117,11 @@ python main.py
117
  | `veo_2_0_t2v_landscape` | 文生视频 | 横屏 |
118
 
119
  #### 首尾帧模型 (I2V - Image to Video)
120
- 📸 **支持1-2张图片:首尾帧**
 
 
 
 
121
 
122
  | 模型名称 | 说明| 尺寸 |
123
  |---------|---------|--------|
 
117
  | `veo_2_0_t2v_landscape` | 文生视频 | 横屏 |
118
 
119
  #### 首尾帧模型 (I2V - Image to Video)
120
+ 📸 **支持1-2张图片:1张作为帧,2张作为首尾帧**
121
+
122
+ > 💡 **自动适配**:系统会根据图片数量自动选择对应的 model_key
123
+ > - **单帧模式**(1张图):使用首帧生成视频
124
+ > - **双帧模式**(2张图):使用首帧+尾帧生成过渡视频
125
 
126
  | 模型名称 | 说明| 尺寸 |
127
  |---------|---------|--------|
src/services/browser_captcha_personal.py CHANGED
@@ -131,11 +131,11 @@ class BrowserCaptchaService:
131
 
132
  # 等待页面加载完成(带重试机制)
133
  page_loaded = False
134
- for retry in range(15):
135
  try:
136
  await asyncio.sleep(1)
137
  ready_state = await self.resident_tab.evaluate("document.readyState")
138
- debug_logger.log_info(f"[BrowserCaptcha] 页面状态: {ready_state} (重试 {retry + 1}/15)")
139
  if ready_state == "complete":
140
  page_loaded = True
141
  break
@@ -391,7 +391,7 @@ class BrowserCaptchaService:
391
 
392
  # 等待页面加载完成
393
  page_loaded = False
394
- for retry in range(15):
395
  try:
396
  await asyncio.sleep(1)
397
  ready_state = await tab.evaluate("document.readyState")
@@ -402,7 +402,7 @@ class BrowserCaptchaService:
402
  debug_logger.log_warning(f"[BrowserCaptcha] 标签页连接丢失: {e}")
403
  return None
404
  except Exception as e:
405
- debug_logger.log_warning(f"[BrowserCaptcha] 等待页面异常: {e},重试 {retry + 1}/15...")
406
  await asyncio.sleep(1)
407
 
408
  if not page_loaded:
 
131
 
132
  # 等待页面加载完成(带重试机制)
133
  page_loaded = False
134
+ for retry in range(60):
135
  try:
136
  await asyncio.sleep(1)
137
  ready_state = await self.resident_tab.evaluate("document.readyState")
138
+ debug_logger.log_info(f"[BrowserCaptcha] 页面状态: {ready_state} (重试 {retry + 1}/60)")
139
  if ready_state == "complete":
140
  page_loaded = True
141
  break
 
391
 
392
  # 等待页面加载完成
393
  page_loaded = False
394
+ for retry in range(60):
395
  try:
396
  await asyncio.sleep(1)
397
  ready_state = await tab.evaluate("document.readyState")
 
402
  debug_logger.log_warning(f"[BrowserCaptcha] 标签页连接丢失: {e}")
403
  return None
404
  except Exception as e:
405
+ debug_logger.log_warning(f"[BrowserCaptcha] 等待页面异常: {e},重试 {retry + 1}/60...")
406
  await asyncio.sleep(1)
407
 
408
  if not page_loaded:
src/services/flow_client.py CHANGED
@@ -572,7 +572,7 @@ class FlowClient:
572
  Returns:
573
  同 generate_video_text
574
  """
575
- url = f"{self.api_base_url}/video:batchAsyncGenerateVideoStartAndEndImage"
576
 
577
  # 获取 reCAPTCHA token
578
  recaptcha_token = await self._get_recaptcha_token(project_id) or ""
 
572
  Returns:
573
  同 generate_video_text
574
  """
575
+ url = f"{self.api_base_url}/video:batchAsyncGenerateVideoStartImage"
576
 
577
  # 获取 reCAPTCHA token
578
  recaptcha_token = await self._get_recaptcha_token(project_id) or ""
src/services/generation_handler.py CHANGED
@@ -118,7 +118,7 @@ MODEL_CONFIG = {
118
  "veo_3_1_i2v_s_fast_fl_landscape": {
119
  "type": "video",
120
  "video_type": "i2v",
121
- "model_key": "veo_3_1_i2v_s_fast_landscape_fl_ultra_relaxed",
122
  "aspect_ratio": "VIDEO_ASPECT_RATIO_LANDSCAPE",
123
  "supports_images": True,
124
  "min_images": 1,
@@ -594,12 +594,16 @@ class GenerationHandler:
594
  user_paygate_tier=token.user_paygate_tier or "PAYGATE_TIER_ONE"
595
  )
596
  else:
597
- # 只有首帧
 
 
 
 
598
  result = await self.flow_client.generate_video_start_image(
599
  at=token.at,
600
  project_id=project_id,
601
  prompt=prompt,
602
- model_key=model_config["model_key"],
603
  aspect_ratio=model_config["aspect_ratio"],
604
  start_media_id=start_media_id,
605
  user_paygate_tier=token.user_paygate_tier or "PAYGATE_TIER_ONE"
 
118
  "veo_3_1_i2v_s_fast_fl_landscape": {
119
  "type": "video",
120
  "video_type": "i2v",
121
+ "model_key": "veo_3_1_i2v_s_fast_fl_ultra_relaxed",
122
  "aspect_ratio": "VIDEO_ASPECT_RATIO_LANDSCAPE",
123
  "supports_images": True,
124
  "min_images": 1,
 
594
  user_paygate_tier=token.user_paygate_tier or "PAYGATE_TIER_ONE"
595
  )
596
  else:
597
+ # 只有首帧 - 需要将 model_key 中的 _fl_ 替换为 _
598
+ # 例如: veo_3_1_i2v_s_fast_fl_ultra_relaxed -> veo_3_1_i2v_s_fast_ultra_relaxed
599
+ # veo_3_1_i2v_s_fast_portrait_fl_ultra_relaxed -> veo_3_1_i2v_s_fast_portrait_ultra_relaxed
600
+ actual_model_key = model_config["model_key"].replace("_fl_", "_")
601
+ debug_logger.log_info(f"[I2V] 单帧模式,model_key: {model_config['model_key']} -> {actual_model_key}")
602
  result = await self.flow_client.generate_video_start_image(
603
  at=token.at,
604
  project_id=project_id,
605
  prompt=prompt,
606
+ model_key=actual_model_key,
607
  aspect_ratio=model_config["aspect_ratio"],
608
  start_media_id=start_media_id,
609
  user_paygate_tier=token.user_paygate_tier or "PAYGATE_TIER_ONE"