feat(i2v): enhance image-to-video documentation and single/dual frame support
Browse files- Update README.md to clarify single vs dual frame image support with
automatic model_key selection
- Add detailed explanation of automatic adaptation system for
different image counts
- Include clear distinction between single frame mode (1 image) and
dual frame mode (2 images) with proper model selection
fix(browser-captcha): increase page load timeout from 15s to 60s
- Extend retry range from 15 to 60 attempts for page loading
- Update debug logging to reflect new timeout values
- Improve reliability of captcha service by allowing more time for
page load completion
fix(api): correct I2V API endpoint URL for single frame generation
- Change URL from batchAsyncGenerateVideoStartAndEndImage to
batchAsyncGenerateVideoStartImage for single frame scenarios
fix(model-config): correct model_key for I2V single frame mode
- Fix typo in model_key: remove duplicate 'fl_' in
veo_3_1_i2v_s_fast_fl_landscape
- Implement automatic model_key transformation for single frame mode
by replacing '_fl_' with '_' in model keys
- Add debug logging for model key transformation process
|
@@ -117,7 +117,11 @@ python main.py
|
|
| 117 |
| `veo_2_0_t2v_landscape` | 文生视频 | 横屏 |
|
| 118 |
|
| 119 |
#### 首尾帧模型 (I2V - Image to Video)
|
| 120 |
-
📸 **支持1-2张图片:首尾帧**
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
| 模型名称 | 说明| 尺寸 |
|
| 123 |
|---------|---------|--------|
|
|
|
|
| 117 |
| `veo_2_0_t2v_landscape` | 文生视频 | 横屏 |
|
| 118 |
|
| 119 |
#### 首尾帧模型 (I2V - Image to Video)
|
| 120 |
+
📸 **支持1-2张图片:1张作为首帧,2张作为首尾帧**
|
| 121 |
+
|
| 122 |
+
> 💡 **自动适配**:系统会根据图片数量自动选择对应的 model_key
|
| 123 |
+
> - **单帧模式**(1张图):使用首帧生成视频
|
| 124 |
+
> - **双帧模式**(2张图):使用首帧+尾帧生成过渡视频
|
| 125 |
|
| 126 |
| 模型名称 | 说明| 尺寸 |
|
| 127 |
|---------|---------|--------|
|
|
@@ -131,11 +131,11 @@ class BrowserCaptchaService:
|
|
| 131 |
|
| 132 |
# 等待页面加载完成(带重试机制)
|
| 133 |
page_loaded = False
|
| 134 |
-
for retry in range(
|
| 135 |
try:
|
| 136 |
await asyncio.sleep(1)
|
| 137 |
ready_state = await self.resident_tab.evaluate("document.readyState")
|
| 138 |
-
debug_logger.log_info(f"[BrowserCaptcha] 页面状态: {ready_state} (重试 {retry + 1}/
|
| 139 |
if ready_state == "complete":
|
| 140 |
page_loaded = True
|
| 141 |
break
|
|
@@ -391,7 +391,7 @@ class BrowserCaptchaService:
|
|
| 391 |
|
| 392 |
# 等待页面加载完成
|
| 393 |
page_loaded = False
|
| 394 |
-
for retry in range(
|
| 395 |
try:
|
| 396 |
await asyncio.sleep(1)
|
| 397 |
ready_state = await tab.evaluate("document.readyState")
|
|
@@ -402,7 +402,7 @@ class BrowserCaptchaService:
|
|
| 402 |
debug_logger.log_warning(f"[BrowserCaptcha] 标签页连接丢失: {e}")
|
| 403 |
return None
|
| 404 |
except Exception as e:
|
| 405 |
-
debug_logger.log_warning(f"[BrowserCaptcha] 等待页面异常: {e},重试 {retry + 1}/
|
| 406 |
await asyncio.sleep(1)
|
| 407 |
|
| 408 |
if not page_loaded:
|
|
|
|
| 131 |
|
| 132 |
# 等待页面加载完成(带重试机制)
|
| 133 |
page_loaded = False
|
| 134 |
+
for retry in range(60):
|
| 135 |
try:
|
| 136 |
await asyncio.sleep(1)
|
| 137 |
ready_state = await self.resident_tab.evaluate("document.readyState")
|
| 138 |
+
debug_logger.log_info(f"[BrowserCaptcha] 页面状态: {ready_state} (重试 {retry + 1}/60)")
|
| 139 |
if ready_state == "complete":
|
| 140 |
page_loaded = True
|
| 141 |
break
|
|
|
|
| 391 |
|
| 392 |
# 等待页面加载完成
|
| 393 |
page_loaded = False
|
| 394 |
+
for retry in range(60):
|
| 395 |
try:
|
| 396 |
await asyncio.sleep(1)
|
| 397 |
ready_state = await tab.evaluate("document.readyState")
|
|
|
|
| 402 |
debug_logger.log_warning(f"[BrowserCaptcha] 标签页连接丢失: {e}")
|
| 403 |
return None
|
| 404 |
except Exception as e:
|
| 405 |
+
debug_logger.log_warning(f"[BrowserCaptcha] 等待页面异常: {e},重试 {retry + 1}/60...")
|
| 406 |
await asyncio.sleep(1)
|
| 407 |
|
| 408 |
if not page_loaded:
|
|
@@ -572,7 +572,7 @@ class FlowClient:
|
|
| 572 |
Returns:
|
| 573 |
同 generate_video_text
|
| 574 |
"""
|
| 575 |
-
url = f"{self.api_base_url}/video:
|
| 576 |
|
| 577 |
# 获取 reCAPTCHA token
|
| 578 |
recaptcha_token = await self._get_recaptcha_token(project_id) or ""
|
|
|
|
| 572 |
Returns:
|
| 573 |
同 generate_video_text
|
| 574 |
"""
|
| 575 |
+
url = f"{self.api_base_url}/video:batchAsyncGenerateVideoStartImage"
|
| 576 |
|
| 577 |
# 获取 reCAPTCHA token
|
| 578 |
recaptcha_token = await self._get_recaptcha_token(project_id) or ""
|
|
@@ -118,7 +118,7 @@ MODEL_CONFIG = {
|
|
| 118 |
"veo_3_1_i2v_s_fast_fl_landscape": {
|
| 119 |
"type": "video",
|
| 120 |
"video_type": "i2v",
|
| 121 |
-
"model_key": "
|
| 122 |
"aspect_ratio": "VIDEO_ASPECT_RATIO_LANDSCAPE",
|
| 123 |
"supports_images": True,
|
| 124 |
"min_images": 1,
|
|
@@ -594,12 +594,16 @@ class GenerationHandler:
|
|
| 594 |
user_paygate_tier=token.user_paygate_tier or "PAYGATE_TIER_ONE"
|
| 595 |
)
|
| 596 |
else:
|
| 597 |
-
# 只有首帧
|
|
|
|
|
|
|
|
|
|
|
|
|
| 598 |
result = await self.flow_client.generate_video_start_image(
|
| 599 |
at=token.at,
|
| 600 |
project_id=project_id,
|
| 601 |
prompt=prompt,
|
| 602 |
-
model_key=
|
| 603 |
aspect_ratio=model_config["aspect_ratio"],
|
| 604 |
start_media_id=start_media_id,
|
| 605 |
user_paygate_tier=token.user_paygate_tier or "PAYGATE_TIER_ONE"
|
|
|
|
| 118 |
"veo_3_1_i2v_s_fast_fl_landscape": {
|
| 119 |
"type": "video",
|
| 120 |
"video_type": "i2v",
|
| 121 |
+
"model_key": "veo_3_1_i2v_s_fast_fl_ultra_relaxed",
|
| 122 |
"aspect_ratio": "VIDEO_ASPECT_RATIO_LANDSCAPE",
|
| 123 |
"supports_images": True,
|
| 124 |
"min_images": 1,
|
|
|
|
| 594 |
user_paygate_tier=token.user_paygate_tier or "PAYGATE_TIER_ONE"
|
| 595 |
)
|
| 596 |
else:
|
| 597 |
+
# 只有首帧 - 需要将 model_key 中的 _fl_ 替换为 _
|
| 598 |
+
# 例如: veo_3_1_i2v_s_fast_fl_ultra_relaxed -> veo_3_1_i2v_s_fast_ultra_relaxed
|
| 599 |
+
# veo_3_1_i2v_s_fast_portrait_fl_ultra_relaxed -> veo_3_1_i2v_s_fast_portrait_ultra_relaxed
|
| 600 |
+
actual_model_key = model_config["model_key"].replace("_fl_", "_")
|
| 601 |
+
debug_logger.log_info(f"[I2V] 单帧模式,model_key: {model_config['model_key']} -> {actual_model_key}")
|
| 602 |
result = await self.flow_client.generate_video_start_image(
|
| 603 |
at=token.at,
|
| 604 |
project_id=project_id,
|
| 605 |
prompt=prompt,
|
| 606 |
+
model_key=actual_model_key,
|
| 607 |
aspect_ratio=model_config["aspect_ratio"],
|
| 608 |
start_media_id=start_media_id,
|
| 609 |
user_paygate_tier=token.user_paygate_tier or "PAYGATE_TIER_ONE"
|