Spaces:

userIdc2024
/

Video_AdGenesis_App

Sleeping

App Files Files Community

Video_AdGenesis_App / standalone_video_creator.py

sushilideaclan01

first push

91d209c 25 days ago

raw

history blame contribute delete

48.4 kB

	#!/usr/bin/env python3
	"""
	Standalone Video Creator
	Automated video generation using extend video flow with one-time input.
	No frontend required - everything runs from this single script.
	"""

	import os
	import sys
	import time
	import json
	import asyncio
	import httpx
	from pathlib import Path
	from typing import List, Dict, Any, Optional
	from datetime import datetime
	from dotenv import load_dotenv

	# Import utilities from the existing codebase
	from utils.prompt_generator import (
	VeoInputs,
	generate_segments_payload,
	split_script_into_segments
	)

	# Try importing replicate (optional)
	try:
	import replicate
	REPLICATE_AVAILABLE = True
	except ImportError:
	REPLICATE_AVAILABLE = False

	# Load environment variables
	load_dotenv('.env.local')

	# Configuration
	KIE_API_BASE = "https://api.kie.ai"
	BACKEND_BASE = f"http://localhost:{os.getenv('SERVER_PORT', 4000)}"
	MAX_RETRIES = 3
	POLLING_INTERVAL = 10 # seconds
	MAX_WAIT_TIME = 600 # 10 minutes per video

	class Colors:
	"""Terminal colors for better output"""
	HEADER = '\033[95m'
	BLUE = '\033[94m'
	CYAN = '\033[96m'
	GREEN = '\033[92m'
	YELLOW = '\033[93m'
	RED = '\033[91m'
	END = '\033[0m'
	BOLD = '\033[1m'

	def print_status(message: str, color: str = Colors.CYAN):
	"""Print colored status message"""
	print(f"{color}{message}{Colors.END}")

	def print_success(message: str):
	"""Print success message"""
	print(f"{Colors.GREEN}✅ {message}{Colors.END}")

	def print_error(message: str):
	"""Print error message"""
	print(f"{Colors.RED}❌ {message}{Colors.END}")

	def print_header(message: str):
	"""Print header message"""
	print(f"\n{Colors.BOLD}{Colors.HEADER}{'='*60}{Colors.END}")
	print(f"{Colors.BOLD}{Colors.HEADER}{message}{Colors.END}")
	print(f"{Colors.BOLD}{Colors.HEADER}{'='*60}{Colors.END}\n")

	def get_api_key() -> str:
	"""Get KIE API key from environment"""
	api_key = os.getenv('KIE_API_KEY')
	if not api_key:
	print_error("KIE_API_KEY not found in environment!")
	print("Please add KIE_API_KEY to .env.local file")
	print("Get your API key at: https://kie.ai/api-key")
	sys.exit(1)
	return api_key

	def get_openai_api_key() -> str:
	"""Get OpenAI API key from environment"""
	api_key = os.getenv('OPENAI_API_KEY')
	if not api_key:
	print_error("OPENAI_API_KEY not found in environment!")
	print("Please add OPENAI_API_KEY to .env.local file")
	sys.exit(1)
	return api_key

	def get_replicate_api_key() -> str:
	"""Get Replicate API key from environment"""
	api_key = os.getenv('REPLICATE_API_TOKEN')
	if not api_key:
	print_error("REPLICATE_API_TOKEN not found in environment!")
	print("Please add REPLICATE_API_TOKEN to .env.local file")
	print("Get your API token at: https://replicate.com/account/api-tokens")
	sys.exit(1)
	return api_key

	def collect_user_inputs() -> Dict[str, Any]:
	"""Collect one-time inputs from user"""
	print_header("VIDEO CREATION SETUP")

	print("This script will generate a video using AI. You'll need to provide:")
	print("1. A script/text for the video")
	print("2. A reference image (character/scene)")
	print("3. Video style preferences\n")

	# Script input
	print_status("Enter your video script (press Enter twice when done):")
	script_lines = []
	while True:
	line = input()
	if line == "" and script_lines and script_lines[-1] == "":
	script_lines.pop() # Remove last empty line
	break
	script_lines.append(line)
	script = "\n".join(script_lines).strip()

	if not script:
	print_error("Script cannot be empty!")
	sys.exit(1)

	# Image path
	print_status("\nEnter path to reference image:")
	image_path = input().strip()

	if not os.path.exists(image_path):
	print_error(f"Image file not found: {image_path}")
	sys.exit(1)

	# Style
	print_status("\nEnter video style (default: 'clean, lifestyle UGC'):")
	style = input().strip() or "clean, lifestyle UGC"

	# Voice type
	print_status("\nEnter voice type (Deep/Warm/Crisp/None, default: None):")
	voice_type = input().strip() or "None"

	# Model
	print_status("\nEnter video model (default: 'veo3_fast'):")
	model = input().strip() or "veo3_fast"

	# Aspect ratio
	print_status("\nEnter aspect ratio (16:9 or 9:16, default: '9:16'):")
	aspect_ratio = input().strip() or "9:16"

	# Camera style
	print_status("\nEnter camera style (default: 'handheld steadicam'):")
	camera_style = input().strip() or "handheld steadicam"

	# Provider selection
	print_status("\nSelect video generation provider:")
	print(" 1. KIE API (supports extend video flow)")
	print(" 2. Replicate (google/veo-3)")
	provider_choice = input().strip() or "1"

	if provider_choice == "2":
	if not REPLICATE_AVAILABLE:
	print_error("Replicate package not installed!")
	print("Please install it: pip install replicate")
	sys.exit(1)
	provider = "replicate"
	else:
	provider = "kie"

	# Seed for consistent lighting
	print_status("\nEnter seed for consistent lighting (optional, press Enter to skip):")
	seed_input = input().strip()
	seed = int(seed_input) if seed_input else None

	print_success("\nConfiguration complete!")

	return {
	'script': script,
	'image_path': image_path,
	'style': style,
	'voice_type': voice_type,
	'model': model,
	'aspect_ratio': aspect_ratio,
	'camera_style': camera_style,
	'seed': seed,
	'provider': provider
	}

	async def generate_initial_video(
	prompt: Dict[str, Any],
	image_path: str,
	api_key: str,
	model: str = "veo3_fast",
	aspect_ratio: str = "9:16",
	voice_type: str = "None",
	seed: Optional[int] = None
	) -> str:
	"""
	Generate the first video segment.

	Uses the backend server's callback endpoint for status updates.
	"""
	print_status(f"🎬 Generating initial video segment...")

	# Read image file
	from pathlib import Path
	with open(image_path, 'rb') as f:
	image_data = f.read()

	# Detect image format for upload
	image_ext = Path(image_path).suffix.lower()
	mime_type = {
	'.jpg': 'image/jpeg',
	'.jpeg': 'image/jpeg',
	'.png': 'image/png',
	'.gif': 'image/gif',
	'.webp': 'image/webp'
	}.get(image_ext, 'image/jpeg')

	# Get public URL and callback URL
	public_url = os.getenv('PUBLIC_URL', BACKEND_BASE)
	callback_url = f"{public_url}/api/veo/callback"

	# Upload image to backend so it's available to the /api/images/{id} endpoint
	print_status(f"📷 Uploading image to backend for hosting...")
	async with httpx.AsyncClient(timeout=30.0) as client:
	upload_response = await client.post(
	f"{BACKEND_BASE}/api/upload-image",
	files={"file": ("reference_image" + image_ext, image_data, mime_type)},
	)
	if upload_response.status_code != 200:
	raise Exception(f"Image upload failed: HTTP {upload_response.status_code} - {upload_response.text}")

	upload_json = upload_response.json()
	hosted_image_url = upload_json.get("url")
	if not hosted_image_url:
	raise Exception(f"Image upload response missing URL: {upload_json}")

	print_success(f"Image hosted at: {hosted_image_url}")

	async with httpx.AsyncClient(timeout=30.0) as client:
	payload = {
	"prompt": prompt,
	"imageUrls": [hosted_image_url], # Use hosted URL
	"model": model,
	"aspectRatio": aspect_ratio,
	"generationType": "FIRST_AND_LAST_FRAMES_2_VIDEO",
	"enableTranslation": True,
	"callBackUrl": callback_url
	}

	if seed is not None:
	payload["seeds"] = seed

	if voice_type and voice_type.lower() != "none":
	payload["voiceType"] = voice_type

	# Debug: print request payload
	try:
	print_status("📦 Initial generate payload:")
	print(json.dumps(payload, indent=2, ensure_ascii=False))
	except Exception:
	# Fallback in case something isn't JSON serializable
	print_status(f"📦 Initial generate payload (raw): {payload}")

	response = await client.post(
	f"{KIE_API_BASE}/api/v1/veo/generate",
	headers={
	"Authorization": f"Bearer {api_key}",
	"Content-Type": "application/json"
	},
	json=payload
	)

	result = response.json()

	if result.get('code') != 200:
	raise Exception(f"Video generation failed: {result.get('msg')}")

	task_id = result['data']['taskId']
	print_success(f"Initial video generation started: {task_id}")

	return task_id

	async def extend_video(
	task_id: str,
	prompt: Dict[str, Any],
	api_key: str,
	voice_type: str = "None",
	seed: Optional[int] = None
	) -> str:
	"""
	Extend an existing video with new prompt.

	Uses the backend server's callback endpoint for status updates.
	"""
	print_status(f"🎬 Extending video from task: {task_id}")

	# Get public URL for callback
	public_url = os.getenv('PUBLIC_URL', BACKEND_BASE)
	callback_url = f"{public_url}/api/veo/callback"

	async with httpx.AsyncClient(timeout=30.0) as client:
	payload = {
	"taskId": task_id,
	"prompt": prompt,
	"callBackUrl": callback_url
	}

	if seed is not None:
	payload["seeds"] = seed

	if voice_type and voice_type.lower() != "none":
	payload["voiceType"] = voice_type

	# Debug: print request payload
	try:
	print_status("📦 Extend payload:")
	print(json.dumps(payload, indent=2, ensure_ascii=False))
	except Exception:
	print_status(f"📦 Extend payload (raw): {payload}")

	response = await client.post(
	f"{KIE_API_BASE}/api/v1/veo/extend",
	headers={
	"Authorization": f"Bearer {api_key}",
	"Content-Type": "application/json"
	},
	json=payload
	)

	result = response.json()

	if result.get('code') != 200:
	raise Exception(f"Video extension failed: {result.get('msg')}")

	new_task_id = result['data']['taskId']
	print_success(f"Video extension started: {new_task_id}")

	return new_task_id

	async def wait_for_callback_result(task_id: str) -> str:
	"""
	Wait for callback result from backend server via SSE.

	Connects to the SSE endpoint and listens for real-time callback events.
	"""
	print_status(f"⏳ Listening for video completion via SSE: {task_id}")

	start_time = time.time()

	try:
	async with httpx.AsyncClient(timeout=httpx.Timeout(MAX_WAIT_TIME, connect=10.0)) as client:
	async with client.stream(
	'GET',
	f"{BACKEND_BASE}/api/veo/events/{task_id}"
	) as response:
	if response.status_code != 200:
	raise Exception(f"Failed to connect to SSE: HTTP {response.status_code}")

	print_status(f"🔌 Connected to SSE stream")

	async for line in response.aiter_lines():
	# Check timeout
	if time.time() - start_time > MAX_WAIT_TIME:
	raise Exception(f"Video generation timed out after {MAX_WAIT_TIME}s")

	# Parse SSE data
	if line.startswith('data: '):
	data_str = line[6:] # Remove "data: " prefix
	try:
	data = json.loads(data_str)
	status = data.get('status')

	if status == 'succeeded':
	video_url = data.get('url')
	if video_url:
	elapsed = int(time.time() - start_time)
	print_success(f"Video completed in {elapsed}s: {task_id}")
	return video_url
	elif status == 'failed':
	error = data.get('error', 'Unknown error')
	raise Exception(f"Video generation failed: {error}")
	except json.JSONDecodeError:
	continue # Skip invalid JSON

	except httpx.TimeoutException:
	raise Exception(f"Video generation timed out after {MAX_WAIT_TIME}s")
	except Exception as e:
	if "timed out" in str(e).lower():
	raise
	print_error(f"SSE connection error, falling back to polling: {str(e)}")
	# Fallback to simple polling if SSE fails
	return await poll_fallback(task_id)

	async def poll_fallback(task_id: str) -> str:
	"""Fallback polling method if SSE fails"""
	print_status(f"⏳ Polling for video completion: {task_id}")

	start_time = time.time()
	async with httpx.AsyncClient(timeout=40.0) as client:
	while time.time() - start_time < MAX_WAIT_TIME:
	try:
	response = await client.get(
	f"{BACKEND_BASE}/api/veo/status/{task_id}"
	)

	if response.status_code == 200:
	result = response.json()
	status = result.get('status')

	if status == 'succeeded':
	video_url = result.get('url')
	if video_url:
	print_success(f"Video completed: {task_id}")
	return video_url
	elif status == 'failed':
	error = result.get('error', 'Unknown error')
	raise Exception(f"Video generation failed: {error}")

	await asyncio.sleep(POLLING_INTERVAL)
	print(f" Still processing... ({int(time.time() - start_time)}s)")

	except httpx.HTTPError as e:
	print_error(f"Error checking status: {str(e)}")
	await asyncio.sleep(POLLING_INTERVAL)

	raise Exception(f"Video generation timed out after {MAX_WAIT_TIME}s")

	def convert_segment_to_text_prompt(segment: Dict[str, Any]) -> str:
	"""
	Convert structured JSON segment to a comprehensive text prompt for Replicate.
	Replicate's Veo-3 expects a plain text prompt, not structured JSON.
	"""
	# Extract key information from structured segment
	action_timeline = segment.get('action_timeline', {})
	dialogue = action_timeline.get('dialogue', '')

	character = segment.get('character_description', {})
	physical = character.get('physical', '')
	clothing = character.get('clothing', '')
	current_state = character.get('current_state', '')

	scene = segment.get('scene_continuity', {})
	environment = scene.get('environment', '')
	camera_position = scene.get('camera_position', '')
	camera_movement = scene.get('camera_movement', '')
	lighting_state = scene.get('lighting_state', '')

	# Build comprehensive text prompt
	prompt_parts = []

	# Start with dialogue if available
	if dialogue:
	prompt_parts.append(f'"{dialogue}"')

	# Add character description
	if physical:
	prompt_parts.append(f"Character: {physical}")
	if clothing:
	prompt_parts.append(f"Wearing: {clothing}")
	if current_state:
	prompt_parts.append(f"Current state: {current_state}")

	# Add scene description
	if environment:
	prompt_parts.append(f"Scene: {environment}")
	if lighting_state:
	prompt_parts.append(f"Lighting: {lighting_state}")

	# Add camera details
	if camera_position:
	prompt_parts.append(f"Camera: {camera_position}")
	if camera_movement:
	prompt_parts.append(f"Camera movement: {camera_movement}")

	# Add synchronized actions if available
	synced_actions = action_timeline.get('synchronized_actions', {})
	if synced_actions:
	actions_list = []
	for key, value in synced_actions.items():
	if value:
	# Convert key like "0:00-0:02" to readable format
	time_key = key.replace('f', '').replace('_', '-') if key.startswith('f') else key
	actions_list.append(f"{time_key}: {value}")
	if actions_list:
	prompt_parts.append(f"Actions: {'; '.join(actions_list)}")

	# Add instruction to not include captions/subtitles
	prompt_parts.append("Do not include any captions, subtitles, or text overlays in the video")

	# Add critical instruction to avoid blur transitions at start
	prompt_parts.append("The video must start immediately at 0:00 with a sharp, clear, in-focus frame. No fade-in, no blur transition, no gradual focus effect at the start. The subject must be fully visible and sharp from the very first frame.")

	# Join all parts with periods
	text_prompt = ". ".join(prompt_parts)

	return text_prompt.strip()

	async def generate_video_replicate(
	prompt: Dict[str, Any],
	image_path: Optional[str] = None,
	seed: Optional[int] = None,
	aspect_ratio: str = "9:16"
	) -> str:
	"""
	Generate video using Replicate's Veo-3 model.

	Args:
	prompt: Structured JSON segment (dict) - will be converted to text
	image_path: Optional path to reference image
	seed: Optional seed for consistency
	aspect_ratio: Aspect ratio for video (e.g., "9:16", "16:9")

	Returns:
	Path to downloaded video file
	"""
	print_status(f"🎬 Generating video with Replicate (google/veo-3)...")

	if not REPLICATE_AVAILABLE:
	raise Exception("Replicate package not installed. Run: pip install replicate")

	# Set up Replicate client
	replicate_token = get_replicate_api_key()
	os.environ['REPLICATE_API_TOKEN'] = replicate_token

	# Stringify the JSON object and send as string
	# This preserves the structured data while meeting Replicate's string requirement
	prompt_string = json.dumps(prompt, ensure_ascii=False, indent=None)

	# Prepare input - send stringified JSON
	input_data = {
	"prompt": prompt_string # Send JSON as stringified string
	}

	# Add aspect ratio
	input_data["aspect_ratio"] = aspect_ratio

	# Add seed if provided
	if seed is not None:
	input_data["seed"] = seed

	# Add image if provided (Replicate expects a file object)
	image_file = None
	if image_path and os.path.exists(image_path):
	image_file = open(image_path, 'rb')
	input_data["image"] = image_file

	# Debug: print request payload
	try:
	print_status("📦 Replicate input (stringified JSON):")
	debug_input = {k: v for k, v in input_data.items() if k != 'image'}
	if 'image' in input_data:
	debug_input['image'] = f"<file: {image_path}>"
	# Show first 500 chars of stringified JSON
	if 'prompt' in debug_input and isinstance(debug_input['prompt'], str):
	prompt_preview = debug_input['prompt'][:500] + "..." if len(debug_input['prompt']) > 500 else debug_input['prompt']
	debug_input['prompt'] = prompt_preview
	print(json.dumps(debug_input, indent=2, ensure_ascii=False))
	print_status(f"📝 Full prompt length: {len(prompt_string)} characters (stringified JSON)")
	print_status(f"📐 Aspect ratio: {aspect_ratio}")
	except Exception:
	print_status(f"📦 Replicate input (raw): {input_data}")

	# Run Replicate model
	print_status("⏳ Waiting for Replicate to generate video...")
	try:
	try:
	output = replicate.run(
	"google/veo-3",
	input=input_data
	)
	except Exception as e:
	# If aspect_ratio parameter is invalid, try alternative names
	error_str = str(e).lower()
	if "aspect" in error_str and ("invalid" in error_str or "unknown" in error_str):
	print_status("⚠️ aspect_ratio parameter not recognized, trying alternative names...")
	# Try camelCase version
	if "aspect_ratio" in input_data:
	input_data["aspectRatio"] = input_data.pop("aspect_ratio")
	try:
	output = replicate.run("google/veo-3", input=input_data)
	except:
	# If that fails, try "ratio"
	if "aspectRatio" in input_data:
	input_data["ratio"] = input_data.pop("aspectRatio")
	output = replicate.run("google/veo-3", input=input_data)
	# If stringified JSON fails, try converting to natural language text and retry
	elif "invalid" in error_str or "expected" in error_str or "422" in error_str or "validation" in error_str:
	print_status("⚠️ Stringified JSON rejected, converting to natural language text prompt and retrying...")

	# Convert to text prompt
	prompt_text = convert_segment_to_text_prompt(prompt)
	input_data["prompt"] = prompt_text

	# Debug: print text prompt
	try:
	print_status("📦 Replicate input (natural language text format):")
	debug_input = {k: v for k, v in input_data.items() if k != 'image'}
	if 'image' in input_data:
	debug_input['image'] = f"<file: {image_path}>"
	# Truncate prompt if too long for display
	if 'prompt' in debug_input and isinstance(debug_input['prompt'], str) and len(debug_input['prompt']) > 500:
	debug_input['prompt'] = debug_input['prompt'][:500] + "... (truncated)"
	print(json.dumps(debug_input, indent=2, ensure_ascii=False))
	print_status(f"📝 Full prompt length: {len(prompt_text)} characters")
	except Exception:
	print_status(f"📦 Replicate input (text format, raw): {input_data}")

	# Retry with text prompt
	output = replicate.run(
	"google/veo-3",
	input=input_data
	)
	else:
	# Re-raise if it's a different error
	raise
	finally:
	# Close image file if opened
	if image_file:
	image_file.close()

	# Replicate output can be a URL string, file-like object, or object with url()/read() methods
	import tempfile
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
	temp_path = temp_file.name
	temp_file.close()

	# Handle different output types
	if hasattr(output, 'read'):
	# File-like object - read directly
	print_status("Reading video from file-like object...")
	with open(temp_path, 'wb') as f:
	f.write(output.read())
	elif hasattr(output, 'url'):
	# Object with url() method
	video_url = output.url()
	print_success(f"Video generated: {video_url}")
	# Download video
	async with httpx.AsyncClient(timeout=120.0) as client:
	response = await client.get(video_url)
	if response.status_code != 200:
	raise Exception(f"Failed to download video: HTTP {response.status_code}")

	with open(temp_path, 'wb') as f:
	f.write(response.content)
	elif isinstance(output, str):
	# URL string
	video_url = output
	print_success(f"Video generated: {video_url}")
	# Download video
	async with httpx.AsyncClient(timeout=120.0) as client:
	response = await client.get(video_url)
	if response.status_code != 200:
	raise Exception(f"Failed to download video: HTTP {response.status_code}")

	with open(temp_path, 'wb') as f:
	f.write(response.content)
	else:
	# Try to convert to string and treat as URL
	video_url = str(output)
	print_success(f"Video generated: {video_url}")
	async with httpx.AsyncClient(timeout=120.0) as client:
	response = await client.get(video_url)
	if response.status_code != 200:
	raise Exception(f"Failed to download video: HTTP {response.status_code}")

	with open(temp_path, 'wb') as f:
	f.write(response.content)

	print_success(f"Video saved to: {temp_path}")
	return temp_path

	async def download_video(url: str, output_path: str):
	"""Download video from URL"""
	print_status(f"📥 Downloading video to {output_path}...")

	async with httpx.AsyncClient(timeout=120.0) as client:
	response = await client.get(url)

	if response.status_code != 200:
	raise Exception(f"Failed to download video: HTTP {response.status_code}")

	with open(output_path, 'wb') as f:
	f.write(response.content)

	print_success(f"Video downloaded: {output_path}")

	async def merge_videos(
	video_paths: List[str],
	output_path: str,
	segments: Optional[List[Dict[str, Any]]] = None,
	use_whisper: bool = True,
	fallback_overlap: float = 0.7
	):
	"""
	Merge multiple videos into one using FFmpeg with Whisper-based precise trimming.

	Args:
	video_paths: List of video file paths to merge
	output_path: Output file path
	segments: Optional list of segment dicts with dialogue info for Whisper trimming
	use_whisper: If True, use Whisper to find optimal trim points at speech boundaries
	fallback_overlap: Fallback trim duration if Whisper fails (seconds)
	"""
	print_status(f"🎥 Merging {len(video_paths)} videos...")

	# Try to import Whisper utilities
	try:
	from utils.whisper_trim import find_last_word_timestamp, is_whisper_available
	WHISPER_AVAILABLE = is_whisper_available()
	except ImportError:
	WHISPER_AVAILABLE = False
	print_status("⚠️ Whisper not available, using fallback trimming")

	# Optionally trim overlap from all but the first clip
	adjusted_paths = []
	temp_trimmed_paths: List[str] = []

	import subprocess

	for idx, path in enumerate(video_paths):
	if idx == 0:
	adjusted_paths.append(path)
	continue

	# No start trimming - keep full video from beginning
	# Only end trimming via Whisper is used (handled during video generation)
	# Skip start trimming for all segments - use full video
	adjusted_paths.append(path)

	# Create a temporary file list for FFmpeg concat
	list_file = "video_list.txt"
	with open(list_file, 'w') as f:
	for path in adjusted_paths:
	f.write(f"file '{path}'\n")

	try:
	# Use FFmpeg to concatenate videos
	cmd = [
	'ffmpeg',
	'-f', 'concat',
	'-safe', '0',
	'-i', list_file,
	'-c', 'copy',
	'-y',
	output_path
	]

	result = subprocess.run(
	cmd,
	capture_output=True,
	text=True,
	timeout=300
	)

	if result.returncode != 0:
	print_error(f"FFmpeg concat error: {result.stderr}")
	raise Exception("Video merging failed")

	print_success(f"Videos merged: {output_path}")

	finally:
	# Clean up temp files
	if os.path.exists(list_file):
	os.remove(list_file)
	for tmp in temp_trimmed_paths:
	if os.path.exists(tmp):
	os.remove(tmp)

	async def main():
	"""Main execution flow"""
	try:
	# Banner
	print_header("🎥 STANDALONE VIDEO CREATOR")
	print("Automated video generation using extend video flow\n")

	# Collect user inputs first (to know which provider to use)
	config = collect_user_inputs()

	# Check API keys based on provider
	openai_api_key = get_openai_api_key()
	if config['provider'] == 'kie':
	kie_api_key = get_api_key()
	elif config['provider'] == 'replicate':
	# Replicate key will be checked when needed
	pass

	# Generate structured prompts using GPT-4o
	print_header("GENERATING VIDEO PROMPTS")
	print_status("🤖 Using GPT-4o to generate structured prompts...")

	# Read reference image
	with open(config['image_path'], 'rb') as f:
	image_bytes = f.read()

	# Create VeoInputs
	veo_inputs = VeoInputs(
	script=config['script'],
	style=config['style'],
	jsonFormat="standard",
	continuationMode=True,
	voiceType=config['voice_type'] if config['voice_type'] != "None" else None,
	cameraStyle=config['camera_style'],
	settingMode="single"
	)

	# Generate prompts
	payload = generate_segments_payload(
	inputs=veo_inputs,
	image_bytes=image_bytes,
	model="gpt-4o",
	api_key=openai_api_key
	)

	# Debug: print GPT-generated segments payload
	try:
	print_status("🧾 Segments payload from GPT-4o:")
	print(json.dumps(payload, indent=2, ensure_ascii=False))
	except Exception:
	print_status(f"🧾 Segments payload from GPT-4o (raw): {payload}")

	segments = payload.get('segments', [])
	print_success(f"Generated {len(segments)} video segments")

	if not segments:
	print_error("No segments generated!")
	sys.exit(1)

	# Generate videos
	print_header("GENERATING VIDEOS")

	video_paths = []

	if config['provider'] == 'replicate':
	# Replicate: Generate each segment independently with frame continuity
	print_status("Using Replicate (google/veo-3) for video generation")
	print_status("Note: Each segment uses last frame from previous trimmed segment for continuity\n")

	output_dir = Path("output_videos")
	output_dir.mkdir(exist_ok=True)
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

	# Track current reference image (starts with original)
	current_image_path = config['image_path']
	temp_frame_paths = [] # Track temp frame files for cleanup

	for i, segment in enumerate(segments, start=1):
	print_status(f"\n📹 Processing segment {i}/{len(segments)}")

	# Generate video with current reference image
	print_status(f" Using reference image: {current_image_path if i == 1 else 'last frame from previous segment'}")
	video_path = await generate_video_replicate(
	prompt=segment, # Send JSON segment directly
	image_path=current_image_path,
	seed=config['seed'],
	aspect_ratio=config['aspect_ratio']
	)

	# Save generated video to output directory with proper naming
	segment_output = output_dir / f"segment_{i}_untrimmed_{timestamp}.mp4"
	import shutil
	if os.path.exists(video_path):
	shutil.move(video_path, str(segment_output))
	video_path = str(segment_output)

	# Trim video with Whisper to get optimal cut point
	# For all segments except the last, we also extract the last frame for next segment
	# For the last segment, we still trim it to avoid extra length
	should_extract_frame = (i < len(segments)) # Only extract frame if not last segment

	if should_extract_frame:
	print_status(f" Trimming segment {i} to extract last frame for next segment...")
	else:
	print_status(f" Trimming segment {i} (last segment) to optimal length...")

	# Get dialogue from segment for Whisper
	action_timeline = segment.get('action_timeline', {})
	dialogue = action_timeline.get('dialogue', '')

	if dialogue:
	try:
	from utils.whisper_trim import find_last_word_timestamp
	from utils.video_processor import extract_frame

	# Find optimal trim point
	last_word_time = find_last_word_timestamp(
	video_path=video_path,
	script=dialogue,
	model_size="base"
	)

	if last_word_time and last_word_time > 0:
	trim_point = last_word_time + 0.3 # 0.3s padding

	# Trim the video - rename to indicate it's trimmed
	trimmed_path = str(segment_output).replace("_untrimmed_", "_trimmed_")
	import subprocess
	cmd_trim = [
	'ffmpeg',
	'-y',
	'-ss', '0',
	'-i', video_path,
	'-t', str(trim_point),
	'-c', 'copy',
	trimmed_path
	]

	result = subprocess.run(
	cmd_trim,
	capture_output=True,
	text=True,
	timeout=300
	)

	if result.returncode == 0:
	# Get video duration to extract last frame
	from utils.video_processor import get_video_info
	info = get_video_info(trimmed_path)
	duration = float(info['format']['duration'])

	# Extract last frame (0.1s before end to ensure we get a frame)
	frame_timestamp = max(0, duration - 0.1)
	frame_path = output_dir / f"frame_segment_{i}_{timestamp}.jpg"

	# Extract frame only if not the last segment
	if should_extract_frame:
	try:
	extract_frame(
	video_path=trimmed_path,
	timestamp=frame_timestamp,
	output_path=str(frame_path)
	)

	# Update current_image_path for next segment
	current_image_path = str(frame_path)
	temp_frame_paths.append(str(frame_path))

	print_success(f" ✅ Trimmed to {trim_point:.2f}s, extracted last frame for next segment")
	except Exception as frame_error:
	print_error(f" ⚠️ Frame extraction failed: {str(frame_error)}")
	# Still use trimmed video, but extract frame from it as fallback
	try:
	# Try extracting from trimmed video anyway
	extract_frame(
	video_path=trimmed_path,
	timestamp=duration - 0.5, # Try earlier timestamp
	output_path=str(frame_path)
	)
	current_image_path = str(frame_path)
	temp_frame_paths.append(str(frame_path))
	print_success(f" ✅ Trimmed to {trim_point:.2f}s (fallback frame extraction)")
	except:
	print_error(f" ⚠️ Frame extraction failed completely")
	else:
	print_success(f" ✅ Trimmed last segment to {trim_point:.2f}s")

	# Use trimmed version for merging (keep untrimmed version)
	# Both files are kept: _untrimmed_ and _trimmed_
	video_path = trimmed_path
	print_status(f" 📁 Kept untrimmed: {Path(segment_output).name}")
	print_status(f" 📁 Created trimmed: {Path(trimmed_path).name}")
	else:
	print_error(f" ⚠️ Trimming failed: {result.stderr}")
	print_status(f" Using full video")
	# Extract frame from full video if needed
	if should_extract_frame:
	try:
	from utils.video_processor import get_video_info, extract_frame
	info = get_video_info(video_path)
	duration = float(info['format']['duration'])
	frame_timestamp = max(0, duration - 0.1)
	frame_path = output_dir / f"frame_segment_{i}_{timestamp}.jpg"
	extract_frame(
	video_path=video_path,
	timestamp=frame_timestamp,
	output_path=str(frame_path)
	)
	current_image_path = str(frame_path)
	temp_frame_paths.append(str(frame_path))
	print_success(f" ✅ Extracted last frame from full video")
	except Exception as frame_error:
	print_error(f" ⚠️ Frame extraction failed: {str(frame_error)}")
	else:
	print_status(f" ⚠️ Could not find trim point, using full video")
	# Extract frame from full video if needed
	if should_extract_frame:
	try:
	from utils.video_processor import get_video_info, extract_frame
	info = get_video_info(video_path)
	duration = float(info['format']['duration'])
	frame_timestamp = max(0, duration - 0.1)
	frame_path = output_dir / f"frame_segment_{i}_{timestamp}.jpg"
	extract_frame(
	video_path=video_path,
	timestamp=frame_timestamp,
	output_path=str(frame_path)
	)
	current_image_path = str(frame_path)
	temp_frame_paths.append(str(frame_path))
	print_success(f" ✅ Extracted last frame from full video")
	except Exception as frame_error:
	print_error(f" ⚠️ Frame extraction failed: {str(frame_error)}")
	except Exception as e:
	print_error(f" ⚠️ Whisper trimming failed: {str(e)}")
	print_status(f" Using full video, will extract last frame")

	# Fallback: extract last frame from full video
	try:
	from utils.video_processor import get_video_info, extract_frame
	info = get_video_info(video_path)
	duration = float(info['format']['duration'])
	frame_timestamp = max(0, duration - 0.1)
	frame_path = output_dir / f"frame_segment_{i}_{timestamp}.jpg"
	extract_frame(
	video_path=video_path,
	timestamp=frame_timestamp,
	output_path=str(frame_path)
	)
	current_image_path = str(frame_path)
	temp_frame_paths.append(str(frame_path))
	print_status(f" ✅ Extracted last frame from full video")
	except Exception as e:
	print_error(f" ⚠️ Frame extraction failed: {str(e)}")
	print_error(f" ⚠️ Next segment will use previous frame or original image")
	else:
	# No dialogue - still try to trim if we can, or just extract frame
	if should_extract_frame:
	print_status(f" No dialogue in segment {i}, extracting last frame from full video...")
	try:
	from utils.video_processor import get_video_info, extract_frame
	info = get_video_info(video_path)
	duration = float(info['format']['duration'])
	frame_timestamp = max(0, duration - 0.1)
	frame_path = output_dir / f"frame_segment_{i}_{timestamp}.jpg"
	extract_frame(
	video_path=video_path,
	timestamp=frame_timestamp,
	output_path=str(frame_path)
	)
	current_image_path = str(frame_path)
	temp_frame_paths.append(str(frame_path))
	print_status(f" ✅ Extracted last frame (no dialogue in segment)")
	except Exception as e:
	print_error(f" ⚠️ Frame extraction failed: {str(e)}")
	print_error(f" ⚠️ Next segment will use previous frame or original image")
	else:
	print_status(f" No dialogue in last segment, using full video")

	video_paths.append(video_path)

	else:
	# KIE API: Use extend video flow
	print_status("Using KIE API for video generation with extend flow\n")

	video_urls = []
	task_ids = []

	# Generate first video
	first_prompt = segments[0]
	task_id = await generate_initial_video(
	prompt=first_prompt,
	image_path=config['image_path'],
	api_key=kie_api_key,
	model=config['model'],
	aspect_ratio=config['aspect_ratio'],
	voice_type=config['voice_type'],
	seed=config['seed']
	)
	task_ids.append(task_id)

	# Wait for callback result
	video_url = await wait_for_callback_result(task_id)
	video_urls.append(video_url)

	# Extend video for remaining segments
	for i, segment in enumerate(segments[1:], start=2):
	print_status(f"\n📹 Processing segment {i}/{len(segments)}")

	# Extend from previous task
	task_id = await extend_video(
	task_id=task_ids[-1], # Use the last task ID
	prompt=segment,
	api_key=kie_api_key,
	voice_type=config['voice_type'],
	seed=config['seed']
	)
	task_ids.append(task_id)

	# Wait for callback result
	video_url = await wait_for_callback_result(task_id)
	video_urls.append(video_url)

	# Download all videos from URLs
	print_header("DOWNLOADING VIDEOS")

	output_dir = Path("output_videos")
	output_dir.mkdir(exist_ok=True)

	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

	for i, url in enumerate(video_urls, start=1):
	output_path = output_dir / f"segment_{i}_{timestamp}.mp4"
	await download_video(url, str(output_path))
	video_paths.append(str(output_path))

	# For Replicate, videos are already in output directory with proper naming
	# Files are named: segment_{i}_untrimmed_{timestamp}.mp4 and segment_{i}_trimmed_{timestamp}.mp4
	# Both versions are kept - untrimmed and trimmed
	# video_paths contains the trimmed versions which will be used for merging
	# No need to rename - they're already properly named

	# Merge videos
	print_header("MERGING VIDEOS")

	output_dir = Path("output_videos")
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	final_output = output_dir / f"final_video_{timestamp}.mp4"

	# Pass segments to merge_videos for Whisper-based trimming
	# For Replicate, videos are already trimmed during generation, so we skip trimming
	# For KIE, we need to trim during merge
	skip_trimming = (config['provider'] == 'replicate')

	await merge_videos(
	video_paths,
	str(final_output),
	segments=segments, # Pass segments for Whisper to find optimal trim points
	use_whisper=not skip_trimming, # Skip Whisper trimming for Replicate (already done)
	fallback_overlap=0.7 if not skip_trimming else 0 # No trimming for Replicate
	)

	# Success
	print_header("✨ VIDEO CREATION COMPLETE!")
	print_success(f"Final video saved to: {final_output}")
	print(f"\nGenerated {len(segments)} segments")
	print(f"Total processing time: {time.strftime('%M:%S', time.gmtime(time.time()))}")

	except KeyboardInterrupt:
	print_error("\n\nVideo creation cancelled by user")
	sys.exit(1)
	except Exception as e:
	print_error(f"\nVideo creation failed: {str(e)}")
	import traceback
	traceback.print_exc()
	sys.exit(1)

	if __name__ == "__main__":
	# Run the async main function
	asyncio.run(main())