topcoderkz
commited on
Commit
Β·
0c4ba75
1
Parent(s):
e598b7d
Refactor code, remove deepseek integration
Browse files- batch.sh +1 -1
- src/api_clients.py +263 -197
- src/asset_selector.py +9 -7
- src/automation.py +15 -35
- src/main.py +1 -1
- src/video_renderer.py +44 -45
batch.sh
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
# Process first 5 strategies
|
| 4 |
-
for i in {0..
|
| 5 |
python src/main.py --csv content_strategies.csv --index $i --output ./outputs/videos/video_$i
|
| 6 |
done
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
# Process first 5 strategies
|
| 4 |
+
for i in {0..0}; do
|
| 5 |
python src/main.py --csv content_strategies.csv --index $i --output ./outputs/videos/video_$i
|
| 6 |
done
|
src/api_clients.py
CHANGED
|
@@ -30,15 +30,15 @@ class APIClients:
|
|
| 30 |
# RunwayML API configuration
|
| 31 |
self.runway_api_key = config.get("runwayml_api_key") or os.getenv("RUNWAYML_API_KEY")
|
| 32 |
self.runway_base_url = "https://api.dev.runwayml.com/v1"
|
| 33 |
-
|
| 34 |
# Voice profiles for different personas
|
| 35 |
self.voice_profiles = {
|
| 36 |
"female_young": "en-US-Neural2-F", # Young female voice
|
| 37 |
"female_mature": "en-US-Neural2-E", # Mature female voice
|
| 38 |
"female_casual": "en-US-Neural2-G", # Casual female voice
|
| 39 |
-
"male_young": "en-US-Neural2-D",
|
| 40 |
-
"male_mature": "en-US-Neural2-A",
|
| 41 |
-
"male_casual": "en-US-Neural2-J",
|
| 42 |
}
|
| 43 |
|
| 44 |
async def enhance_prompt(self, prompt: str) -> str:
|
|
@@ -60,7 +60,7 @@ class APIClients:
|
|
| 60 |
Return only the enhanced prompt, nothing else.
|
| 61 |
"""
|
| 62 |
|
| 63 |
-
model = genai.GenerativeModel("gemini-2.
|
| 64 |
response = model.generate_content(enhancement_instruction)
|
| 65 |
|
| 66 |
enhanced_prompt = response.text.strip()
|
|
@@ -74,39 +74,39 @@ class APIClients:
|
|
| 74 |
async def generate_image(self, prompt: str) -> Optional[str]:
|
| 75 |
"""
|
| 76 |
Generate image using Vertex AI Imagen 4 Ultra
|
| 77 |
-
|
| 78 |
Args:
|
| 79 |
prompt: Image generation prompt
|
| 80 |
-
|
| 81 |
Returns:
|
| 82 |
Local path to generated image or None
|
| 83 |
"""
|
| 84 |
try:
|
| 85 |
-
import vertexai
|
| 86 |
-
from vertexai.preview.vision_models import ImageGenerationModel
|
| 87 |
|
| 88 |
-
logger.info(f"π¨ Generating image with Imagen 4 Ultra: {prompt[:200]}...")
|
| 89 |
|
| 90 |
-
vertexai.init(project=self.config.get("gcp_project_id"), location="us-central1")
|
| 91 |
|
| 92 |
-
# Use correct Imagen 4 Ultra model name
|
| 93 |
-
model = ImageGenerationModel.from_pretrained("imagen-4.0-ultra-generate-001")
|
| 94 |
|
| 95 |
-
images = model.generate_images(
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
)
|
| 102 |
|
| 103 |
-
# Save to temp file
|
| 104 |
-
import tempfile
|
| 105 |
-
output_path = f"/tmp/hook_image_{hash(prompt)}.png"
|
| 106 |
-
images[0].save(location=output_path, include_generation_parameters=False)
|
| 107 |
-
#
|
| 108 |
-
|
| 109 |
-
return
|
| 110 |
|
| 111 |
except Exception as e:
|
| 112 |
logger.error(f"β Imagen 4 Ultra generation failed: {e}")
|
|
@@ -130,7 +130,7 @@ class APIClients:
|
|
| 130 |
|
| 131 |
Return ONLY the caption text, nothing else."""
|
| 132 |
|
| 133 |
-
model = genai.GenerativeModel("gemini-2.
|
| 134 |
response = model.generate_content(instruction)
|
| 135 |
|
| 136 |
caption = response.text.strip()
|
|
@@ -164,7 +164,7 @@ class APIClients:
|
|
| 164 |
"video_prompt": "..."
|
| 165 |
}}"""
|
| 166 |
|
| 167 |
-
model = genai.GenerativeModel("gemini-2.
|
| 168 |
response = model.generate_content(instruction)
|
| 169 |
|
| 170 |
result = json.loads(response.text.strip())
|
|
@@ -182,7 +182,7 @@ class APIClients:
|
|
| 182 |
async def generate_video(self, prompt: str, duration: int, image_url: str) -> Dict:
|
| 183 |
"""
|
| 184 |
Generate video using RunwayML gen4_turbo ($0.25 per video / 25 credits)
|
| 185 |
-
|
| 186 |
Args:
|
| 187 |
prompt: Text prompt for video generation
|
| 188 |
duration: Video duration in seconds
|
|
@@ -190,76 +190,73 @@ class APIClients:
|
|
| 190 |
"""
|
| 191 |
try:
|
| 192 |
logger.info(f"π¬ Generating video with gen4_turbo: {prompt[:100]}...")
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
# }
|
| 202 |
-
|
| 203 |
-
headers = {
|
| 204 |
-
"Authorization": f"Bearer {self.runway_api_key}",
|
| 205 |
-
"Content-Type": "application/json",
|
| 206 |
-
"X-Runway-Version": "2024-11-06",
|
| 207 |
}
|
| 208 |
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
"ratio": "1280:720"
|
| 215 |
-
}
|
| 216 |
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
|
| 264 |
except Exception as e:
|
| 265 |
logger.error(f"β Video generation error: {e}")
|
|
@@ -268,12 +265,12 @@ class APIClients:
|
|
| 268 |
async def generate_tts(self, text: str, voice_name: Optional[str] = None, duration: Optional[float] = None) -> Dict:
|
| 269 |
"""
|
| 270 |
Generate TTS audio using Google Cloud TTS
|
| 271 |
-
|
| 272 |
Args:
|
| 273 |
text: Text to convert to speech
|
| 274 |
voice_name: Voice to use (optional)
|
| 275 |
duration: Target duration in seconds (optional) - will adjust speaking rate
|
| 276 |
-
|
| 277 |
Returns:
|
| 278 |
Dict with audio_url, duration, voice, text, local_path
|
| 279 |
"""
|
|
@@ -285,37 +282,27 @@ class APIClients:
|
|
| 285 |
if not voice_name:
|
| 286 |
voice_name = self.config.get("default_voice", "en-US-Neural2-F")
|
| 287 |
|
| 288 |
-
#
|
| 289 |
-
synthesis_input = texttospeech.SynthesisInput(text=text)
|
| 290 |
language_code = "-".join(voice_name.split("-")[:2])
|
| 291 |
|
| 292 |
-
#
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
"en-US-Neural2-C", "en-US-Neural2-E", "en-US-Neural2-F",
|
| 299 |
-
"en-US-Neural2-G", "en-US-Neural2-H", "en-US-Studio-O",
|
| 300 |
-
"en-US-Standard-A" # Add other female voices if needed
|
| 301 |
-
}
|
| 302 |
-
|
| 303 |
-
# Determine gender from full voice name
|
| 304 |
-
if voice_name in male_voices:
|
| 305 |
ssml_gender = texttospeech.SsmlVoiceGender.MALE
|
| 306 |
logger.info(f"π Using MALE voice: {voice_name}")
|
| 307 |
-
elif voice_name in female_voices:
|
| 308 |
-
ssml_gender = texttospeech.SsmlVoiceGender.FEMALE
|
| 309 |
-
logger.info(f"π Using FEMALE voice: {voice_name}")
|
| 310 |
else:
|
| 311 |
-
# Default to FEMALE for unknown voices (or you could skip ssml_gender)
|
| 312 |
ssml_gender = texttospeech.SsmlVoiceGender.FEMALE
|
| 313 |
-
logger.
|
|
|
|
|
|
|
|
|
|
| 314 |
|
|
|
|
| 315 |
voice = texttospeech.VoiceSelectionParams(
|
| 316 |
-
language_code=language_code,
|
| 317 |
-
name=voice_name,
|
| 318 |
-
ssml_gender=ssml_gender
|
| 319 |
)
|
| 320 |
|
| 321 |
# Calculate speaking rate if duration is provided
|
|
@@ -323,54 +310,48 @@ class APIClients:
|
|
| 323 |
if duration:
|
| 324 |
# First, generate at normal rate to get baseline duration
|
| 325 |
temp_audio_config = texttospeech.AudioConfig(
|
| 326 |
-
audio_encoding=texttospeech.AudioEncoding.MP3,
|
| 327 |
-
speaking_rate=1.0,
|
| 328 |
-
pitch=0.0
|
| 329 |
)
|
| 330 |
temp_response = self.tts_client.synthesize_speech(
|
| 331 |
-
input=synthesis_input,
|
| 332 |
-
voice=voice,
|
| 333 |
-
audio_config=temp_audio_config
|
| 334 |
)
|
| 335 |
-
|
| 336 |
# Save temp file to measure duration
|
| 337 |
import tempfile
|
|
|
|
| 338 |
temp_path = f"/tmp/tts_temp_{hash(text)}.mp3"
|
| 339 |
with open(temp_path, "wb") as out:
|
| 340 |
out.write(temp_response.audio_content)
|
| 341 |
-
|
| 342 |
# Measure actual duration
|
| 343 |
try:
|
| 344 |
from mutagen.mp3 import MP3
|
|
|
|
| 345 |
audio = MP3(temp_path)
|
| 346 |
baseline_duration = audio.info.length
|
| 347 |
except ImportError:
|
| 348 |
# Estimate if mutagen not available
|
| 349 |
word_count = len(text.split())
|
| 350 |
baseline_duration = (word_count / 150) * 60
|
| 351 |
-
|
| 352 |
# Calculate required speaking rate
|
| 353 |
speaking_rate = baseline_duration / duration
|
| 354 |
speaking_rate = max(0.25, min(4.0, speaking_rate)) # Clamp to valid range
|
| 355 |
-
|
| 356 |
-
logger.info(
|
| 357 |
-
|
|
|
|
|
|
|
| 358 |
# Clean up temp file
|
| 359 |
if os.path.exists(temp_path):
|
| 360 |
os.remove(temp_path)
|
| 361 |
|
| 362 |
# Generate final audio with adjusted speaking rate
|
| 363 |
audio_config = texttospeech.AudioConfig(
|
| 364 |
-
audio_encoding=texttospeech.AudioEncoding.MP3,
|
| 365 |
-
speaking_rate=speaking_rate,
|
| 366 |
-
pitch=0.0
|
| 367 |
)
|
| 368 |
|
| 369 |
-
response = self.tts_client.synthesize_speech(
|
| 370 |
-
input=synthesis_input,
|
| 371 |
-
voice=voice,
|
| 372 |
-
audio_config=audio_config
|
| 373 |
-
)
|
| 374 |
|
| 375 |
# Save audio
|
| 376 |
audio_filename = f"tts_{hash(text)}.mp3"
|
|
@@ -382,12 +363,14 @@ class APIClients:
|
|
| 382 |
# Get actual duration
|
| 383 |
try:
|
| 384 |
from mutagen.mp3 import MP3
|
|
|
|
| 385 |
audio = MP3(audio_path)
|
| 386 |
actual_duration = audio.info.length
|
| 387 |
logger.info(f"β TTS audio duration: {actual_duration:.2f}s")
|
| 388 |
except ImportError:
|
| 389 |
try:
|
| 390 |
from pydub import AudioSegment
|
|
|
|
| 391 |
audio = AudioSegment.from_mp3(audio_path)
|
| 392 |
actual_duration = len(audio) / 1000.0
|
| 393 |
logger.info(f"β TTS audio duration: {actual_duration:.2f}s (via pydub)")
|
|
@@ -395,7 +378,13 @@ class APIClients:
|
|
| 395 |
actual_duration = duration if duration else (len(text.split()) / 150) * 60
|
| 396 |
logger.warning(f"β οΈ Estimated duration: {actual_duration:.2f}s")
|
| 397 |
|
| 398 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
audio_url = await self.store_in_gcs(audio_path, "audio")
|
| 400 |
|
| 401 |
logger.info(f"β
TTS generated successfully: {audio_url}")
|
|
@@ -406,13 +395,73 @@ class APIClients:
|
|
| 406 |
"voice": voice_name,
|
| 407 |
"text": text,
|
| 408 |
"local_path": audio_path,
|
| 409 |
-
"speaking_rate": speaking_rate
|
| 410 |
}
|
| 411 |
|
| 412 |
except Exception as e:
|
| 413 |
logger.error(f"β Error generating TTS: {e}")
|
| 414 |
raise
|
| 415 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
async def download_file(self, url: str, filename: str) -> str:
|
| 417 |
"""Download file from URL to local temporary file"""
|
| 418 |
import aiohttp
|
|
@@ -435,61 +484,83 @@ class APIClients:
|
|
| 435 |
logger.error(f"Failed to download {url}: {e}")
|
| 436 |
raise
|
| 437 |
|
| 438 |
-
async def select_voice_for_persona(self,
|
| 439 |
-
"""
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
"en-US-Neural2-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
return selected_voice
|
| 473 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
async def upload_captions_to_gcs(self, captions_text: str, video_filename: str) -> Optional[str]:
|
| 475 |
"""
|
| 476 |
Upload captions to GCS bucket with same name as video (but .txt extension)
|
| 477 |
-
|
| 478 |
Args:
|
| 479 |
captions_text: Caption text content
|
| 480 |
video_filename: Name of the video file (e.g., "final_video_abc123.mp4")
|
| 481 |
-
|
| 482 |
Returns:
|
| 483 |
GCS signed URL of uploaded captions or None
|
| 484 |
"""
|
| 485 |
try:
|
| 486 |
# Create captions filename (replace .mp4 with .txt)
|
| 487 |
captions_filename = os.path.splitext(video_filename)[0] + ".txt"
|
| 488 |
-
|
| 489 |
logger.info(f"βοΈ Uploading captions to GCS: {captions_filename}")
|
| 490 |
|
| 491 |
# Save captions to temp file
|
| 492 |
import tempfile
|
|
|
|
| 493 |
temp_path = os.path.join(tempfile.gettempdir(), captions_filename)
|
| 494 |
with open(temp_path, "w", encoding="utf-8") as f:
|
| 495 |
f.write(captions_text)
|
|
@@ -498,24 +569,21 @@ class APIClients:
|
|
| 498 |
blob_name = f"captions/{captions_filename}"
|
| 499 |
blob = self.gcs_bucket.blob(blob_name)
|
| 500 |
blob.content_type = "text/plain"
|
| 501 |
-
|
| 502 |
logger.info(f"Uploading {captions_filename} to gs://{self.gcs_bucket.name}/{blob_name}")
|
| 503 |
blob.upload_from_filename(temp_path)
|
| 504 |
|
| 505 |
# Generate signed URL (valid for 7 days)
|
| 506 |
from datetime import timedelta
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
method="GET"
|
| 511 |
-
)
|
| 512 |
-
|
| 513 |
logger.info(f"β
Captions uploaded to GCS: {captions_url[:100]}...")
|
| 514 |
-
|
| 515 |
# Clean up temp file
|
| 516 |
if os.path.exists(temp_path):
|
| 517 |
os.remove(temp_path)
|
| 518 |
-
|
| 519 |
return captions_url
|
| 520 |
|
| 521 |
except Exception as e:
|
|
@@ -541,6 +609,7 @@ class APIClients:
|
|
| 541 |
|
| 542 |
try:
|
| 543 |
from google.cloud.exceptions import NotFound
|
|
|
|
| 544 |
try:
|
| 545 |
self.gcs_bucket.exists()
|
| 546 |
health["gcs"] = True
|
|
@@ -589,16 +658,13 @@ class APIClients:
|
|
| 589 |
|
| 590 |
file_ext = os.path.splitext(filename)[1]
|
| 591 |
blob.content_type = content_types.get(file_ext, "application/octet-stream")
|
| 592 |
-
|
| 593 |
logger.info(f"Uploading {filename} to gs://{self.gcs_bucket.name}/{blob_name}")
|
| 594 |
blob.upload_from_filename(file_path)
|
| 595 |
|
| 596 |
from datetime import timedelta
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
expiration=timedelta(days=7),
|
| 600 |
-
method="GET"
|
| 601 |
-
)
|
| 602 |
|
| 603 |
logger.info(f"β
File uploaded with signed URL: {signed_url[:100]}...")
|
| 604 |
return signed_url
|
|
|
|
| 30 |
# RunwayML API configuration
|
| 31 |
self.runway_api_key = config.get("runwayml_api_key") or os.getenv("RUNWAYML_API_KEY")
|
| 32 |
self.runway_base_url = "https://api.dev.runwayml.com/v1"
|
| 33 |
+
|
| 34 |
# Voice profiles for different personas
|
| 35 |
self.voice_profiles = {
|
| 36 |
"female_young": "en-US-Neural2-F", # Young female voice
|
| 37 |
"female_mature": "en-US-Neural2-E", # Mature female voice
|
| 38 |
"female_casual": "en-US-Neural2-G", # Casual female voice
|
| 39 |
+
"male_young": "en-US-Neural2-D", # Young male voice
|
| 40 |
+
"male_mature": "en-US-Neural2-A", # Mature male voice
|
| 41 |
+
"male_casual": "en-US-Neural2-J", # Casual male voice
|
| 42 |
}
|
| 43 |
|
| 44 |
async def enhance_prompt(self, prompt: str) -> str:
|
|
|
|
| 60 |
Return only the enhanced prompt, nothing else.
|
| 61 |
"""
|
| 62 |
|
| 63 |
+
model = genai.GenerativeModel("gemini-2.5-flash")
|
| 64 |
response = model.generate_content(enhancement_instruction)
|
| 65 |
|
| 66 |
enhanced_prompt = response.text.strip()
|
|
|
|
| 74 |
async def generate_image(self, prompt: str) -> Optional[str]:
|
| 75 |
"""
|
| 76 |
Generate image using Vertex AI Imagen 4 Ultra
|
| 77 |
+
|
| 78 |
Args:
|
| 79 |
prompt: Image generation prompt
|
| 80 |
+
|
| 81 |
Returns:
|
| 82 |
Local path to generated image or None
|
| 83 |
"""
|
| 84 |
try:
|
| 85 |
+
# import vertexai
|
| 86 |
+
# from vertexai.preview.vision_models import ImageGenerationModel
|
| 87 |
|
| 88 |
+
# logger.info(f"π¨ Generating image with Imagen 4 Ultra: {prompt[:200]}...")
|
| 89 |
|
| 90 |
+
# vertexai.init(project=self.config.get("gcp_project_id"), location="us-central1")
|
| 91 |
|
| 92 |
+
# # Use correct Imagen 4 Ultra model name
|
| 93 |
+
# model = ImageGenerationModel.from_pretrained("imagen-4.0-ultra-generate-001")
|
| 94 |
|
| 95 |
+
# images = model.generate_images(
|
| 96 |
+
# prompt=prompt,
|
| 97 |
+
# number_of_images=1,
|
| 98 |
+
# aspect_ratio="9:16",
|
| 99 |
+
# safety_filter_level="block_some",
|
| 100 |
+
# person_generation="allow_adult",
|
| 101 |
+
# )
|
| 102 |
|
| 103 |
+
# # Save to temp file
|
| 104 |
+
# import tempfile
|
| 105 |
+
# output_path = f"/tmp/hook_image_{hash(prompt)}.png"
|
| 106 |
+
# images[0].save(location=output_path, include_generation_parameters=False)
|
| 107 |
+
# logger.info(f"β Image generated with Imagen 4 Ultra (9:16): {output_path}")
|
| 108 |
+
# return output_path
|
| 109 |
+
return "/tmp/hook_image_391248835665466790.png"
|
| 110 |
|
| 111 |
except Exception as e:
|
| 112 |
logger.error(f"β Imagen 4 Ultra generation failed: {e}")
|
|
|
|
| 130 |
|
| 131 |
Return ONLY the caption text, nothing else."""
|
| 132 |
|
| 133 |
+
model = genai.GenerativeModel("gemini-2.5-flash")
|
| 134 |
response = model.generate_content(instruction)
|
| 135 |
|
| 136 |
caption = response.text.strip()
|
|
|
|
| 164 |
"video_prompt": "..."
|
| 165 |
}}"""
|
| 166 |
|
| 167 |
+
model = genai.GenerativeModel("gemini-2.5-flash")
|
| 168 |
response = model.generate_content(instruction)
|
| 169 |
|
| 170 |
result = json.loads(response.text.strip())
|
|
|
|
| 182 |
async def generate_video(self, prompt: str, duration: int, image_url: str) -> Dict:
|
| 183 |
"""
|
| 184 |
Generate video using RunwayML gen4_turbo ($0.25 per video / 25 credits)
|
| 185 |
+
|
| 186 |
Args:
|
| 187 |
prompt: Text prompt for video generation
|
| 188 |
duration: Video duration in seconds
|
|
|
|
| 190 |
"""
|
| 191 |
try:
|
| 192 |
logger.info(f"π¬ Generating video with gen4_turbo: {prompt[:100]}...")
|
| 193 |
+
return {
|
| 194 |
+
"video_url": "https://dnznrvs05pmza.cloudfront.net/4a582f22-9dd3-456e-a0a5-8036ed2c6b2c.mp4?_jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJrZXlIYXNoIjoiNGVlNGI1MjIxNGYxYjJjNyIsImJ1Y2tldCI6InJ1bndheS10YXNrLWFydGlmYWN0cyIsInN0YWdlIjoicHJvZCIsImV4cCI6MTc2MDQ4NjQwMH0.FWm7vx_lQjkg4fk8stDQI2gt-ahr95qBPREDyWhvgoI",
|
| 195 |
+
"task_id": "61cdffe3-e84e-4c45-a611-bb9c48e6a485",
|
| 196 |
+
"duration": 3,
|
| 197 |
+
"prompt": prompt,
|
| 198 |
+
"status": "SUCCEEDED",
|
| 199 |
+
"created_at": "2025-10-13T22:56:06.290Z",
|
| 200 |
+
"model": "gen4_turbo",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
}
|
| 202 |
|
| 203 |
+
# headers = {z``
|
| 204 |
+
# "Authorization": f"Bearer {self.runway_api_key}",
|
| 205 |
+
# "Content-Type": "application/json",
|
| 206 |
+
# "X-Runway-Version": "2024-11-06",
|
| 207 |
+
# }
|
|
|
|
|
|
|
| 208 |
|
| 209 |
+
# payload = {
|
| 210 |
+
# "promptImage": image_url,
|
| 211 |
+
# "promptText": prompt[:1000],
|
| 212 |
+
# "model": "gen4_turbo", # Updated to gen4_turbo ($0.25/video)
|
| 213 |
+
# "duration": duration,
|
| 214 |
+
# "ratio": "720:1280",
|
| 215 |
+
# }
|
| 216 |
+
|
| 217 |
+
# async with aiohttp.ClientSession() as session:
|
| 218 |
+
# # Create task
|
| 219 |
+
# async with session.post(
|
| 220 |
+
# "https://api.dev.runwayml.com/v1/image_to_video", headers=headers, json=payload
|
| 221 |
+
# ) as response:
|
| 222 |
+
# if response.status != 200:
|
| 223 |
+
# error_text = await response.text()
|
| 224 |
+
# raise Exception(f"RunwayML error: {error_text}")
|
| 225 |
+
|
| 226 |
+
# task_data = await response.json()
|
| 227 |
+
# task_id = task_data["id"]
|
| 228 |
+
# logger.info(f"β Task created with gen4_turbo: {task_id}")
|
| 229 |
+
|
| 230 |
+
# # Poll for completion
|
| 231 |
+
# max_attempts = 120
|
| 232 |
+
# for attempt in range(max_attempts):
|
| 233 |
+
# await asyncio.sleep(10)
|
| 234 |
+
|
| 235 |
+
# async with session.get(
|
| 236 |
+
# f"https://api.dev.runwayml.com/v1/tasks/{task_id}", headers=headers
|
| 237 |
+
# ) as status_response:
|
| 238 |
+
# status_data = await status_response.json()
|
| 239 |
+
# status = status_data["status"]
|
| 240 |
+
|
| 241 |
+
# if status == "SUCCEEDED":
|
| 242 |
+
# video_url = status_data["output"][0]
|
| 243 |
+
# logger.info(f"β
Video generated with gen4_turbo: {video_url}")
|
| 244 |
+
# return {
|
| 245 |
+
# "video_url": video_url,
|
| 246 |
+
# "task_id": task_id,
|
| 247 |
+
# "duration": duration,
|
| 248 |
+
# "prompt": prompt,
|
| 249 |
+
# "status": status,
|
| 250 |
+
# "created_at": status_data.get("createdAt"),
|
| 251 |
+
# "model": "gen4_turbo",
|
| 252 |
+
# }
|
| 253 |
+
# elif status == "FAILED":
|
| 254 |
+
# raise Exception(f"Generation failed: {status_data.get('failure')}")
|
| 255 |
+
# elif status == "RUNNING":
|
| 256 |
+
# progress = status_data.get("progress", 0)
|
| 257 |
+
# logger.info(f"β³ Progress: {progress*100:.0f}%")
|
| 258 |
+
|
| 259 |
+
# raise Exception("Timeout waiting for video generation")
|
| 260 |
|
| 261 |
except Exception as e:
|
| 262 |
logger.error(f"β Video generation error: {e}")
|
|
|
|
| 265 |
async def generate_tts(self, text: str, voice_name: Optional[str] = None, duration: Optional[float] = None) -> Dict:
|
| 266 |
"""
|
| 267 |
Generate TTS audio using Google Cloud TTS
|
| 268 |
+
|
| 269 |
Args:
|
| 270 |
text: Text to convert to speech
|
| 271 |
voice_name: Voice to use (optional)
|
| 272 |
duration: Target duration in seconds (optional) - will adjust speaking rate
|
| 273 |
+
|
| 274 |
Returns:
|
| 275 |
Dict with audio_url, duration, voice, text, local_path
|
| 276 |
"""
|
|
|
|
| 282 |
if not voice_name:
|
| 283 |
voice_name = self.config.get("default_voice", "en-US-Neural2-F")
|
| 284 |
|
| 285 |
+
# IMPORTANT: Determine gender FIRST before creating any voice objects
|
|
|
|
| 286 |
language_code = "-".join(voice_name.split("-")[:2])
|
| 287 |
|
| 288 |
+
# Male voices: Neural2-A, Neural2-D, Neural2-I, Neural2-J
|
| 289 |
+
# Female voices: Neural2-C, Neural2-E, Neural2-F, Neural2-G, Neural2-H
|
| 290 |
+
male_voice_suffixes = ["Neural2-A", "Neural2-D", "Neural2-I", "Neural2-J"]
|
| 291 |
+
voice_suffix = "-".join(voice_name.split("-")[2:]) # Get "Neural2-A" part
|
| 292 |
+
|
| 293 |
+
if voice_suffix in male_voice_suffixes:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
ssml_gender = texttospeech.SsmlVoiceGender.MALE
|
| 295 |
logger.info(f"π Using MALE voice: {voice_name}")
|
|
|
|
|
|
|
|
|
|
| 296 |
else:
|
|
|
|
| 297 |
ssml_gender = texttospeech.SsmlVoiceGender.FEMALE
|
| 298 |
+
logger.info(f"π Using FEMALE voice: {voice_name}")
|
| 299 |
+
|
| 300 |
+
# Configure synthesis
|
| 301 |
+
synthesis_input = texttospeech.SynthesisInput(text=text)
|
| 302 |
|
| 303 |
+
# Create voice object with correct gender
|
| 304 |
voice = texttospeech.VoiceSelectionParams(
|
| 305 |
+
language_code=language_code, name=voice_name, ssml_gender=ssml_gender
|
|
|
|
|
|
|
| 306 |
)
|
| 307 |
|
| 308 |
# Calculate speaking rate if duration is provided
|
|
|
|
| 310 |
if duration:
|
| 311 |
# First, generate at normal rate to get baseline duration
|
| 312 |
temp_audio_config = texttospeech.AudioConfig(
|
| 313 |
+
audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=1.0, pitch=0.0
|
|
|
|
|
|
|
| 314 |
)
|
| 315 |
temp_response = self.tts_client.synthesize_speech(
|
| 316 |
+
input=synthesis_input, voice=voice, audio_config=temp_audio_config
|
|
|
|
|
|
|
| 317 |
)
|
| 318 |
+
|
| 319 |
# Save temp file to measure duration
|
| 320 |
import tempfile
|
| 321 |
+
|
| 322 |
temp_path = f"/tmp/tts_temp_{hash(text)}.mp3"
|
| 323 |
with open(temp_path, "wb") as out:
|
| 324 |
out.write(temp_response.audio_content)
|
| 325 |
+
|
| 326 |
# Measure actual duration
|
| 327 |
try:
|
| 328 |
from mutagen.mp3 import MP3
|
| 329 |
+
|
| 330 |
audio = MP3(temp_path)
|
| 331 |
baseline_duration = audio.info.length
|
| 332 |
except ImportError:
|
| 333 |
# Estimate if mutagen not available
|
| 334 |
word_count = len(text.split())
|
| 335 |
baseline_duration = (word_count / 150) * 60
|
| 336 |
+
|
| 337 |
# Calculate required speaking rate
|
| 338 |
speaking_rate = baseline_duration / duration
|
| 339 |
speaking_rate = max(0.25, min(4.0, speaking_rate)) # Clamp to valid range
|
| 340 |
+
|
| 341 |
+
logger.info(
|
| 342 |
+
f"π Baseline: {baseline_duration:.2f}s, Target: {duration:.2f}s, Rate: {speaking_rate:.2f}x"
|
| 343 |
+
)
|
| 344 |
+
|
| 345 |
# Clean up temp file
|
| 346 |
if os.path.exists(temp_path):
|
| 347 |
os.remove(temp_path)
|
| 348 |
|
| 349 |
# Generate final audio with adjusted speaking rate
|
| 350 |
audio_config = texttospeech.AudioConfig(
|
| 351 |
+
audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=speaking_rate, pitch=0.0
|
|
|
|
|
|
|
| 352 |
)
|
| 353 |
|
| 354 |
+
response = self.tts_client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
|
| 356 |
# Save audio
|
| 357 |
audio_filename = f"tts_{hash(text)}.mp3"
|
|
|
|
| 363 |
# Get actual duration
|
| 364 |
try:
|
| 365 |
from mutagen.mp3 import MP3
|
| 366 |
+
|
| 367 |
audio = MP3(audio_path)
|
| 368 |
actual_duration = audio.info.length
|
| 369 |
logger.info(f"β TTS audio duration: {actual_duration:.2f}s")
|
| 370 |
except ImportError:
|
| 371 |
try:
|
| 372 |
from pydub import AudioSegment
|
| 373 |
+
|
| 374 |
audio = AudioSegment.from_mp3(audio_path)
|
| 375 |
actual_duration = len(audio) / 1000.0
|
| 376 |
logger.info(f"β TTS audio duration: {actual_duration:.2f}s (via pydub)")
|
|
|
|
| 378 |
actual_duration = duration if duration else (len(text.split()) / 150) * 60
|
| 379 |
logger.warning(f"β οΈ Estimated duration: {actual_duration:.2f}s")
|
| 380 |
|
| 381 |
+
# IMPORTANT: Normalize audio to make it louder
|
| 382 |
+
normalized_path = await self._normalize_audio(audio_path)
|
| 383 |
+
if normalized_path:
|
| 384 |
+
audio_path = normalized_path
|
| 385 |
+
logger.info(f"β
Audio normalized to -10 to -12 LUFS")
|
| 386 |
+
|
| 387 |
+
# Upload to GCS (upload normalized version)
|
| 388 |
audio_url = await self.store_in_gcs(audio_path, "audio")
|
| 389 |
|
| 390 |
logger.info(f"β
TTS generated successfully: {audio_url}")
|
|
|
|
| 395 |
"voice": voice_name,
|
| 396 |
"text": text,
|
| 397 |
"local_path": audio_path,
|
| 398 |
+
"speaking_rate": speaking_rate,
|
| 399 |
}
|
| 400 |
|
| 401 |
except Exception as e:
|
| 402 |
logger.error(f"β Error generating TTS: {e}")
|
| 403 |
raise
|
| 404 |
+
|
| 405 |
+
async def _normalize_audio(self, audio_path: str) -> Optional[str]:
|
| 406 |
+
"""
|
| 407 |
+
Normalize audio to -10 to -12 LUFS with peaks at -1 dBFS
|
| 408 |
+
Uses pydub for proper loudness normalization
|
| 409 |
+
|
| 410 |
+
Args:
|
| 411 |
+
audio_path: Path to input audio file
|
| 412 |
+
|
| 413 |
+
Returns:
|
| 414 |
+
Path to normalized audio file or None if failed
|
| 415 |
+
"""
|
| 416 |
+
try:
|
| 417 |
+
from pydub import AudioSegment
|
| 418 |
+
from pydub.effects import normalize
|
| 419 |
+
import tempfile
|
| 420 |
+
|
| 421 |
+
logger.info(f"π Normalizing audio: {audio_path}")
|
| 422 |
+
|
| 423 |
+
# Load audio
|
| 424 |
+
audio = AudioSegment.from_mp3(audio_path)
|
| 425 |
+
|
| 426 |
+
# Step 1: Normalize peaks to -1 dBFS (prevents clipping)
|
| 427 |
+
audio = normalize(audio, headroom=1.0)
|
| 428 |
+
|
| 429 |
+
# Step 2: Boost to target loudness (-10 to -12 LUFS β -11 dBFS)
|
| 430 |
+
current_dBFS = audio.dBFS
|
| 431 |
+
target_dBFS = -11.0 # Target around -11 LUFS (loud and clear)
|
| 432 |
+
|
| 433 |
+
gain_needed = target_dBFS - current_dBFS
|
| 434 |
+
|
| 435 |
+
# Apply gain (THIS IS WHERE VOLUME INCREASES)
|
| 436 |
+
if gain_needed > 0:
|
| 437 |
+
audio = audio + gain_needed # β INCREASES VOLUME
|
| 438 |
+
logger.info(f"β Boosted audio by {gain_needed:.1f} dB")
|
| 439 |
+
|
| 440 |
+
# Ensure no clipping (peaks at -1 dBFS max)
|
| 441 |
+
if audio.max_dBFS > -1.0:
|
| 442 |
+
reduction = audio.max_dBFS + 1.0
|
| 443 |
+
audio = audio - reduction
|
| 444 |
+
logger.info(f"β Reduced peaks by {reduction:.1f} dB to prevent clipping")
|
| 445 |
+
|
| 446 |
+
# Save normalized audio
|
| 447 |
+
normalized_path = audio_path.replace(".mp3", "_normalized.mp3")
|
| 448 |
+
audio.export(normalized_path, format="mp3", bitrate="192k")
|
| 449 |
+
|
| 450 |
+
logger.info(f"β
Audio normalized: {audio.dBFS:.1f} dBFS (target: -11 LUFS)")
|
| 451 |
+
|
| 452 |
+
# Remove original file
|
| 453 |
+
if os.path.exists(audio_path):
|
| 454 |
+
os.remove(audio_path)
|
| 455 |
+
|
| 456 |
+
return normalized_path
|
| 457 |
+
|
| 458 |
+
except ImportError:
|
| 459 |
+
logger.warning("β οΈ pydub not available, skipping audio normalization")
|
| 460 |
+
return None
|
| 461 |
+
except Exception as e:
|
| 462 |
+
logger.error(f"β Audio normalization failed: {e}")
|
| 463 |
+
return None
|
| 464 |
+
|
| 465 |
async def download_file(self, url: str, filename: str) -> str:
|
| 466 |
"""Download file from URL to local temporary file"""
|
| 467 |
import aiohttp
|
|
|
|
| 484 |
logger.error(f"Failed to download {url}: {e}")
|
| 485 |
raise
|
| 486 |
|
| 487 |
+
async def select_voice_for_persona(self, image_prompt: str) -> str:
|
| 488 |
+
"""
|
| 489 |
+
Select appropriate voice based on image prompt/description
|
| 490 |
+
Uses Gemini to analyze the persona and select matching voice
|
| 491 |
+
|
| 492 |
+
Args:
|
| 493 |
+
image_prompt: Description of the person in the image
|
| 494 |
+
|
| 495 |
+
Returns:
|
| 496 |
+
Voice name (e.g., "en-US-Neural2-F")
|
| 497 |
+
"""
|
| 498 |
+
try:
|
| 499 |
+
logger.info(f"π Analyzing persona for voice selection: {image_prompt[:100]}...")
|
| 500 |
+
|
| 501 |
+
analysis_prompt = f"""Analyze this image description and determine the persona:
|
| 502 |
+
|
| 503 |
+
Image Description: {image_prompt}
|
| 504 |
+
|
| 505 |
+
Determine:
|
| 506 |
+
1. Gender (male/female)
|
| 507 |
+
2. Age range (young: 18-30, mature: 30-50)
|
| 508 |
+
3. Style (casual/professional)
|
| 509 |
+
|
| 510 |
+
Return ONLY valid JSON:
|
| 511 |
+
{{
|
| 512 |
+
"gender": "female",
|
| 513 |
+
"age": "young",
|
| 514 |
+
"style": "casual"
|
| 515 |
+
}}"""
|
| 516 |
+
|
| 517 |
+
model = genai.GenerativeModel("gemini-2.5-flash")
|
| 518 |
+
response = model.generate_content(analysis_prompt)
|
| 519 |
+
|
| 520 |
+
# Parse response
|
| 521 |
+
response_text = response.text.strip()
|
| 522 |
+
if response_text.startswith("```"):
|
| 523 |
+
response_text = response_text.split("```")[1]
|
| 524 |
+
if response_text.startswith("json"):
|
| 525 |
+
response_text = response_text[4:]
|
| 526 |
+
response_text = response_text.strip()
|
| 527 |
+
|
| 528 |
+
persona = json.loads(response_text)
|
| 529 |
+
|
| 530 |
+
# Select voice based on persona
|
| 531 |
+
gender = persona.get("gender", "female")
|
| 532 |
+
age = persona.get("age", "young")
|
| 533 |
+
|
| 534 |
+
voice_key = f"{gender}_{age}"
|
| 535 |
+
selected_voice = self.voice_profiles.get(voice_key, self.voice_profiles["female_young"])
|
| 536 |
+
|
| 537 |
+
logger.info(f"β Selected voice: {selected_voice} for {gender}/{age} persona")
|
| 538 |
return selected_voice
|
| 539 |
|
| 540 |
+
except Exception as e:
|
| 541 |
+
logger.error(f"β Voice selection failed: {e}, using default")
|
| 542 |
+
return self.voice_profiles["female_young"]
|
| 543 |
+
|
| 544 |
async def upload_captions_to_gcs(self, captions_text: str, video_filename: str) -> Optional[str]:
|
| 545 |
"""
|
| 546 |
Upload captions to GCS bucket with same name as video (but .txt extension)
|
| 547 |
+
|
| 548 |
Args:
|
| 549 |
captions_text: Caption text content
|
| 550 |
video_filename: Name of the video file (e.g., "final_video_abc123.mp4")
|
| 551 |
+
|
| 552 |
Returns:
|
| 553 |
GCS signed URL of uploaded captions or None
|
| 554 |
"""
|
| 555 |
try:
|
| 556 |
# Create captions filename (replace .mp4 with .txt)
|
| 557 |
captions_filename = os.path.splitext(video_filename)[0] + ".txt"
|
| 558 |
+
|
| 559 |
logger.info(f"βοΈ Uploading captions to GCS: {captions_filename}")
|
| 560 |
|
| 561 |
# Save captions to temp file
|
| 562 |
import tempfile
|
| 563 |
+
|
| 564 |
temp_path = os.path.join(tempfile.gettempdir(), captions_filename)
|
| 565 |
with open(temp_path, "w", encoding="utf-8") as f:
|
| 566 |
f.write(captions_text)
|
|
|
|
| 569 |
blob_name = f"captions/{captions_filename}"
|
| 570 |
blob = self.gcs_bucket.blob(blob_name)
|
| 571 |
blob.content_type = "text/plain"
|
| 572 |
+
|
| 573 |
logger.info(f"Uploading {captions_filename} to gs://{self.gcs_bucket.name}/{blob_name}")
|
| 574 |
blob.upload_from_filename(temp_path)
|
| 575 |
|
| 576 |
# Generate signed URL (valid for 7 days)
|
| 577 |
from datetime import timedelta
|
| 578 |
+
|
| 579 |
+
captions_url = blob.generate_signed_url(version="v4", expiration=timedelta(days=7), method="GET")
|
| 580 |
+
|
|
|
|
|
|
|
|
|
|
| 581 |
logger.info(f"β
Captions uploaded to GCS: {captions_url[:100]}...")
|
| 582 |
+
|
| 583 |
# Clean up temp file
|
| 584 |
if os.path.exists(temp_path):
|
| 585 |
os.remove(temp_path)
|
| 586 |
+
|
| 587 |
return captions_url
|
| 588 |
|
| 589 |
except Exception as e:
|
|
|
|
| 609 |
|
| 610 |
try:
|
| 611 |
from google.cloud.exceptions import NotFound
|
| 612 |
+
|
| 613 |
try:
|
| 614 |
self.gcs_bucket.exists()
|
| 615 |
health["gcs"] = True
|
|
|
|
| 658 |
|
| 659 |
file_ext = os.path.splitext(filename)[1]
|
| 660 |
blob.content_type = content_types.get(file_ext, "application/octet-stream")
|
| 661 |
+
|
| 662 |
logger.info(f"Uploading {filename} to gs://{self.gcs_bucket.name}/{blob_name}")
|
| 663 |
blob.upload_from_filename(file_path)
|
| 664 |
|
| 665 |
from datetime import timedelta
|
| 666 |
+
|
| 667 |
+
signed_url = blob.generate_signed_url(version="v4", expiration=timedelta(days=7), method="GET")
|
|
|
|
|
|
|
|
|
|
| 668 |
|
| 669 |
logger.info(f"β
File uploaded with signed URL: {signed_url[:100]}...")
|
| 670 |
return signed_url
|
src/asset_selector.py
CHANGED
|
@@ -13,7 +13,7 @@ class AssetSelector:
|
|
| 13 |
self.config = config
|
| 14 |
self.video_library = self._load_video_library()
|
| 15 |
self.audio_library = self._load_audio_library()
|
| 16 |
-
|
| 17 |
# Track current background music index for sequential selection
|
| 18 |
self.current_audio_index = 0
|
| 19 |
|
|
@@ -143,7 +143,7 @@ class AssetSelector:
|
|
| 143 |
}}
|
| 144 |
"""
|
| 145 |
|
| 146 |
-
model = genai.GenerativeModel("gemini-2.
|
| 147 |
response = model.generate_content(prompt)
|
| 148 |
|
| 149 |
response_text = response.text.strip()
|
|
@@ -229,14 +229,16 @@ class AssetSelector:
|
|
| 229 |
|
| 230 |
# Select current index
|
| 231 |
selected = self.audio_library[self.current_audio_index]
|
| 232 |
-
|
| 233 |
-
logger.info(
|
| 234 |
-
|
|
|
|
|
|
|
| 235 |
# Increment index for next call (loop back to start if needed)
|
| 236 |
self.current_audio_index = (self.current_audio_index + 1) % len(self.audio_library)
|
| 237 |
-
|
| 238 |
return selected
|
| 239 |
-
|
| 240 |
def reset_audio_index(self):
|
| 241 |
"""Reset audio index to start from beginning (useful for batch processing)"""
|
| 242 |
self.current_audio_index = 0
|
|
|
|
| 13 |
self.config = config
|
| 14 |
self.video_library = self._load_video_library()
|
| 15 |
self.audio_library = self._load_audio_library()
|
| 16 |
+
|
| 17 |
# Track current background music index for sequential selection
|
| 18 |
self.current_audio_index = 0
|
| 19 |
|
|
|
|
| 143 |
}}
|
| 144 |
"""
|
| 145 |
|
| 146 |
+
model = genai.GenerativeModel("gemini-2.5-pro")
|
| 147 |
response = model.generate_content(prompt)
|
| 148 |
|
| 149 |
response_text = response.text.strip()
|
|
|
|
| 229 |
|
| 230 |
# Select current index
|
| 231 |
selected = self.audio_library[self.current_audio_index]
|
| 232 |
+
|
| 233 |
+
logger.info(
|
| 234 |
+
f"π΅ Selected background music #{self.current_audio_index + 1}/{len(self.audio_library)}: {selected}"
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
# Increment index for next call (loop back to start if needed)
|
| 238 |
self.current_audio_index = (self.current_audio_index + 1) % len(self.audio_library)
|
| 239 |
+
|
| 240 |
return selected
|
| 241 |
+
|
| 242 |
def reset_audio_index(self):
|
| 243 |
"""Reset audio index to start from beginning (useful for batch processing)"""
|
| 244 |
self.current_audio_index = 0
|
src/automation.py
CHANGED
|
@@ -90,6 +90,7 @@ class ContentAutomation:
|
|
| 90 |
except Exception as e:
|
| 91 |
logger.error(f"β Demo failed: {e}")
|
| 92 |
import traceback
|
|
|
|
| 93 |
logger.error(f"π Debug: {traceback.format_exc()}")
|
| 94 |
return False
|
| 95 |
|
|
@@ -120,43 +121,33 @@ class ContentAutomation:
|
|
| 120 |
|
| 121 |
# STEP 3: Render video WITHOUT audio to get exact duration
|
| 122 |
logger.info("\n㪠STEP 3: Render Video (Without Audio)")
|
| 123 |
-
video_no_audio_path, video_duration = await self.video_renderer.render_video_without_audio(
|
| 124 |
-
visual_assets
|
| 125 |
-
)
|
| 126 |
logger.info(f"β
Video rendered (no audio): {video_duration:.2f}s")
|
| 127 |
|
| 128 |
# STEP 4: Select voice based on hook video persona
|
| 129 |
logger.info("\nπ STEP 4: Select Voice for Persona")
|
| 130 |
-
selected_voice = await self.api_clients.select_voice_for_persona(
|
| 131 |
-
content_strategy.get("gemini_prompt", "")
|
| 132 |
-
)
|
| 133 |
|
| 134 |
# STEP 5: Generate TTS with EXACT video duration and matched voice
|
| 135 |
logger.info(f"\nποΈ STEP 5: Generate TTS (Target: {video_duration:.2f}s, Voice: {selected_voice})")
|
| 136 |
tts_audio = await self.api_clients.generate_tts(
|
| 137 |
-
text=tts_script,
|
| 138 |
-
duration=video_duration,
|
| 139 |
-
voice_name=selected_voice
|
| 140 |
)
|
| 141 |
visual_assets["tts_audio"] = tts_audio
|
| 142 |
-
logger.info(
|
|
|
|
|
|
|
| 143 |
|
| 144 |
# STEP 6: Select and download background music (sequential)
|
| 145 |
logger.info("\nπ΅ STEP 6: Background Music (Sequential)")
|
| 146 |
visual_assets["background_music_url"] = self.asset_selector.select_background_music()
|
| 147 |
await self._download_to_local(
|
| 148 |
-
visual_assets["background_music_url"],
|
| 149 |
-
"background_music.mp3",
|
| 150 |
-
visual_assets,
|
| 151 |
-
"background_music_local"
|
| 152 |
)
|
| 153 |
|
| 154 |
# STEP 7: Add audio to video
|
| 155 |
logger.info("\nπ STEP 7: Add Audio to Video")
|
| 156 |
-
final_video_path = await self.video_renderer.add_audio_to_video(
|
| 157 |
-
video_no_audio_path,
|
| 158 |
-
visual_assets
|
| 159 |
-
)
|
| 160 |
|
| 161 |
# STEP 8: Upload to cloud storage
|
| 162 |
logger.info("\nβοΈ STEP 8: Cloud Storage Upload")
|
|
@@ -193,6 +184,7 @@ class ContentAutomation:
|
|
| 193 |
elapsed_time = time.time() - self.pipeline_start_time if self.pipeline_start_time else 0
|
| 194 |
logger.error(f"\nβ Pipeline failed after {elapsed_time:.2f}s: {e}")
|
| 195 |
import traceback
|
|
|
|
| 196 |
logger.error(traceback.format_exc())
|
| 197 |
|
| 198 |
return {"success": False, "error": str(e), "duration": elapsed_time}
|
|
@@ -231,15 +223,13 @@ class ContentAutomation:
|
|
| 231 |
image_path = await self.api_clients.generate_image(strategy["gemini_prompt"])
|
| 232 |
if not image_path:
|
| 233 |
raise Exception("Image generation failed")
|
| 234 |
-
|
| 235 |
# Step 2: Upload image to GCS
|
| 236 |
image_url = await self.api_clients.store_in_gcs(image_path, "image")
|
| 237 |
-
|
| 238 |
# Step 3: Generate video using gen4_turbo
|
| 239 |
video_data = await self.api_clients.generate_video(
|
| 240 |
-
prompt=strategy["runway_prompt"],
|
| 241 |
-
image_url=image_url,
|
| 242 |
-
duration=strategy.get("duration", 3)
|
| 243 |
)
|
| 244 |
|
| 245 |
video_data["captions"] = captions
|
|
@@ -259,23 +249,13 @@ class ContentAutomation:
|
|
| 259 |
# Download hook video
|
| 260 |
if assets.get("hook_video") and assets["hook_video"].get("video_url"):
|
| 261 |
download_tasks.append(
|
| 262 |
-
self._download_to_local(
|
| 263 |
-
assets["hook_video"]["video_url"],
|
| 264 |
-
"hook_video.mp4",
|
| 265 |
-
assets["hook_video"]
|
| 266 |
-
)
|
| 267 |
)
|
| 268 |
|
| 269 |
# Download library videos
|
| 270 |
for i, video in enumerate(assets.get("selected_videos", [])):
|
| 271 |
if video.get("url"):
|
| 272 |
-
download_tasks.append(
|
| 273 |
-
self._download_to_local(
|
| 274 |
-
video["url"],
|
| 275 |
-
f"library_video_{i}.mp4",
|
| 276 |
-
video
|
| 277 |
-
)
|
| 278 |
-
)
|
| 279 |
|
| 280 |
if download_tasks:
|
| 281 |
await asyncio.gather(*download_tasks, return_exceptions=True)
|
|
|
|
| 90 |
except Exception as e:
|
| 91 |
logger.error(f"β Demo failed: {e}")
|
| 92 |
import traceback
|
| 93 |
+
|
| 94 |
logger.error(f"π Debug: {traceback.format_exc()}")
|
| 95 |
return False
|
| 96 |
|
|
|
|
| 121 |
|
| 122 |
# STEP 3: Render video WITHOUT audio to get exact duration
|
| 123 |
logger.info("\n㪠STEP 3: Render Video (Without Audio)")
|
| 124 |
+
video_no_audio_path, video_duration = await self.video_renderer.render_video_without_audio(visual_assets)
|
|
|
|
|
|
|
| 125 |
logger.info(f"β
Video rendered (no audio): {video_duration:.2f}s")
|
| 126 |
|
| 127 |
# STEP 4: Select voice based on hook video persona
|
| 128 |
logger.info("\nπ STEP 4: Select Voice for Persona")
|
| 129 |
+
selected_voice = await self.api_clients.select_voice_for_persona(content_strategy.get("gemini_prompt", ""))
|
|
|
|
|
|
|
| 130 |
|
| 131 |
# STEP 5: Generate TTS with EXACT video duration and matched voice
|
| 132 |
logger.info(f"\nποΈ STEP 5: Generate TTS (Target: {video_duration:.2f}s, Voice: {selected_voice})")
|
| 133 |
tts_audio = await self.api_clients.generate_tts(
|
| 134 |
+
text=tts_script, duration=video_duration, voice_name=selected_voice
|
|
|
|
|
|
|
| 135 |
)
|
| 136 |
visual_assets["tts_audio"] = tts_audio
|
| 137 |
+
logger.info(
|
| 138 |
+
f"β
TTS generated: {tts_audio['duration']:.2f}s at {tts_audio.get('speaking_rate', 1.0):.2f}x rate"
|
| 139 |
+
)
|
| 140 |
|
| 141 |
# STEP 6: Select and download background music (sequential)
|
| 142 |
logger.info("\nπ΅ STEP 6: Background Music (Sequential)")
|
| 143 |
visual_assets["background_music_url"] = self.asset_selector.select_background_music()
|
| 144 |
await self._download_to_local(
|
| 145 |
+
visual_assets["background_music_url"], "background_music.mp3", visual_assets, "background_music_local"
|
|
|
|
|
|
|
|
|
|
| 146 |
)
|
| 147 |
|
| 148 |
# STEP 7: Add audio to video
|
| 149 |
logger.info("\nπ STEP 7: Add Audio to Video")
|
| 150 |
+
final_video_path = await self.video_renderer.add_audio_to_video(video_no_audio_path, visual_assets)
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
# STEP 8: Upload to cloud storage
|
| 153 |
logger.info("\nβοΈ STEP 8: Cloud Storage Upload")
|
|
|
|
| 184 |
elapsed_time = time.time() - self.pipeline_start_time if self.pipeline_start_time else 0
|
| 185 |
logger.error(f"\nβ Pipeline failed after {elapsed_time:.2f}s: {e}")
|
| 186 |
import traceback
|
| 187 |
+
|
| 188 |
logger.error(traceback.format_exc())
|
| 189 |
|
| 190 |
return {"success": False, "error": str(e), "duration": elapsed_time}
|
|
|
|
| 223 |
image_path = await self.api_clients.generate_image(strategy["gemini_prompt"])
|
| 224 |
if not image_path:
|
| 225 |
raise Exception("Image generation failed")
|
| 226 |
+
|
| 227 |
# Step 2: Upload image to GCS
|
| 228 |
image_url = await self.api_clients.store_in_gcs(image_path, "image")
|
| 229 |
+
|
| 230 |
# Step 3: Generate video using gen4_turbo
|
| 231 |
video_data = await self.api_clients.generate_video(
|
| 232 |
+
prompt=strategy["runway_prompt"], image_url=image_url, duration=strategy.get("duration", 3)
|
|
|
|
|
|
|
| 233 |
)
|
| 234 |
|
| 235 |
video_data["captions"] = captions
|
|
|
|
| 249 |
# Download hook video
|
| 250 |
if assets.get("hook_video") and assets["hook_video"].get("video_url"):
|
| 251 |
download_tasks.append(
|
| 252 |
+
self._download_to_local(assets["hook_video"]["video_url"], "hook_video.mp4", assets["hook_video"])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
)
|
| 254 |
|
| 255 |
# Download library videos
|
| 256 |
for i, video in enumerate(assets.get("selected_videos", [])):
|
| 257 |
if video.get("url"):
|
| 258 |
+
download_tasks.append(self._download_to_local(video["url"], f"library_video_{i}.mp4", video))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
|
| 260 |
if download_tasks:
|
| 261 |
await asyncio.gather(*download_tasks, return_exceptions=True)
|
src/main.py
CHANGED
|
@@ -319,7 +319,7 @@ async def main():
|
|
| 319 |
print("β
PIPELINE COMPLETED SUCCESSFULLY")
|
| 320 |
print("=" * 70)
|
| 321 |
print(f"\nπΉ Final Video URL: {result['final_url']}")
|
| 322 |
-
if result.get(
|
| 323 |
print(f"π Captions URL (GCS): {result['captions_url']}")
|
| 324 |
print(f"π Voice Used: {result.get('voice_used', 'N/A')}")
|
| 325 |
print(f"β±οΈ Video Duration: {result.get('video_duration', 0):.2f}s")
|
|
|
|
| 319 |
print("β
PIPELINE COMPLETED SUCCESSFULLY")
|
| 320 |
print("=" * 70)
|
| 321 |
print(f"\nπΉ Final Video URL: {result['final_url']}")
|
| 322 |
+
if result.get("captions_url"):
|
| 323 |
print(f"π Captions URL (GCS): {result['captions_url']}")
|
| 324 |
print(f"π Voice Used: {result.get('voice_used', 'N/A')}")
|
| 325 |
print(f"β±οΈ Video Duration: {result.get('video_duration', 0):.2f}s")
|
src/video_renderer.py
CHANGED
|
@@ -29,6 +29,7 @@ import textwrap
|
|
| 29 |
from utils import logger
|
| 30 |
import time
|
| 31 |
|
|
|
|
| 32 |
class VideoRenderer:
|
| 33 |
def __init__(self, config: Dict):
|
| 34 |
self.config = config
|
|
@@ -38,7 +39,7 @@ class VideoRenderer:
|
|
| 38 |
async def render_video_without_audio(self, assets: Dict, video_config: Optional[Dict] = None) -> tuple[str, float]:
|
| 39 |
"""
|
| 40 |
Render video composition WITHOUT audio first to get exact duration
|
| 41 |
-
|
| 42 |
Returns:
|
| 43 |
tuple: (video_path, video_duration)
|
| 44 |
"""
|
|
@@ -76,11 +77,11 @@ class VideoRenderer:
|
|
| 76 |
async def add_audio_to_video(self, video_path: str, assets: Dict) -> str:
|
| 77 |
"""
|
| 78 |
Add audio track to pre-rendered video (NO speedup - video is already correct duration)
|
| 79 |
-
|
| 80 |
Args:
|
| 81 |
video_path: Path to video file without audio
|
| 82 |
assets: Dictionary containing audio assets (tts_audio, background_music_local)
|
| 83 |
-
|
| 84 |
Returns:
|
| 85 |
Path to final video with audio
|
| 86 |
"""
|
|
@@ -89,20 +90,20 @@ class VideoRenderer:
|
|
| 89 |
|
| 90 |
# Load the video
|
| 91 |
video_clip = VideoFileClip(video_path)
|
| 92 |
-
|
| 93 |
# Prepare audio clips
|
| 94 |
audio_clips = await self._prepare_audio_clips(assets, video_clip.duration)
|
| 95 |
-
|
| 96 |
# Add audio track
|
| 97 |
video_with_audio = await self._add_audio_track(video_clip, audio_clips)
|
| 98 |
-
|
| 99 |
output_path = await self.render_video_final(video_with_audio)
|
| 100 |
-
|
| 101 |
# Cleanup
|
| 102 |
video_clip.close()
|
| 103 |
if video_with_audio != video_clip:
|
| 104 |
video_with_audio.close()
|
| 105 |
-
|
| 106 |
logger.info(f"β
Final video with audio: {output_path}")
|
| 107 |
return output_path
|
| 108 |
|
|
@@ -114,21 +115,15 @@ class VideoRenderer:
|
|
| 114 |
"""Render final video clip to file"""
|
| 115 |
try:
|
| 116 |
output_path = self.temp_dir / f"final_video_{int(time.time())}.mp4"
|
| 117 |
-
|
| 118 |
-
video_clip.write_videofile(
|
| 119 |
-
|
| 120 |
-
codec="libx264",
|
| 121 |
-
audio_codec="aac",
|
| 122 |
-
verbose=False,
|
| 123 |
-
logger=None
|
| 124 |
-
)
|
| 125 |
-
|
| 126 |
video_clip.close()
|
| 127 |
return str(output_path)
|
| 128 |
-
|
| 129 |
except Exception as e:
|
| 130 |
logger.error(f"Final video render failed: {e}")
|
| 131 |
-
if
|
| 132 |
video_clip.close()
|
| 133 |
raise
|
| 134 |
|
|
@@ -151,7 +146,7 @@ class VideoRenderer:
|
|
| 151 |
# Calculate segment positions
|
| 152 |
# For an 8s video: use 6.5-8s for start, 0-1.5s for end
|
| 153 |
start_segment_begin = max(0, hook_duration - HOOK_SEGMENT_DURATION) # Last 1.5s
|
| 154 |
-
|
| 155 |
# Second half for beginning (last 1.5 seconds of hook video)
|
| 156 |
hook_start = hook_clip.subclip(start_segment_begin, hook_duration)
|
| 157 |
clips.append(("hook_start", hook_start))
|
|
@@ -210,7 +205,7 @@ class VideoRenderer:
|
|
| 210 |
|
| 211 |
# Hook segments should now be exactly 1.5 seconds
|
| 212 |
HOOK_DURATION = 1.5
|
| 213 |
-
|
| 214 |
for clip in video_clips:
|
| 215 |
if abs(clip.duration - HOOK_DURATION) < 0.2: # Hook segments (~1.5s with tolerance)
|
| 216 |
if hook_start is None:
|
|
@@ -220,19 +215,21 @@ class VideoRenderer:
|
|
| 220 |
else:
|
| 221 |
library_clips.append(clip)
|
| 222 |
|
| 223 |
-
logger.info(
|
| 224 |
-
|
| 225 |
-
|
|
|
|
|
|
|
| 226 |
|
| 227 |
# Calculate current library duration
|
| 228 |
library_duration = sum(clip.duration for clip in library_clips)
|
| 229 |
hook_total = (hook_start.duration if hook_start else 0) + (hook_end.duration if hook_end else 0)
|
| 230 |
-
|
| 231 |
logger.info(f"π Hook total: {hook_total:.2f}s, Library total: {library_duration:.2f}s")
|
| 232 |
-
|
| 233 |
# Target middle section duration (11-12s total - 3s hook = 8-9s middle)
|
| 234 |
target_middle_duration = TARGET_MIN_DURATION - hook_total
|
| 235 |
-
|
| 236 |
logger.info(f"π― Target middle section: {target_middle_duration:.2f}s")
|
| 237 |
|
| 238 |
# Adjust library clips to reach target middle duration
|
|
@@ -272,7 +269,9 @@ class VideoRenderer:
|
|
| 272 |
|
| 273 |
# Calculate total duration
|
| 274 |
total_duration = sum(clip.duration for clip in sequence_clips)
|
| 275 |
-
logger.info(
|
|
|
|
|
|
|
| 276 |
|
| 277 |
# Resize all clips to 9:16 vertical
|
| 278 |
target_size = (1080, 1920)
|
|
@@ -300,11 +299,13 @@ class VideoRenderer:
|
|
| 300 |
if tts_clip.duration > 0:
|
| 301 |
# Trim or extend TTS to match video duration
|
| 302 |
if tts_clip.duration > target_duration:
|
| 303 |
-
logger.info(
|
|
|
|
|
|
|
| 304 |
tts_clip = tts_clip.subclip(0, target_duration)
|
| 305 |
elif tts_clip.duration < target_duration:
|
| 306 |
logger.info(f"β οΈ TTS shorter than video: {tts_clip.duration:.2f}s < {target_duration:.2f}s")
|
| 307 |
-
|
| 308 |
clips.append(("tts", tts_clip))
|
| 309 |
logger.info(f"β Loaded TTS audio at FULL volume: {tts_clip.duration:.2f}s")
|
| 310 |
else:
|
|
@@ -313,7 +314,7 @@ class VideoRenderer:
|
|
| 313 |
except Exception as e:
|
| 314 |
logger.error(f"β Failed to load TTS audio: {e}")
|
| 315 |
|
| 316 |
-
# Load background music -
|
| 317 |
if assets.get("background_music_local"):
|
| 318 |
try:
|
| 319 |
bg_clip = AudioFileClip(assets["background_music_local"])
|
|
@@ -322,10 +323,10 @@ class VideoRenderer:
|
|
| 322 |
if bg_clip.duration > target_duration:
|
| 323 |
bg_clip = bg_clip.subclip(0, target_duration)
|
| 324 |
logger.info(f"β Trimmed background music to {target_duration:.2f}s")
|
| 325 |
-
#
|
| 326 |
-
bg_clip = bg_clip.volumex(0.
|
| 327 |
clips.append(("background", bg_clip))
|
| 328 |
-
logger.info(f"β Loaded background music at
|
| 329 |
else:
|
| 330 |
logger.warning("β οΈ Background music has zero duration")
|
| 331 |
bg_clip.close()
|
|
@@ -371,13 +372,13 @@ class VideoRenderer:
|
|
| 371 |
|
| 372 |
try:
|
| 373 |
valid_audio_clips = [clip for clip in audio_clips if clip.duration > 0]
|
| 374 |
-
|
| 375 |
if not valid_audio_clips:
|
| 376 |
return video_clip
|
| 377 |
|
| 378 |
mixed_audio = CompositeAudioClip(valid_audio_clips)
|
| 379 |
video_with_audio = video_clip.set_audio(mixed_audio)
|
| 380 |
-
|
| 381 |
logger.info(f"β
Added audio track")
|
| 382 |
return video_with_audio
|
| 383 |
|
|
@@ -485,6 +486,7 @@ class VideoRenderer:
|
|
| 485 |
def _split_script_into_words(self, script: str) -> List[str]:
|
| 486 |
"""Split script into individual words"""
|
| 487 |
import re
|
|
|
|
| 488 |
script = re.sub(r"\s+", " ", script).strip()
|
| 489 |
return script.split()
|
| 490 |
|
|
@@ -514,14 +516,9 @@ class VideoRenderer:
|
|
| 514 |
|
| 515 |
try:
|
| 516 |
logger.info(f"πΉ Rendering video (no audio): {filename}")
|
| 517 |
-
|
| 518 |
video_clip.write_videofile(
|
| 519 |
-
str(output_path),
|
| 520 |
-
codec="libx264",
|
| 521 |
-
fps=24,
|
| 522 |
-
verbose=False,
|
| 523 |
-
logger=None,
|
| 524 |
-
audio=False # No audio
|
| 525 |
)
|
| 526 |
|
| 527 |
return str(output_path)
|
|
@@ -583,6 +580,7 @@ class VideoRenderer:
|
|
| 583 |
try:
|
| 584 |
import librosa
|
| 585 |
import soundfile as sf
|
|
|
|
| 586 |
has_librosa = True
|
| 587 |
except ImportError:
|
| 588 |
has_librosa = False
|
|
@@ -634,7 +632,7 @@ class VideoRenderer:
|
|
| 634 |
"""Clean up temporary video/audio clips"""
|
| 635 |
for clip in clips:
|
| 636 |
try:
|
| 637 |
-
if hasattr(clip,
|
| 638 |
clip.close()
|
| 639 |
except Exception as e:
|
| 640 |
# Silently ignore cleanup errors
|
|
@@ -644,7 +642,8 @@ class VideoRenderer:
|
|
| 644 |
"""Cleanup on destruction"""
|
| 645 |
try:
|
| 646 |
import shutil
|
| 647 |
-
|
|
|
|
| 648 |
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
| 649 |
except Exception:
|
| 650 |
# Silently ignore cleanup errors
|
|
|
|
| 29 |
from utils import logger
|
| 30 |
import time
|
| 31 |
|
| 32 |
+
|
| 33 |
class VideoRenderer:
|
| 34 |
def __init__(self, config: Dict):
|
| 35 |
self.config = config
|
|
|
|
| 39 |
async def render_video_without_audio(self, assets: Dict, video_config: Optional[Dict] = None) -> tuple[str, float]:
|
| 40 |
"""
|
| 41 |
Render video composition WITHOUT audio first to get exact duration
|
| 42 |
+
|
| 43 |
Returns:
|
| 44 |
tuple: (video_path, video_duration)
|
| 45 |
"""
|
|
|
|
| 77 |
async def add_audio_to_video(self, video_path: str, assets: Dict) -> str:
|
| 78 |
"""
|
| 79 |
Add audio track to pre-rendered video (NO speedup - video is already correct duration)
|
| 80 |
+
|
| 81 |
Args:
|
| 82 |
video_path: Path to video file without audio
|
| 83 |
assets: Dictionary containing audio assets (tts_audio, background_music_local)
|
| 84 |
+
|
| 85 |
Returns:
|
| 86 |
Path to final video with audio
|
| 87 |
"""
|
|
|
|
| 90 |
|
| 91 |
# Load the video
|
| 92 |
video_clip = VideoFileClip(video_path)
|
| 93 |
+
|
| 94 |
# Prepare audio clips
|
| 95 |
audio_clips = await self._prepare_audio_clips(assets, video_clip.duration)
|
| 96 |
+
|
| 97 |
# Add audio track
|
| 98 |
video_with_audio = await self._add_audio_track(video_clip, audio_clips)
|
| 99 |
+
|
| 100 |
output_path = await self.render_video_final(video_with_audio)
|
| 101 |
+
|
| 102 |
# Cleanup
|
| 103 |
video_clip.close()
|
| 104 |
if video_with_audio != video_clip:
|
| 105 |
video_with_audio.close()
|
| 106 |
+
|
| 107 |
logger.info(f"β
Final video with audio: {output_path}")
|
| 108 |
return output_path
|
| 109 |
|
|
|
|
| 115 |
"""Render final video clip to file"""
|
| 116 |
try:
|
| 117 |
output_path = self.temp_dir / f"final_video_{int(time.time())}.mp4"
|
| 118 |
+
|
| 119 |
+
video_clip.write_videofile(str(output_path), codec="libx264", audio_codec="aac", verbose=False, logger=None)
|
| 120 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
video_clip.close()
|
| 122 |
return str(output_path)
|
| 123 |
+
|
| 124 |
except Exception as e:
|
| 125 |
logger.error(f"Final video render failed: {e}")
|
| 126 |
+
if "video_clip" in locals():
|
| 127 |
video_clip.close()
|
| 128 |
raise
|
| 129 |
|
|
|
|
| 146 |
# Calculate segment positions
|
| 147 |
# For an 8s video: use 6.5-8s for start, 0-1.5s for end
|
| 148 |
start_segment_begin = max(0, hook_duration - HOOK_SEGMENT_DURATION) # Last 1.5s
|
| 149 |
+
|
| 150 |
# Second half for beginning (last 1.5 seconds of hook video)
|
| 151 |
hook_start = hook_clip.subclip(start_segment_begin, hook_duration)
|
| 152 |
clips.append(("hook_start", hook_start))
|
|
|
|
| 205 |
|
| 206 |
# Hook segments should now be exactly 1.5 seconds
|
| 207 |
HOOK_DURATION = 1.5
|
| 208 |
+
|
| 209 |
for clip in video_clips:
|
| 210 |
if abs(clip.duration - HOOK_DURATION) < 0.2: # Hook segments (~1.5s with tolerance)
|
| 211 |
if hook_start is None:
|
|
|
|
| 215 |
else:
|
| 216 |
library_clips.append(clip)
|
| 217 |
|
| 218 |
+
logger.info(
|
| 219 |
+
f"β Identified: hook_start={hook_start.duration if hook_start else 0:.2f}s, "
|
| 220 |
+
f"hook_end={hook_end.duration if hook_end else 0:.2f}s, "
|
| 221 |
+
f"library_clips={len(library_clips)}"
|
| 222 |
+
)
|
| 223 |
|
| 224 |
# Calculate current library duration
|
| 225 |
library_duration = sum(clip.duration for clip in library_clips)
|
| 226 |
hook_total = (hook_start.duration if hook_start else 0) + (hook_end.duration if hook_end else 0)
|
| 227 |
+
|
| 228 |
logger.info(f"π Hook total: {hook_total:.2f}s, Library total: {library_duration:.2f}s")
|
| 229 |
+
|
| 230 |
# Target middle section duration (11-12s total - 3s hook = 8-9s middle)
|
| 231 |
target_middle_duration = TARGET_MIN_DURATION - hook_total
|
| 232 |
+
|
| 233 |
logger.info(f"π― Target middle section: {target_middle_duration:.2f}s")
|
| 234 |
|
| 235 |
# Adjust library clips to reach target middle duration
|
|
|
|
| 269 |
|
| 270 |
# Calculate total duration
|
| 271 |
total_duration = sum(clip.duration for clip in sequence_clips)
|
| 272 |
+
logger.info(
|
| 273 |
+
f"π Total video sequence duration: {total_duration:.2f}s (target: {TARGET_MIN_DURATION}-{TARGET_MAX_DURATION}s)"
|
| 274 |
+
)
|
| 275 |
|
| 276 |
# Resize all clips to 9:16 vertical
|
| 277 |
target_size = (1080, 1920)
|
|
|
|
| 299 |
if tts_clip.duration > 0:
|
| 300 |
# Trim or extend TTS to match video duration
|
| 301 |
if tts_clip.duration > target_duration:
|
| 302 |
+
logger.info(
|
| 303 |
+
f"β οΈ TTS longer than video, trimming: {tts_clip.duration:.2f}s -> {target_duration:.2f}s"
|
| 304 |
+
)
|
| 305 |
tts_clip = tts_clip.subclip(0, target_duration)
|
| 306 |
elif tts_clip.duration < target_duration:
|
| 307 |
logger.info(f"β οΈ TTS shorter than video: {tts_clip.duration:.2f}s < {target_duration:.2f}s")
|
| 308 |
+
|
| 309 |
clips.append(("tts", tts_clip))
|
| 310 |
logger.info(f"β Loaded TTS audio at FULL volume: {tts_clip.duration:.2f}s")
|
| 311 |
else:
|
|
|
|
| 314 |
except Exception as e:
|
| 315 |
logger.error(f"β Failed to load TTS audio: {e}")
|
| 316 |
|
| 317 |
+
# Load background music - INCREASED volume for better presence
|
| 318 |
if assets.get("background_music_local"):
|
| 319 |
try:
|
| 320 |
bg_clip = AudioFileClip(assets["background_music_local"])
|
|
|
|
| 323 |
if bg_clip.duration > target_duration:
|
| 324 |
bg_clip = bg_clip.subclip(0, target_duration)
|
| 325 |
logger.info(f"β Trimmed background music to {target_duration:.2f}s")
|
| 326 |
+
# Increase volume from 8% to 25% for better audibility
|
| 327 |
+
bg_clip = bg_clip.volumex(0.25)
|
| 328 |
clips.append(("background", bg_clip))
|
| 329 |
+
logger.info(f"β Loaded background music at 25% volume: {bg_clip.duration:.2f}s")
|
| 330 |
else:
|
| 331 |
logger.warning("β οΈ Background music has zero duration")
|
| 332 |
bg_clip.close()
|
|
|
|
| 372 |
|
| 373 |
try:
|
| 374 |
valid_audio_clips = [clip for clip in audio_clips if clip.duration > 0]
|
| 375 |
+
|
| 376 |
if not valid_audio_clips:
|
| 377 |
return video_clip
|
| 378 |
|
| 379 |
mixed_audio = CompositeAudioClip(valid_audio_clips)
|
| 380 |
video_with_audio = video_clip.set_audio(mixed_audio)
|
| 381 |
+
|
| 382 |
logger.info(f"β
Added audio track")
|
| 383 |
return video_with_audio
|
| 384 |
|
|
|
|
| 486 |
def _split_script_into_words(self, script: str) -> List[str]:
|
| 487 |
"""Split script into individual words"""
|
| 488 |
import re
|
| 489 |
+
|
| 490 |
script = re.sub(r"\s+", " ", script).strip()
|
| 491 |
return script.split()
|
| 492 |
|
|
|
|
| 516 |
|
| 517 |
try:
|
| 518 |
logger.info(f"πΉ Rendering video (no audio): {filename}")
|
| 519 |
+
|
| 520 |
video_clip.write_videofile(
|
| 521 |
+
str(output_path), codec="libx264", fps=24, verbose=False, logger=None, audio=False # No audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 522 |
)
|
| 523 |
|
| 524 |
return str(output_path)
|
|
|
|
| 580 |
try:
|
| 581 |
import librosa
|
| 582 |
import soundfile as sf
|
| 583 |
+
|
| 584 |
has_librosa = True
|
| 585 |
except ImportError:
|
| 586 |
has_librosa = False
|
|
|
|
| 632 |
"""Clean up temporary video/audio clips"""
|
| 633 |
for clip in clips:
|
| 634 |
try:
|
| 635 |
+
if hasattr(clip, "close"):
|
| 636 |
clip.close()
|
| 637 |
except Exception as e:
|
| 638 |
# Silently ignore cleanup errors
|
|
|
|
| 642 |
"""Cleanup on destruction"""
|
| 643 |
try:
|
| 644 |
import shutil
|
| 645 |
+
|
| 646 |
+
if hasattr(self, "temp_dir") and self.temp_dir.exists():
|
| 647 |
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
| 648 |
except Exception:
|
| 649 |
# Silently ignore cleanup errors
|