topcoderkz commited on
Commit
0c4ba75
Β·
1 Parent(s): e598b7d

Refactor code, remove deepseek integration

Browse files
Files changed (6) hide show
  1. batch.sh +1 -1
  2. src/api_clients.py +263 -197
  3. src/asset_selector.py +9 -7
  4. src/automation.py +15 -35
  5. src/main.py +1 -1
  6. src/video_renderer.py +44 -45
batch.sh CHANGED
@@ -1,6 +1,6 @@
1
  #!/bin/bash
2
 
3
  # Process first 5 strategies
4
- for i in {0..1}; do
5
  python src/main.py --csv content_strategies.csv --index $i --output ./outputs/videos/video_$i
6
  done
 
1
  #!/bin/bash
2
 
3
  # Process first 5 strategies
4
+ for i in {0..0}; do
5
  python src/main.py --csv content_strategies.csv --index $i --output ./outputs/videos/video_$i
6
  done
src/api_clients.py CHANGED
@@ -30,15 +30,15 @@ class APIClients:
30
  # RunwayML API configuration
31
  self.runway_api_key = config.get("runwayml_api_key") or os.getenv("RUNWAYML_API_KEY")
32
  self.runway_base_url = "https://api.dev.runwayml.com/v1"
33
-
34
  # Voice profiles for different personas
35
  self.voice_profiles = {
36
  "female_young": "en-US-Neural2-F", # Young female voice
37
  "female_mature": "en-US-Neural2-E", # Mature female voice
38
  "female_casual": "en-US-Neural2-G", # Casual female voice
39
- "male_young": "en-US-Neural2-D", # Young male voice
40
- "male_mature": "en-US-Neural2-A", # Mature male voice
41
- "male_casual": "en-US-Neural2-J", # Casual male voice
42
  }
43
 
44
  async def enhance_prompt(self, prompt: str) -> str:
@@ -60,7 +60,7 @@ class APIClients:
60
  Return only the enhanced prompt, nothing else.
61
  """
62
 
63
- model = genai.GenerativeModel("gemini-2.0-flash-exp")
64
  response = model.generate_content(enhancement_instruction)
65
 
66
  enhanced_prompt = response.text.strip()
@@ -74,39 +74,39 @@ class APIClients:
74
  async def generate_image(self, prompt: str) -> Optional[str]:
75
  """
76
  Generate image using Vertex AI Imagen 4 Ultra
77
-
78
  Args:
79
  prompt: Image generation prompt
80
-
81
  Returns:
82
  Local path to generated image or None
83
  """
84
  try:
85
- import vertexai
86
- from vertexai.preview.vision_models import ImageGenerationModel
87
 
88
- logger.info(f"🎨 Generating image with Imagen 4 Ultra: {prompt[:200]}...")
89
 
90
- vertexai.init(project=self.config.get("gcp_project_id"), location="us-central1")
91
 
92
- # Use correct Imagen 4 Ultra model name
93
- model = ImageGenerationModel.from_pretrained("imagen-4.0-ultra-generate-001")
94
 
95
- images = model.generate_images(
96
- prompt=prompt,
97
- number_of_images=1,
98
- aspect_ratio="9:16", # Vertical for TikTok/Instagram
99
- safety_filter_level="block_some",
100
- person_generation="allow_adult",
101
- )
102
 
103
- # Save to temp file
104
- import tempfile
105
- output_path = f"/tmp/hook_image_{hash(prompt)}.png"
106
- images[0].save(location=output_path, include_generation_parameters=False)
107
- # output_path = '/Users/topcoderkz/Downloads/gen4-ultra.png'
108
- logger.info(f"βœ“ Image generated with Imagen 4 Ultra: {output_path}")
109
- return output_path
110
 
111
  except Exception as e:
112
  logger.error(f"❌ Imagen 4 Ultra generation failed: {e}")
@@ -130,7 +130,7 @@ class APIClients:
130
 
131
  Return ONLY the caption text, nothing else."""
132
 
133
- model = genai.GenerativeModel("gemini-2.0-flash-exp")
134
  response = model.generate_content(instruction)
135
 
136
  caption = response.text.strip()
@@ -164,7 +164,7 @@ class APIClients:
164
  "video_prompt": "..."
165
  }}"""
166
 
167
- model = genai.GenerativeModel("gemini-2.0-flash-exp")
168
  response = model.generate_content(instruction)
169
 
170
  result = json.loads(response.text.strip())
@@ -182,7 +182,7 @@ class APIClients:
182
  async def generate_video(self, prompt: str, duration: int, image_url: str) -> Dict:
183
  """
184
  Generate video using RunwayML gen4_turbo ($0.25 per video / 25 credits)
185
-
186
  Args:
187
  prompt: Text prompt for video generation
188
  duration: Video duration in seconds
@@ -190,76 +190,73 @@ class APIClients:
190
  """
191
  try:
192
  logger.info(f"🎬 Generating video with gen4_turbo: {prompt[:100]}...")
193
- # return {
194
- # "video_url": 'https://dnznrvs05pmza.cloudfront.net/764d8b31-4e1f-4ba2-bf4f-360cf029e0b7.mp4?_jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJrZXlIYXNoIjoiMGIyZjMyMzc5NDA4ZTU0NCIsImJ1Y2tldCI6InJ1bndheS10YXNrLWFydGlmYWN0cyIsInN0YWdlIjoicHJvZCIsImV4cCI6MTc2MDQwMDAwMH0.5ltZPfO-gWilm_lt6sK-tPvgwJUgPluOjeeUOOIbyEE',
195
- # "task_id": '08fbc334-7d36-45c2-8b71-7f20fa075f10',
196
- # "duration": duration,
197
- # "prompt": prompt,
198
- # "status": 'SUCCEEDED',
199
- # "created_at": '2025-10-12T18:57:27.240Z',
200
- # "model": "gen4_turbo"
201
- # }
202
-
203
- headers = {
204
- "Authorization": f"Bearer {self.runway_api_key}",
205
- "Content-Type": "application/json",
206
- "X-Runway-Version": "2024-11-06",
207
  }
208
 
209
- payload = {
210
- "promptImage": image_url,
211
- "promptText": prompt[:1000],
212
- "model": "gen4_turbo", # Updated to gen4_turbo ($0.25/video)
213
- "duration": duration,
214
- "ratio": "1280:720"
215
- }
216
 
217
- async with aiohttp.ClientSession() as session:
218
- # Create task
219
- async with session.post(
220
- "https://api.dev.runwayml.com/v1/image_to_video",
221
- headers=headers,
222
- json=payload
223
- ) as response:
224
- if response.status != 200:
225
- error_text = await response.text()
226
- raise Exception(f"RunwayML error: {error_text}")
227
-
228
- task_data = await response.json()
229
- task_id = task_data["id"]
230
- logger.info(f"βœ“ Task created with gen4_turbo: {task_id}")
231
-
232
- # Poll for completion
233
- max_attempts = 120
234
- for attempt in range(max_attempts):
235
- await asyncio.sleep(10)
236
-
237
- async with session.get(
238
- f"https://api.dev.runwayml.com/v1/tasks/{task_id}",
239
- headers=headers
240
- ) as status_response:
241
- status_data = await status_response.json()
242
- status = status_data["status"]
243
-
244
- if status == "SUCCEEDED":
245
- video_url = status_data["output"][0]
246
- logger.info(f"βœ… Video generated with gen4_turbo: {video_url}")
247
- return {
248
- "video_url": video_url,
249
- "task_id": task_id,
250
- "duration": duration,
251
- "prompt": prompt,
252
- "status": status,
253
- "created_at": status_data.get("createdAt"),
254
- "model": "gen4_turbo"
255
- }
256
- elif status == "FAILED":
257
- raise Exception(f"Generation failed: {status_data.get('failure')}")
258
- elif status == "RUNNING":
259
- progress = status_data.get("progress", 0)
260
- logger.info(f"⏳ Progress: {progress*100:.0f}%")
261
-
262
- raise Exception("Timeout waiting for video generation")
 
 
 
 
 
263
 
264
  except Exception as e:
265
  logger.error(f"❌ Video generation error: {e}")
@@ -268,12 +265,12 @@ class APIClients:
268
  async def generate_tts(self, text: str, voice_name: Optional[str] = None, duration: Optional[float] = None) -> Dict:
269
  """
270
  Generate TTS audio using Google Cloud TTS
271
-
272
  Args:
273
  text: Text to convert to speech
274
  voice_name: Voice to use (optional)
275
  duration: Target duration in seconds (optional) - will adjust speaking rate
276
-
277
  Returns:
278
  Dict with audio_url, duration, voice, text, local_path
279
  """
@@ -285,37 +282,27 @@ class APIClients:
285
  if not voice_name:
286
  voice_name = self.config.get("default_voice", "en-US-Neural2-F")
287
 
288
- # Configure synthesis
289
- synthesis_input = texttospeech.SynthesisInput(text=text)
290
  language_code = "-".join(voice_name.split("-")[:2])
291
 
292
- # CORRECTED: Proper gender mapping for US Neural2 voices
293
- male_voices = {
294
- "en-US-Neural2-A", "en-US-Neural2-D", "en-US-Neural2-I", "en-US-Neural2-J",
295
- "en-US-Studio-M" # Add other male voices if needed
296
- }
297
- female_voices = {
298
- "en-US-Neural2-C", "en-US-Neural2-E", "en-US-Neural2-F",
299
- "en-US-Neural2-G", "en-US-Neural2-H", "en-US-Studio-O",
300
- "en-US-Standard-A" # Add other female voices if needed
301
- }
302
-
303
- # Determine gender from full voice name
304
- if voice_name in male_voices:
305
  ssml_gender = texttospeech.SsmlVoiceGender.MALE
306
  logger.info(f"🎭 Using MALE voice: {voice_name}")
307
- elif voice_name in female_voices:
308
- ssml_gender = texttospeech.SsmlVoiceGender.FEMALE
309
- logger.info(f"🎭 Using FEMALE voice: {voice_name}")
310
  else:
311
- # Default to FEMALE for unknown voices (or you could skip ssml_gender)
312
  ssml_gender = texttospeech.SsmlVoiceGender.FEMALE
313
- logger.warning(f"🎭 Voice {voice_name} not in gender map, defaulting to FEMALE")
 
 
 
314
 
 
315
  voice = texttospeech.VoiceSelectionParams(
316
- language_code=language_code,
317
- name=voice_name,
318
- ssml_gender=ssml_gender
319
  )
320
 
321
  # Calculate speaking rate if duration is provided
@@ -323,54 +310,48 @@ class APIClients:
323
  if duration:
324
  # First, generate at normal rate to get baseline duration
325
  temp_audio_config = texttospeech.AudioConfig(
326
- audio_encoding=texttospeech.AudioEncoding.MP3,
327
- speaking_rate=1.0,
328
- pitch=0.0
329
  )
330
  temp_response = self.tts_client.synthesize_speech(
331
- input=synthesis_input,
332
- voice=voice,
333
- audio_config=temp_audio_config
334
  )
335
-
336
  # Save temp file to measure duration
337
  import tempfile
 
338
  temp_path = f"/tmp/tts_temp_{hash(text)}.mp3"
339
  with open(temp_path, "wb") as out:
340
  out.write(temp_response.audio_content)
341
-
342
  # Measure actual duration
343
  try:
344
  from mutagen.mp3 import MP3
 
345
  audio = MP3(temp_path)
346
  baseline_duration = audio.info.length
347
  except ImportError:
348
  # Estimate if mutagen not available
349
  word_count = len(text.split())
350
  baseline_duration = (word_count / 150) * 60
351
-
352
  # Calculate required speaking rate
353
  speaking_rate = baseline_duration / duration
354
  speaking_rate = max(0.25, min(4.0, speaking_rate)) # Clamp to valid range
355
-
356
- logger.info(f"πŸ“Š Baseline: {baseline_duration:.2f}s, Target: {duration:.2f}s, Rate: {speaking_rate:.2f}x")
357
-
 
 
358
  # Clean up temp file
359
  if os.path.exists(temp_path):
360
  os.remove(temp_path)
361
 
362
  # Generate final audio with adjusted speaking rate
363
  audio_config = texttospeech.AudioConfig(
364
- audio_encoding=texttospeech.AudioEncoding.MP3,
365
- speaking_rate=speaking_rate,
366
- pitch=0.0
367
  )
368
 
369
- response = self.tts_client.synthesize_speech(
370
- input=synthesis_input,
371
- voice=voice,
372
- audio_config=audio_config
373
- )
374
 
375
  # Save audio
376
  audio_filename = f"tts_{hash(text)}.mp3"
@@ -382,12 +363,14 @@ class APIClients:
382
  # Get actual duration
383
  try:
384
  from mutagen.mp3 import MP3
 
385
  audio = MP3(audio_path)
386
  actual_duration = audio.info.length
387
  logger.info(f"βœ“ TTS audio duration: {actual_duration:.2f}s")
388
  except ImportError:
389
  try:
390
  from pydub import AudioSegment
 
391
  audio = AudioSegment.from_mp3(audio_path)
392
  actual_duration = len(audio) / 1000.0
393
  logger.info(f"βœ“ TTS audio duration: {actual_duration:.2f}s (via pydub)")
@@ -395,7 +378,13 @@ class APIClients:
395
  actual_duration = duration if duration else (len(text.split()) / 150) * 60
396
  logger.warning(f"⚠️ Estimated duration: {actual_duration:.2f}s")
397
 
398
- # Upload to GCS
 
 
 
 
 
 
399
  audio_url = await self.store_in_gcs(audio_path, "audio")
400
 
401
  logger.info(f"βœ… TTS generated successfully: {audio_url}")
@@ -406,13 +395,73 @@ class APIClients:
406
  "voice": voice_name,
407
  "text": text,
408
  "local_path": audio_path,
409
- "speaking_rate": speaking_rate
410
  }
411
 
412
  except Exception as e:
413
  logger.error(f"❌ Error generating TTS: {e}")
414
  raise
415
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
  async def download_file(self, url: str, filename: str) -> str:
417
  """Download file from URL to local temporary file"""
418
  import aiohttp
@@ -435,61 +484,83 @@ class APIClients:
435
  logger.error(f"Failed to download {url}: {e}")
436
  raise
437
 
438
- async def select_voice_for_persona(self, gemini_prompt: str) -> str:
439
- """Select appropriate voice based on persona with CORRECT gender mapping"""
440
-
441
- # Default to female voice for most content
442
- female_voices = [
443
- "en-US-Neural2-C", # Female (was missing from your list)
444
- "en-US-Neural2-E", # Female (was missing from your list)
445
- "en-US-Neural2-F", # Female βœ“
446
- "en-US-Neural2-G", # Female (was missing from your list)
447
- "en-US-Neural2-H", # Female βœ“
448
- "en-US-Studio-O", # Female βœ“
449
- "en-US-Standard-A", # Female βœ“
450
- ]
451
-
452
- male_voices = [
453
- "en-US-Neural2-A", # Male (was missing from your list)
454
- "en-US-Neural2-D", # Male βœ“
455
- "en-US-Neural2-I", # Male (was missing from your list)
456
- "en-US-Neural2-J", # Male (was missing from your list)
457
- "en-US-Studio-M", # Male βœ“
458
- ]
459
-
460
- # Simple persona detection from prompt
461
- prompt_lower = gemini_prompt.lower()
462
-
463
- # If prompt suggests male persona, use male voice
464
- if any(word in prompt_lower for word in ["male", "man", "boy", "gentleman", "his", "he "]):
465
- selected_voice = male_voices[0] # Use first male voice
466
- logger.info(f"🎭 Selected MALE voice for persona: {selected_voice}")
467
- return selected_voice
468
- else:
469
- # Default to female voice
470
- selected_voice = female_voices[0] # Use first female voice
471
- logger.info(f"🎭 Selected FEMALE voice for persona: {selected_voice}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
  return selected_voice
473
 
 
 
 
 
474
  async def upload_captions_to_gcs(self, captions_text: str, video_filename: str) -> Optional[str]:
475
  """
476
  Upload captions to GCS bucket with same name as video (but .txt extension)
477
-
478
  Args:
479
  captions_text: Caption text content
480
  video_filename: Name of the video file (e.g., "final_video_abc123.mp4")
481
-
482
  Returns:
483
  GCS signed URL of uploaded captions or None
484
  """
485
  try:
486
  # Create captions filename (replace .mp4 with .txt)
487
  captions_filename = os.path.splitext(video_filename)[0] + ".txt"
488
-
489
  logger.info(f"☁️ Uploading captions to GCS: {captions_filename}")
490
 
491
  # Save captions to temp file
492
  import tempfile
 
493
  temp_path = os.path.join(tempfile.gettempdir(), captions_filename)
494
  with open(temp_path, "w", encoding="utf-8") as f:
495
  f.write(captions_text)
@@ -498,24 +569,21 @@ class APIClients:
498
  blob_name = f"captions/{captions_filename}"
499
  blob = self.gcs_bucket.blob(blob_name)
500
  blob.content_type = "text/plain"
501
-
502
  logger.info(f"Uploading {captions_filename} to gs://{self.gcs_bucket.name}/{blob_name}")
503
  blob.upload_from_filename(temp_path)
504
 
505
  # Generate signed URL (valid for 7 days)
506
  from datetime import timedelta
507
- captions_url = blob.generate_signed_url(
508
- version="v4",
509
- expiration=timedelta(days=7),
510
- method="GET"
511
- )
512
-
513
  logger.info(f"βœ… Captions uploaded to GCS: {captions_url[:100]}...")
514
-
515
  # Clean up temp file
516
  if os.path.exists(temp_path):
517
  os.remove(temp_path)
518
-
519
  return captions_url
520
 
521
  except Exception as e:
@@ -541,6 +609,7 @@ class APIClients:
541
 
542
  try:
543
  from google.cloud.exceptions import NotFound
 
544
  try:
545
  self.gcs_bucket.exists()
546
  health["gcs"] = True
@@ -589,16 +658,13 @@ class APIClients:
589
 
590
  file_ext = os.path.splitext(filename)[1]
591
  blob.content_type = content_types.get(file_ext, "application/octet-stream")
592
-
593
  logger.info(f"Uploading {filename} to gs://{self.gcs_bucket.name}/{blob_name}")
594
  blob.upload_from_filename(file_path)
595
 
596
  from datetime import timedelta
597
- signed_url = blob.generate_signed_url(
598
- version="v4",
599
- expiration=timedelta(days=7),
600
- method="GET"
601
- )
602
 
603
  logger.info(f"βœ… File uploaded with signed URL: {signed_url[:100]}...")
604
  return signed_url
 
30
  # RunwayML API configuration
31
  self.runway_api_key = config.get("runwayml_api_key") or os.getenv("RUNWAYML_API_KEY")
32
  self.runway_base_url = "https://api.dev.runwayml.com/v1"
33
+
34
  # Voice profiles for different personas
35
  self.voice_profiles = {
36
  "female_young": "en-US-Neural2-F", # Young female voice
37
  "female_mature": "en-US-Neural2-E", # Mature female voice
38
  "female_casual": "en-US-Neural2-G", # Casual female voice
39
+ "male_young": "en-US-Neural2-D", # Young male voice
40
+ "male_mature": "en-US-Neural2-A", # Mature male voice
41
+ "male_casual": "en-US-Neural2-J", # Casual male voice
42
  }
43
 
44
  async def enhance_prompt(self, prompt: str) -> str:
 
60
  Return only the enhanced prompt, nothing else.
61
  """
62
 
63
+ model = genai.GenerativeModel("gemini-2.5-flash")
64
  response = model.generate_content(enhancement_instruction)
65
 
66
  enhanced_prompt = response.text.strip()
 
74
  async def generate_image(self, prompt: str) -> Optional[str]:
75
  """
76
  Generate image using Vertex AI Imagen 4 Ultra
77
+
78
  Args:
79
  prompt: Image generation prompt
80
+
81
  Returns:
82
  Local path to generated image or None
83
  """
84
  try:
85
+ # import vertexai
86
+ # from vertexai.preview.vision_models import ImageGenerationModel
87
 
88
+ # logger.info(f"🎨 Generating image with Imagen 4 Ultra: {prompt[:200]}...")
89
 
90
+ # vertexai.init(project=self.config.get("gcp_project_id"), location="us-central1")
91
 
92
+ # # Use correct Imagen 4 Ultra model name
93
+ # model = ImageGenerationModel.from_pretrained("imagen-4.0-ultra-generate-001")
94
 
95
+ # images = model.generate_images(
96
+ # prompt=prompt,
97
+ # number_of_images=1,
98
+ # aspect_ratio="9:16",
99
+ # safety_filter_level="block_some",
100
+ # person_generation="allow_adult",
101
+ # )
102
 
103
+ # # Save to temp file
104
+ # import tempfile
105
+ # output_path = f"/tmp/hook_image_{hash(prompt)}.png"
106
+ # images[0].save(location=output_path, include_generation_parameters=False)
107
+ # logger.info(f"βœ“ Image generated with Imagen 4 Ultra (9:16): {output_path}")
108
+ # return output_path
109
+ return "/tmp/hook_image_391248835665466790.png"
110
 
111
  except Exception as e:
112
  logger.error(f"❌ Imagen 4 Ultra generation failed: {e}")
 
130
 
131
  Return ONLY the caption text, nothing else."""
132
 
133
+ model = genai.GenerativeModel("gemini-2.5-flash")
134
  response = model.generate_content(instruction)
135
 
136
  caption = response.text.strip()
 
164
  "video_prompt": "..."
165
  }}"""
166
 
167
+ model = genai.GenerativeModel("gemini-2.5-flash")
168
  response = model.generate_content(instruction)
169
 
170
  result = json.loads(response.text.strip())
 
182
  async def generate_video(self, prompt: str, duration: int, image_url: str) -> Dict:
183
  """
184
  Generate video using RunwayML gen4_turbo ($0.25 per video / 25 credits)
185
+
186
  Args:
187
  prompt: Text prompt for video generation
188
  duration: Video duration in seconds
 
190
  """
191
  try:
192
  logger.info(f"🎬 Generating video with gen4_turbo: {prompt[:100]}...")
193
+ return {
194
+ "video_url": "https://dnznrvs05pmza.cloudfront.net/4a582f22-9dd3-456e-a0a5-8036ed2c6b2c.mp4?_jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJrZXlIYXNoIjoiNGVlNGI1MjIxNGYxYjJjNyIsImJ1Y2tldCI6InJ1bndheS10YXNrLWFydGlmYWN0cyIsInN0YWdlIjoicHJvZCIsImV4cCI6MTc2MDQ4NjQwMH0.FWm7vx_lQjkg4fk8stDQI2gt-ahr95qBPREDyWhvgoI",
195
+ "task_id": "61cdffe3-e84e-4c45-a611-bb9c48e6a485",
196
+ "duration": 3,
197
+ "prompt": prompt,
198
+ "status": "SUCCEEDED",
199
+ "created_at": "2025-10-13T22:56:06.290Z",
200
+ "model": "gen4_turbo",
 
 
 
 
 
 
201
  }
202
 
203
+ # headers = {z``
204
+ # "Authorization": f"Bearer {self.runway_api_key}",
205
+ # "Content-Type": "application/json",
206
+ # "X-Runway-Version": "2024-11-06",
207
+ # }
 
 
208
 
209
+ # payload = {
210
+ # "promptImage": image_url,
211
+ # "promptText": prompt[:1000],
212
+ # "model": "gen4_turbo", # Updated to gen4_turbo ($0.25/video)
213
+ # "duration": duration,
214
+ # "ratio": "720:1280",
215
+ # }
216
+
217
+ # async with aiohttp.ClientSession() as session:
218
+ # # Create task
219
+ # async with session.post(
220
+ # "https://api.dev.runwayml.com/v1/image_to_video", headers=headers, json=payload
221
+ # ) as response:
222
+ # if response.status != 200:
223
+ # error_text = await response.text()
224
+ # raise Exception(f"RunwayML error: {error_text}")
225
+
226
+ # task_data = await response.json()
227
+ # task_id = task_data["id"]
228
+ # logger.info(f"βœ“ Task created with gen4_turbo: {task_id}")
229
+
230
+ # # Poll for completion
231
+ # max_attempts = 120
232
+ # for attempt in range(max_attempts):
233
+ # await asyncio.sleep(10)
234
+
235
+ # async with session.get(
236
+ # f"https://api.dev.runwayml.com/v1/tasks/{task_id}", headers=headers
237
+ # ) as status_response:
238
+ # status_data = await status_response.json()
239
+ # status = status_data["status"]
240
+
241
+ # if status == "SUCCEEDED":
242
+ # video_url = status_data["output"][0]
243
+ # logger.info(f"βœ… Video generated with gen4_turbo: {video_url}")
244
+ # return {
245
+ # "video_url": video_url,
246
+ # "task_id": task_id,
247
+ # "duration": duration,
248
+ # "prompt": prompt,
249
+ # "status": status,
250
+ # "created_at": status_data.get("createdAt"),
251
+ # "model": "gen4_turbo",
252
+ # }
253
+ # elif status == "FAILED":
254
+ # raise Exception(f"Generation failed: {status_data.get('failure')}")
255
+ # elif status == "RUNNING":
256
+ # progress = status_data.get("progress", 0)
257
+ # logger.info(f"⏳ Progress: {progress*100:.0f}%")
258
+
259
+ # raise Exception("Timeout waiting for video generation")
260
 
261
  except Exception as e:
262
  logger.error(f"❌ Video generation error: {e}")
 
265
  async def generate_tts(self, text: str, voice_name: Optional[str] = None, duration: Optional[float] = None) -> Dict:
266
  """
267
  Generate TTS audio using Google Cloud TTS
268
+
269
  Args:
270
  text: Text to convert to speech
271
  voice_name: Voice to use (optional)
272
  duration: Target duration in seconds (optional) - will adjust speaking rate
273
+
274
  Returns:
275
  Dict with audio_url, duration, voice, text, local_path
276
  """
 
282
  if not voice_name:
283
  voice_name = self.config.get("default_voice", "en-US-Neural2-F")
284
 
285
+ # IMPORTANT: Determine gender FIRST before creating any voice objects
 
286
  language_code = "-".join(voice_name.split("-")[:2])
287
 
288
+ # Male voices: Neural2-A, Neural2-D, Neural2-I, Neural2-J
289
+ # Female voices: Neural2-C, Neural2-E, Neural2-F, Neural2-G, Neural2-H
290
+ male_voice_suffixes = ["Neural2-A", "Neural2-D", "Neural2-I", "Neural2-J"]
291
+ voice_suffix = "-".join(voice_name.split("-")[2:]) # Get "Neural2-A" part
292
+
293
+ if voice_suffix in male_voice_suffixes:
 
 
 
 
 
 
 
294
  ssml_gender = texttospeech.SsmlVoiceGender.MALE
295
  logger.info(f"🎭 Using MALE voice: {voice_name}")
 
 
 
296
  else:
 
297
  ssml_gender = texttospeech.SsmlVoiceGender.FEMALE
298
+ logger.info(f"🎭 Using FEMALE voice: {voice_name}")
299
+
300
+ # Configure synthesis
301
+ synthesis_input = texttospeech.SynthesisInput(text=text)
302
 
303
+ # Create voice object with correct gender
304
  voice = texttospeech.VoiceSelectionParams(
305
+ language_code=language_code, name=voice_name, ssml_gender=ssml_gender
 
 
306
  )
307
 
308
  # Calculate speaking rate if duration is provided
 
310
  if duration:
311
  # First, generate at normal rate to get baseline duration
312
  temp_audio_config = texttospeech.AudioConfig(
313
+ audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=1.0, pitch=0.0
 
 
314
  )
315
  temp_response = self.tts_client.synthesize_speech(
316
+ input=synthesis_input, voice=voice, audio_config=temp_audio_config
 
 
317
  )
318
+
319
  # Save temp file to measure duration
320
  import tempfile
321
+
322
  temp_path = f"/tmp/tts_temp_{hash(text)}.mp3"
323
  with open(temp_path, "wb") as out:
324
  out.write(temp_response.audio_content)
325
+
326
  # Measure actual duration
327
  try:
328
  from mutagen.mp3 import MP3
329
+
330
  audio = MP3(temp_path)
331
  baseline_duration = audio.info.length
332
  except ImportError:
333
  # Estimate if mutagen not available
334
  word_count = len(text.split())
335
  baseline_duration = (word_count / 150) * 60
336
+
337
  # Calculate required speaking rate
338
  speaking_rate = baseline_duration / duration
339
  speaking_rate = max(0.25, min(4.0, speaking_rate)) # Clamp to valid range
340
+
341
+ logger.info(
342
+ f"πŸ“Š Baseline: {baseline_duration:.2f}s, Target: {duration:.2f}s, Rate: {speaking_rate:.2f}x"
343
+ )
344
+
345
  # Clean up temp file
346
  if os.path.exists(temp_path):
347
  os.remove(temp_path)
348
 
349
  # Generate final audio with adjusted speaking rate
350
  audio_config = texttospeech.AudioConfig(
351
+ audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=speaking_rate, pitch=0.0
 
 
352
  )
353
 
354
+ response = self.tts_client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config)
 
 
 
 
355
 
356
  # Save audio
357
  audio_filename = f"tts_{hash(text)}.mp3"
 
363
  # Get actual duration
364
  try:
365
  from mutagen.mp3 import MP3
366
+
367
  audio = MP3(audio_path)
368
  actual_duration = audio.info.length
369
  logger.info(f"βœ“ TTS audio duration: {actual_duration:.2f}s")
370
  except ImportError:
371
  try:
372
  from pydub import AudioSegment
373
+
374
  audio = AudioSegment.from_mp3(audio_path)
375
  actual_duration = len(audio) / 1000.0
376
  logger.info(f"βœ“ TTS audio duration: {actual_duration:.2f}s (via pydub)")
 
378
  actual_duration = duration if duration else (len(text.split()) / 150) * 60
379
  logger.warning(f"⚠️ Estimated duration: {actual_duration:.2f}s")
380
 
381
+ # IMPORTANT: Normalize audio to make it louder
382
+ normalized_path = await self._normalize_audio(audio_path)
383
+ if normalized_path:
384
+ audio_path = normalized_path
385
+ logger.info(f"βœ… Audio normalized to -10 to -12 LUFS")
386
+
387
+ # Upload to GCS (upload normalized version)
388
  audio_url = await self.store_in_gcs(audio_path, "audio")
389
 
390
  logger.info(f"βœ… TTS generated successfully: {audio_url}")
 
395
  "voice": voice_name,
396
  "text": text,
397
  "local_path": audio_path,
398
+ "speaking_rate": speaking_rate,
399
  }
400
 
401
  except Exception as e:
402
  logger.error(f"❌ Error generating TTS: {e}")
403
  raise
404
+
405
+ async def _normalize_audio(self, audio_path: str) -> Optional[str]:
406
+ """
407
+ Normalize audio to -10 to -12 LUFS with peaks at -1 dBFS
408
+ Uses pydub for proper loudness normalization
409
+
410
+ Args:
411
+ audio_path: Path to input audio file
412
+
413
+ Returns:
414
+ Path to normalized audio file or None if failed
415
+ """
416
+ try:
417
+ from pydub import AudioSegment
418
+ from pydub.effects import normalize
419
+ import tempfile
420
+
421
+ logger.info(f"πŸ”Š Normalizing audio: {audio_path}")
422
+
423
+ # Load audio
424
+ audio = AudioSegment.from_mp3(audio_path)
425
+
426
+ # Step 1: Normalize peaks to -1 dBFS (prevents clipping)
427
+ audio = normalize(audio, headroom=1.0)
428
+
429
+ # Step 2: Boost to target loudness (-10 to -12 LUFS β‰ˆ -11 dBFS)
430
+ current_dBFS = audio.dBFS
431
+ target_dBFS = -11.0 # Target around -11 LUFS (loud and clear)
432
+
433
+ gain_needed = target_dBFS - current_dBFS
434
+
435
+ # Apply gain (THIS IS WHERE VOLUME INCREASES)
436
+ if gain_needed > 0:
437
+ audio = audio + gain_needed # ← INCREASES VOLUME
438
+ logger.info(f"βœ“ Boosted audio by {gain_needed:.1f} dB")
439
+
440
+ # Ensure no clipping (peaks at -1 dBFS max)
441
+ if audio.max_dBFS > -1.0:
442
+ reduction = audio.max_dBFS + 1.0
443
+ audio = audio - reduction
444
+ logger.info(f"βœ“ Reduced peaks by {reduction:.1f} dB to prevent clipping")
445
+
446
+ # Save normalized audio
447
+ normalized_path = audio_path.replace(".mp3", "_normalized.mp3")
448
+ audio.export(normalized_path, format="mp3", bitrate="192k")
449
+
450
+ logger.info(f"βœ… Audio normalized: {audio.dBFS:.1f} dBFS (target: -11 LUFS)")
451
+
452
+ # Remove original file
453
+ if os.path.exists(audio_path):
454
+ os.remove(audio_path)
455
+
456
+ return normalized_path
457
+
458
+ except ImportError:
459
+ logger.warning("⚠️ pydub not available, skipping audio normalization")
460
+ return None
461
+ except Exception as e:
462
+ logger.error(f"❌ Audio normalization failed: {e}")
463
+ return None
464
+
465
  async def download_file(self, url: str, filename: str) -> str:
466
  """Download file from URL to local temporary file"""
467
  import aiohttp
 
484
  logger.error(f"Failed to download {url}: {e}")
485
  raise
486
 
487
+ async def select_voice_for_persona(self, image_prompt: str) -> str:
488
+ """
489
+ Select appropriate voice based on image prompt/description
490
+ Uses Gemini to analyze the persona and select matching voice
491
+
492
+ Args:
493
+ image_prompt: Description of the person in the image
494
+
495
+ Returns:
496
+ Voice name (e.g., "en-US-Neural2-F")
497
+ """
498
+ try:
499
+ logger.info(f"🎭 Analyzing persona for voice selection: {image_prompt[:100]}...")
500
+
501
+ analysis_prompt = f"""Analyze this image description and determine the persona:
502
+
503
+ Image Description: {image_prompt}
504
+
505
+ Determine:
506
+ 1. Gender (male/female)
507
+ 2. Age range (young: 18-30, mature: 30-50)
508
+ 3. Style (casual/professional)
509
+
510
+ Return ONLY valid JSON:
511
+ {{
512
+ "gender": "female",
513
+ "age": "young",
514
+ "style": "casual"
515
+ }}"""
516
+
517
+ model = genai.GenerativeModel("gemini-2.5-flash")
518
+ response = model.generate_content(analysis_prompt)
519
+
520
+ # Parse response
521
+ response_text = response.text.strip()
522
+ if response_text.startswith("```"):
523
+ response_text = response_text.split("```")[1]
524
+ if response_text.startswith("json"):
525
+ response_text = response_text[4:]
526
+ response_text = response_text.strip()
527
+
528
+ persona = json.loads(response_text)
529
+
530
+ # Select voice based on persona
531
+ gender = persona.get("gender", "female")
532
+ age = persona.get("age", "young")
533
+
534
+ voice_key = f"{gender}_{age}"
535
+ selected_voice = self.voice_profiles.get(voice_key, self.voice_profiles["female_young"])
536
+
537
+ logger.info(f"βœ“ Selected voice: {selected_voice} for {gender}/{age} persona")
538
  return selected_voice
539
 
540
+ except Exception as e:
541
+ logger.error(f"❌ Voice selection failed: {e}, using default")
542
+ return self.voice_profiles["female_young"]
543
+
544
  async def upload_captions_to_gcs(self, captions_text: str, video_filename: str) -> Optional[str]:
545
  """
546
  Upload captions to GCS bucket with same name as video (but .txt extension)
547
+
548
  Args:
549
  captions_text: Caption text content
550
  video_filename: Name of the video file (e.g., "final_video_abc123.mp4")
551
+
552
  Returns:
553
  GCS signed URL of uploaded captions or None
554
  """
555
  try:
556
  # Create captions filename (replace .mp4 with .txt)
557
  captions_filename = os.path.splitext(video_filename)[0] + ".txt"
558
+
559
  logger.info(f"☁️ Uploading captions to GCS: {captions_filename}")
560
 
561
  # Save captions to temp file
562
  import tempfile
563
+
564
  temp_path = os.path.join(tempfile.gettempdir(), captions_filename)
565
  with open(temp_path, "w", encoding="utf-8") as f:
566
  f.write(captions_text)
 
569
  blob_name = f"captions/{captions_filename}"
570
  blob = self.gcs_bucket.blob(blob_name)
571
  blob.content_type = "text/plain"
572
+
573
  logger.info(f"Uploading {captions_filename} to gs://{self.gcs_bucket.name}/{blob_name}")
574
  blob.upload_from_filename(temp_path)
575
 
576
  # Generate signed URL (valid for 7 days)
577
  from datetime import timedelta
578
+
579
+ captions_url = blob.generate_signed_url(version="v4", expiration=timedelta(days=7), method="GET")
580
+
 
 
 
581
  logger.info(f"βœ… Captions uploaded to GCS: {captions_url[:100]}...")
582
+
583
  # Clean up temp file
584
  if os.path.exists(temp_path):
585
  os.remove(temp_path)
586
+
587
  return captions_url
588
 
589
  except Exception as e:
 
609
 
610
  try:
611
  from google.cloud.exceptions import NotFound
612
+
613
  try:
614
  self.gcs_bucket.exists()
615
  health["gcs"] = True
 
658
 
659
  file_ext = os.path.splitext(filename)[1]
660
  blob.content_type = content_types.get(file_ext, "application/octet-stream")
661
+
662
  logger.info(f"Uploading {filename} to gs://{self.gcs_bucket.name}/{blob_name}")
663
  blob.upload_from_filename(file_path)
664
 
665
  from datetime import timedelta
666
+
667
+ signed_url = blob.generate_signed_url(version="v4", expiration=timedelta(days=7), method="GET")
 
 
 
668
 
669
  logger.info(f"βœ… File uploaded with signed URL: {signed_url[:100]}...")
670
  return signed_url
src/asset_selector.py CHANGED
@@ -13,7 +13,7 @@ class AssetSelector:
13
  self.config = config
14
  self.video_library = self._load_video_library()
15
  self.audio_library = self._load_audio_library()
16
-
17
  # Track current background music index for sequential selection
18
  self.current_audio_index = 0
19
 
@@ -143,7 +143,7 @@ class AssetSelector:
143
  }}
144
  """
145
 
146
- model = genai.GenerativeModel("gemini-2.0-flash-exp")
147
  response = model.generate_content(prompt)
148
 
149
  response_text = response.text.strip()
@@ -229,14 +229,16 @@ class AssetSelector:
229
 
230
  # Select current index
231
  selected = self.audio_library[self.current_audio_index]
232
-
233
- logger.info(f"🎡 Selected background music #{self.current_audio_index + 1}/{len(self.audio_library)}: {selected}")
234
-
 
 
235
  # Increment index for next call (loop back to start if needed)
236
  self.current_audio_index = (self.current_audio_index + 1) % len(self.audio_library)
237
-
238
  return selected
239
-
240
  def reset_audio_index(self):
241
  """Reset audio index to start from beginning (useful for batch processing)"""
242
  self.current_audio_index = 0
 
13
  self.config = config
14
  self.video_library = self._load_video_library()
15
  self.audio_library = self._load_audio_library()
16
+
17
  # Track current background music index for sequential selection
18
  self.current_audio_index = 0
19
 
 
143
  }}
144
  """
145
 
146
+ model = genai.GenerativeModel("gemini-2.5-pro")
147
  response = model.generate_content(prompt)
148
 
149
  response_text = response.text.strip()
 
229
 
230
  # Select current index
231
  selected = self.audio_library[self.current_audio_index]
232
+
233
+ logger.info(
234
+ f"🎡 Selected background music #{self.current_audio_index + 1}/{len(self.audio_library)}: {selected}"
235
+ )
236
+
237
  # Increment index for next call (loop back to start if needed)
238
  self.current_audio_index = (self.current_audio_index + 1) % len(self.audio_library)
239
+
240
  return selected
241
+
242
  def reset_audio_index(self):
243
  """Reset audio index to start from beginning (useful for batch processing)"""
244
  self.current_audio_index = 0
src/automation.py CHANGED
@@ -90,6 +90,7 @@ class ContentAutomation:
90
  except Exception as e:
91
  logger.error(f"❌ Demo failed: {e}")
92
  import traceback
 
93
  logger.error(f"πŸ“‹ Debug: {traceback.format_exc()}")
94
  return False
95
 
@@ -120,43 +121,33 @@ class ContentAutomation:
120
 
121
  # STEP 3: Render video WITHOUT audio to get exact duration
122
  logger.info("\n🎬 STEP 3: Render Video (Without Audio)")
123
- video_no_audio_path, video_duration = await self.video_renderer.render_video_without_audio(
124
- visual_assets
125
- )
126
  logger.info(f"βœ… Video rendered (no audio): {video_duration:.2f}s")
127
 
128
  # STEP 4: Select voice based on hook video persona
129
  logger.info("\n🎭 STEP 4: Select Voice for Persona")
130
- selected_voice = await self.api_clients.select_voice_for_persona(
131
- content_strategy.get("gemini_prompt", "")
132
- )
133
 
134
  # STEP 5: Generate TTS with EXACT video duration and matched voice
135
  logger.info(f"\nπŸŽ™οΈ STEP 5: Generate TTS (Target: {video_duration:.2f}s, Voice: {selected_voice})")
136
  tts_audio = await self.api_clients.generate_tts(
137
- text=tts_script,
138
- duration=video_duration,
139
- voice_name=selected_voice
140
  )
141
  visual_assets["tts_audio"] = tts_audio
142
- logger.info(f"βœ… TTS generated: {tts_audio['duration']:.2f}s at {tts_audio.get('speaking_rate', 1.0):.2f}x rate")
 
 
143
 
144
  # STEP 6: Select and download background music (sequential)
145
  logger.info("\n🎡 STEP 6: Background Music (Sequential)")
146
  visual_assets["background_music_url"] = self.asset_selector.select_background_music()
147
  await self._download_to_local(
148
- visual_assets["background_music_url"],
149
- "background_music.mp3",
150
- visual_assets,
151
- "background_music_local"
152
  )
153
 
154
  # STEP 7: Add audio to video
155
  logger.info("\nπŸ”Š STEP 7: Add Audio to Video")
156
- final_video_path = await self.video_renderer.add_audio_to_video(
157
- video_no_audio_path,
158
- visual_assets
159
- )
160
 
161
  # STEP 8: Upload to cloud storage
162
  logger.info("\n☁️ STEP 8: Cloud Storage Upload")
@@ -193,6 +184,7 @@ class ContentAutomation:
193
  elapsed_time = time.time() - self.pipeline_start_time if self.pipeline_start_time else 0
194
  logger.error(f"\n❌ Pipeline failed after {elapsed_time:.2f}s: {e}")
195
  import traceback
 
196
  logger.error(traceback.format_exc())
197
 
198
  return {"success": False, "error": str(e), "duration": elapsed_time}
@@ -231,15 +223,13 @@ class ContentAutomation:
231
  image_path = await self.api_clients.generate_image(strategy["gemini_prompt"])
232
  if not image_path:
233
  raise Exception("Image generation failed")
234
-
235
  # Step 2: Upload image to GCS
236
  image_url = await self.api_clients.store_in_gcs(image_path, "image")
237
-
238
  # Step 3: Generate video using gen4_turbo
239
  video_data = await self.api_clients.generate_video(
240
- prompt=strategy["runway_prompt"],
241
- image_url=image_url,
242
- duration=strategy.get("duration", 3)
243
  )
244
 
245
  video_data["captions"] = captions
@@ -259,23 +249,13 @@ class ContentAutomation:
259
  # Download hook video
260
  if assets.get("hook_video") and assets["hook_video"].get("video_url"):
261
  download_tasks.append(
262
- self._download_to_local(
263
- assets["hook_video"]["video_url"],
264
- "hook_video.mp4",
265
- assets["hook_video"]
266
- )
267
  )
268
 
269
  # Download library videos
270
  for i, video in enumerate(assets.get("selected_videos", [])):
271
  if video.get("url"):
272
- download_tasks.append(
273
- self._download_to_local(
274
- video["url"],
275
- f"library_video_{i}.mp4",
276
- video
277
- )
278
- )
279
 
280
  if download_tasks:
281
  await asyncio.gather(*download_tasks, return_exceptions=True)
 
90
  except Exception as e:
91
  logger.error(f"❌ Demo failed: {e}")
92
  import traceback
93
+
94
  logger.error(f"πŸ“‹ Debug: {traceback.format_exc()}")
95
  return False
96
 
 
121
 
122
  # STEP 3: Render video WITHOUT audio to get exact duration
123
  logger.info("\n🎬 STEP 3: Render Video (Without Audio)")
124
+ video_no_audio_path, video_duration = await self.video_renderer.render_video_without_audio(visual_assets)
 
 
125
  logger.info(f"βœ… Video rendered (no audio): {video_duration:.2f}s")
126
 
127
  # STEP 4: Select voice based on hook video persona
128
  logger.info("\n🎭 STEP 4: Select Voice for Persona")
129
+ selected_voice = await self.api_clients.select_voice_for_persona(content_strategy.get("gemini_prompt", ""))
 
 
130
 
131
  # STEP 5: Generate TTS with EXACT video duration and matched voice
132
  logger.info(f"\nπŸŽ™οΈ STEP 5: Generate TTS (Target: {video_duration:.2f}s, Voice: {selected_voice})")
133
  tts_audio = await self.api_clients.generate_tts(
134
+ text=tts_script, duration=video_duration, voice_name=selected_voice
 
 
135
  )
136
  visual_assets["tts_audio"] = tts_audio
137
+ logger.info(
138
+ f"βœ… TTS generated: {tts_audio['duration']:.2f}s at {tts_audio.get('speaking_rate', 1.0):.2f}x rate"
139
+ )
140
 
141
  # STEP 6: Select and download background music (sequential)
142
  logger.info("\n🎡 STEP 6: Background Music (Sequential)")
143
  visual_assets["background_music_url"] = self.asset_selector.select_background_music()
144
  await self._download_to_local(
145
+ visual_assets["background_music_url"], "background_music.mp3", visual_assets, "background_music_local"
 
 
 
146
  )
147
 
148
  # STEP 7: Add audio to video
149
  logger.info("\nπŸ”Š STEP 7: Add Audio to Video")
150
+ final_video_path = await self.video_renderer.add_audio_to_video(video_no_audio_path, visual_assets)
 
 
 
151
 
152
  # STEP 8: Upload to cloud storage
153
  logger.info("\n☁️ STEP 8: Cloud Storage Upload")
 
184
  elapsed_time = time.time() - self.pipeline_start_time if self.pipeline_start_time else 0
185
  logger.error(f"\n❌ Pipeline failed after {elapsed_time:.2f}s: {e}")
186
  import traceback
187
+
188
  logger.error(traceback.format_exc())
189
 
190
  return {"success": False, "error": str(e), "duration": elapsed_time}
 
223
  image_path = await self.api_clients.generate_image(strategy["gemini_prompt"])
224
  if not image_path:
225
  raise Exception("Image generation failed")
226
+
227
  # Step 2: Upload image to GCS
228
  image_url = await self.api_clients.store_in_gcs(image_path, "image")
229
+
230
  # Step 3: Generate video using gen4_turbo
231
  video_data = await self.api_clients.generate_video(
232
+ prompt=strategy["runway_prompt"], image_url=image_url, duration=strategy.get("duration", 3)
 
 
233
  )
234
 
235
  video_data["captions"] = captions
 
249
  # Download hook video
250
  if assets.get("hook_video") and assets["hook_video"].get("video_url"):
251
  download_tasks.append(
252
+ self._download_to_local(assets["hook_video"]["video_url"], "hook_video.mp4", assets["hook_video"])
 
 
 
 
253
  )
254
 
255
  # Download library videos
256
  for i, video in enumerate(assets.get("selected_videos", [])):
257
  if video.get("url"):
258
+ download_tasks.append(self._download_to_local(video["url"], f"library_video_{i}.mp4", video))
 
 
 
 
 
 
259
 
260
  if download_tasks:
261
  await asyncio.gather(*download_tasks, return_exceptions=True)
src/main.py CHANGED
@@ -319,7 +319,7 @@ async def main():
319
  print("βœ… PIPELINE COMPLETED SUCCESSFULLY")
320
  print("=" * 70)
321
  print(f"\nπŸ“Ή Final Video URL: {result['final_url']}")
322
- if result.get('captions_url'):
323
  print(f"πŸ“ Captions URL (GCS): {result['captions_url']}")
324
  print(f"🎭 Voice Used: {result.get('voice_used', 'N/A')}")
325
  print(f"⏱️ Video Duration: {result.get('video_duration', 0):.2f}s")
 
319
  print("βœ… PIPELINE COMPLETED SUCCESSFULLY")
320
  print("=" * 70)
321
  print(f"\nπŸ“Ή Final Video URL: {result['final_url']}")
322
+ if result.get("captions_url"):
323
  print(f"πŸ“ Captions URL (GCS): {result['captions_url']}")
324
  print(f"🎭 Voice Used: {result.get('voice_used', 'N/A')}")
325
  print(f"⏱️ Video Duration: {result.get('video_duration', 0):.2f}s")
src/video_renderer.py CHANGED
@@ -29,6 +29,7 @@ import textwrap
29
  from utils import logger
30
  import time
31
 
 
32
  class VideoRenderer:
33
  def __init__(self, config: Dict):
34
  self.config = config
@@ -38,7 +39,7 @@ class VideoRenderer:
38
  async def render_video_without_audio(self, assets: Dict, video_config: Optional[Dict] = None) -> tuple[str, float]:
39
  """
40
  Render video composition WITHOUT audio first to get exact duration
41
-
42
  Returns:
43
  tuple: (video_path, video_duration)
44
  """
@@ -76,11 +77,11 @@ class VideoRenderer:
76
  async def add_audio_to_video(self, video_path: str, assets: Dict) -> str:
77
  """
78
  Add audio track to pre-rendered video (NO speedup - video is already correct duration)
79
-
80
  Args:
81
  video_path: Path to video file without audio
82
  assets: Dictionary containing audio assets (tts_audio, background_music_local)
83
-
84
  Returns:
85
  Path to final video with audio
86
  """
@@ -89,20 +90,20 @@ class VideoRenderer:
89
 
90
  # Load the video
91
  video_clip = VideoFileClip(video_path)
92
-
93
  # Prepare audio clips
94
  audio_clips = await self._prepare_audio_clips(assets, video_clip.duration)
95
-
96
  # Add audio track
97
  video_with_audio = await self._add_audio_track(video_clip, audio_clips)
98
-
99
  output_path = await self.render_video_final(video_with_audio)
100
-
101
  # Cleanup
102
  video_clip.close()
103
  if video_with_audio != video_clip:
104
  video_with_audio.close()
105
-
106
  logger.info(f"βœ… Final video with audio: {output_path}")
107
  return output_path
108
 
@@ -114,21 +115,15 @@ class VideoRenderer:
114
  """Render final video clip to file"""
115
  try:
116
  output_path = self.temp_dir / f"final_video_{int(time.time())}.mp4"
117
-
118
- video_clip.write_videofile(
119
- str(output_path),
120
- codec="libx264",
121
- audio_codec="aac",
122
- verbose=False,
123
- logger=None
124
- )
125
-
126
  video_clip.close()
127
  return str(output_path)
128
-
129
  except Exception as e:
130
  logger.error(f"Final video render failed: {e}")
131
- if 'video_clip' in locals():
132
  video_clip.close()
133
  raise
134
 
@@ -151,7 +146,7 @@ class VideoRenderer:
151
  # Calculate segment positions
152
  # For an 8s video: use 6.5-8s for start, 0-1.5s for end
153
  start_segment_begin = max(0, hook_duration - HOOK_SEGMENT_DURATION) # Last 1.5s
154
-
155
  # Second half for beginning (last 1.5 seconds of hook video)
156
  hook_start = hook_clip.subclip(start_segment_begin, hook_duration)
157
  clips.append(("hook_start", hook_start))
@@ -210,7 +205,7 @@ class VideoRenderer:
210
 
211
  # Hook segments should now be exactly 1.5 seconds
212
  HOOK_DURATION = 1.5
213
-
214
  for clip in video_clips:
215
  if abs(clip.duration - HOOK_DURATION) < 0.2: # Hook segments (~1.5s with tolerance)
216
  if hook_start is None:
@@ -220,19 +215,21 @@ class VideoRenderer:
220
  else:
221
  library_clips.append(clip)
222
 
223
- logger.info(f"βœ“ Identified: hook_start={hook_start.duration if hook_start else 0:.2f}s, "
224
- f"hook_end={hook_end.duration if hook_end else 0:.2f}s, "
225
- f"library_clips={len(library_clips)}")
 
 
226
 
227
  # Calculate current library duration
228
  library_duration = sum(clip.duration for clip in library_clips)
229
  hook_total = (hook_start.duration if hook_start else 0) + (hook_end.duration if hook_end else 0)
230
-
231
  logger.info(f"πŸ“Š Hook total: {hook_total:.2f}s, Library total: {library_duration:.2f}s")
232
-
233
  # Target middle section duration (11-12s total - 3s hook = 8-9s middle)
234
  target_middle_duration = TARGET_MIN_DURATION - hook_total
235
-
236
  logger.info(f"🎯 Target middle section: {target_middle_duration:.2f}s")
237
 
238
  # Adjust library clips to reach target middle duration
@@ -272,7 +269,9 @@ class VideoRenderer:
272
 
273
  # Calculate total duration
274
  total_duration = sum(clip.duration for clip in sequence_clips)
275
- logger.info(f"πŸ“Š Total video sequence duration: {total_duration:.2f}s (target: {TARGET_MIN_DURATION}-{TARGET_MAX_DURATION}s)")
 
 
276
 
277
  # Resize all clips to 9:16 vertical
278
  target_size = (1080, 1920)
@@ -300,11 +299,13 @@ class VideoRenderer:
300
  if tts_clip.duration > 0:
301
  # Trim or extend TTS to match video duration
302
  if tts_clip.duration > target_duration:
303
- logger.info(f"⚠️ TTS longer than video, trimming: {tts_clip.duration:.2f}s -> {target_duration:.2f}s")
 
 
304
  tts_clip = tts_clip.subclip(0, target_duration)
305
  elif tts_clip.duration < target_duration:
306
  logger.info(f"⚠️ TTS shorter than video: {tts_clip.duration:.2f}s < {target_duration:.2f}s")
307
-
308
  clips.append(("tts", tts_clip))
309
  logger.info(f"βœ“ Loaded TTS audio at FULL volume: {tts_clip.duration:.2f}s")
310
  else:
@@ -313,7 +314,7 @@ class VideoRenderer:
313
  except Exception as e:
314
  logger.error(f"❌ Failed to load TTS audio: {e}")
315
 
316
- # Load background music - VERY LOW volume
317
  if assets.get("background_music_local"):
318
  try:
319
  bg_clip = AudioFileClip(assets["background_music_local"])
@@ -322,10 +323,10 @@ class VideoRenderer:
322
  if bg_clip.duration > target_duration:
323
  bg_clip = bg_clip.subclip(0, target_duration)
324
  logger.info(f"βœ“ Trimmed background music to {target_duration:.2f}s")
325
- # Reduce volume significantly
326
- bg_clip = bg_clip.volumex(0.08)
327
  clips.append(("background", bg_clip))
328
- logger.info(f"βœ“ Loaded background music at 8% volume: {bg_clip.duration:.2f}s")
329
  else:
330
  logger.warning("⚠️ Background music has zero duration")
331
  bg_clip.close()
@@ -371,13 +372,13 @@ class VideoRenderer:
371
 
372
  try:
373
  valid_audio_clips = [clip for clip in audio_clips if clip.duration > 0]
374
-
375
  if not valid_audio_clips:
376
  return video_clip
377
 
378
  mixed_audio = CompositeAudioClip(valid_audio_clips)
379
  video_with_audio = video_clip.set_audio(mixed_audio)
380
-
381
  logger.info(f"βœ… Added audio track")
382
  return video_with_audio
383
 
@@ -485,6 +486,7 @@ class VideoRenderer:
485
  def _split_script_into_words(self, script: str) -> List[str]:
486
  """Split script into individual words"""
487
  import re
 
488
  script = re.sub(r"\s+", " ", script).strip()
489
  return script.split()
490
 
@@ -514,14 +516,9 @@ class VideoRenderer:
514
 
515
  try:
516
  logger.info(f"πŸ“Ή Rendering video (no audio): {filename}")
517
-
518
  video_clip.write_videofile(
519
- str(output_path),
520
- codec="libx264",
521
- fps=24,
522
- verbose=False,
523
- logger=None,
524
- audio=False # No audio
525
  )
526
 
527
  return str(output_path)
@@ -583,6 +580,7 @@ class VideoRenderer:
583
  try:
584
  import librosa
585
  import soundfile as sf
 
586
  has_librosa = True
587
  except ImportError:
588
  has_librosa = False
@@ -634,7 +632,7 @@ class VideoRenderer:
634
  """Clean up temporary video/audio clips"""
635
  for clip in clips:
636
  try:
637
- if hasattr(clip, 'close'):
638
  clip.close()
639
  except Exception as e:
640
  # Silently ignore cleanup errors
@@ -644,7 +642,8 @@ class VideoRenderer:
644
  """Cleanup on destruction"""
645
  try:
646
  import shutil
647
- if hasattr(self, 'temp_dir') and self.temp_dir.exists():
 
648
  shutil.rmtree(self.temp_dir, ignore_errors=True)
649
  except Exception:
650
  # Silently ignore cleanup errors
 
29
  from utils import logger
30
  import time
31
 
32
+
33
  class VideoRenderer:
34
  def __init__(self, config: Dict):
35
  self.config = config
 
39
  async def render_video_without_audio(self, assets: Dict, video_config: Optional[Dict] = None) -> tuple[str, float]:
40
  """
41
  Render video composition WITHOUT audio first to get exact duration
42
+
43
  Returns:
44
  tuple: (video_path, video_duration)
45
  """
 
77
  async def add_audio_to_video(self, video_path: str, assets: Dict) -> str:
78
  """
79
  Add audio track to pre-rendered video (NO speedup - video is already correct duration)
80
+
81
  Args:
82
  video_path: Path to video file without audio
83
  assets: Dictionary containing audio assets (tts_audio, background_music_local)
84
+
85
  Returns:
86
  Path to final video with audio
87
  """
 
90
 
91
  # Load the video
92
  video_clip = VideoFileClip(video_path)
93
+
94
  # Prepare audio clips
95
  audio_clips = await self._prepare_audio_clips(assets, video_clip.duration)
96
+
97
  # Add audio track
98
  video_with_audio = await self._add_audio_track(video_clip, audio_clips)
99
+
100
  output_path = await self.render_video_final(video_with_audio)
101
+
102
  # Cleanup
103
  video_clip.close()
104
  if video_with_audio != video_clip:
105
  video_with_audio.close()
106
+
107
  logger.info(f"βœ… Final video with audio: {output_path}")
108
  return output_path
109
 
 
115
  """Render final video clip to file"""
116
  try:
117
  output_path = self.temp_dir / f"final_video_{int(time.time())}.mp4"
118
+
119
+ video_clip.write_videofile(str(output_path), codec="libx264", audio_codec="aac", verbose=False, logger=None)
120
+
 
 
 
 
 
 
121
  video_clip.close()
122
  return str(output_path)
123
+
124
  except Exception as e:
125
  logger.error(f"Final video render failed: {e}")
126
+ if "video_clip" in locals():
127
  video_clip.close()
128
  raise
129
 
 
146
  # Calculate segment positions
147
  # For an 8s video: use 6.5-8s for start, 0-1.5s for end
148
  start_segment_begin = max(0, hook_duration - HOOK_SEGMENT_DURATION) # Last 1.5s
149
+
150
  # Second half for beginning (last 1.5 seconds of hook video)
151
  hook_start = hook_clip.subclip(start_segment_begin, hook_duration)
152
  clips.append(("hook_start", hook_start))
 
205
 
206
  # Hook segments should now be exactly 1.5 seconds
207
  HOOK_DURATION = 1.5
208
+
209
  for clip in video_clips:
210
  if abs(clip.duration - HOOK_DURATION) < 0.2: # Hook segments (~1.5s with tolerance)
211
  if hook_start is None:
 
215
  else:
216
  library_clips.append(clip)
217
 
218
+ logger.info(
219
+ f"βœ“ Identified: hook_start={hook_start.duration if hook_start else 0:.2f}s, "
220
+ f"hook_end={hook_end.duration if hook_end else 0:.2f}s, "
221
+ f"library_clips={len(library_clips)}"
222
+ )
223
 
224
  # Calculate current library duration
225
  library_duration = sum(clip.duration for clip in library_clips)
226
  hook_total = (hook_start.duration if hook_start else 0) + (hook_end.duration if hook_end else 0)
227
+
228
  logger.info(f"πŸ“Š Hook total: {hook_total:.2f}s, Library total: {library_duration:.2f}s")
229
+
230
  # Target middle section duration (11-12s total - 3s hook = 8-9s middle)
231
  target_middle_duration = TARGET_MIN_DURATION - hook_total
232
+
233
  logger.info(f"🎯 Target middle section: {target_middle_duration:.2f}s")
234
 
235
  # Adjust library clips to reach target middle duration
 
269
 
270
  # Calculate total duration
271
  total_duration = sum(clip.duration for clip in sequence_clips)
272
+ logger.info(
273
+ f"πŸ“Š Total video sequence duration: {total_duration:.2f}s (target: {TARGET_MIN_DURATION}-{TARGET_MAX_DURATION}s)"
274
+ )
275
 
276
  # Resize all clips to 9:16 vertical
277
  target_size = (1080, 1920)
 
299
  if tts_clip.duration > 0:
300
  # Trim or extend TTS to match video duration
301
  if tts_clip.duration > target_duration:
302
+ logger.info(
303
+ f"⚠️ TTS longer than video, trimming: {tts_clip.duration:.2f}s -> {target_duration:.2f}s"
304
+ )
305
  tts_clip = tts_clip.subclip(0, target_duration)
306
  elif tts_clip.duration < target_duration:
307
  logger.info(f"⚠️ TTS shorter than video: {tts_clip.duration:.2f}s < {target_duration:.2f}s")
308
+
309
  clips.append(("tts", tts_clip))
310
  logger.info(f"βœ“ Loaded TTS audio at FULL volume: {tts_clip.duration:.2f}s")
311
  else:
 
314
  except Exception as e:
315
  logger.error(f"❌ Failed to load TTS audio: {e}")
316
 
317
+ # Load background music - INCREASED volume for better presence
318
  if assets.get("background_music_local"):
319
  try:
320
  bg_clip = AudioFileClip(assets["background_music_local"])
 
323
  if bg_clip.duration > target_duration:
324
  bg_clip = bg_clip.subclip(0, target_duration)
325
  logger.info(f"βœ“ Trimmed background music to {target_duration:.2f}s")
326
+ # Increase volume from 8% to 25% for better audibility
327
+ bg_clip = bg_clip.volumex(0.25)
328
  clips.append(("background", bg_clip))
329
+ logger.info(f"βœ“ Loaded background music at 25% volume: {bg_clip.duration:.2f}s")
330
  else:
331
  logger.warning("⚠️ Background music has zero duration")
332
  bg_clip.close()
 
372
 
373
  try:
374
  valid_audio_clips = [clip for clip in audio_clips if clip.duration > 0]
375
+
376
  if not valid_audio_clips:
377
  return video_clip
378
 
379
  mixed_audio = CompositeAudioClip(valid_audio_clips)
380
  video_with_audio = video_clip.set_audio(mixed_audio)
381
+
382
  logger.info(f"βœ… Added audio track")
383
  return video_with_audio
384
 
 
486
  def _split_script_into_words(self, script: str) -> List[str]:
487
  """Split script into individual words"""
488
  import re
489
+
490
  script = re.sub(r"\s+", " ", script).strip()
491
  return script.split()
492
 
 
516
 
517
  try:
518
  logger.info(f"πŸ“Ή Rendering video (no audio): {filename}")
519
+
520
  video_clip.write_videofile(
521
+ str(output_path), codec="libx264", fps=24, verbose=False, logger=None, audio=False # No audio
 
 
 
 
 
522
  )
523
 
524
  return str(output_path)
 
580
  try:
581
  import librosa
582
  import soundfile as sf
583
+
584
  has_librosa = True
585
  except ImportError:
586
  has_librosa = False
 
632
  """Clean up temporary video/audio clips"""
633
  for clip in clips:
634
  try:
635
+ if hasattr(clip, "close"):
636
  clip.close()
637
  except Exception as e:
638
  # Silently ignore cleanup errors
 
642
  """Cleanup on destruction"""
643
  try:
644
  import shutil
645
+
646
+ if hasattr(self, "temp_dir") and self.temp_dir.exists():
647
  shutil.rmtree(self.temp_dir, ignore_errors=True)
648
  except Exception:
649
  # Silently ignore cleanup errors