Spaces:

Elvoro
/

Tools

Running

App Files Files Community

topcoderkz commited on Oct 13, 2025

Commit

0c4ba75

1 Parent(s): e598b7d

Refactor code, remove deepseek integration

Browse files

Files changed (6) hide show

batch.sh +1 -1
src/api_clients.py +263 -197
src/asset_selector.py +9 -7
src/automation.py +15 -35
src/main.py +1 -1
src/video_renderer.py +44 -45

batch.sh CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/bin/bash
 # Process first 5 strategies
-for i in {0..1}; do
     python src/main.py --csv content_strategies.csv --index $i --output ./outputs/videos/video_$i
 done

 #!/bin/bash
 # Process first 5 strategies
+for i in {0..0}; do
     python src/main.py --csv content_strategies.csv --index $i --output ./outputs/videos/video_$i
 done

src/api_clients.py CHANGED Viewed

@@ -30,15 +30,15 @@ class APIClients:
         # RunwayML API configuration
         self.runway_api_key = config.get("runwayml_api_key") or os.getenv("RUNWAYML_API_KEY")
         self.runway_base_url = "https://api.dev.runwayml.com/v1"
         # Voice profiles for different personas
         self.voice_profiles = {
             "female_young": "en-US-Neural2-F",  # Young female voice
             "female_mature": "en-US-Neural2-E",  # Mature female voice
             "female_casual": "en-US-Neural2-G",  # Casual female voice
-            "male_young": "en-US-Neural2-D",    # Young male voice
-            "male_mature": "en-US-Neural2-A",   # Mature male voice
-            "male_casual": "en-US-Neural2-J",   # Casual male voice
         }
     async def enhance_prompt(self, prompt: str) -> str:
@@ -60,7 +60,7 @@ class APIClients:
             Return only the enhanced prompt, nothing else.
             """
-            model = genai.GenerativeModel("gemini-2.0-flash-exp")
             response = model.generate_content(enhancement_instruction)
             enhanced_prompt = response.text.strip()
@@ -74,39 +74,39 @@ class APIClients:
     async def generate_image(self, prompt: str) -> Optional[str]:
         """
         Generate image using Vertex AI Imagen 4 Ultra
         Args:
             prompt: Image generation prompt
         Returns:
             Local path to generated image or None
         """
         try:
-            import vertexai
-            from vertexai.preview.vision_models import ImageGenerationModel
-            logger.info(f"🎨 Generating image with Imagen 4 Ultra: {prompt[:200]}...")
-            vertexai.init(project=self.config.get("gcp_project_id"), location="us-central1")
-            # Use correct Imagen 4 Ultra model name
-            model = ImageGenerationModel.from_pretrained("imagen-4.0-ultra-generate-001")
-            images = model.generate_images(
-                prompt=prompt,
-                number_of_images=1,
-                aspect_ratio="9:16",  # Vertical for TikTok/Instagram
-                safety_filter_level="block_some",
-                person_generation="allow_adult",
-            )
-            # Save to temp file
-            import tempfile
-            output_path = f"/tmp/hook_image_{hash(prompt)}.png"
-            images[0].save(location=output_path, include_generation_parameters=False)
-            # output_path = '/Users/topcoderkz/Downloads/gen4-ultra.png'
-            logger.info(f"✓ Image generated with Imagen 4 Ultra: {output_path}")
-            return output_path
         except Exception as e:
             logger.error(f"❌ Imagen 4 Ultra generation failed: {e}")
@@ -130,7 +130,7 @@ class APIClients:
     Return ONLY the caption text, nothing else."""
-            model = genai.GenerativeModel("gemini-2.0-flash-exp")
             response = model.generate_content(instruction)
             caption = response.text.strip()
@@ -164,7 +164,7 @@ class APIClients:
     "video_prompt": "..."
     }}"""
-            model = genai.GenerativeModel("gemini-2.0-flash-exp")
             response = model.generate_content(instruction)
             result = json.loads(response.text.strip())
@@ -182,7 +182,7 @@ class APIClients:
     async def generate_video(self, prompt: str, duration: int, image_url: str) -> Dict:
         """
         Generate video using RunwayML gen4_turbo ($0.25 per video / 25 credits)
         Args:
             prompt: Text prompt for video generation
             duration: Video duration in seconds
@@ -190,76 +190,73 @@ class APIClients:
         """
         try:
             logger.info(f"🎬 Generating video with gen4_turbo: {prompt[:100]}...")
-            # return {
-            #     "video_url": 'https://dnznrvs05pmza.cloudfront.net/764d8b31-4e1f-4ba2-bf4f-360cf029e0b7.mp4?_jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJrZXlIYXNoIjoiMGIyZjMyMzc5NDA4ZTU0NCIsImJ1Y2tldCI6InJ1bndheS10YXNrLWFydGlmYWN0cyIsInN0YWdlIjoicHJvZCIsImV4cCI6MTc2MDQwMDAwMH0.5ltZPfO-gWilm_lt6sK-tPvgwJUgPluOjeeUOOIbyEE',
-            #     "task_id": '08fbc334-7d36-45c2-8b71-7f20fa075f10',
-            #     "duration": duration,
-            #     "prompt": prompt,
-            #     "status": 'SUCCEEDED',
-            #     "created_at": '2025-10-12T18:57:27.240Z',
-            #     "model": "gen4_turbo"
-            # }
-            headers = {
-                "Authorization": f"Bearer {self.runway_api_key}",
-                "Content-Type": "application/json",
-                "X-Runway-Version": "2024-11-06",
             }
-            payload = {
-                "promptImage": image_url,
-                "promptText": prompt[:1000],
-                "model": "gen4_turbo",  # Updated to gen4_turbo ($0.25/video)
-                "duration": duration,
-                "ratio": "1280:720"
-            }
-            async with aiohttp.ClientSession() as session:
-                # Create task
-                async with session.post(
-                    "https://api.dev.runwayml.com/v1/image_to_video",
-                    headers=headers,
-                    json=payload
-                ) as response:
-                    if response.status != 200:
-                        error_text = await response.text()
-                        raise Exception(f"RunwayML error: {error_text}")
-                    task_data = await response.json()
-                    task_id = task_data["id"]
-                    logger.info(f"✓ Task created with gen4_turbo: {task_id}")
-                # Poll for completion
-                max_attempts = 120
-                for attempt in range(max_attempts):
-                    await asyncio.sleep(10)
-                    async with session.get(
-                        f"https://api.dev.runwayml.com/v1/tasks/{task_id}",
-                        headers=headers
-                    ) as status_response:
-                        status_data = await status_response.json()
-                        status = status_data["status"]
-                        if status == "SUCCEEDED":
-                            video_url = status_data["output"][0]
-                            logger.info(f"✅ Video generated with gen4_turbo: {video_url}")
-                            return {
-                                "video_url": video_url,
-                                "task_id": task_id,
-                                "duration": duration,
-                                "prompt": prompt,
-                                "status": status,
-                                "created_at": status_data.get("createdAt"),
-                                "model": "gen4_turbo"
-                            }
-                        elif status == "FAILED":
-                            raise Exception(f"Generation failed: {status_data.get('failure')}")
-                        elif status == "RUNNING":
-                            progress = status_data.get("progress", 0)
-                            logger.info(f"⏳ Progress: {progress*100:.0f}%")
-                raise Exception("Timeout waiting for video generation")
         except Exception as e:
             logger.error(f"❌ Video generation error: {e}")
@@ -268,12 +265,12 @@ class APIClients:
     async def generate_tts(self, text: str, voice_name: Optional[str] = None, duration: Optional[float] = None) -> Dict:
         """
         Generate TTS audio using Google Cloud TTS
         Args:
             text: Text to convert to speech
             voice_name: Voice to use (optional)
             duration: Target duration in seconds (optional) - will adjust speaking rate
         Returns:
             Dict with audio_url, duration, voice, text, local_path
         """
@@ -285,37 +282,27 @@ class APIClients:
             if not voice_name:
                 voice_name = self.config.get("default_voice", "en-US-Neural2-F")
-            # Configure synthesis
-            synthesis_input = texttospeech.SynthesisInput(text=text)
             language_code = "-".join(voice_name.split("-")[:2])
-            # CORRECTED: Proper gender mapping for US Neural2 voices
-            male_voices = {
-                "en-US-Neural2-A", "en-US-Neural2-D", "en-US-Neural2-I", "en-US-Neural2-J",
-                "en-US-Studio-M"  # Add other male voices if needed
-            }
-            female_voices = {
-                "en-US-Neural2-C", "en-US-Neural2-E", "en-US-Neural2-F",
-                "en-US-Neural2-G", "en-US-Neural2-H", "en-US-Studio-O",
-                "en-US-Standard-A"  # Add other female voices if needed
-            }
-            # Determine gender from full voice name
-            if voice_name in male_voices:
                 ssml_gender = texttospeech.SsmlVoiceGender.MALE
                 logger.info(f"🎭 Using MALE voice: {voice_name}")
-            elif voice_name in female_voices:
-                ssml_gender = texttospeech.SsmlVoiceGender.FEMALE
-                logger.info(f"🎭 Using FEMALE voice: {voice_name}")
             else:
-                # Default to FEMALE for unknown voices (or you could skip ssml_gender)
                 ssml_gender = texttospeech.SsmlVoiceGender.FEMALE
-                logger.warning(f"🎭 Voice {voice_name} not in gender map, defaulting to FEMALE")
             voice = texttospeech.VoiceSelectionParams(
-                language_code=language_code,
-                name=voice_name,
-                ssml_gender=ssml_gender
             )
             # Calculate speaking rate if duration is provided
@@ -323,54 +310,48 @@ class APIClients:
             if duration:
                 # First, generate at normal rate to get baseline duration
                 temp_audio_config = texttospeech.AudioConfig(
-                    audio_encoding=texttospeech.AudioEncoding.MP3,
-                    speaking_rate=1.0,
-                    pitch=0.0
                 )
                 temp_response = self.tts_client.synthesize_speech(
-                    input=synthesis_input,
-                    voice=voice,
-                    audio_config=temp_audio_config
                 )
                 # Save temp file to measure duration
                 import tempfile
                 temp_path = f"/tmp/tts_temp_{hash(text)}.mp3"
                 with open(temp_path, "wb") as out:
                     out.write(temp_response.audio_content)
                 # Measure actual duration
                 try:
                     from mutagen.mp3 import MP3
                     audio = MP3(temp_path)
                     baseline_duration = audio.info.length
                 except ImportError:
                     # Estimate if mutagen not available
                     word_count = len(text.split())
                     baseline_duration = (word_count / 150) * 60
                 # Calculate required speaking rate
                 speaking_rate = baseline_duration / duration
                 speaking_rate = max(0.25, min(4.0, speaking_rate))  # Clamp to valid range
-                logger.info(f"📊 Baseline: {baseline_duration:.2f}s, Target: {duration:.2f}s, Rate: {speaking_rate:.2f}x")
                 # Clean up temp file
                 if os.path.exists(temp_path):
                     os.remove(temp_path)
             # Generate final audio with adjusted speaking rate
             audio_config = texttospeech.AudioConfig(
-                audio_encoding=texttospeech.AudioEncoding.MP3,
-                speaking_rate=speaking_rate,
-                pitch=0.0
             )
-            response = self.tts_client.synthesize_speech(
-                input=synthesis_input,
-                voice=voice,
-                audio_config=audio_config
-            )
             # Save audio
             audio_filename = f"tts_{hash(text)}.mp3"
@@ -382,12 +363,14 @@ class APIClients:
             # Get actual duration
             try:
                 from mutagen.mp3 import MP3
                 audio = MP3(audio_path)
                 actual_duration = audio.info.length
                 logger.info(f"✓ TTS audio duration: {actual_duration:.2f}s")
             except ImportError:
                 try:
                     from pydub import AudioSegment
                     audio = AudioSegment.from_mp3(audio_path)
                     actual_duration = len(audio) / 1000.0
                     logger.info(f"✓ TTS audio duration: {actual_duration:.2f}s (via pydub)")
@@ -395,7 +378,13 @@ class APIClients:
                     actual_duration = duration if duration else (len(text.split()) / 150) * 60
                     logger.warning(f"⚠️ Estimated duration: {actual_duration:.2f}s")
-            # Upload to GCS
             audio_url = await self.store_in_gcs(audio_path, "audio")
             logger.info(f"✅ TTS generated successfully: {audio_url}")
@@ -406,13 +395,73 @@ class APIClients:
                 "voice": voice_name,
                 "text": text,
                 "local_path": audio_path,
-                "speaking_rate": speaking_rate
             }
         except Exception as e:
             logger.error(f"❌ Error generating TTS: {e}")
             raise
     async def download_file(self, url: str, filename: str) -> str:
         """Download file from URL to local temporary file"""
         import aiohttp
@@ -435,61 +484,83 @@ class APIClients:
             logger.error(f"Failed to download {url}: {e}")
             raise
-    async def select_voice_for_persona(self, gemini_prompt: str) -> str:
-        """Select appropriate voice based on persona with CORRECT gender mapping"""
-        # Default to female voice for most content
-        female_voices = [
-            "en-US-Neural2-C",  # Female (was missing from your list)
-            "en-US-Neural2-E",  # Female (was missing from your list)
-            "en-US-Neural2-F",  # Female ✓
-            "en-US-Neural2-G",  # Female (was missing from your list)
-            "en-US-Neural2-H",  # Female ✓
-            "en-US-Studio-O",   # Female ✓
-            "en-US-Standard-A", # Female ✓
-        ]
-        male_voices = [
-            "en-US-Neural2-A",  # Male (was missing from your list)
-            "en-US-Neural2-D",  # Male ✓
-            "en-US-Neural2-I",  # Male (was missing from your list)
-            "en-US-Neural2-J",  # Male (was missing from your list)
-            "en-US-Studio-M",   # Male ✓
-        ]
-        # Simple persona detection from prompt
-        prompt_lower = gemini_prompt.lower()
-        # If prompt suggests male persona, use male voice
-        if any(word in prompt_lower for word in ["male", "man", "boy", "gentleman", "his", "he "]):
-            selected_voice = male_voices[0]  # Use first male voice
-            logger.info(f"🎭 Selected MALE voice for persona: {selected_voice}")
-            return selected_voice
-        else:
-            # Default to female voice
-            selected_voice = female_voices[0]  # Use first female voice
-            logger.info(f"🎭 Selected FEMALE voice for persona: {selected_voice}")
             return selected_voice
     async def upload_captions_to_gcs(self, captions_text: str, video_filename: str) -> Optional[str]:
         """
         Upload captions to GCS bucket with same name as video (but .txt extension)
         Args:
             captions_text: Caption text content
             video_filename: Name of the video file (e.g., "final_video_abc123.mp4")
         Returns:
             GCS signed URL of uploaded captions or None
         """
         try:
             # Create captions filename (replace .mp4 with .txt)
             captions_filename = os.path.splitext(video_filename)[0] + ".txt"
             logger.info(f"☁️ Uploading captions to GCS: {captions_filename}")
             # Save captions to temp file
             import tempfile
             temp_path = os.path.join(tempfile.gettempdir(), captions_filename)
             with open(temp_path, "w", encoding="utf-8") as f:
                 f.write(captions_text)
@@ -498,24 +569,21 @@ class APIClients:
             blob_name = f"captions/{captions_filename}"
             blob = self.gcs_bucket.blob(blob_name)
             blob.content_type = "text/plain"
             logger.info(f"Uploading {captions_filename} to gs://{self.gcs_bucket.name}/{blob_name}")
             blob.upload_from_filename(temp_path)
             # Generate signed URL (valid for 7 days)
             from datetime import timedelta
-            captions_url = blob.generate_signed_url(
-                version="v4",
-                expiration=timedelta(days=7),
-                method="GET"
-            )
             logger.info(f"✅ Captions uploaded to GCS: {captions_url[:100]}...")
             # Clean up temp file
             if os.path.exists(temp_path):
                 os.remove(temp_path)
             return captions_url
         except Exception as e:
@@ -541,6 +609,7 @@ class APIClients:
         try:
             from google.cloud.exceptions import NotFound
             try:
                 self.gcs_bucket.exists()
                 health["gcs"] = True
@@ -589,16 +658,13 @@ class APIClients:
             file_ext = os.path.splitext(filename)[1]
             blob.content_type = content_types.get(file_ext, "application/octet-stream")
             logger.info(f"Uploading {filename} to gs://{self.gcs_bucket.name}/{blob_name}")
             blob.upload_from_filename(file_path)
             from datetime import timedelta
-            signed_url = blob.generate_signed_url(
-                version="v4",
-                expiration=timedelta(days=7),
-                method="GET"
-            )
             logger.info(f"✅ File uploaded with signed URL: {signed_url[:100]}...")
             return signed_url

         # RunwayML API configuration
         self.runway_api_key = config.get("runwayml_api_key") or os.getenv("RUNWAYML_API_KEY")
         self.runway_base_url = "https://api.dev.runwayml.com/v1"
         # Voice profiles for different personas
         self.voice_profiles = {
             "female_young": "en-US-Neural2-F",  # Young female voice
             "female_mature": "en-US-Neural2-E",  # Mature female voice
             "female_casual": "en-US-Neural2-G",  # Casual female voice
+            "male_young": "en-US-Neural2-D",  # Young male voice
+            "male_mature": "en-US-Neural2-A",  # Mature male voice
+            "male_casual": "en-US-Neural2-J",  # Casual male voice
         }
     async def enhance_prompt(self, prompt: str) -> str:
             Return only the enhanced prompt, nothing else.
             """
+            model = genai.GenerativeModel("gemini-2.5-flash")
             response = model.generate_content(enhancement_instruction)
             enhanced_prompt = response.text.strip()
     async def generate_image(self, prompt: str) -> Optional[str]:
         """
         Generate image using Vertex AI Imagen 4 Ultra
         Args:
             prompt: Image generation prompt
         Returns:
             Local path to generated image or None
         """
         try:
+            # import vertexai
+            # from vertexai.preview.vision_models import ImageGenerationModel
+            # logger.info(f"🎨 Generating image with Imagen 4 Ultra: {prompt[:200]}...")
+            # vertexai.init(project=self.config.get("gcp_project_id"), location="us-central1")
+            # # Use correct Imagen 4 Ultra model name
+            # model = ImageGenerationModel.from_pretrained("imagen-4.0-ultra-generate-001")
+            # images = model.generate_images(
+            #     prompt=prompt,
+            #     number_of_images=1,
+            #     aspect_ratio="9:16",
+            #     safety_filter_level="block_some",
+            #     person_generation="allow_adult",
+            # )
+            # # Save to temp file
+            # import tempfile
+            # output_path = f"/tmp/hook_image_{hash(prompt)}.png"
+            # images[0].save(location=output_path, include_generation_parameters=False)
+            # logger.info(f"✓ Image generated with Imagen 4 Ultra (9:16): {output_path}")
+            # return output_path
+            return "/tmp/hook_image_391248835665466790.png"
         except Exception as e:
             logger.error(f"❌ Imagen 4 Ultra generation failed: {e}")
     Return ONLY the caption text, nothing else."""
+            model = genai.GenerativeModel("gemini-2.5-flash")
             response = model.generate_content(instruction)
             caption = response.text.strip()
     "video_prompt": "..."
     }}"""
+            model = genai.GenerativeModel("gemini-2.5-flash")
             response = model.generate_content(instruction)
             result = json.loads(response.text.strip())
     async def generate_video(self, prompt: str, duration: int, image_url: str) -> Dict:
         """
         Generate video using RunwayML gen4_turbo ($0.25 per video / 25 credits)
         Args:
             prompt: Text prompt for video generation
             duration: Video duration in seconds
         """
         try:
             logger.info(f"🎬 Generating video with gen4_turbo: {prompt[:100]}...")
+            return {
+                "video_url": "https://dnznrvs05pmza.cloudfront.net/4a582f22-9dd3-456e-a0a5-8036ed2c6b2c.mp4?_jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJrZXlIYXNoIjoiNGVlNGI1MjIxNGYxYjJjNyIsImJ1Y2tldCI6InJ1bndheS10YXNrLWFydGlmYWN0cyIsInN0YWdlIjoicHJvZCIsImV4cCI6MTc2MDQ4NjQwMH0.FWm7vx_lQjkg4fk8stDQI2gt-ahr95qBPREDyWhvgoI",
+                "task_id": "61cdffe3-e84e-4c45-a611-bb9c48e6a485",
+                "duration": 3,
+                "prompt": prompt,
+                "status": "SUCCEEDED",
+                "created_at": "2025-10-13T22:56:06.290Z",
+                "model": "gen4_turbo",
             }
+            # headers = {z``
+            #     "Authorization": f"Bearer {self.runway_api_key}",
+            #     "Content-Type": "application/json",
+            #     "X-Runway-Version": "2024-11-06",
+            # }
+            # payload = {
+            #     "promptImage": image_url,
+            #     "promptText": prompt[:1000],
+            #     "model": "gen4_turbo",  # Updated to gen4_turbo ($0.25/video)
+            #     "duration": duration,
+            #     "ratio": "720:1280",
+            # }
+            # async with aiohttp.ClientSession() as session:
+            #     # Create task
+            #     async with session.post(
+            #         "https://api.dev.runwayml.com/v1/image_to_video", headers=headers, json=payload
+            #     ) as response:
+            #         if response.status != 200:
+            #             error_text = await response.text()
+            #             raise Exception(f"RunwayML error: {error_text}")
+            #         task_data = await response.json()
+            #         task_id = task_data["id"]
+            #         logger.info(f"✓ Task created with gen4_turbo: {task_id}")
+            #     # Poll for completion
+            #     max_attempts = 120
+            #     for attempt in range(max_attempts):
+            #         await asyncio.sleep(10)
+            #         async with session.get(
+            #             f"https://api.dev.runwayml.com/v1/tasks/{task_id}", headers=headers
+            #         ) as status_response:
+            #             status_data = await status_response.json()
+            #             status = status_data["status"]
+            #             if status == "SUCCEEDED":
+            #                 video_url = status_data["output"][0]
+            #                 logger.info(f"✅ Video generated with gen4_turbo: {video_url}")
+            #                 return {
+            #                     "video_url": video_url,
+            #                     "task_id": task_id,
+            #                     "duration": duration,
+            #                     "prompt": prompt,
+            #                     "status": status,
+            #                     "created_at": status_data.get("createdAt"),
+            #                     "model": "gen4_turbo",
+            #                 }
+            #             elif status == "FAILED":
+            #                 raise Exception(f"Generation failed: {status_data.get('failure')}")
+            #             elif status == "RUNNING":
+            #                 progress = status_data.get("progress", 0)
+            #                 logger.info(f"⏳ Progress: {progress*100:.0f}%")
+            #     raise Exception("Timeout waiting for video generation")
         except Exception as e:
             logger.error(f"❌ Video generation error: {e}")
     async def generate_tts(self, text: str, voice_name: Optional[str] = None, duration: Optional[float] = None) -> Dict:
         """
         Generate TTS audio using Google Cloud TTS
         Args:
             text: Text to convert to speech
             voice_name: Voice to use (optional)
             duration: Target duration in seconds (optional) - will adjust speaking rate
         Returns:
             Dict with audio_url, duration, voice, text, local_path
         """
             if not voice_name:
                 voice_name = self.config.get("default_voice", "en-US-Neural2-F")
+            # IMPORTANT: Determine gender FIRST before creating any voice objects
             language_code = "-".join(voice_name.split("-")[:2])
+            # Male voices: Neural2-A, Neural2-D, Neural2-I, Neural2-J
+            # Female voices: Neural2-C, Neural2-E, Neural2-F, Neural2-G, Neural2-H
+            male_voice_suffixes = ["Neural2-A", "Neural2-D", "Neural2-I", "Neural2-J"]
+            voice_suffix = "-".join(voice_name.split("-")[2:])  # Get "Neural2-A" part
+            if voice_suffix in male_voice_suffixes:
                 ssml_gender = texttospeech.SsmlVoiceGender.MALE
                 logger.info(f"🎭 Using MALE voice: {voice_name}")
             else:
                 ssml_gender = texttospeech.SsmlVoiceGender.FEMALE
+                logger.info(f"🎭 Using FEMALE voice: {voice_name}")
+            # Configure synthesis
+            synthesis_input = texttospeech.SynthesisInput(text=text)
+            # Create voice object with correct gender
             voice = texttospeech.VoiceSelectionParams(
+                language_code=language_code, name=voice_name, ssml_gender=ssml_gender
             )
             # Calculate speaking rate if duration is provided
             if duration:
                 # First, generate at normal rate to get baseline duration
                 temp_audio_config = texttospeech.AudioConfig(
+                    audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=1.0, pitch=0.0
                 )
                 temp_response = self.tts_client.synthesize_speech(
+                    input=synthesis_input, voice=voice, audio_config=temp_audio_config
                 )
                 # Save temp file to measure duration
                 import tempfile
                 temp_path = f"/tmp/tts_temp_{hash(text)}.mp3"
                 with open(temp_path, "wb") as out:
                     out.write(temp_response.audio_content)
                 # Measure actual duration
                 try:
                     from mutagen.mp3 import MP3
                     audio = MP3(temp_path)
                     baseline_duration = audio.info.length
                 except ImportError:
                     # Estimate if mutagen not available
                     word_count = len(text.split())
                     baseline_duration = (word_count / 150) * 60
                 # Calculate required speaking rate
                 speaking_rate = baseline_duration / duration
                 speaking_rate = max(0.25, min(4.0, speaking_rate))  # Clamp to valid range
+                logger.info(
+                    f"📊 Baseline: {baseline_duration:.2f}s, Target: {duration:.2f}s, Rate: {speaking_rate:.2f}x"
+                )
                 # Clean up temp file
                 if os.path.exists(temp_path):
                     os.remove(temp_path)
             # Generate final audio with adjusted speaking rate
             audio_config = texttospeech.AudioConfig(
+                audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=speaking_rate, pitch=0.0
             )
+            response = self.tts_client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config)
             # Save audio
             audio_filename = f"tts_{hash(text)}.mp3"
             # Get actual duration
             try:
                 from mutagen.mp3 import MP3
                 audio = MP3(audio_path)
                 actual_duration = audio.info.length
                 logger.info(f"✓ TTS audio duration: {actual_duration:.2f}s")
             except ImportError:
                 try:
                     from pydub import AudioSegment
                     audio = AudioSegment.from_mp3(audio_path)
                     actual_duration = len(audio) / 1000.0
                     logger.info(f"✓ TTS audio duration: {actual_duration:.2f}s (via pydub)")
                     actual_duration = duration if duration else (len(text.split()) / 150) * 60
                     logger.warning(f"⚠️ Estimated duration: {actual_duration:.2f}s")
+            # IMPORTANT: Normalize audio to make it louder
+            normalized_path = await self._normalize_audio(audio_path)
+            if normalized_path:
+                audio_path = normalized_path
+                logger.info(f"✅ Audio normalized to -10 to -12 LUFS")
+            # Upload to GCS (upload normalized version)
             audio_url = await self.store_in_gcs(audio_path, "audio")
             logger.info(f"✅ TTS generated successfully: {audio_url}")
                 "voice": voice_name,
                 "text": text,
                 "local_path": audio_path,
+                "speaking_rate": speaking_rate,
             }
         except Exception as e:
             logger.error(f"❌ Error generating TTS: {e}")
             raise
+    async def _normalize_audio(self, audio_path: str) -> Optional[str]:
+        """
+        Normalize audio to -10 to -12 LUFS with peaks at -1 dBFS
+        Uses pydub for proper loudness normalization
+        Args:
+            audio_path: Path to input audio file
+        Returns:
+            Path to normalized audio file or None if failed
+        """
+        try:
+            from pydub import AudioSegment
+            from pydub.effects import normalize
+            import tempfile
+            logger.info(f"🔊 Normalizing audio: {audio_path}")
+            # Load audio
+            audio = AudioSegment.from_mp3(audio_path)
+            # Step 1: Normalize peaks to -1 dBFS (prevents clipping)
+            audio = normalize(audio, headroom=1.0)
+            # Step 2: Boost to target loudness (-10 to -12 LUFS ≈ -11 dBFS)
+            current_dBFS = audio.dBFS
+            target_dBFS = -11.0  # Target around -11 LUFS (loud and clear)
+            gain_needed = target_dBFS - current_dBFS
+            # Apply gain (THIS IS WHERE VOLUME INCREASES)
+            if gain_needed > 0:
+                audio = audio + gain_needed  # ← INCREASES VOLUME
+                logger.info(f"✓ Boosted audio by {gain_needed:.1f} dB")
+            # Ensure no clipping (peaks at -1 dBFS max)
+            if audio.max_dBFS > -1.0:
+                reduction = audio.max_dBFS + 1.0
+                audio = audio - reduction
+                logger.info(f"✓ Reduced peaks by {reduction:.1f} dB to prevent clipping")
+            # Save normalized audio
+            normalized_path = audio_path.replace(".mp3", "_normalized.mp3")
+            audio.export(normalized_path, format="mp3", bitrate="192k")
+            logger.info(f"✅ Audio normalized: {audio.dBFS:.1f} dBFS (target: -11 LUFS)")
+            # Remove original file
+            if os.path.exists(audio_path):
+                os.remove(audio_path)
+            return normalized_path
+        except ImportError:
+            logger.warning("⚠️ pydub not available, skipping audio normalization")
+            return None
+        except Exception as e:
+            logger.error(f"❌ Audio normalization failed: {e}")
+            return None
     async def download_file(self, url: str, filename: str) -> str:
         """Download file from URL to local temporary file"""
         import aiohttp
             logger.error(f"Failed to download {url}: {e}")
             raise
+    async def select_voice_for_persona(self, image_prompt: str) -> str:
+        """
+        Select appropriate voice based on image prompt/description
+        Uses Gemini to analyze the persona and select matching voice
+        Args:
+            image_prompt: Description of the person in the image
+        Returns:
+            Voice name (e.g., "en-US-Neural2-F")
+        """
+        try:
+            logger.info(f"🎭 Analyzing persona for voice selection: {image_prompt[:100]}...")
+            analysis_prompt = f"""Analyze this image description and determine the persona:
+Image Description: {image_prompt}
+Determine:
+1. Gender (male/female)
+2. Age range (young: 18-30, mature: 30-50)
+3. Style (casual/professional)
+Return ONLY valid JSON:
+{{
+    "gender": "female",
+    "age": "young",
+    "style": "casual"
+}}"""
+            model = genai.GenerativeModel("gemini-2.5-flash")
+            response = model.generate_content(analysis_prompt)
+            # Parse response
+            response_text = response.text.strip()
+            if response_text.startswith("```"):
+                response_text = response_text.split("```")[1]
+                if response_text.startswith("json"):
+                    response_text = response_text[4:]
+                response_text = response_text.strip()
+            persona = json.loads(response_text)
+            # Select voice based on persona
+            gender = persona.get("gender", "female")
+            age = persona.get("age", "young")
+            voice_key = f"{gender}_{age}"
+            selected_voice = self.voice_profiles.get(voice_key, self.voice_profiles["female_young"])
+            logger.info(f"✓ Selected voice: {selected_voice} for {gender}/{age} persona")
             return selected_voice
+        except Exception as e:
+            logger.error(f"❌ Voice selection failed: {e}, using default")
+            return self.voice_profiles["female_young"]
     async def upload_captions_to_gcs(self, captions_text: str, video_filename: str) -> Optional[str]:
         """
         Upload captions to GCS bucket with same name as video (but .txt extension)
         Args:
             captions_text: Caption text content
             video_filename: Name of the video file (e.g., "final_video_abc123.mp4")
         Returns:
             GCS signed URL of uploaded captions or None
         """
         try:
             # Create captions filename (replace .mp4 with .txt)
             captions_filename = os.path.splitext(video_filename)[0] + ".txt"
             logger.info(f"☁️ Uploading captions to GCS: {captions_filename}")
             # Save captions to temp file
             import tempfile
             temp_path = os.path.join(tempfile.gettempdir(), captions_filename)
             with open(temp_path, "w", encoding="utf-8") as f:
                 f.write(captions_text)
             blob_name = f"captions/{captions_filename}"
             blob = self.gcs_bucket.blob(blob_name)
             blob.content_type = "text/plain"
             logger.info(f"Uploading {captions_filename} to gs://{self.gcs_bucket.name}/{blob_name}")
             blob.upload_from_filename(temp_path)
             # Generate signed URL (valid for 7 days)
             from datetime import timedelta
+            captions_url = blob.generate_signed_url(version="v4", expiration=timedelta(days=7), method="GET")
             logger.info(f"✅ Captions uploaded to GCS: {captions_url[:100]}...")
             # Clean up temp file
             if os.path.exists(temp_path):
                 os.remove(temp_path)
             return captions_url
         except Exception as e:
         try:
             from google.cloud.exceptions import NotFound
             try:
                 self.gcs_bucket.exists()
                 health["gcs"] = True
             file_ext = os.path.splitext(filename)[1]
             blob.content_type = content_types.get(file_ext, "application/octet-stream")
             logger.info(f"Uploading {filename} to gs://{self.gcs_bucket.name}/{blob_name}")
             blob.upload_from_filename(file_path)
             from datetime import timedelta
+            signed_url = blob.generate_signed_url(version="v4", expiration=timedelta(days=7), method="GET")
             logger.info(f"✅ File uploaded with signed URL: {signed_url[:100]}...")
             return signed_url

src/asset_selector.py CHANGED Viewed

@@ -13,7 +13,7 @@ class AssetSelector:
         self.config = config
         self.video_library = self._load_video_library()
         self.audio_library = self._load_audio_library()
         # Track current background music index for sequential selection
         self.current_audio_index = 0
@@ -143,7 +143,7 @@ class AssetSelector:
             }}
             """
-            model = genai.GenerativeModel("gemini-2.0-flash-exp")
             response = model.generate_content(prompt)
             response_text = response.text.strip()
@@ -229,14 +229,16 @@ class AssetSelector:
         # Select current index
         selected = self.audio_library[self.current_audio_index]
-        logger.info(f"🎵 Selected background music #{self.current_audio_index + 1}/{len(self.audio_library)}: {selected}")
         # Increment index for next call (loop back to start if needed)
         self.current_audio_index = (self.current_audio_index + 1) % len(self.audio_library)
         return selected
     def reset_audio_index(self):
         """Reset audio index to start from beginning (useful for batch processing)"""
         self.current_audio_index = 0

         self.config = config
         self.video_library = self._load_video_library()
         self.audio_library = self._load_audio_library()
         # Track current background music index for sequential selection
         self.current_audio_index = 0
             }}
             """
+            model = genai.GenerativeModel("gemini-2.5-pro")
             response = model.generate_content(prompt)
             response_text = response.text.strip()
         # Select current index
         selected = self.audio_library[self.current_audio_index]
+        logger.info(
+            f"🎵 Selected background music #{self.current_audio_index + 1}/{len(self.audio_library)}: {selected}"
+        )
         # Increment index for next call (loop back to start if needed)
         self.current_audio_index = (self.current_audio_index + 1) % len(self.audio_library)
         return selected
     def reset_audio_index(self):
         """Reset audio index to start from beginning (useful for batch processing)"""
         self.current_audio_index = 0

src/automation.py CHANGED Viewed

@@ -90,6 +90,7 @@ class ContentAutomation:
         except Exception as e:
             logger.error(f"❌ Demo failed: {e}")
             import traceback
             logger.error(f"📋 Debug: {traceback.format_exc()}")
             return False
@@ -120,43 +121,33 @@ class ContentAutomation:
             # STEP 3: Render video WITHOUT audio to get exact duration
             logger.info("\n🎬 STEP 3: Render Video (Without Audio)")
-            video_no_audio_path, video_duration = await self.video_renderer.render_video_without_audio(
-                visual_assets
-            )
             logger.info(f"✅ Video rendered (no audio): {video_duration:.2f}s")
             # STEP 4: Select voice based on hook video persona
             logger.info("\n🎭 STEP 4: Select Voice for Persona")
-            selected_voice = await self.api_clients.select_voice_for_persona(
-                content_strategy.get("gemini_prompt", "")
-            )
             # STEP 5: Generate TTS with EXACT video duration and matched voice
             logger.info(f"\n🎙️ STEP 5: Generate TTS (Target: {video_duration:.2f}s, Voice: {selected_voice})")
             tts_audio = await self.api_clients.generate_tts(
-                text=tts_script,
-                duration=video_duration,
-                voice_name=selected_voice
             )
             visual_assets["tts_audio"] = tts_audio
-            logger.info(f"✅ TTS generated: {tts_audio['duration']:.2f}s at {tts_audio.get('speaking_rate', 1.0):.2f}x rate")
             # STEP 6: Select and download background music (sequential)
             logger.info("\n🎵 STEP 6: Background Music (Sequential)")
             visual_assets["background_music_url"] = self.asset_selector.select_background_music()
             await self._download_to_local(
-                visual_assets["background_music_url"],
-                "background_music.mp3",
-                visual_assets,
-                "background_music_local"
             )
             # STEP 7: Add audio to video
             logger.info("\n🔊 STEP 7: Add Audio to Video")
-            final_video_path = await self.video_renderer.add_audio_to_video(
-                video_no_audio_path,
-                visual_assets
-            )
             # STEP 8: Upload to cloud storage
             logger.info("\n☁️ STEP 8: Cloud Storage Upload")
@@ -193,6 +184,7 @@ class ContentAutomation:
             elapsed_time = time.time() - self.pipeline_start_time if self.pipeline_start_time else 0
             logger.error(f"\n❌ Pipeline failed after {elapsed_time:.2f}s: {e}")
             import traceback
             logger.error(traceback.format_exc())
             return {"success": False, "error": str(e), "duration": elapsed_time}
@@ -231,15 +223,13 @@ class ContentAutomation:
             image_path = await self.api_clients.generate_image(strategy["gemini_prompt"])
             if not image_path:
                 raise Exception("Image generation failed")
             # Step 2: Upload image to GCS
             image_url = await self.api_clients.store_in_gcs(image_path, "image")
             # Step 3: Generate video using gen4_turbo
             video_data = await self.api_clients.generate_video(
-                prompt=strategy["runway_prompt"],
-                image_url=image_url,
-                duration=strategy.get("duration", 3)
             )
             video_data["captions"] = captions
@@ -259,23 +249,13 @@ class ContentAutomation:
         # Download hook video
         if assets.get("hook_video") and assets["hook_video"].get("video_url"):
             download_tasks.append(
-                self._download_to_local(
-                    assets["hook_video"]["video_url"],
-                    "hook_video.mp4",
-                    assets["hook_video"]
-                )
             )
         # Download library videos
         for i, video in enumerate(assets.get("selected_videos", [])):
             if video.get("url"):
-                download_tasks.append(
-                    self._download_to_local(
-                        video["url"],
-                        f"library_video_{i}.mp4",
-                        video
-                    )
-                )
         if download_tasks:
             await asyncio.gather(*download_tasks, return_exceptions=True)

         except Exception as e:
             logger.error(f"❌ Demo failed: {e}")
             import traceback
             logger.error(f"📋 Debug: {traceback.format_exc()}")
             return False
             # STEP 3: Render video WITHOUT audio to get exact duration
             logger.info("\n🎬 STEP 3: Render Video (Without Audio)")
+            video_no_audio_path, video_duration = await self.video_renderer.render_video_without_audio(visual_assets)
             logger.info(f"✅ Video rendered (no audio): {video_duration:.2f}s")
             # STEP 4: Select voice based on hook video persona
             logger.info("\n🎭 STEP 4: Select Voice for Persona")
+            selected_voice = await self.api_clients.select_voice_for_persona(content_strategy.get("gemini_prompt", ""))
             # STEP 5: Generate TTS with EXACT video duration and matched voice
             logger.info(f"\n🎙️ STEP 5: Generate TTS (Target: {video_duration:.2f}s, Voice: {selected_voice})")
             tts_audio = await self.api_clients.generate_tts(
+                text=tts_script, duration=video_duration, voice_name=selected_voice
             )
             visual_assets["tts_audio"] = tts_audio
+            logger.info(
+                f"✅ TTS generated: {tts_audio['duration']:.2f}s at {tts_audio.get('speaking_rate', 1.0):.2f}x rate"
+            )
             # STEP 6: Select and download background music (sequential)
             logger.info("\n🎵 STEP 6: Background Music (Sequential)")
             visual_assets["background_music_url"] = self.asset_selector.select_background_music()
             await self._download_to_local(
+                visual_assets["background_music_url"], "background_music.mp3", visual_assets, "background_music_local"
             )
             # STEP 7: Add audio to video
             logger.info("\n🔊 STEP 7: Add Audio to Video")
+            final_video_path = await self.video_renderer.add_audio_to_video(video_no_audio_path, visual_assets)
             # STEP 8: Upload to cloud storage
             logger.info("\n☁️ STEP 8: Cloud Storage Upload")
             elapsed_time = time.time() - self.pipeline_start_time if self.pipeline_start_time else 0
             logger.error(f"\n❌ Pipeline failed after {elapsed_time:.2f}s: {e}")
             import traceback
             logger.error(traceback.format_exc())
             return {"success": False, "error": str(e), "duration": elapsed_time}
             image_path = await self.api_clients.generate_image(strategy["gemini_prompt"])
             if not image_path:
                 raise Exception("Image generation failed")
             # Step 2: Upload image to GCS
             image_url = await self.api_clients.store_in_gcs(image_path, "image")
             # Step 3: Generate video using gen4_turbo
             video_data = await self.api_clients.generate_video(
+                prompt=strategy["runway_prompt"], image_url=image_url, duration=strategy.get("duration", 3)
             )
             video_data["captions"] = captions
         # Download hook video
         if assets.get("hook_video") and assets["hook_video"].get("video_url"):
             download_tasks.append(
+                self._download_to_local(assets["hook_video"]["video_url"], "hook_video.mp4", assets["hook_video"])
             )
         # Download library videos
         for i, video in enumerate(assets.get("selected_videos", [])):
             if video.get("url"):
+                download_tasks.append(self._download_to_local(video["url"], f"library_video_{i}.mp4", video))
         if download_tasks:
             await asyncio.gather(*download_tasks, return_exceptions=True)

src/main.py CHANGED Viewed

@@ -319,7 +319,7 @@ async def main():
             print("✅ PIPELINE COMPLETED SUCCESSFULLY")
             print("=" * 70)
             print(f"\n📹 Final Video URL: {result['final_url']}")
-            if result.get('captions_url'):
                 print(f"📝 Captions URL (GCS): {result['captions_url']}")
             print(f"🎭 Voice Used: {result.get('voice_used', 'N/A')}")
             print(f"⏱️  Video Duration: {result.get('video_duration', 0):.2f}s")

             print("✅ PIPELINE COMPLETED SUCCESSFULLY")
             print("=" * 70)
             print(f"\n📹 Final Video URL: {result['final_url']}")
+            if result.get("captions_url"):
                 print(f"📝 Captions URL (GCS): {result['captions_url']}")
             print(f"🎭 Voice Used: {result.get('voice_used', 'N/A')}")
             print(f"⏱️  Video Duration: {result.get('video_duration', 0):.2f}s")

src/video_renderer.py CHANGED Viewed

@@ -29,6 +29,7 @@ import textwrap
 from utils import logger
 import time
 class VideoRenderer:
     def __init__(self, config: Dict):
         self.config = config
@@ -38,7 +39,7 @@ class VideoRenderer:
     async def render_video_without_audio(self, assets: Dict, video_config: Optional[Dict] = None) -> tuple[str, float]:
         """
         Render video composition WITHOUT audio first to get exact duration
         Returns:
             tuple: (video_path, video_duration)
         """
@@ -76,11 +77,11 @@ class VideoRenderer:
     async def add_audio_to_video(self, video_path: str, assets: Dict) -> str:
         """
         Add audio track to pre-rendered video (NO speedup - video is already correct duration)
         Args:
             video_path: Path to video file without audio
             assets: Dictionary containing audio assets (tts_audio, background_music_local)
         Returns:
             Path to final video with audio
         """
@@ -89,20 +90,20 @@ class VideoRenderer:
             # Load the video
             video_clip = VideoFileClip(video_path)
             # Prepare audio clips
             audio_clips = await self._prepare_audio_clips(assets, video_clip.duration)
             # Add audio track
             video_with_audio = await self._add_audio_track(video_clip, audio_clips)
             output_path = await self.render_video_final(video_with_audio)
             # Cleanup
             video_clip.close()
             if video_with_audio != video_clip:
                 video_with_audio.close()
             logger.info(f"✅ Final video with audio: {output_path}")
             return output_path
@@ -114,21 +115,15 @@ class VideoRenderer:
         """Render final video clip to file"""
         try:
             output_path = self.temp_dir / f"final_video_{int(time.time())}.mp4"
-            video_clip.write_videofile(
-                str(output_path),
-                codec="libx264",
-                audio_codec="aac",
-                verbose=False,
-                logger=None
-            )
             video_clip.close()
             return str(output_path)
         except Exception as e:
             logger.error(f"Final video render failed: {e}")
-            if 'video_clip' in locals():
                 video_clip.close()
             raise
@@ -151,7 +146,7 @@ class VideoRenderer:
                 # Calculate segment positions
                 # For an 8s video: use 6.5-8s for start, 0-1.5s for end
                 start_segment_begin = max(0, hook_duration - HOOK_SEGMENT_DURATION)  # Last 1.5s
                 # Second half for beginning (last 1.5 seconds of hook video)
                 hook_start = hook_clip.subclip(start_segment_begin, hook_duration)
                 clips.append(("hook_start", hook_start))
@@ -210,7 +205,7 @@ class VideoRenderer:
             # Hook segments should now be exactly 1.5 seconds
             HOOK_DURATION = 1.5
             for clip in video_clips:
                 if abs(clip.duration - HOOK_DURATION) < 0.2:  # Hook segments (~1.5s with tolerance)
                     if hook_start is None:
@@ -220,19 +215,21 @@ class VideoRenderer:
                 else:
                     library_clips.append(clip)
-            logger.info(f"✓ Identified: hook_start={hook_start.duration if hook_start else 0:.2f}s, "
-                       f"hook_end={hook_end.duration if hook_end else 0:.2f}s, "
-                       f"library_clips={len(library_clips)}")
             # Calculate current library duration
             library_duration = sum(clip.duration for clip in library_clips)
             hook_total = (hook_start.duration if hook_start else 0) + (hook_end.duration if hook_end else 0)
             logger.info(f"📊 Hook total: {hook_total:.2f}s, Library total: {library_duration:.2f}s")
             # Target middle section duration (11-12s total - 3s hook = 8-9s middle)
             target_middle_duration = TARGET_MIN_DURATION - hook_total
             logger.info(f"🎯 Target middle section: {target_middle_duration:.2f}s")
             # Adjust library clips to reach target middle duration
@@ -272,7 +269,9 @@ class VideoRenderer:
             # Calculate total duration
             total_duration = sum(clip.duration for clip in sequence_clips)
-            logger.info(f"📊 Total video sequence duration: {total_duration:.2f}s (target: {TARGET_MIN_DURATION}-{TARGET_MAX_DURATION}s)")
             # Resize all clips to 9:16 vertical
             target_size = (1080, 1920)
@@ -300,11 +299,13 @@ class VideoRenderer:
                     if tts_clip.duration > 0:
                         # Trim or extend TTS to match video duration
                         if tts_clip.duration > target_duration:
-                            logger.info(f"⚠️ TTS longer than video, trimming: {tts_clip.duration:.2f}s -> {target_duration:.2f}s")
                             tts_clip = tts_clip.subclip(0, target_duration)
                         elif tts_clip.duration < target_duration:
                             logger.info(f"⚠️ TTS shorter than video: {tts_clip.duration:.2f}s < {target_duration:.2f}s")
                         clips.append(("tts", tts_clip))
                         logger.info(f"✓ Loaded TTS audio at FULL volume: {tts_clip.duration:.2f}s")
                     else:
@@ -313,7 +314,7 @@ class VideoRenderer:
                 except Exception as e:
                     logger.error(f"❌ Failed to load TTS audio: {e}")
-            # Load background music - VERY LOW volume
             if assets.get("background_music_local"):
                 try:
                     bg_clip = AudioFileClip(assets["background_music_local"])
@@ -322,10 +323,10 @@ class VideoRenderer:
                         if bg_clip.duration > target_duration:
                             bg_clip = bg_clip.subclip(0, target_duration)
                             logger.info(f"✓ Trimmed background music to {target_duration:.2f}s")
-                        # Reduce volume significantly
-                        bg_clip = bg_clip.volumex(0.08)
                         clips.append(("background", bg_clip))
-                        logger.info(f"✓ Loaded background music at 8% volume: {bg_clip.duration:.2f}s")
                     else:
                         logger.warning("⚠️ Background music has zero duration")
                         bg_clip.close()
@@ -371,13 +372,13 @@ class VideoRenderer:
         try:
             valid_audio_clips = [clip for clip in audio_clips if clip.duration > 0]
             if not valid_audio_clips:
                 return video_clip
             mixed_audio = CompositeAudioClip(valid_audio_clips)
             video_with_audio = video_clip.set_audio(mixed_audio)
             logger.info(f"✅ Added audio track")
             return video_with_audio
@@ -485,6 +486,7 @@ class VideoRenderer:
     def _split_script_into_words(self, script: str) -> List[str]:
         """Split script into individual words"""
         import re
         script = re.sub(r"\s+", " ", script).strip()
         return script.split()
@@ -514,14 +516,9 @@ class VideoRenderer:
         try:
             logger.info(f"📹 Rendering video (no audio): {filename}")
             video_clip.write_videofile(
-                str(output_path),
-                codec="libx264",
-                fps=24,
-                verbose=False,
-                logger=None,
-                audio=False  # No audio
             )
             return str(output_path)
@@ -583,6 +580,7 @@ class VideoRenderer:
             try:
                 import librosa
                 import soundfile as sf
                 has_librosa = True
             except ImportError:
                 has_librosa = False
@@ -634,7 +632,7 @@ class VideoRenderer:
         """Clean up temporary video/audio clips"""
         for clip in clips:
             try:
-                if hasattr(clip, 'close'):
                     clip.close()
             except Exception as e:
                 # Silently ignore cleanup errors
@@ -644,7 +642,8 @@ class VideoRenderer:
         """Cleanup on destruction"""
         try:
             import shutil
-            if hasattr(self, 'temp_dir') and self.temp_dir.exists():
                 shutil.rmtree(self.temp_dir, ignore_errors=True)
         except Exception:
             # Silently ignore cleanup errors

 from utils import logger
 import time
 class VideoRenderer:
     def __init__(self, config: Dict):
         self.config = config
     async def render_video_without_audio(self, assets: Dict, video_config: Optional[Dict] = None) -> tuple[str, float]:
         """
         Render video composition WITHOUT audio first to get exact duration
         Returns:
             tuple: (video_path, video_duration)
         """
     async def add_audio_to_video(self, video_path: str, assets: Dict) -> str:
         """
         Add audio track to pre-rendered video (NO speedup - video is already correct duration)
         Args:
             video_path: Path to video file without audio
             assets: Dictionary containing audio assets (tts_audio, background_music_local)
         Returns:
             Path to final video with audio
         """
             # Load the video
             video_clip = VideoFileClip(video_path)
             # Prepare audio clips
             audio_clips = await self._prepare_audio_clips(assets, video_clip.duration)
             # Add audio track
             video_with_audio = await self._add_audio_track(video_clip, audio_clips)
             output_path = await self.render_video_final(video_with_audio)
             # Cleanup
             video_clip.close()
             if video_with_audio != video_clip:
                 video_with_audio.close()
             logger.info(f"✅ Final video with audio: {output_path}")
             return output_path
         """Render final video clip to file"""
         try:
             output_path = self.temp_dir / f"final_video_{int(time.time())}.mp4"
+            video_clip.write_videofile(str(output_path), codec="libx264", audio_codec="aac", verbose=False, logger=None)
             video_clip.close()
             return str(output_path)
         except Exception as e:
             logger.error(f"Final video render failed: {e}")
+            if "video_clip" in locals():
                 video_clip.close()
             raise
                 # Calculate segment positions
                 # For an 8s video: use 6.5-8s for start, 0-1.5s for end
                 start_segment_begin = max(0, hook_duration - HOOK_SEGMENT_DURATION)  # Last 1.5s
                 # Second half for beginning (last 1.5 seconds of hook video)
                 hook_start = hook_clip.subclip(start_segment_begin, hook_duration)
                 clips.append(("hook_start", hook_start))
             # Hook segments should now be exactly 1.5 seconds
             HOOK_DURATION = 1.5
             for clip in video_clips:
                 if abs(clip.duration - HOOK_DURATION) < 0.2:  # Hook segments (~1.5s with tolerance)
                     if hook_start is None:
                 else:
                     library_clips.append(clip)
+            logger.info(
+                f"✓ Identified: hook_start={hook_start.duration if hook_start else 0:.2f}s, "
+                f"hook_end={hook_end.duration if hook_end else 0:.2f}s, "
+                f"library_clips={len(library_clips)}"
+            )
             # Calculate current library duration
             library_duration = sum(clip.duration for clip in library_clips)
             hook_total = (hook_start.duration if hook_start else 0) + (hook_end.duration if hook_end else 0)
             logger.info(f"📊 Hook total: {hook_total:.2f}s, Library total: {library_duration:.2f}s")
             # Target middle section duration (11-12s total - 3s hook = 8-9s middle)
             target_middle_duration = TARGET_MIN_DURATION - hook_total
             logger.info(f"🎯 Target middle section: {target_middle_duration:.2f}s")
             # Adjust library clips to reach target middle duration
             # Calculate total duration
             total_duration = sum(clip.duration for clip in sequence_clips)
+            logger.info(
+                f"📊 Total video sequence duration: {total_duration:.2f}s (target: {TARGET_MIN_DURATION}-{TARGET_MAX_DURATION}s)"
+            )
             # Resize all clips to 9:16 vertical
             target_size = (1080, 1920)
                     if tts_clip.duration > 0:
                         # Trim or extend TTS to match video duration
                         if tts_clip.duration > target_duration:
+                            logger.info(
+                                f"⚠️ TTS longer than video, trimming: {tts_clip.duration:.2f}s -> {target_duration:.2f}s"
+                            )
                             tts_clip = tts_clip.subclip(0, target_duration)
                         elif tts_clip.duration < target_duration:
                             logger.info(f"⚠️ TTS shorter than video: {tts_clip.duration:.2f}s < {target_duration:.2f}s")
                         clips.append(("tts", tts_clip))
                         logger.info(f"✓ Loaded TTS audio at FULL volume: {tts_clip.duration:.2f}s")
                     else:
                 except Exception as e:
                     logger.error(f"❌ Failed to load TTS audio: {e}")
+            # Load background music - INCREASED volume for better presence
             if assets.get("background_music_local"):
                 try:
                     bg_clip = AudioFileClip(assets["background_music_local"])
                         if bg_clip.duration > target_duration:
                             bg_clip = bg_clip.subclip(0, target_duration)
                             logger.info(f"✓ Trimmed background music to {target_duration:.2f}s")
+                        # Increase volume from 8% to 25% for better audibility
+                        bg_clip = bg_clip.volumex(0.25)
                         clips.append(("background", bg_clip))
+                        logger.info(f"✓ Loaded background music at 25% volume: {bg_clip.duration:.2f}s")
                     else:
                         logger.warning("⚠️ Background music has zero duration")
                         bg_clip.close()
         try:
             valid_audio_clips = [clip for clip in audio_clips if clip.duration > 0]
             if not valid_audio_clips:
                 return video_clip
             mixed_audio = CompositeAudioClip(valid_audio_clips)
             video_with_audio = video_clip.set_audio(mixed_audio)
             logger.info(f"✅ Added audio track")
             return video_with_audio
     def _split_script_into_words(self, script: str) -> List[str]:
         """Split script into individual words"""
         import re
         script = re.sub(r"\s+", " ", script).strip()
         return script.split()
         try:
             logger.info(f"📹 Rendering video (no audio): {filename}")
             video_clip.write_videofile(
+                str(output_path), codec="libx264", fps=24, verbose=False, logger=None, audio=False  # No audio
             )
             return str(output_path)
             try:
                 import librosa
                 import soundfile as sf
                 has_librosa = True
             except ImportError:
                 has_librosa = False
         """Clean up temporary video/audio clips"""
         for clip in clips:
             try:
+                if hasattr(clip, "close"):
                     clip.close()
             except Exception as e:
                 # Silently ignore cleanup errors
         """Cleanup on destruction"""
         try:
             import shutil
+            if hasattr(self, "temp_dir") and self.temp_dir.exists():
                 shutil.rmtree(self.temp_dir, ignore_errors=True)
         except Exception:
             # Silently ignore cleanup errors