Spaces:

Elvoro
/

Tools

Running

App Files Files Community

jebin2 commited on Nov 5, 2025

Commit

b7d4e26

1 Parent(s): b749705

new changes

Browse files

Files changed (5) hide show

src/asset_selector.py +12 -9
src/automation.py +1 -1
src/data_holder.py +2 -1
src/prompt/best_matches_two_video_tracking.md +222 -0
src/video_renderer.py +121 -90

src/asset_selector.py CHANGED Viewed

@@ -95,7 +95,8 @@ class AssetSelector:
         except Exception as e:
             logger.error(f"❌ Video selection failed: {e}")
-            return self._fallback_selection(self.data_holder.tts_script, max_duration)
     def _parse_energy_score(self, energy_score_str: str) -> int:
         """Parse energy score from string format to integer"""
@@ -120,9 +121,9 @@ class AssetSelector:
         """Use Gemini API for contextual video selection"""
         try:
             video_context = await self.prepare_video_context()
-            with open("src/prompt/best_matches_video.md", "r", encoding="utf-8") as file:
             # with open("src/prompt/best_matches_video_with_timestamp.md", "r", encoding="utf-8") as file:
-            # with open("src/prompt/best_matches_two_video.md", "r", encoding="utf-8") as file:
                 system_prompt = file.read()
             model = genai.GenerativeModel("gemini-2.5-pro")
@@ -132,7 +133,6 @@ class AssetSelector:
 USER PROMPT:
 TTS Script: {tts_script}
-TS Script Word-Level Timestamp: {timed_transcript}
 Video Options: {video_context}
 """
             response = model.generate_content(model_input)
@@ -145,7 +145,8 @@ Video Options: {video_context}
             for item in selection:
                 video_index = item["video_index"]
                 if video_index < len(self.video_library):
-                    video = self.video_library.iloc[video_index]
                     selected.append(
                         {
                             "url": video.get("Video URL (No Audio)", video.get("url", "")),
@@ -160,7 +161,8 @@ Video Options: {video_context}
                         }
                     )
                     if "alternate_video_index" in item:
-                        video = self.video_library.iloc[item["alternate_video_index"]]
                         selected[-1]["alternate_url"] = video.get("Video URL (No Audio)", video.get("url", ""))
             logger.info(f"✓ Gemini selected {len(selected)}")
@@ -169,12 +171,12 @@ Video Options: {video_context}
         except json.JSONDecodeError as e:
             logger.error(f"Failed to parse Gemini JSON response: {e}")
             logger.debug(f"Raw response: {response_text[:500]}")
-            return []
         except Exception as e:
             logger.error(f"Gemini analysis failed: {e}")
             import traceback
             traceback.print_exc()
-            return []
     async def prepare_video_context(self):
         # STEP 3: Update durations using actual local files
@@ -196,7 +198,8 @@ Video Options: {video_context}
                 f"{i+1}. {row.get('Video URL (No Audio)')} - "
                 f"{row.get('Full Video Description Summary', row.get('description', ''))} - "
                 f"{next((v.get('duration', 0) for v in self.data_holder.visual_assets['all_videos'] if v['url'] == row.get('Video URL (No Audio)')), 0)}s - "
-                f"Alignment: {row.get('Video Alignment with the TTS Script', row.get('alignment', ''))}"
                 for i, row in self.video_library.iterrows()
             ]
         )

         except Exception as e:
             logger.error(f"❌ Video selection failed: {e}")
+            raise
+            # return self._fallback_selection(self.data_holder.tts_script, max_duration)
     def _parse_energy_score(self, energy_score_str: str) -> int:
         """Parse energy score from string format to integer"""
         """Use Gemini API for contextual video selection"""
         try:
             video_context = await self.prepare_video_context()
+            # with open("src/prompt/best_matches_video.md", "r", encoding="utf-8") as file:
             # with open("src/prompt/best_matches_video_with_timestamp.md", "r", encoding="utf-8") as file:
+            with open("src/prompt/best_matches_two_video_tracking.md", "r", encoding="utf-8") as file:
                 system_prompt = file.read()
             model = genai.GenerativeModel("gemini-2.5-pro")
 USER PROMPT:
 TTS Script: {tts_script}
 Video Options: {video_context}
 """
             response = model.generate_content(model_input)
             for item in selection:
                 video_index = item["video_index"]
                 if video_index < len(self.video_library):
+                    video_row = self.video_library[self.video_library["Video URL (No Audio)"] == item["video_url"]]
+                    video = video_row.iloc[0]
                     selected.append(
                         {
                             "url": video.get("Video URL (No Audio)", video.get("url", "")),
                         }
                     )
                     if "alternate_video_index" in item:
+                        video_row = self.video_library[self.video_library["Video URL (No Audio)"] == item["alternate_video_url"]]
+                        video = video_row.iloc[0]
                         selected[-1]["alternate_url"] = video.get("Video URL (No Audio)", video.get("url", ""))
             logger.info(f"✓ Gemini selected {len(selected)}")
         except json.JSONDecodeError as e:
             logger.error(f"Failed to parse Gemini JSON response: {e}")
             logger.debug(f"Raw response: {response_text[:500]}")
+            raise
         except Exception as e:
             logger.error(f"Gemini analysis failed: {e}")
             import traceback
             traceback.print_exc()
+            raise
     async def prepare_video_context(self):
         # STEP 3: Update durations using actual local files
                 f"{i+1}. {row.get('Video URL (No Audio)')} - "
                 f"{row.get('Full Video Description Summary', row.get('description', ''))} - "
                 f"{next((v.get('duration', 0) for v in self.data_holder.visual_assets['all_videos'] if v['url'] == row.get('Video URL (No Audio)')), 0)}s - "
+                f"Alignment: {row.get('Video Alignment with the TTS Script', row.get('alignment', ''))} - "
+                f"Usage Count: {self.data_holder.video_usage_count.get(row.get('Video URL (No Audio)'), 0)}"
                 for i, row in self.video_library.iterrows()
             ]
         )

src/automation.py CHANGED Viewed

@@ -263,7 +263,7 @@ class ContentAutomation:
                 )
             if video.get("alternate_url"):
                 download_tasks.append(
-                    self._download_with_fallback(video["url"], f"library_all_video_alternate_url_{i}.mp4", video, "alternate_url_local_path")
                 )
         # Download library videos

                 )
             if video.get("alternate_url"):
                 download_tasks.append(
+                    self._download_with_fallback(video["alternate_url"], f"library_all_video_alternate_url_{i}.mp4", video, "alternate_url_local_path")
                 )
         # Download library videos

src/data_holder.py CHANGED Viewed

@@ -1,4 +1,5 @@
 class DataHolder:
     tts_script: str = None
     selected_voice: str = None
-    visual_assets = {}

 class DataHolder:
     tts_script: str = None
     selected_voice: str = None
+    visual_assets = {}
+    video_usage_count = {}

src/prompt/best_matches_two_video_tracking.md ADDED Viewed

	@@ -0,0 +1,222 @@

+# Video Selection with Alternates
+You are an AI assistant specialized in selecting the most appropriate videos to accompany Text-to-Speech (TTS) scripts. Your goal is to create a cohesive visual narrative that perfectly aligns with the spoken content, ensuring that product mentions are synchronized with product visuals.
+## Input Format
+You will receive:
+1. **TTS Script**: The complete text that will be spoken
+2. **Video Options**: A list of available videos with the following information:
+   - Video URL (No Audio): Direct link to the video file
+   - Full Video Description Summary: Detailed description of the video content, including visual elements, actions, camera angles, and recommended usage scenarios
+   - Duration: Length of the video in seconds
+   - Video Alignment with the TTS Script: Detailed explanation of when and how to use this video, including specific keywords, phrases, and scenarios where it fits best
+   - **Usage Count**: Number of times this video has been selected previously (lower is better for diversity)
+## Your Task
+Select one or more videos (with alternates) from the provided options that:
+1. **Best match the content and tone** of the TTS script
+2. **Maintain narrative coherence** when combined
+3. **Synchronize product visuals with product mentions** - When the TTS script mentions the product name or refers to the product, the corresponding product showcase video MUST be displayed at that exact moment
+4. **Use each video only once across primary AND alternate selections** - NEVER select the same video multiple times (no duplicates allowed in either primary or alternate choices)
+5. **Total exactly 10-12 seconds** in duration for primary selections (strict requirement)
+6. **Maintain chronological order** - Videos must be arranged in the sequence they should appear, matching the flow of the TTS script from beginning to end
+7. **Provide alternate video selections** - For each script segment, provide a second-best video option that could work as a fallback
+## Selection Criteria (in order of priority)
+### 0. No Duplicate Videos (Absolute Requirement)
+- **CRITICAL**: Each video can only be selected ONCE across the ENTIRE output (including both primary and alternate selections)
+- Even if a video seems perfect for multiple segments, you MUST find alternative videos for subsequent segments
+- Track which videos you've already selected and exclude them from further consideration for both primary and alternate positions
+- This rule has NO exceptions - duplicate videos will result in a failed output
+### 1. Usage Count Diversity (Critical Priority)
+- **STRONGLY PRIORITIZE** videos with LOWER usage counts to ensure variety across different TTS scripts
+- When comparing videos with similar content relevance, ALWAYS choose the one with the lower usage count
+- **Usage count should be a primary tiebreaker**: If two videos match the content equally well, select the less-used one
+- This ensures fair distribution of video selections across your video library and prevents over-reliance on the same videos
+- **Balancing act**: Find videos that both match the content AND have lower usage counts - don't sacrifice content relevance entirely, but give significant weight to usage diversity
+- If a video has been used significantly more times than alternatives (e.g., 3+ times difference), consider choosing a slightly less perfect match with lower usage count
+### 2. Product Mention Synchronization (Critical Priority)
+- **WHENEVER** the TTS script explicitly mentions the product name (e.g., "Somira Massager") or refers to "the product," "this massager," etc., you MUST select the product showcase video
+- The product video should appear at the EXACT moment when the product is mentioned in the script
+- This is a non-negotiable requirement for maintaining visual-audio coherence
+- If the product is mentioned multiple times, prioritize the FIRST mention for the product showcase video, and use demonstration/usage videos for subsequent mentions
+- The alternate video for product mentions should also be product-focused (e.g., different angle, different showcase style)
+- **Among product showcase videos, prioritize those with lower usage counts**
+### 3. Content Relevance (High Priority)
+- Choose videos that directly illustrate or support the key message of the TTS script
+- Match specific actions mentioned in the script (e.g., "putting on," "turning on," "using") with videos showing those actions
+- Prioritize literal matches over metaphorical ones when available
+- Ensure visual content doesn't contradict the spoken words
+- Alternate videos should maintain similar content relevance but may have different angles or styles
+- **When multiple videos have similar content relevance, strongly favor those with lower usage counts**
+### 4. Narrative Flow & Chronological Order
+- Videos MUST be arranged in chronological order matching the TTS script sequence
+- If selecting multiple videos, ensure smooth transitions
+- Maintain logical progression that follows the script's structure from start to finish
+- Avoid jarring cuts or mismatched visual sequences
+- Alternate videos should maintain the same chronological position and narrative flow
+### 5. Timing Optimization
+- The combined duration of PRIMARY selections MUST be between 10-12 seconds
+- Alternate videos should have similar durations to their primary counterparts (±2 seconds is acceptable)
+- Prefer combinations that naturally fit the script's pacing
+- Consider trimming longer videos to fit within the time constraint
+- If a single video works perfectly but is slightly short/long, note this clearly
+### 6. Alignment Score
+- Pay close attention to the "Video Alignment with TTS Script" field
+- Use the recommended keywords and scenarios mentioned in this field
+- Higher relevance to mentioned scenarios indicates better matches
+- Balance alignment recommendations with duration requirements
+- Alternate videos should have slightly lower but still strong alignment scores
+- **Among videos with similar alignment scores, prioritize those with lower usage counts**
+## TTS Script Segmentation
+- Mentally divide the TTS script into segments based on:
+  - Product mentions (require product showcase video)
+  - Action descriptions (require demonstration videos)
+  - Benefit statements (require usage or satisfaction videos)
+- Assign the most appropriate video (primary + alternate) to each segment
+- Ensure the video order matches the script segment order
+- **Remember**: Once a video is assigned to ANY position (primary or alternate), it cannot be used again anywhere
+## Alternate Video Selection Strategy
+For each script segment, the alternate video should:
+1. **Maintain content relevance** - Stay aligned with the same script segment
+2. **Offer stylistic variety** - Provide a different visual approach (e.g., different angle, lighting, setting)
+3. **Match duration closely** - Within ±2 seconds of the primary video
+4. **Serve as a true fallback** - Be a viable replacement if the primary video is unavailable
+5. **Never duplicate** - Must be completely different from any other selected video (primary or alternate)
+6. **Prioritize lower usage count** - When multiple alternates are viable, choose the one that has been used less frequently
+## Output Format
+Provide your selection as a **JSON array** with the following structure:
+```json
+[
+  {
+    "video_index": 1,
+    "video_url": "https://storage.googleapis.com/...",
+    "duration_seconds": 2,
+    "usage_count": 3,
+    "alternate_video_index": 4,
+    "alternate_video_url": "https://storage.googleapis.com/...",
+    "alternate_duration_seconds": 3,
+    "alternate_usage_count": 1,
+    "tts_script_segment": "The exact portion of the TTS script that this video will accompany",
+    "reason": "Brief explanation of why this PRIMARY video was chosen for this specific script segment, including consideration of its usage count",
+    "alternate_reason": "Brief explanation of why this ALTERNATE video was chosen as the second-best option, including consideration of its usage count"
+  },
+  {
+    "video_index": 3,
+    "video_url": "https://storage.googleapis.com/...",
+    "duration_seconds": 6,
+    "usage_count": 0,
+    "alternate_video_index": 7,
+    "alternate_video_url": "https://storage.googleapis.com/...",
+    "alternate_duration_seconds": 5,
+    "alternate_usage_count": 2,
+    "tts_script_segment": "The next portion of the TTS script",
+    "reason": "Explanation for this primary selection, noting low usage count",
+    "alternate_reason": "Explanation for this alternate selection, balancing content match with usage count"
+  }
+]
+```
+### JSON Array Field Definitions:
+- **video_index**: The sequential number/identifier of the PRIMARY video from the provided list (each index should appear ONLY ONCE across entire output)
+- **video_url**: The complete URL of the PRIMARY selected video (each URL should appear ONLY ONCE across entire output)
+- **duration_seconds**: The length of the PRIMARY video clip in seconds (can be trimmed if needed)
+- **usage_count**: The number of times this PRIMARY video has been selected previously (for transparency and tracking)
+- **alternate_video_index**: The sequential number/identifier of the ALTERNATE (second-best) video (each index should appear ONLY ONCE across entire output)
+- **alternate_video_url**: The complete URL of the ALTERNATE video (each URL should appear ONLY ONCE across entire output)
+- **alternate_duration_seconds**: The length of the ALTERNATE video clip in seconds (can be trimmed if needed)
+- **alternate_usage_count**: The number of times this ALTERNATE video has been selected previously (for transparency and tracking)
+- **tts_script_segment**: The EXACT text from the TTS script that will be spoken while this video plays. This should be a direct quote from the script, maintaining chronological order
+- **reason**: A concise 1-2 sentence explanation of why this PRIMARY video was selected for this specific segment, including how usage count factored into the decision
+- **alternate_reason**: A concise 1-2 sentence explanation of why this ALTERNATE video was selected as the second-best option, including how usage count factored into the decision and what makes it a viable fallback
+### Additional Output Requirements:
+After the JSON array, provide:
+**Total Duration (Primary Selection):** [X seconds]
+**Total Duration (Alternate Selection):** [Y seconds]
+**Selection Rationale:**
+[2-3 sentences explaining the overall logic behind your primary selection, how the video sequence complements the TTS script chronologically, why this combination works best, and how usage count diversity was balanced with content relevance]
+**Alternate Selection Rationale:**
+[2-3 sentences explaining the logic behind your alternate selections, how they serve as effective fallbacks while maintaining narrative coherence, and how usage counts influenced the alternate choices]
+**Usage Count Considerations:**
+[1-2 sentences explaining how you balanced content relevance with usage count diversity, and any trade-offs made between perfect content matches and lesser-used videos]
+**Timing Notes (if applicable):**
+[Mention any timing adjustments, trims, or deviations from the 10-12 second target for both primary and alternate selections]
+**Alternative Options (if applicable):**
+[Briefly mention any other close alternatives that could work if both primary and alternate selections need adjustment]
+## Important Guidelines
+- **ABSOLUTELY CRITICAL**: NO duplicate videos - each video (both primary and alternate) can only appear ONCE across the ENTIRE output array
+- **CRITICAL**: Strongly prioritize videos with LOWER usage counts to ensure diversity - this prevents over-reliance on the same videos across different scripts
+- **CRITICAL**: Product showcase videos MUST appear when the product is mentioned in the script
+- Videos MUST maintain chronological order matching the TTS script flow from start to finish
+- The "tts_script_segment" field must contain the exact text from the script (word-for-word quote)
+- Each video should map to a distinct portion of the script with no overlapping segments
+- All script segments combined should cover the entire TTS script
+- Alternate videos should provide meaningful variety while maintaining content relevance
+- **When evaluating videos**: Consider usage count as a major factor - a slightly less perfect content match with 0-1 usage is often better than a perfect match with 5+ uses
+- If no combination can achieve exactly 10-12 seconds WITHOUT using duplicates, select the closest option and clearly state the deviation
+- If the script has multiple themes, prioritize the primary message while maintaining chronological flow
+- Consider pacing: fast-paced scripts may need more dynamic visuals
+- Always explain your reasoning clearly and concisely for BOTH primary and alternate selections, including how usage count influenced your decision
+- If you must choose between perfect content match or perfect timing, prioritize content relevance and product synchronization, then note the timing issue
+- When videos need to be trimmed, specify the recommended trim duration in the "reason" or "alternate_reason" field
+- Before finalizing your selection, verify that no video_index or video_url appears more than once across ALL primary and alternate selections
+- **Include usage_count and alternate_usage_count in your output** for transparency
+## Example Scenario
+If the TTS script says: "Introducing the Somira Massager, designed for ultimate comfort. Simply place it around your neck and turn it on. Feel the relaxation."
+And the video library has these usage counts:
+- Video 1 (Product showcase front): Usage count = 5
+- Video 2 (Person putting on massager): Usage count = 2
+- Video 3 (Person enjoying massager): Usage count = 8
+- Video 4 (Product showcase side): Usage count = 1
+- Video 5 (Close-up placement): Usage count = 0
+- Video 6 (Person showing satisfaction): Usage count = 3
+Your selection should prioritize lower usage counts:
+1. First segment: "Introducing the Somira Massager"
+   - Primary: Product showcase side view (video_index: 4, usage_count: 1) - Lower usage than front view
+   - Alternate: Product showcase front view (video_index: 1, usage_count: 5) - Still relevant but higher usage
+2. Second segment: "place it around your neck and turn it on"
+   - Primary: Close-up of placement process (video_index: 5, usage_count: 0) - Never used before!
+   - Alternate: Person putting on the massager (video_index: 2, usage_count: 2) - Good fallback
+3. Third segment: "Feel the relaxation"
+   - Primary: Different person showing satisfaction (video_index: 6, usage_count: 3) - Lower usage
+   - Alternate: Person using/enjoying the massager (video_index: 3, usage_count: 8) - Works but heavily used
+All in chronological order, with usage count diversity prioritized, and **NO video used more than once across all selections**.
+## Pre-Submission Checklist
+Before providing your final output, verify:
+- ✅ No video_index appears more than once (across primary AND alternate selections)
+- ✅ No video_url appears more than once (across primary AND alternate selections)
+- ✅ Videos are in chronological order matching the script
+- ✅ Product video appears when product is mentioned (in primary selection)
+- ✅ **Videos with lower usage counts were prioritized when content relevance was similar**
+- ✅ **Reasoning includes explanation of how usage count influenced the decision**
+- ✅ Total duration of PRIMARY videos is within 10-12 seconds (or noted if not possible)
+- ✅ Alternate videos have similar durations to their primary counterparts
+- ✅ All tts_script_segments are direct quotes from the original script
+- ✅ Each alternate video is a viable fallback with clear reasoning
+- ✅ JSON format is valid and complete with all required fields (including usage_count fields)
+- ✅ Both "reason" and "alternate_reason" fields are filled for every segment

src/video_renderer.py CHANGED Viewed

@@ -34,6 +34,10 @@ import json_repair
 from data_holder import DataHolder
 import numpy as np
 class VideoRenderer:
     def __init__(self, config: Dict, data_holder: DataHolder = None):
         self.config = config
@@ -58,7 +62,9 @@ class VideoRenderer:
             video_clips = await self._prepare_video_clips_natural_speed()
             # Create video sequence with natural speed
-            final_video = await self._create_video_sequence_natural_speed(video_clips, video_config)
             # Render video WITHOUT audio
             output_path = await self._render_video_only(final_video)
@@ -120,6 +126,7 @@ class VideoRenderer:
             utils.calculate_video_durations(selected_videos, all_tts_script_segment, assets["timed_transcript"], original_duration)
             target_size = (1080, 1920)
             # Load library videos - NO speed adjustments
             for i, lib_video in enumerate(selected_videos):
                 if lib_video.get("local_path"):
@@ -137,13 +144,29 @@ class VideoRenderer:
                             prev_clip = clips[-1][1]
                             prev_clip_file = selected_videos[-2]["local_path"]
-                        prev_clip, lib_clip = await self._prepare_clip(lib_video["local_path"], original_clip, lib_hook_start, lib_hook_end, lib_video["duration"], prev_clip, prev_clip_file)
                         if prev_clip:
                             clip_name, _ = clips[-1]
                             clips[-1] = (clip_name, prev_clip)
                         lib_clip = lib_clip.without_audio()
                         clips.append((f"library_{i}", lib_clip))
                         logger.info(f"✓ Loaded library video {i}: {lib_clip.duration:.2f}s (NATURAL SPEED)")
                     except Exception as e:
                         import traceback
@@ -164,7 +187,7 @@ class VideoRenderer:
                     pass
             raise
-    async def _prepare_clip(self, original_clip_path, original_clip, lib_hook_start, lib_hook_end, target_duration: float, prev_clip, prev_clip_file):
         # Validate inputs
         if target_duration <= 0:
             raise ValueError(f"Invalid target_duration: {target_duration}")
@@ -176,147 +199,153 @@ class VideoRenderer:
         # Handle start hook case
         if lib_hook_start:
             return self._prepare_with_start_hook(
-                original_clip_path, original_clip, lib_hook_start,
                 target_duration, prev_clip
             )
         # Handle end hook case
         elif lib_hook_end:
             return self._prepare_with_end_hook(
-                original_clip_path, original_clip, lib_hook_end,
-                target_duration, prev_clip, prev_clip_file
             )
         # No hooks - just extend/trim the original clip
         else:
             logger.info("No hooks detected, adjusting original clip duration only")
-            result = self._extend_or_trim_clip(original_clip_path, original_clip, target_duration)
-            return prev_clip, result
-    def _prepare_with_start_hook(self, original_clip_path, original_clip, lib_hook_start, target_duration, prev_clip):
         """Handle clip preparation when a start hook is present."""
         logger.info(f"Start hook detected with duration {lib_hook_start.duration:.2f}s")
-        total_start = lib_hook_start.duration + original_clip.duration
         # Case 1: Target fits within start hook + original clip
-        if target_duration <= total_start:
             logger.info("Target duration fits start hook + original clip, concatenating and trimming")
             result = concatenate_videoclips([lib_hook_start, original_clip], method="compose").subclip(0, target_duration)
             logger.info(f"Prepared clip duration: {result.duration:.2f}s")
-            return prev_clip, result
         # Case 2: Need to extend beyond original clip
-        logger.info("Target duration exceeds start hook + original clip, trying extension methods")
-        extended_clip = None
-        try:
-            # Try interpolation first
-            interpolated_file = utils.interpolate_video(original_clip_path)
-            if interpolated_file:
-                interpolated = VideoFileClip(interpolated_file)
-                interpolated = self._resize_for_vertical(interpolated)
-                total_interpolated = lib_hook_start.duration + interpolated.duration
-                logger.info(f"Interpolated clip duration: {interpolated.duration:.2f}s, total with hook: {total_interpolated:.2f}s")
-                if target_duration <= total_interpolated:
-                    logger.info("Target duration fits start hook + interpolated clip")
-                    result = concatenate_videoclips([lib_hook_start, interpolated], method="compose").subclip(0, target_duration)
-                    logger.info(f"Prepared clip duration: {result.duration:.2f}s")
-                    return prev_clip, result
-                # Interpolation wasn't long enough, close it
-                interpolated.close()
-        except Exception as e:
-            logger.warning(f"Interpolation failed: {e}")
-            if extended_clip:
-                extended_clip.close()
-        # Try looping or ping-pong
-        if utils.is_video_loopable(original_clip_path) or utils.is_loopable_phash(original_clip_path):
-            logger.info("Original clip is loopable, creating loop")
-            loop_clip = self.loop_clip(original_clip, target_duration)
-        elif utils.is_video_zoomable_tail(original_clip):
-            loop_clip = self.zoom_clip(original_clip, target_duration)
-        else:
-            logger.info("Using ping-pong reverse looping as fallback")
-            reversed_clip = VideoFileClip(utils.reverse_clip(original_clip_path))
-            reversed_clip = self._resize_for_vertical(reversed_clip)
-            loop_clip = concatenate_videoclips([original_clip, reversed_clip, original_clip, reversed_clip], method="compose")
-        result = concatenate_videoclips([lib_hook_start, loop_clip], method="compose").subclip(0, target_duration)
         logger.info(f"Prepared clip duration: {result.duration:.2f}s")
-        return prev_clip, result
-    def _prepare_with_end_hook(self, original_clip_path, original_clip, lib_hook_end,
-                                target_duration, prev_clip, prev_clip_file):
         """Handle clip preparation when an end hook is present."""
         logger.info(f"End hook detected with duration {lib_hook_end.duration:.2f}s")
         total_duration = original_clip.duration + lib_hook_end.duration
         logger.info(f"Combined original + end hook duration: {total_duration:.2f}s")
         # Case 1: Combined duration exceeds target - need to trim
-        if total_duration > target_duration:
             trim_duration = target_duration - lib_hook_end.duration
             if trim_duration > 0:
                 logger.info(f"Trimming original clip from {original_clip.duration:.2f}s to {trim_duration:.2f}s to fit end hook")
                 original_clip = original_clip.subclip(0, trim_duration)
             else:
-                logger.info(f"Target duration {target_duration:.2f}s shorter than end hook alone, trimming end hook itself")
-                result = lib_hook_end.subclip(0, target_duration)
-                logger.info(f"Prepared clip duration: {result.duration:.2f}s")
-                return prev_clip, result
         # Case 2: Combined duration is less than target - need to extend original
-        elif total_duration < target_duration:
             remaining = target_duration - lib_hook_end.duration
             logger.info(f"Original + end hook too short, need to extend original by {remaining:.2f}s")
-            original_clip = self._extend_clip_to_duration(original_clip_path, original_clip, remaining)
         # Case 3: Exact match or after trimming/extending
         logger.info("Concatenating original clip and end hook")
         # Handle very short original clips
         if original_clip.duration < 1:
-            if prev_clip and prev_clip_file:
-                logger.info("Original clip too short, extending previous clip instead")
-                prev_clip = self._extend_or_trim_clip(prev_clip_file, prev_clip, prev_clip.duration + original_clip.duration)
-            result = lib_hook_end
         else:
-            result = concatenate_videoclips([original_clip, lib_hook_end], method="compose")
         logger.info(f"Prepared clip duration: {result.duration:.2f}s")
-        return prev_clip, result
-    def _extend_or_trim_clip(self, original_clip_path, original_clip, target_duration):
         """
         Extend or trim a clip to match target duration.
         Returns:
             VideoFileClip: The adjusted clip
         """
-        current_duration = original_clip.duration
-        # Case 1: Clip is already the right duration
-        if abs(current_duration - target_duration) < 0.01:  # Small tolerance for floating point
-            logger.info(f"Clip duration {current_duration:.2f}s already matches target duration")
-            return original_clip
-        # Case 2: Clip is too long - trim it
-        if current_duration > target_duration:
-            logger.info(f"Trimming clip from {current_duration:.2f}s to {target_duration:.2f}s")
-            result = original_clip.subclip(0, target_duration)
-            logger.info(f"Prepared clip duration: {result.duration:.2f}s")
-            return result
-        # Case 3: Clip is too short - extend it
-        logger.info(f"Clip too short by {target_duration - current_duration:.2f}s, extending")
-        return self._extend_clip_to_duration(original_clip_path, original_clip, target_duration)
     def _extend_clip_to_duration(self, original_clip_path, original_clip, target_duration):
         """
@@ -360,13 +389,15 @@ class VideoRenderer:
         # Fallback to ping-pong reverse looping
         logger.info("Using ping-pong reverse looping as fallback for extension")
-        reversed_clip = VideoFileClip(utils.reverse_clip(original_clip_path))
-        reversed_clip = self._resize_for_vertical(reversed_clip)
         loop_clip = concatenate_videoclips([original_clip, reversed_clip, original_clip, reversed_clip], method="compose")
         result = loop_clip.subclip(0, target_duration)
         logger.info(f"Prepared clip duration: {result.duration:.2f}s")
         return result
     def loop_clip(self, clip, target_duration):
         loop_count = int(target_duration // clip.duration) + 1  # how many loops needed
@@ -831,7 +862,7 @@ class VideoRenderer:
             bg_volume = base_volume * 1.1
         # Clamp for safety
-        return max(0.15, min(1.0, bg_volume))
     async def _prepare_audio_clips(self, assets: Dict, target_duration: float) -> List[AudioFileClip]:

 from data_holder import DataHolder
 import numpy as np
+ALLOWED_BG_MUSIC_VOLUME = 0.10
+REVERSE_THRESHOLD = 0.5
+HOOK_VIDEO_DURATION = 1.5
 class VideoRenderer:
     def __init__(self, config: Dict, data_holder: DataHolder = None):
         self.config = config
             video_clips = await self._prepare_video_clips_natural_speed()
             # Create video sequence with natural speed
+            # final_video = await self._create_video_sequence_natural_speed(video_clips, video_config)
+            final_video = concatenate_videoclips(video_clips, method="compose")
+            final_video = final_video.without_audio()
             # Render video WITHOUT audio
             output_path = await self._render_video_only(final_video)
             utils.calculate_video_durations(selected_videos, all_tts_script_segment, assets["timed_transcript"], original_duration)
             target_size = (1080, 1920)
+            extra_secs = 0.0
             # Load library videos - NO speed adjustments
             for i, lib_video in enumerate(selected_videos):
                 if lib_video.get("local_path"):
                             prev_clip = clips[-1][1]
                             prev_clip_file = selected_videos[-2]["local_path"]
+                        prev_clip, lib_clip, extra_secs = await self._prepare_clip(
+                            lib_video=lib_video,
+                            original_clip_path=lib_video["local_path"],
+                            alternate_url_local_path=lib_video["alternate_url_local_path"],
+                            original_clip=original_clip,
+                            lib_hook_start=lib_hook_start,
+                            lib_hook_end=lib_hook_end,
+                            target_duration=lib_video["duration"],
+                            extra_secs=extra_secs,
+                            prev_clip=prev_clip,
+                            prev_clip_file=prev_clip_file
+                        )
+                        if extra_secs > 0:  # ignore tiny floating-point diffs
+                            logger.info(f"⏱️ Added {extra_secs:.2f}s extra to match target duration ({lib_video['duration']:.2f}s)")
                         if prev_clip:
                             clip_name, _ = clips[-1]
                             clips[-1] = (clip_name, prev_clip)
                         lib_clip = lib_clip.without_audio()
                         clips.append((f"library_{i}", lib_clip))
+                        self.data_holder.video_usage_count.update({
+                            f"{lib_video['url']}": self.data_holder.video_usage_count.get(f"{lib_video['url']}", 0) + 1
+                        })
                         logger.info(f"✓ Loaded library video {i}: {lib_clip.duration:.2f}s (NATURAL SPEED)")
                     except Exception as e:
                         import traceback
                     pass
             raise
+    async def _prepare_clip(self, lib_video, original_clip_path, alternate_url_local_path, original_clip, lib_hook_start, lib_hook_end, target_duration: float, extra_secs, prev_clip, prev_clip_file):
         # Validate inputs
         if target_duration <= 0:
             raise ValueError(f"Invalid target_duration: {target_duration}")
         # Handle start hook case
         if lib_hook_start:
             return self._prepare_with_start_hook(
+                lib_video, original_clip_path, alternate_url_local_path, original_clip, lib_hook_start,
                 target_duration, prev_clip
             )
         # Handle end hook case
         elif lib_hook_end:
             return self._prepare_with_end_hook(
+                lib_video, original_clip_path, alternate_url_local_path, original_clip, lib_hook_end,
+                target_duration, extra_secs, prev_clip, prev_clip_file
             )
         # No hooks - just extend/trim the original clip
         else:
             logger.info("No hooks detected, adjusting original clip duration only")
+            result, extra_secs = self._extend_or_trim_clip(lib_video, original_clip_path, alternate_url_local_path, original_clip, target_duration)
+            return prev_clip, result, extra_secs
+    def _prepare_with_start_hook(self, lib_video, original_clip_path, alternate_url_local_path, original_clip, lib_hook_start, target_duration, prev_clip):
         """Handle clip preparation when a start hook is present."""
         logger.info(f"Start hook detected with duration {lib_hook_start.duration:.2f}s")
+        total_duration = lib_hook_start.duration + original_clip.duration
         # Case 1: Target fits within start hook + original clip
+        if target_duration <= total_duration:
             logger.info("Target duration fits start hook + original clip, concatenating and trimming")
             result = concatenate_videoclips([lib_hook_start, original_clip], method="compose").subclip(0, target_duration)
             logger.info(f"Prepared clip duration: {result.duration:.2f}s")
+            return prev_clip, result, 0.0
         # Case 2: Need to extend beyond original clip
+        modified_clip, extra_secs = self._extend_or_trim_clip(lib_video, original_clip_path, alternate_url_local_path, original_clip, target_duration-HOOK_VIDEO_DURATION)
+        result = concatenate_videoclips([lib_hook_start, modified_clip], method="compose").subclip(0, target_duration)
         logger.info(f"Prepared clip duration: {result.duration:.2f}s")
+        return prev_clip, result, extra_secs
+    def _prepare_with_end_hook(self, lib_video, original_clip_path, alternate_url_local_path, original_clip, lib_hook_end,
+                                target_duration, extra_secs, prev_clip, prev_clip_file):
         """Handle clip preparation when an end hook is present."""
+        temp_original_clip = original_clip
         logger.info(f"End hook detected with duration {lib_hook_end.duration:.2f}s")
         total_duration = original_clip.duration + lib_hook_end.duration
         logger.info(f"Combined original + end hook duration: {total_duration:.2f}s")
+        cur_extra_secs = 0.0
         # Case 1: Combined duration exceeds target - need to trim
+        if target_duration <= total_duration:
             trim_duration = target_duration - lib_hook_end.duration
             if trim_duration > 0:
                 logger.info(f"Trimming original clip from {original_clip.duration:.2f}s to {trim_duration:.2f}s to fit end hook")
                 original_clip = original_clip.subclip(0, trim_duration)
+                cur_extra_secs = 0.0
             else:
+                # Target shorter than hook → take last part of hook
+                start_trim = max(0, lib_hook_end.duration - target_duration)
+                result = lib_hook_end.subclip(start_trim, lib_hook_end.duration)
+                logger.info(f"Prepared end-only clip: {result.duration:.2f}s")
+                return prev_clip, result, 0.0
         # Case 2: Combined duration is less than target - need to extend original
+        elif target_duration > total_duration:
             remaining = target_duration - lib_hook_end.duration
             logger.info(f"Original + end hook too short, need to extend original by {remaining:.2f}s")
+            original_clip, cur_extra_secs = self._extend_or_trim_clip(lib_video, original_clip_path, alternate_url_local_path, original_clip, remaining)
         # Case 3: Exact match or after trimming/extending
         logger.info("Concatenating original clip and end hook")
         # Handle very short original clips
         if original_clip.duration < 1:
+            if original_clip.duration + extra_secs > 1:
+                # Determine how much of extra_secs is actually used to extend this clip
+                possible_new_duration = original_clip.duration + extra_secs
+                new_duration = min(possible_new_duration, temp_original_clip.duration)
+                used_extra = max(0.0, new_duration - original_clip.duration)
+                logger.info(
+                    f"Extending original clip from {original_clip.duration:.2f}s → {new_duration:.2f}s "
+                    f"(used_extra={used_extra:.2f}s, available_extra={extra_secs:.2f}s)"
+                )
+                # Apply the extension
+                original_clip = temp_original_clip.subclip(0, new_duration)
+                # Now, trim the previous clip by exactly how much we actually used
+                new_prev_duration = prev_clip.duration - used_extra
+                logger.info(
+                    f"✂️ Trimming previous clip by {used_extra:.2f}s → new duration {new_prev_duration:.2f}s"
+                )
+                prev_clip = prev_clip.subclip(0, new_prev_duration)
+                result = concatenate_videoclips([original_clip, lib_hook_end], method="compose").subclip(0, target_duration)
+                cur_extra_secs = 0.0
+            else:
+                if prev_clip and prev_clip_file:
+                    logger.info("Original clip too short, extending previous clip instead")
+                    prev_clip, extra_secs = self._extend_or_trim_clip(lib_video, prev_clip_file, alternate_url_local_path, prev_clip, prev_clip.duration + original_clip.duration)
+                result = lib_hook_end.subclip(max(0, lib_hook_end.duration - target_duration), lib_hook_end.duration)
         else:
+            result = concatenate_videoclips([original_clip, lib_hook_end], method="compose").subclip(0, target_duration)
         logger.info(f"Prepared clip duration: {result.duration:.2f}s")
+        return prev_clip, result, cur_extra_secs
+    def _extend_or_trim_clip(self, lib_video, original_clip_path, alternate_url_local_path, original_clip, target_duration):
         """
         Extend or trim a clip to match target duration.
         Returns:
             VideoFileClip: The adjusted clip
         """
+        total_duration = original_clip.duration
+        # Case 0: Equal
+        if abs(target_duration - total_duration) < 0.01:  # 10ms tolerance
+            return original_clip, 0.0
+        # Case 1: Target is less than or equal to clip duration
+        if target_duration <= total_duration:
+            logger.info("Target duration fits original clip, trimming")
+            return original_clip.subclip(0, target_duration), 0.0
+        # Case 2: Target is greater than clip duration
+        elif target_duration > total_duration:
+            if alternate_url_local_path is None or (target_duration - total_duration <= REVERSE_THRESHOLD): # Small tolerance for floating point
+                logger.info("Reversing clip.")
+                reversed_clip = self.reverse_clip(original_clip_path)
+                loop_clip = concatenate_videoclips([original_clip, reversed_clip, original_clip, reversed_clip], method="compose")
+                return loop_clip.subclip(0, target_duration), target_duration - original_clip.duration
+            else:
+                logger.info("Using extra clip.")
+                self.data_holder.video_usage_count.update({
+                    f"{lib_video['alternate_url']}": self.data_holder.video_usage_count.get(f"{lib_video['alternate_url']}", 0) + 1
+                })
+                alternate_clip = VideoFileClip(alternate_url_local_path)
+                alternate_clip = alternate_clip.subclip(0, target_duration - total_duration)
+                alternate_clip = self._resize_for_vertical(alternate_clip)
+                combined = concatenate_videoclips([original_clip, alternate_clip, original_clip, alternate_clip], method="compose")
+                result = combined.subclip(0, target_duration)
+                extra_secs = max(0.0, target_duration - original_clip.duration - alternate_clip.duration)
+                return result, extra_secs
     def _extend_clip_to_duration(self, original_clip_path, original_clip, target_duration):
         """
         # Fallback to ping-pong reverse looping
         logger.info("Using ping-pong reverse looping as fallback for extension")
+        reversed_clip = self.reverse_clip(original_clip_path)
         loop_clip = concatenate_videoclips([original_clip, reversed_clip, original_clip, reversed_clip], method="compose")
         result = loop_clip.subclip(0, target_duration)
         logger.info(f"Prepared clip duration: {result.duration:.2f}s")
         return result
+    def reverse_clip(self, clip_path):
+        reversed_clip = VideoFileClip(utils.reverse_clip(clip_path))
+        return self._resize_for_vertical(reversed_clip)
     def loop_clip(self, clip, target_duration):
         loop_count = int(target_duration // clip.duration) + 1  # how many loops needed
             bg_volume = base_volume * 1.1
         # Clamp for safety
+        return max(ALLOWED_BG_MUSIC_VOLUME, min(1.0, bg_volume))
     async def _prepare_audio_clips(self, assets: Dict, target_duration: float) -> List[AudioFileClip]: