jebin2 commited on
Commit
b7d4e26
·
1 Parent(s): b749705

new changes

Browse files
src/asset_selector.py CHANGED
@@ -95,7 +95,8 @@ class AssetSelector:
95
 
96
  except Exception as e:
97
  logger.error(f"❌ Video selection failed: {e}")
98
- return self._fallback_selection(self.data_holder.tts_script, max_duration)
 
99
 
100
  def _parse_energy_score(self, energy_score_str: str) -> int:
101
  """Parse energy score from string format to integer"""
@@ -120,9 +121,9 @@ class AssetSelector:
120
  """Use Gemini API for contextual video selection"""
121
  try:
122
  video_context = await self.prepare_video_context()
123
- with open("src/prompt/best_matches_video.md", "r", encoding="utf-8") as file:
124
  # with open("src/prompt/best_matches_video_with_timestamp.md", "r", encoding="utf-8") as file:
125
- # with open("src/prompt/best_matches_two_video.md", "r", encoding="utf-8") as file:
126
  system_prompt = file.read()
127
 
128
  model = genai.GenerativeModel("gemini-2.5-pro")
@@ -132,7 +133,6 @@ class AssetSelector:
132
 
133
  USER PROMPT:
134
  TTS Script: {tts_script}
135
- TS Script Word-Level Timestamp: {timed_transcript}
136
  Video Options: {video_context}
137
  """
138
  response = model.generate_content(model_input)
@@ -145,7 +145,8 @@ Video Options: {video_context}
145
  for item in selection:
146
  video_index = item["video_index"]
147
  if video_index < len(self.video_library):
148
- video = self.video_library.iloc[video_index]
 
149
  selected.append(
150
  {
151
  "url": video.get("Video URL (No Audio)", video.get("url", "")),
@@ -160,7 +161,8 @@ Video Options: {video_context}
160
  }
161
  )
162
  if "alternate_video_index" in item:
163
- video = self.video_library.iloc[item["alternate_video_index"]]
 
164
  selected[-1]["alternate_url"] = video.get("Video URL (No Audio)", video.get("url", ""))
165
 
166
  logger.info(f"✓ Gemini selected {len(selected)}")
@@ -169,12 +171,12 @@ Video Options: {video_context}
169
  except json.JSONDecodeError as e:
170
  logger.error(f"Failed to parse Gemini JSON response: {e}")
171
  logger.debug(f"Raw response: {response_text[:500]}")
172
- return []
173
  except Exception as e:
174
  logger.error(f"Gemini analysis failed: {e}")
175
  import traceback
176
  traceback.print_exc()
177
- return []
178
 
179
  async def prepare_video_context(self):
180
  # STEP 3: Update durations using actual local files
@@ -196,7 +198,8 @@ Video Options: {video_context}
196
  f"{i+1}. {row.get('Video URL (No Audio)')} - "
197
  f"{row.get('Full Video Description Summary', row.get('description', ''))} - "
198
  f"{next((v.get('duration', 0) for v in self.data_holder.visual_assets['all_videos'] if v['url'] == row.get('Video URL (No Audio)')), 0)}s - "
199
- f"Alignment: {row.get('Video Alignment with the TTS Script', row.get('alignment', ''))}"
 
200
  for i, row in self.video_library.iterrows()
201
  ]
202
  )
 
95
 
96
  except Exception as e:
97
  logger.error(f"❌ Video selection failed: {e}")
98
+ raise
99
+ # return self._fallback_selection(self.data_holder.tts_script, max_duration)
100
 
101
  def _parse_energy_score(self, energy_score_str: str) -> int:
102
  """Parse energy score from string format to integer"""
 
121
  """Use Gemini API for contextual video selection"""
122
  try:
123
  video_context = await self.prepare_video_context()
124
+ # with open("src/prompt/best_matches_video.md", "r", encoding="utf-8") as file:
125
  # with open("src/prompt/best_matches_video_with_timestamp.md", "r", encoding="utf-8") as file:
126
+ with open("src/prompt/best_matches_two_video_tracking.md", "r", encoding="utf-8") as file:
127
  system_prompt = file.read()
128
 
129
  model = genai.GenerativeModel("gemini-2.5-pro")
 
133
 
134
  USER PROMPT:
135
  TTS Script: {tts_script}
 
136
  Video Options: {video_context}
137
  """
138
  response = model.generate_content(model_input)
 
145
  for item in selection:
146
  video_index = item["video_index"]
147
  if video_index < len(self.video_library):
148
+ video_row = self.video_library[self.video_library["Video URL (No Audio)"] == item["video_url"]]
149
+ video = video_row.iloc[0]
150
  selected.append(
151
  {
152
  "url": video.get("Video URL (No Audio)", video.get("url", "")),
 
161
  }
162
  )
163
  if "alternate_video_index" in item:
164
+ video_row = self.video_library[self.video_library["Video URL (No Audio)"] == item["alternate_video_url"]]
165
+ video = video_row.iloc[0]
166
  selected[-1]["alternate_url"] = video.get("Video URL (No Audio)", video.get("url", ""))
167
 
168
  logger.info(f"✓ Gemini selected {len(selected)}")
 
171
  except json.JSONDecodeError as e:
172
  logger.error(f"Failed to parse Gemini JSON response: {e}")
173
  logger.debug(f"Raw response: {response_text[:500]}")
174
+ raise
175
  except Exception as e:
176
  logger.error(f"Gemini analysis failed: {e}")
177
  import traceback
178
  traceback.print_exc()
179
+ raise
180
 
181
  async def prepare_video_context(self):
182
  # STEP 3: Update durations using actual local files
 
198
  f"{i+1}. {row.get('Video URL (No Audio)')} - "
199
  f"{row.get('Full Video Description Summary', row.get('description', ''))} - "
200
  f"{next((v.get('duration', 0) for v in self.data_holder.visual_assets['all_videos'] if v['url'] == row.get('Video URL (No Audio)')), 0)}s - "
201
+ f"Alignment: {row.get('Video Alignment with the TTS Script', row.get('alignment', ''))} - "
202
+ f"Usage Count: {self.data_holder.video_usage_count.get(row.get('Video URL (No Audio)'), 0)}"
203
  for i, row in self.video_library.iterrows()
204
  ]
205
  )
src/automation.py CHANGED
@@ -263,7 +263,7 @@ class ContentAutomation:
263
  )
264
  if video.get("alternate_url"):
265
  download_tasks.append(
266
- self._download_with_fallback(video["url"], f"library_all_video_alternate_url_{i}.mp4", video, "alternate_url_local_path")
267
  )
268
 
269
  # Download library videos
 
263
  )
264
  if video.get("alternate_url"):
265
  download_tasks.append(
266
+ self._download_with_fallback(video["alternate_url"], f"library_all_video_alternate_url_{i}.mp4", video, "alternate_url_local_path")
267
  )
268
 
269
  # Download library videos
src/data_holder.py CHANGED
@@ -1,4 +1,5 @@
1
  class DataHolder:
2
  tts_script: str = None
3
  selected_voice: str = None
4
- visual_assets = {}
 
 
1
  class DataHolder:
2
  tts_script: str = None
3
  selected_voice: str = None
4
+ visual_assets = {}
5
+ video_usage_count = {}
src/prompt/best_matches_two_video_tracking.md ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Video Selection with Alternates
2
+
3
+ You are an AI assistant specialized in selecting the most appropriate videos to accompany Text-to-Speech (TTS) scripts. Your goal is to create a cohesive visual narrative that perfectly aligns with the spoken content, ensuring that product mentions are synchronized with product visuals.
4
+
5
+ ## Input Format
6
+ You will receive:
7
+ 1. **TTS Script**: The complete text that will be spoken
8
+ 2. **Video Options**: A list of available videos with the following information:
9
+ - Video URL (No Audio): Direct link to the video file
10
+ - Full Video Description Summary: Detailed description of the video content, including visual elements, actions, camera angles, and recommended usage scenarios
11
+ - Duration: Length of the video in seconds
12
+ - Video Alignment with the TTS Script: Detailed explanation of when and how to use this video, including specific keywords, phrases, and scenarios where it fits best
13
+ - **Usage Count**: Number of times this video has been selected previously (lower is better for diversity)
14
+
15
+ ## Your Task
16
+ Select one or more videos (with alternates) from the provided options that:
17
+ 1. **Best match the content and tone** of the TTS script
18
+ 2. **Maintain narrative coherence** when combined
19
+ 3. **Synchronize product visuals with product mentions** - When the TTS script mentions the product name or refers to the product, the corresponding product showcase video MUST be displayed at that exact moment
20
+ 4. **Use each video only once across primary AND alternate selections** - NEVER select the same video multiple times (no duplicates allowed in either primary or alternate choices)
21
+ 5. **Total exactly 10-12 seconds** in duration for primary selections (strict requirement)
22
+ 6. **Maintain chronological order** - Videos must be arranged in the sequence they should appear, matching the flow of the TTS script from beginning to end
23
+ 7. **Provide alternate video selections** - For each script segment, provide a second-best video option that could work as a fallback
24
+
25
+ ## Selection Criteria (in order of priority)
26
+
27
+ ### 0. No Duplicate Videos (Absolute Requirement)
28
+ - **CRITICAL**: Each video can only be selected ONCE across the ENTIRE output (including both primary and alternate selections)
29
+ - Even if a video seems perfect for multiple segments, you MUST find alternative videos for subsequent segments
30
+ - Track which videos you've already selected and exclude them from further consideration for both primary and alternate positions
31
+ - This rule has NO exceptions - duplicate videos will result in a failed output
32
+
33
+ ### 1. Usage Count Diversity (Critical Priority)
34
+ - **STRONGLY PRIORITIZE** videos with LOWER usage counts to ensure variety across different TTS scripts
35
+ - When comparing videos with similar content relevance, ALWAYS choose the one with the lower usage count
36
+ - **Usage count should be a primary tiebreaker**: If two videos match the content equally well, select the less-used one
37
+ - This ensures fair distribution of video selections across your video library and prevents over-reliance on the same videos
38
+ - **Balancing act**: Find videos that both match the content AND have lower usage counts - don't sacrifice content relevance entirely, but give significant weight to usage diversity
39
+ - If a video has been used significantly more times than alternatives (e.g., 3+ times difference), consider choosing a slightly less perfect match with lower usage count
40
+
41
+ ### 2. Product Mention Synchronization (Critical Priority)
42
+ - **WHENEVER** the TTS script explicitly mentions the product name (e.g., "Somira Massager") or refers to "the product," "this massager," etc., you MUST select the product showcase video
43
+ - The product video should appear at the EXACT moment when the product is mentioned in the script
44
+ - This is a non-negotiable requirement for maintaining visual-audio coherence
45
+ - If the product is mentioned multiple times, prioritize the FIRST mention for the product showcase video, and use demonstration/usage videos for subsequent mentions
46
+ - The alternate video for product mentions should also be product-focused (e.g., different angle, different showcase style)
47
+ - **Among product showcase videos, prioritize those with lower usage counts**
48
+
49
+ ### 3. Content Relevance (High Priority)
50
+ - Choose videos that directly illustrate or support the key message of the TTS script
51
+ - Match specific actions mentioned in the script (e.g., "putting on," "turning on," "using") with videos showing those actions
52
+ - Prioritize literal matches over metaphorical ones when available
53
+ - Ensure visual content doesn't contradict the spoken words
54
+ - Alternate videos should maintain similar content relevance but may have different angles or styles
55
+ - **When multiple videos have similar content relevance, strongly favor those with lower usage counts**
56
+
57
+ ### 4. Narrative Flow & Chronological Order
58
+ - Videos MUST be arranged in chronological order matching the TTS script sequence
59
+ - If selecting multiple videos, ensure smooth transitions
60
+ - Maintain logical progression that follows the script's structure from start to finish
61
+ - Avoid jarring cuts or mismatched visual sequences
62
+ - Alternate videos should maintain the same chronological position and narrative flow
63
+
64
+ ### 5. Timing Optimization
65
+ - The combined duration of PRIMARY selections MUST be between 10-12 seconds
66
+ - Alternate videos should have similar durations to their primary counterparts (±2 seconds is acceptable)
67
+ - Prefer combinations that naturally fit the script's pacing
68
+ - Consider trimming longer videos to fit within the time constraint
69
+ - If a single video works perfectly but is slightly short/long, note this clearly
70
+
71
+ ### 6. Alignment Score
72
+ - Pay close attention to the "Video Alignment with TTS Script" field
73
+ - Use the recommended keywords and scenarios mentioned in this field
74
+ - Higher relevance to mentioned scenarios indicates better matches
75
+ - Balance alignment recommendations with duration requirements
76
+ - Alternate videos should have slightly lower but still strong alignment scores
77
+ - **Among videos with similar alignment scores, prioritize those with lower usage counts**
78
+
79
+ ## TTS Script Segmentation
80
+ - Mentally divide the TTS script into segments based on:
81
+ - Product mentions (require product showcase video)
82
+ - Action descriptions (require demonstration videos)
83
+ - Benefit statements (require usage or satisfaction videos)
84
+ - Assign the most appropriate video (primary + alternate) to each segment
85
+ - Ensure the video order matches the script segment order
86
+ - **Remember**: Once a video is assigned to ANY position (primary or alternate), it cannot be used again anywhere
87
+
88
+ ## Alternate Video Selection Strategy
89
+ For each script segment, the alternate video should:
90
+ 1. **Maintain content relevance** - Stay aligned with the same script segment
91
+ 2. **Offer stylistic variety** - Provide a different visual approach (e.g., different angle, lighting, setting)
92
+ 3. **Match duration closely** - Within ±2 seconds of the primary video
93
+ 4. **Serve as a true fallback** - Be a viable replacement if the primary video is unavailable
94
+ 5. **Never duplicate** - Must be completely different from any other selected video (primary or alternate)
95
+ 6. **Prioritize lower usage count** - When multiple alternates are viable, choose the one that has been used less frequently
96
+
97
+ ## Output Format
98
+
99
+ Provide your selection as a **JSON array** with the following structure:
100
+ ```json
101
+ [
102
+ {
103
+ "video_index": 1,
104
+ "video_url": "https://storage.googleapis.com/...",
105
+ "duration_seconds": 2,
106
+ "usage_count": 3,
107
+ "alternate_video_index": 4,
108
+ "alternate_video_url": "https://storage.googleapis.com/...",
109
+ "alternate_duration_seconds": 3,
110
+ "alternate_usage_count": 1,
111
+ "tts_script_segment": "The exact portion of the TTS script that this video will accompany",
112
+ "reason": "Brief explanation of why this PRIMARY video was chosen for this specific script segment, including consideration of its usage count",
113
+ "alternate_reason": "Brief explanation of why this ALTERNATE video was chosen as the second-best option, including consideration of its usage count"
114
+ },
115
+ {
116
+ "video_index": 3,
117
+ "video_url": "https://storage.googleapis.com/...",
118
+ "duration_seconds": 6,
119
+ "usage_count": 0,
120
+ "alternate_video_index": 7,
121
+ "alternate_video_url": "https://storage.googleapis.com/...",
122
+ "alternate_duration_seconds": 5,
123
+ "alternate_usage_count": 2,
124
+ "tts_script_segment": "The next portion of the TTS script",
125
+ "reason": "Explanation for this primary selection, noting low usage count",
126
+ "alternate_reason": "Explanation for this alternate selection, balancing content match with usage count"
127
+ }
128
+ ]
129
+ ```
130
+
131
+ ### JSON Array Field Definitions:
132
+ - **video_index**: The sequential number/identifier of the PRIMARY video from the provided list (each index should appear ONLY ONCE across entire output)
133
+ - **video_url**: The complete URL of the PRIMARY selected video (each URL should appear ONLY ONCE across entire output)
134
+ - **duration_seconds**: The length of the PRIMARY video clip in seconds (can be trimmed if needed)
135
+ - **usage_count**: The number of times this PRIMARY video has been selected previously (for transparency and tracking)
136
+ - **alternate_video_index**: The sequential number/identifier of the ALTERNATE (second-best) video (each index should appear ONLY ONCE across entire output)
137
+ - **alternate_video_url**: The complete URL of the ALTERNATE video (each URL should appear ONLY ONCE across entire output)
138
+ - **alternate_duration_seconds**: The length of the ALTERNATE video clip in seconds (can be trimmed if needed)
139
+ - **alternate_usage_count**: The number of times this ALTERNATE video has been selected previously (for transparency and tracking)
140
+ - **tts_script_segment**: The EXACT text from the TTS script that will be spoken while this video plays. This should be a direct quote from the script, maintaining chronological order
141
+ - **reason**: A concise 1-2 sentence explanation of why this PRIMARY video was selected for this specific segment, including how usage count factored into the decision
142
+ - **alternate_reason**: A concise 1-2 sentence explanation of why this ALTERNATE video was selected as the second-best option, including how usage count factored into the decision and what makes it a viable fallback
143
+
144
+ ### Additional Output Requirements:
145
+ After the JSON array, provide:
146
+
147
+ **Total Duration (Primary Selection):** [X seconds]
148
+
149
+ **Total Duration (Alternate Selection):** [Y seconds]
150
+
151
+ **Selection Rationale:**
152
+ [2-3 sentences explaining the overall logic behind your primary selection, how the video sequence complements the TTS script chronologically, why this combination works best, and how usage count diversity was balanced with content relevance]
153
+
154
+ **Alternate Selection Rationale:**
155
+ [2-3 sentences explaining the logic behind your alternate selections, how they serve as effective fallbacks while maintaining narrative coherence, and how usage counts influenced the alternate choices]
156
+
157
+ **Usage Count Considerations:**
158
+ [1-2 sentences explaining how you balanced content relevance with usage count diversity, and any trade-offs made between perfect content matches and lesser-used videos]
159
+
160
+ **Timing Notes (if applicable):**
161
+ [Mention any timing adjustments, trims, or deviations from the 10-12 second target for both primary and alternate selections]
162
+
163
+ **Alternative Options (if applicable):**
164
+ [Briefly mention any other close alternatives that could work if both primary and alternate selections need adjustment]
165
+
166
+ ## Important Guidelines
167
+ - **ABSOLUTELY CRITICAL**: NO duplicate videos - each video (both primary and alternate) can only appear ONCE across the ENTIRE output array
168
+ - **CRITICAL**: Strongly prioritize videos with LOWER usage counts to ensure diversity - this prevents over-reliance on the same videos across different scripts
169
+ - **CRITICAL**: Product showcase videos MUST appear when the product is mentioned in the script
170
+ - Videos MUST maintain chronological order matching the TTS script flow from start to finish
171
+ - The "tts_script_segment" field must contain the exact text from the script (word-for-word quote)
172
+ - Each video should map to a distinct portion of the script with no overlapping segments
173
+ - All script segments combined should cover the entire TTS script
174
+ - Alternate videos should provide meaningful variety while maintaining content relevance
175
+ - **When evaluating videos**: Consider usage count as a major factor - a slightly less perfect content match with 0-1 usage is often better than a perfect match with 5+ uses
176
+ - If no combination can achieve exactly 10-12 seconds WITHOUT using duplicates, select the closest option and clearly state the deviation
177
+ - If the script has multiple themes, prioritize the primary message while maintaining chronological flow
178
+ - Consider pacing: fast-paced scripts may need more dynamic visuals
179
+ - Always explain your reasoning clearly and concisely for BOTH primary and alternate selections, including how usage count influenced your decision
180
+ - If you must choose between perfect content match or perfect timing, prioritize content relevance and product synchronization, then note the timing issue
181
+ - When videos need to be trimmed, specify the recommended trim duration in the "reason" or "alternate_reason" field
182
+ - Before finalizing your selection, verify that no video_index or video_url appears more than once across ALL primary and alternate selections
183
+ - **Include usage_count and alternate_usage_count in your output** for transparency
184
+
185
+ ## Example Scenario
186
+ If the TTS script says: "Introducing the Somira Massager, designed for ultimate comfort. Simply place it around your neck and turn it on. Feel the relaxation."
187
+
188
+ And the video library has these usage counts:
189
+ - Video 1 (Product showcase front): Usage count = 5
190
+ - Video 2 (Person putting on massager): Usage count = 2
191
+ - Video 3 (Person enjoying massager): Usage count = 8
192
+ - Video 4 (Product showcase side): Usage count = 1
193
+ - Video 5 (Close-up placement): Usage count = 0
194
+ - Video 6 (Person showing satisfaction): Usage count = 3
195
+
196
+ Your selection should prioritize lower usage counts:
197
+ 1. First segment: "Introducing the Somira Massager"
198
+ - Primary: Product showcase side view (video_index: 4, usage_count: 1) - Lower usage than front view
199
+ - Alternate: Product showcase front view (video_index: 1, usage_count: 5) - Still relevant but higher usage
200
+ 2. Second segment: "place it around your neck and turn it on"
201
+ - Primary: Close-up of placement process (video_index: 5, usage_count: 0) - Never used before!
202
+ - Alternate: Person putting on the massager (video_index: 2, usage_count: 2) - Good fallback
203
+ 3. Third segment: "Feel the relaxation"
204
+ - Primary: Different person showing satisfaction (video_index: 6, usage_count: 3) - Lower usage
205
+ - Alternate: Person using/enjoying the massager (video_index: 3, usage_count: 8) - Works but heavily used
206
+
207
+ All in chronological order, with usage count diversity prioritized, and **NO video used more than once across all selections**.
208
+
209
+ ## Pre-Submission Checklist
210
+ Before providing your final output, verify:
211
+ - ✅ No video_index appears more than once (across primary AND alternate selections)
212
+ - ✅ No video_url appears more than once (across primary AND alternate selections)
213
+ - ✅ Videos are in chronological order matching the script
214
+ - ✅ Product video appears when product is mentioned (in primary selection)
215
+ - ✅ **Videos with lower usage counts were prioritized when content relevance was similar**
216
+ - ✅ **Reasoning includes explanation of how usage count influenced the decision**
217
+ - ✅ Total duration of PRIMARY videos is within 10-12 seconds (or noted if not possible)
218
+ - ✅ Alternate videos have similar durations to their primary counterparts
219
+ - ✅ All tts_script_segments are direct quotes from the original script
220
+ - ✅ Each alternate video is a viable fallback with clear reasoning
221
+ - ✅ JSON format is valid and complete with all required fields (including usage_count fields)
222
+ - ✅ Both "reason" and "alternate_reason" fields are filled for every segment
src/video_renderer.py CHANGED
@@ -34,6 +34,10 @@ import json_repair
34
  from data_holder import DataHolder
35
  import numpy as np
36
 
 
 
 
 
37
  class VideoRenderer:
38
  def __init__(self, config: Dict, data_holder: DataHolder = None):
39
  self.config = config
@@ -58,7 +62,9 @@ class VideoRenderer:
58
  video_clips = await self._prepare_video_clips_natural_speed()
59
 
60
  # Create video sequence with natural speed
61
- final_video = await self._create_video_sequence_natural_speed(video_clips, video_config)
 
 
62
 
63
  # Render video WITHOUT audio
64
  output_path = await self._render_video_only(final_video)
@@ -120,6 +126,7 @@ class VideoRenderer:
120
  utils.calculate_video_durations(selected_videos, all_tts_script_segment, assets["timed_transcript"], original_duration)
121
 
122
  target_size = (1080, 1920)
 
123
  # Load library videos - NO speed adjustments
124
  for i, lib_video in enumerate(selected_videos):
125
  if lib_video.get("local_path"):
@@ -137,13 +144,29 @@ class VideoRenderer:
137
  prev_clip = clips[-1][1]
138
  prev_clip_file = selected_videos[-2]["local_path"]
139
 
140
- prev_clip, lib_clip = await self._prepare_clip(lib_video["local_path"], original_clip, lib_hook_start, lib_hook_end, lib_video["duration"], prev_clip, prev_clip_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  if prev_clip:
142
  clip_name, _ = clips[-1]
143
  clips[-1] = (clip_name, prev_clip)
144
 
145
  lib_clip = lib_clip.without_audio()
146
  clips.append((f"library_{i}", lib_clip))
 
 
 
147
  logger.info(f"✓ Loaded library video {i}: {lib_clip.duration:.2f}s (NATURAL SPEED)")
148
  except Exception as e:
149
  import traceback
@@ -164,7 +187,7 @@ class VideoRenderer:
164
  pass
165
  raise
166
 
167
- async def _prepare_clip(self, original_clip_path, original_clip, lib_hook_start, lib_hook_end, target_duration: float, prev_clip, prev_clip_file):
168
  # Validate inputs
169
  if target_duration <= 0:
170
  raise ValueError(f"Invalid target_duration: {target_duration}")
@@ -176,147 +199,153 @@ class VideoRenderer:
176
  # Handle start hook case
177
  if lib_hook_start:
178
  return self._prepare_with_start_hook(
179
- original_clip_path, original_clip, lib_hook_start,
180
  target_duration, prev_clip
181
  )
182
 
183
  # Handle end hook case
184
  elif lib_hook_end:
185
  return self._prepare_with_end_hook(
186
- original_clip_path, original_clip, lib_hook_end,
187
- target_duration, prev_clip, prev_clip_file
188
  )
189
 
190
  # No hooks - just extend/trim the original clip
191
  else:
192
  logger.info("No hooks detected, adjusting original clip duration only")
193
- result = self._extend_or_trim_clip(original_clip_path, original_clip, target_duration)
194
- return prev_clip, result
195
 
196
- def _prepare_with_start_hook(self, original_clip_path, original_clip, lib_hook_start, target_duration, prev_clip):
197
  """Handle clip preparation when a start hook is present."""
198
  logger.info(f"Start hook detected with duration {lib_hook_start.duration:.2f}s")
199
- total_start = lib_hook_start.duration + original_clip.duration
200
 
201
  # Case 1: Target fits within start hook + original clip
202
- if target_duration <= total_start:
203
  logger.info("Target duration fits start hook + original clip, concatenating and trimming")
204
  result = concatenate_videoclips([lib_hook_start, original_clip], method="compose").subclip(0, target_duration)
205
  logger.info(f"Prepared clip duration: {result.duration:.2f}s")
206
- return prev_clip, result
207
 
208
  # Case 2: Need to extend beyond original clip
209
- logger.info("Target duration exceeds start hook + original clip, trying extension methods")
210
- extended_clip = None
211
-
212
- try:
213
- # Try interpolation first
214
- interpolated_file = utils.interpolate_video(original_clip_path)
215
- if interpolated_file:
216
- interpolated = VideoFileClip(interpolated_file)
217
- interpolated = self._resize_for_vertical(interpolated)
218
- total_interpolated = lib_hook_start.duration + interpolated.duration
219
- logger.info(f"Interpolated clip duration: {interpolated.duration:.2f}s, total with hook: {total_interpolated:.2f}s")
220
-
221
- if target_duration <= total_interpolated:
222
- logger.info("Target duration fits start hook + interpolated clip")
223
- result = concatenate_videoclips([lib_hook_start, interpolated], method="compose").subclip(0, target_duration)
224
- logger.info(f"Prepared clip duration: {result.duration:.2f}s")
225
- return prev_clip, result
226
-
227
- # Interpolation wasn't long enough, close it
228
- interpolated.close()
229
-
230
- except Exception as e:
231
- logger.warning(f"Interpolation failed: {e}")
232
- if extended_clip:
233
- extended_clip.close()
234
-
235
- # Try looping or ping-pong
236
- if utils.is_video_loopable(original_clip_path) or utils.is_loopable_phash(original_clip_path):
237
- logger.info("Original clip is loopable, creating loop")
238
- loop_clip = self.loop_clip(original_clip, target_duration)
239
- elif utils.is_video_zoomable_tail(original_clip):
240
- loop_clip = self.zoom_clip(original_clip, target_duration)
241
- else:
242
- logger.info("Using ping-pong reverse looping as fallback")
243
- reversed_clip = VideoFileClip(utils.reverse_clip(original_clip_path))
244
- reversed_clip = self._resize_for_vertical(reversed_clip)
245
- loop_clip = concatenate_videoclips([original_clip, reversed_clip, original_clip, reversed_clip], method="compose")
246
-
247
- result = concatenate_videoclips([lib_hook_start, loop_clip], method="compose").subclip(0, target_duration)
248
  logger.info(f"Prepared clip duration: {result.duration:.2f}s")
249
- return prev_clip, result
250
 
251
 
252
- def _prepare_with_end_hook(self, original_clip_path, original_clip, lib_hook_end,
253
- target_duration, prev_clip, prev_clip_file):
254
  """Handle clip preparation when an end hook is present."""
 
255
  logger.info(f"End hook detected with duration {lib_hook_end.duration:.2f}s")
256
  total_duration = original_clip.duration + lib_hook_end.duration
257
  logger.info(f"Combined original + end hook duration: {total_duration:.2f}s")
258
-
 
259
  # Case 1: Combined duration exceeds target - need to trim
260
- if total_duration > target_duration:
261
  trim_duration = target_duration - lib_hook_end.duration
262
 
263
  if trim_duration > 0:
264
  logger.info(f"Trimming original clip from {original_clip.duration:.2f}s to {trim_duration:.2f}s to fit end hook")
265
  original_clip = original_clip.subclip(0, trim_duration)
 
266
  else:
267
- logger.info(f"Target duration {target_duration:.2f}s shorter than end hook alone, trimming end hook itself")
268
- result = lib_hook_end.subclip(0, target_duration)
269
- logger.info(f"Prepared clip duration: {result.duration:.2f}s")
270
- return prev_clip, result
 
271
 
272
  # Case 2: Combined duration is less than target - need to extend original
273
- elif total_duration < target_duration:
274
  remaining = target_duration - lib_hook_end.duration
275
  logger.info(f"Original + end hook too short, need to extend original by {remaining:.2f}s")
276
 
277
- original_clip = self._extend_clip_to_duration(original_clip_path, original_clip, remaining)
278
 
279
  # Case 3: Exact match or after trimming/extending
280
  logger.info("Concatenating original clip and end hook")
281
 
282
  # Handle very short original clips
283
  if original_clip.duration < 1:
284
- if prev_clip and prev_clip_file:
285
- logger.info("Original clip too short, extending previous clip instead")
286
- prev_clip = self._extend_or_trim_clip(prev_clip_file, prev_clip, prev_clip.duration + original_clip.duration)
287
- result = lib_hook_end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  else:
289
- result = concatenate_videoclips([original_clip, lib_hook_end], method="compose")
290
 
291
  logger.info(f"Prepared clip duration: {result.duration:.2f}s")
292
- return prev_clip, result
293
 
294
 
295
- def _extend_or_trim_clip(self, original_clip_path, original_clip, target_duration):
296
  """
297
  Extend or trim a clip to match target duration.
298
 
299
  Returns:
300
  VideoFileClip: The adjusted clip
301
  """
302
- current_duration = original_clip.duration
303
-
304
- # Case 1: Clip is already the right duration
305
- if abs(current_duration - target_duration) < 0.01: # Small tolerance for floating point
306
- logger.info(f"Clip duration {current_duration:.2f}s already matches target duration")
307
- return original_clip
308
-
309
- # Case 2: Clip is too long - trim it
310
- if current_duration > target_duration:
311
- logger.info(f"Trimming clip from {current_duration:.2f}s to {target_duration:.2f}s")
312
- result = original_clip.subclip(0, target_duration)
313
- logger.info(f"Prepared clip duration: {result.duration:.2f}s")
314
- return result
315
-
316
- # Case 3: Clip is too short - extend it
317
- logger.info(f"Clip too short by {target_duration - current_duration:.2f}s, extending")
318
- return self._extend_clip_to_duration(original_clip_path, original_clip, target_duration)
319
-
 
 
 
 
 
 
 
 
 
 
 
 
320
 
321
  def _extend_clip_to_duration(self, original_clip_path, original_clip, target_duration):
322
  """
@@ -360,13 +389,15 @@ class VideoRenderer:
360
 
361
  # Fallback to ping-pong reverse looping
362
  logger.info("Using ping-pong reverse looping as fallback for extension")
363
- reversed_clip = VideoFileClip(utils.reverse_clip(original_clip_path))
364
- reversed_clip = self._resize_for_vertical(reversed_clip)
365
  loop_clip = concatenate_videoclips([original_clip, reversed_clip, original_clip, reversed_clip], method="compose")
366
  result = loop_clip.subclip(0, target_duration)
367
  logger.info(f"Prepared clip duration: {result.duration:.2f}s")
368
  return result
369
 
 
 
 
370
 
371
  def loop_clip(self, clip, target_duration):
372
  loop_count = int(target_duration // clip.duration) + 1 # how many loops needed
@@ -831,7 +862,7 @@ class VideoRenderer:
831
  bg_volume = base_volume * 1.1
832
 
833
  # Clamp for safety
834
- return max(0.15, min(1.0, bg_volume))
835
 
836
 
837
  async def _prepare_audio_clips(self, assets: Dict, target_duration: float) -> List[AudioFileClip]:
 
34
  from data_holder import DataHolder
35
  import numpy as np
36
 
37
+ ALLOWED_BG_MUSIC_VOLUME = 0.10
38
+ REVERSE_THRESHOLD = 0.5
39
+ HOOK_VIDEO_DURATION = 1.5
40
+
41
  class VideoRenderer:
42
  def __init__(self, config: Dict, data_holder: DataHolder = None):
43
  self.config = config
 
62
  video_clips = await self._prepare_video_clips_natural_speed()
63
 
64
  # Create video sequence with natural speed
65
+ # final_video = await self._create_video_sequence_natural_speed(video_clips, video_config)
66
+ final_video = concatenate_videoclips(video_clips, method="compose")
67
+ final_video = final_video.without_audio()
68
 
69
  # Render video WITHOUT audio
70
  output_path = await self._render_video_only(final_video)
 
126
  utils.calculate_video_durations(selected_videos, all_tts_script_segment, assets["timed_transcript"], original_duration)
127
 
128
  target_size = (1080, 1920)
129
+ extra_secs = 0.0
130
  # Load library videos - NO speed adjustments
131
  for i, lib_video in enumerate(selected_videos):
132
  if lib_video.get("local_path"):
 
144
  prev_clip = clips[-1][1]
145
  prev_clip_file = selected_videos[-2]["local_path"]
146
 
147
+ prev_clip, lib_clip, extra_secs = await self._prepare_clip(
148
+ lib_video=lib_video,
149
+ original_clip_path=lib_video["local_path"],
150
+ alternate_url_local_path=lib_video["alternate_url_local_path"],
151
+ original_clip=original_clip,
152
+ lib_hook_start=lib_hook_start,
153
+ lib_hook_end=lib_hook_end,
154
+ target_duration=lib_video["duration"],
155
+ extra_secs=extra_secs,
156
+ prev_clip=prev_clip,
157
+ prev_clip_file=prev_clip_file
158
+ )
159
+ if extra_secs > 0: # ignore tiny floating-point diffs
160
+ logger.info(f"⏱️ Added {extra_secs:.2f}s extra to match target duration ({lib_video['duration']:.2f}s)")
161
  if prev_clip:
162
  clip_name, _ = clips[-1]
163
  clips[-1] = (clip_name, prev_clip)
164
 
165
  lib_clip = lib_clip.without_audio()
166
  clips.append((f"library_{i}", lib_clip))
167
+ self.data_holder.video_usage_count.update({
168
+ f"{lib_video['url']}": self.data_holder.video_usage_count.get(f"{lib_video['url']}", 0) + 1
169
+ })
170
  logger.info(f"✓ Loaded library video {i}: {lib_clip.duration:.2f}s (NATURAL SPEED)")
171
  except Exception as e:
172
  import traceback
 
187
  pass
188
  raise
189
 
190
+ async def _prepare_clip(self, lib_video, original_clip_path, alternate_url_local_path, original_clip, lib_hook_start, lib_hook_end, target_duration: float, extra_secs, prev_clip, prev_clip_file):
191
  # Validate inputs
192
  if target_duration <= 0:
193
  raise ValueError(f"Invalid target_duration: {target_duration}")
 
199
  # Handle start hook case
200
  if lib_hook_start:
201
  return self._prepare_with_start_hook(
202
+ lib_video, original_clip_path, alternate_url_local_path, original_clip, lib_hook_start,
203
  target_duration, prev_clip
204
  )
205
 
206
  # Handle end hook case
207
  elif lib_hook_end:
208
  return self._prepare_with_end_hook(
209
+ lib_video, original_clip_path, alternate_url_local_path, original_clip, lib_hook_end,
210
+ target_duration, extra_secs, prev_clip, prev_clip_file
211
  )
212
 
213
  # No hooks - just extend/trim the original clip
214
  else:
215
  logger.info("No hooks detected, adjusting original clip duration only")
216
+ result, extra_secs = self._extend_or_trim_clip(lib_video, original_clip_path, alternate_url_local_path, original_clip, target_duration)
217
+ return prev_clip, result, extra_secs
218
 
219
+ def _prepare_with_start_hook(self, lib_video, original_clip_path, alternate_url_local_path, original_clip, lib_hook_start, target_duration, prev_clip):
220
  """Handle clip preparation when a start hook is present."""
221
  logger.info(f"Start hook detected with duration {lib_hook_start.duration:.2f}s")
222
+ total_duration = lib_hook_start.duration + original_clip.duration
223
 
224
  # Case 1: Target fits within start hook + original clip
225
+ if target_duration <= total_duration:
226
  logger.info("Target duration fits start hook + original clip, concatenating and trimming")
227
  result = concatenate_videoclips([lib_hook_start, original_clip], method="compose").subclip(0, target_duration)
228
  logger.info(f"Prepared clip duration: {result.duration:.2f}s")
229
+ return prev_clip, result, 0.0
230
 
231
  # Case 2: Need to extend beyond original clip
232
+ modified_clip, extra_secs = self._extend_or_trim_clip(lib_video, original_clip_path, alternate_url_local_path, original_clip, target_duration-HOOK_VIDEO_DURATION)
233
+
234
+ result = concatenate_videoclips([lib_hook_start, modified_clip], method="compose").subclip(0, target_duration)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  logger.info(f"Prepared clip duration: {result.duration:.2f}s")
236
+ return prev_clip, result, extra_secs
237
 
238
 
239
+ def _prepare_with_end_hook(self, lib_video, original_clip_path, alternate_url_local_path, original_clip, lib_hook_end,
240
+ target_duration, extra_secs, prev_clip, prev_clip_file):
241
  """Handle clip preparation when an end hook is present."""
242
+ temp_original_clip = original_clip
243
  logger.info(f"End hook detected with duration {lib_hook_end.duration:.2f}s")
244
  total_duration = original_clip.duration + lib_hook_end.duration
245
  logger.info(f"Combined original + end hook duration: {total_duration:.2f}s")
246
+
247
+ cur_extra_secs = 0.0
248
  # Case 1: Combined duration exceeds target - need to trim
249
+ if target_duration <= total_duration:
250
  trim_duration = target_duration - lib_hook_end.duration
251
 
252
  if trim_duration > 0:
253
  logger.info(f"Trimming original clip from {original_clip.duration:.2f}s to {trim_duration:.2f}s to fit end hook")
254
  original_clip = original_clip.subclip(0, trim_duration)
255
+ cur_extra_secs = 0.0
256
  else:
257
+ # Target shorter than hook take last part of hook
258
+ start_trim = max(0, lib_hook_end.duration - target_duration)
259
+ result = lib_hook_end.subclip(start_trim, lib_hook_end.duration)
260
+ logger.info(f"Prepared end-only clip: {result.duration:.2f}s")
261
+ return prev_clip, result, 0.0
262
 
263
  # Case 2: Combined duration is less than target - need to extend original
264
+ elif target_duration > total_duration:
265
  remaining = target_duration - lib_hook_end.duration
266
  logger.info(f"Original + end hook too short, need to extend original by {remaining:.2f}s")
267
 
268
+ original_clip, cur_extra_secs = self._extend_or_trim_clip(lib_video, original_clip_path, alternate_url_local_path, original_clip, remaining)
269
 
270
  # Case 3: Exact match or after trimming/extending
271
  logger.info("Concatenating original clip and end hook")
272
 
273
  # Handle very short original clips
274
  if original_clip.duration < 1:
275
+ if original_clip.duration + extra_secs > 1:
276
+ # Determine how much of extra_secs is actually used to extend this clip
277
+ possible_new_duration = original_clip.duration + extra_secs
278
+ new_duration = min(possible_new_duration, temp_original_clip.duration)
279
+ used_extra = max(0.0, new_duration - original_clip.duration)
280
+
281
+ logger.info(
282
+ f"Extending original clip from {original_clip.duration:.2f}s → {new_duration:.2f}s "
283
+ f"(used_extra={used_extra:.2f}s, available_extra={extra_secs:.2f}s)"
284
+ )
285
+
286
+ # Apply the extension
287
+ original_clip = temp_original_clip.subclip(0, new_duration)
288
+
289
+ # Now, trim the previous clip by exactly how much we actually used
290
+ new_prev_duration = prev_clip.duration - used_extra
291
+ logger.info(
292
+ f"✂️ Trimming previous clip by {used_extra:.2f}s → new duration {new_prev_duration:.2f}s"
293
+ )
294
+ prev_clip = prev_clip.subclip(0, new_prev_duration)
295
+
296
+ result = concatenate_videoclips([original_clip, lib_hook_end], method="compose").subclip(0, target_duration)
297
+ cur_extra_secs = 0.0
298
+
299
+ else:
300
+ if prev_clip and prev_clip_file:
301
+ logger.info("Original clip too short, extending previous clip instead")
302
+ prev_clip, extra_secs = self._extend_or_trim_clip(lib_video, prev_clip_file, alternate_url_local_path, prev_clip, prev_clip.duration + original_clip.duration)
303
+ result = lib_hook_end.subclip(max(0, lib_hook_end.duration - target_duration), lib_hook_end.duration)
304
+
305
  else:
306
+ result = concatenate_videoclips([original_clip, lib_hook_end], method="compose").subclip(0, target_duration)
307
 
308
  logger.info(f"Prepared clip duration: {result.duration:.2f}s")
309
+ return prev_clip, result, cur_extra_secs
310
 
311
 
312
+ def _extend_or_trim_clip(self, lib_video, original_clip_path, alternate_url_local_path, original_clip, target_duration):
313
  """
314
  Extend or trim a clip to match target duration.
315
 
316
  Returns:
317
  VideoFileClip: The adjusted clip
318
  """
319
+ total_duration = original_clip.duration
320
+
321
+ # Case 0: Equal
322
+ if abs(target_duration - total_duration) < 0.01: # 10ms tolerance
323
+ return original_clip, 0.0
324
+
325
+ # Case 1: Target is less than or equal to clip duration
326
+ if target_duration <= total_duration:
327
+ logger.info("Target duration fits original clip, trimming")
328
+ return original_clip.subclip(0, target_duration), 0.0
329
+
330
+ # Case 2: Target is greater than clip duration
331
+ elif target_duration > total_duration:
332
+ if alternate_url_local_path is None or (target_duration - total_duration <= REVERSE_THRESHOLD): # Small tolerance for floating point
333
+ logger.info("Reversing clip.")
334
+ reversed_clip = self.reverse_clip(original_clip_path)
335
+ loop_clip = concatenate_videoclips([original_clip, reversed_clip, original_clip, reversed_clip], method="compose")
336
+ return loop_clip.subclip(0, target_duration), target_duration - original_clip.duration
337
+ else:
338
+ logger.info("Using extra clip.")
339
+ self.data_holder.video_usage_count.update({
340
+ f"{lib_video['alternate_url']}": self.data_holder.video_usage_count.get(f"{lib_video['alternate_url']}", 0) + 1
341
+ })
342
+ alternate_clip = VideoFileClip(alternate_url_local_path)
343
+ alternate_clip = alternate_clip.subclip(0, target_duration - total_duration)
344
+ alternate_clip = self._resize_for_vertical(alternate_clip)
345
+ combined = concatenate_videoclips([original_clip, alternate_clip, original_clip, alternate_clip], method="compose")
346
+ result = combined.subclip(0, target_duration)
347
+ extra_secs = max(0.0, target_duration - original_clip.duration - alternate_clip.duration)
348
+ return result, extra_secs
349
 
350
  def _extend_clip_to_duration(self, original_clip_path, original_clip, target_duration):
351
  """
 
389
 
390
  # Fallback to ping-pong reverse looping
391
  logger.info("Using ping-pong reverse looping as fallback for extension")
392
+ reversed_clip = self.reverse_clip(original_clip_path)
 
393
  loop_clip = concatenate_videoclips([original_clip, reversed_clip, original_clip, reversed_clip], method="compose")
394
  result = loop_clip.subclip(0, target_duration)
395
  logger.info(f"Prepared clip duration: {result.duration:.2f}s")
396
  return result
397
 
398
+ def reverse_clip(self, clip_path):
399
+ reversed_clip = VideoFileClip(utils.reverse_clip(clip_path))
400
+ return self._resize_for_vertical(reversed_clip)
401
 
402
  def loop_clip(self, clip, target_duration):
403
  loop_count = int(target_duration // clip.duration) + 1 # how many loops needed
 
862
  bg_volume = base_volume * 1.1
863
 
864
  # Clamp for safety
865
+ return max(ALLOWED_BG_MUSIC_VOLUME, min(1.0, bg_volume))
866
 
867
 
868
  async def _prepare_audio_clips(self, assets: Dict, target_duration: float) -> List[AudioFileClip]: