topcoderkz commited on
Commit
b1bee74
Β·
1 Parent(s): e4d57c9

Refactor code, remove deepseek integration

Browse files
.env.example CHANGED
@@ -1,7 +1,6 @@
1
  # API Keys
2
  GEMINI_API_KEY=your_gemini_api_key_here
3
  RUNWAYML_API_KEY=your_runwayml_api_key_here
4
- DEEPSEEK_API_KEY=your_deepseek_api_key_here
5
  GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/service-account-key.json
6
 
7
  # Cloud Storage
 
1
  # API Keys
2
  GEMINI_API_KEY=your_gemini_api_key_here
3
  RUNWAYML_API_KEY=your_runwayml_api_key_here
 
4
  GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/service-account-key.json
5
 
6
  # Cloud Storage
config/api_keys.yaml CHANGED
@@ -6,10 +6,6 @@ runwayml:
6
  base_url: "https://api.runwayml.com/v1"
7
  timeout: 300
8
 
9
- deepseek:
10
- base_url: "https://api.deepseek.com/v1"
11
- model: "deepseek-chat"
12
-
13
  tts:
14
  provider: "google"
15
  voice: "en-US-Neural2-F"
 
6
  base_url: "https://api.runwayml.com/v1"
7
  timeout: 300
8
 
 
 
 
 
9
  tts:
10
  provider: "google"
11
  voice: "en-US-Neural2-F"
requirements.txt CHANGED
@@ -3,8 +3,11 @@ aiohttp==3.9.5
3
  aiosignal==1.4.0
4
  annotated-types==0.7.0
5
  attrs==25.3.0
 
 
6
  cachetools==5.5.2
7
  certifi==2025.8.3
 
8
  charset-normalizer==3.4.3
9
  decorator==4.4.2
10
  frozenlist==1.7.0
@@ -26,19 +29,31 @@ httplib2==0.31.0
26
  idna==3.10
27
  imageio==2.37.0
28
  imageio-ffmpeg==0.6.0
 
 
 
 
29
  moviepy==1.0.3
 
30
  multidict==6.6.4
 
 
31
  numpy==1.26.4
 
32
  pandas==2.3.3
33
  pillow==11.3.0
 
 
34
  proglog==0.1.12
35
  propcache==0.4.0
36
  proto-plus==1.26.1
37
  protobuf==5.29.5
38
  pyasn1==0.6.1
39
  pyasn1_modules==0.4.2
 
40
  pydantic==2.11.10
41
  pydantic_core==2.33.2
 
42
  pyparsing==3.2.5
43
  python-dateutil==2.9.0.post0
44
  python-dotenv==1.0.1
@@ -46,7 +61,15 @@ pytz==2025.2
46
  PyYAML==6.0.3
47
  requests==2.32.5
48
  rsa==4.9.1
 
 
49
  six==1.17.0
 
 
 
 
 
 
50
  tqdm==4.67.1
51
  typing-inspection==0.4.2
52
  typing_extensions==4.15.0
 
3
  aiosignal==1.4.0
4
  annotated-types==0.7.0
5
  attrs==25.3.0
6
+ audioop-lts==0.2.2
7
+ audioread==3.0.1
8
  cachetools==5.5.2
9
  certifi==2025.8.3
10
+ cffi==2.0.0
11
  charset-normalizer==3.4.3
12
  decorator==4.4.2
13
  frozenlist==1.7.0
 
29
  idna==3.10
30
  imageio==2.37.0
31
  imageio-ffmpeg==0.6.0
32
+ joblib==1.5.2
33
+ lazy_loader==0.4
34
+ librosa==0.11.0
35
+ llvmlite==0.45.1
36
  moviepy==1.0.3
37
+ msgpack==1.1.2
38
  multidict==6.6.4
39
+ mutagen==1.47.0
40
+ numba==0.62.1
41
  numpy==1.26.4
42
+ packaging==25.0
43
  pandas==2.3.3
44
  pillow==11.3.0
45
+ platformdirs==4.5.0
46
+ pooch==1.8.2
47
  proglog==0.1.12
48
  propcache==0.4.0
49
  proto-plus==1.26.1
50
  protobuf==5.29.5
51
  pyasn1==0.6.1
52
  pyasn1_modules==0.4.2
53
+ pycparser==2.23
54
  pydantic==2.11.10
55
  pydantic_core==2.33.2
56
+ pydub==0.25.1
57
  pyparsing==3.2.5
58
  python-dateutil==2.9.0.post0
59
  python-dotenv==1.0.1
 
61
  PyYAML==6.0.3
62
  requests==2.32.5
63
  rsa==4.9.1
64
+ scikit-learn==1.7.2
65
+ scipy==1.16.2
66
  six==1.17.0
67
+ soundfile==0.13.1
68
+ soxr==1.0.0
69
+ standard-aifc==3.13.0
70
+ standard-chunk==3.13.0
71
+ standard-sunau==3.13.0
72
+ threadpoolctl==3.6.0
73
  tqdm==4.67.1
74
  typing-inspection==0.4.2
75
  typing_extensions==4.15.0
src/api_clients.py CHANGED
@@ -17,7 +17,7 @@ class APIClients:
17
 
18
  # Initialize Gemini client
19
  self.gemini_client = genai
20
- genai.configure(api_key=config.get('gemini_api_key') or os.getenv('GEMINI_API_KEY'))
21
 
22
  # Initialize GCS client
23
  self.gcs_client = storage.Client()
@@ -41,7 +41,7 @@ class APIClients:
41
  Enhanced prompt optimized for video generation
42
  """
43
  try:
44
- logger.info(f"Enhancing prompt with Gemini: {prompt[:100]}...")
45
 
46
  enhancement_instruction = f"""
47
  You are a prompt enhancement specialist for video generation AI.
@@ -61,7 +61,7 @@ class APIClients:
61
  response = model.generate_content(enhancement_instruction)
62
 
63
  enhanced_prompt = response.text.strip()
64
- logger.info(f"Enhanced prompt: {enhanced_prompt[:100]}...")
65
  return enhanced_prompt
66
 
67
  except Exception as e:
@@ -71,70 +71,93 @@ class APIClients:
71
 
72
  async def generate_video(self, prompt: str, duration: int = 10) -> Dict:
73
  """
74
- Generate video using RunwayML Gen-4 API
75
  """
76
  try:
77
- logger.info(f"Generating video with RunwayML: {prompt[:100]}...")
78
 
79
  headers = {
80
  "Authorization": f"Bearer {self.runway_api_key}",
81
  "Content-Type": "application/json",
82
- "X-Runway-Version": "1.0.0" # Add this required header
83
  }
84
 
85
  payload = {
86
- "promptText": prompt,
87
- "model": "gen4",
88
  "duration": duration,
89
- "ratio": "16:9",
90
- "watermark": False
91
  }
92
-
93
  async with aiohttp.ClientSession() as session:
94
- # Create generation task
95
  async with session.post(
96
- f"{self.runway_base_url}/generations",
97
  headers=headers,
98
  json=payload
99
  ) as response:
100
  if response.status != 200:
101
  error_text = await response.text()
102
- raise Exception(f"RunwayML API error: {error_text}")
103
 
104
  task_data = await response.json()
105
  task_id = task_data['id']
106
  logger.info(f"Video generation task created: {task_id}")
107
 
108
  # Poll for completion
109
- max_attempts = 60 # 5 minutes max
110
  attempt = 0
111
 
112
  while attempt < max_attempts:
113
- await asyncio.sleep(5) # Check every 5 seconds
114
 
115
  async with session.get(
116
- f"{self.runway_base_url}/generations/{task_id}",
117
  headers=headers
118
  ) as status_response:
 
 
 
 
119
  status_data = await status_response.json()
120
  status = status_data['status']
121
 
122
  if status == 'SUCCEEDED':
123
- video_url = status_data['output'][0]
 
 
 
 
 
124
  logger.info(f"Video generated successfully: {video_url}")
125
  return {
126
  'video_url': video_url,
127
  'task_id': task_id,
128
  'duration': duration,
129
- 'prompt': prompt
 
 
130
  }
 
131
  elif status == 'FAILED':
132
- raise Exception(f"Video generation failed: {status_data.get('failure')}")
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  attempt += 1
135
- logger.info(f"Video generation in progress... ({status})")
136
 
137
- raise Exception("Video generation timeout")
138
 
139
  except Exception as e:
140
  logger.error(f"Error generating video with RunwayML: {e}")
@@ -145,7 +168,7 @@ class APIClients:
145
  Generate TTS audio using Google Cloud TTS
146
  """
147
  try:
148
- logger.info(f"Generating TTS for text: {text[:100]}...")
149
 
150
  if not voice_name:
151
  voice_name = self.config.get('default_voice', 'en-US-Neural2-F')
@@ -168,12 +191,10 @@ class APIClients:
168
  pitch=0.0
169
  )
170
 
171
- # Remove TimePointingType as it's not available in this version
172
  response = self.tts_client.synthesize_speech(
173
  input=synthesis_input,
174
  voice=voice,
175
  audio_config=audio_config
176
- # Remove: enable_time_pointing=[texttospeech.TimePointingType.SSML_MARK]
177
  )
178
 
179
  # Save audio to temporary file
@@ -184,18 +205,37 @@ class APIClients:
184
  with open(audio_path, "wb") as out:
185
  out.write(response.audio_content)
186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  # Upload to GCS
188
  audio_url = await self.store_in_gcs(audio_path, 'audio')
189
 
190
- # Remove lip sync data extraction
191
  logger.info(f"TTS generated successfully: {audio_url}")
192
 
193
  return {
194
  'audio_url': audio_url,
195
- 'duration': len(response.audio_content) / 32000, # Approximate
196
  'voice': voice_name,
197
  'text': text,
198
- 'local_path': audio_path # Add local path directly
199
  }
200
 
201
  except Exception as e:
@@ -279,91 +319,23 @@ class APIClients:
279
  else:
280
  logger.error(" ❌ TTS API: Not configured")
281
 
282
- # Check DeepSeek configuration
283
- deepseek_key = self.config.get('deepseek_api_key')
284
- if deepseek_key and len(deepseek_key) > 10:
285
- logger.info(" βœ… DeepSeek API: Configured")
286
- else:
287
- logger.warning(" ⚠️ DeepSeek API: Not configured")
288
-
289
  all_healthy = all(health.values())
290
  status = "βœ… All systems operational!" if all_healthy else "⚠️ Some services have issues"
291
  logger.info(f"\n{status}")
292
 
293
  return health
294
 
295
- async def select_videos(self, tts_script: str, count: int = 3) -> List[Dict]:
296
- """
297
- AI agent selects videos based on script using Gemini
298
-
299
- Args:
300
- tts_script: The TTS script to analyze
301
- count: Number of videos to select (max 3)
302
-
303
- Returns:
304
- List of selected video metadata
305
- """
306
- try:
307
- logger.info(f"Selecting {count} videos for script...")
308
-
309
- # Use Gemini to analyze script and suggest video keywords
310
- analysis_prompt = f"""
311
- Analyze this product advertisement script and identify {count} key visual moments
312
- that should be represented with video clips. For each moment, provide:
313
- 1. A descriptive keyword/phrase
314
- 2. The timing (start-end seconds if mentioned)
315
- 3. Visual style preference (product closeup, lifestyle, abstract, etc.)
316
-
317
- Script: {tts_script}
318
-
319
- Return as JSON array with format:
320
- [{{"keyword": "...", "timing": "0-5", "style": "..."}}, ...]
321
- """
322
- model = genai.GenerativeModel('gemini-2.0-flash-exp')
323
- response = model.generate_content(analysis_prompt)
324
-
325
- # Parse Gemini response
326
- try:
327
- suggestions = json.loads(response.text.strip())
328
- except:
329
- # Fallback to keyword extraction
330
- keywords = self._extract_keywords(tts_script)
331
- suggestions = [
332
- {"keyword": kw, "timing": f"{i*5}-{(i+1)*5}", "style": "general"}
333
- for i, kw in enumerate(keywords[:count])
334
- ]
335
-
336
- # Select videos from library based on suggestions
337
- selected_videos = []
338
- for i, suggestion in enumerate(suggestions[:count]):
339
- video_id = (hash(suggestion['keyword']) + i) % self.config['video_library_size'] + 1
340
- selected_videos.append({
341
- 'id': video_id,
342
- 'url': f"gs://{self.config['gcs_bucket_name']}/library/video{video_id}.mp4",
343
- 'keyword': suggestion['keyword'],
344
- 'timing': suggestion.get('timing', f"{i*5}-{(i+1)*5}"),
345
- 'style': suggestion.get('style', 'general'),
346
- 'reason': f"Matches: {suggestion['keyword']}"
347
- })
348
-
349
- logger.info(f"Selected {len(selected_videos)} videos")
350
- return selected_videos
351
-
352
- except Exception as e:
353
- logger.error(f"Error selecting videos: {e}")
354
- # Fallback selection
355
- return self._fallback_video_selection(tts_script, count)
356
-
357
  async def store_in_gcs(self, file_path: str, content_type: str = 'video') -> str:
358
  """
359
- Store file in Google Cloud Storage
360
 
361
  Args:
362
  file_path: Local file path
363
  content_type: Type of content ('video', 'audio', etc.)
364
 
365
  Returns:
366
- GCS public URL
367
  """
368
  try:
369
  logger.info(f"Storing file in GCS: {file_path}")
@@ -386,59 +358,18 @@ class APIClients:
386
  # Upload file
387
  blob.upload_from_filename(file_path)
388
 
389
- # Make public (optional)
390
- blob.make_public()
 
 
 
 
 
391
 
392
- gcs_url = blob.public_url
393
- logger.info(f"File uploaded to: {gcs_url}")
394
 
395
- return gcs_url
396
 
397
  except Exception as e:
398
  logger.error(f"Error storing file in GCS: {e}")
399
  raise
400
-
401
- def _extract_keywords(self, text: str) -> List[str]:
402
- """Extract keywords from TTS script"""
403
- text_lower = text.lower()
404
- keywords = []
405
-
406
- key_phrases = [
407
- 'somira massager', 'neck pain', 'product', 'massager',
408
- 'solution', 'comfort', 'using the product', 'relaxation',
409
- 'relief', 'wellness', 'ergonomic', 'design'
410
- ]
411
-
412
- for phrase in key_phrases:
413
- if phrase in text_lower:
414
- keywords.append(phrase)
415
-
416
- return keywords if keywords else ['general', 'product', 'lifestyle']
417
-
418
- def _extract_timing_data(self, tts_response) -> Dict:
419
- """Extract timing data from TTS response for lip sync"""
420
- # This would parse the timepoints from Azure TTS response
421
- # Simplified version
422
- return {
423
- 'timestamps': [],
424
- 'phonemes': [],
425
- 'words': []
426
- }
427
-
428
- def _fallback_video_selection(self, text: str, count: int) -> List[Dict]:
429
- """Fallback video selection if AI selection fails"""
430
- keywords = self._extract_keywords(text)
431
- selected_videos = []
432
-
433
- for i in range(min(count, 3)):
434
- video_id = (hash(text) + i) % self.config['video_library_size'] + 1
435
- selected_videos.append({
436
- 'id': video_id,
437
- 'url': f"gs://{self.config['gcs_bucket_name']}/library/video{video_id}.mp4",
438
- 'keyword': keywords[i % len(keywords)] if keywords else "general",
439
- 'timing': f"{i*5}-{(i+1)*5}",
440
- 'style': 'general',
441
- 'reason': f'Fallback selection for: {keywords[i % len(keywords)] if keywords else "general"}'
442
- })
443
-
444
- return selected_videos
 
17
 
18
  # Initialize Gemini client
19
  self.gemini_client = genai
20
+ genai.configure(api_key=config.get('gemini_api_key'))
21
 
22
  # Initialize GCS client
23
  self.gcs_client = storage.Client()
 
41
  Enhanced prompt optimized for video generation
42
  """
43
  try:
44
+ logger.info(f"Enhancing prompt with Gemini: {prompt[:300]}...")
45
 
46
  enhancement_instruction = f"""
47
  You are a prompt enhancement specialist for video generation AI.
 
61
  response = model.generate_content(enhancement_instruction)
62
 
63
  enhanced_prompt = response.text.strip()
64
+ logger.info(f"Enhanced prompt: {enhanced_prompt[:300]}...")
65
  return enhanced_prompt
66
 
67
  except Exception as e:
 
71
 
72
  async def generate_video(self, prompt: str, duration: int = 10) -> Dict:
73
  """
74
+ Generate video using RunwayML API
75
  """
76
  try:
77
+ logger.info(f"Generating video with RunwayML: {prompt[:1000]}...")
78
 
79
  headers = {
80
  "Authorization": f"Bearer {self.runway_api_key}",
81
  "Content-Type": "application/json",
82
+ "X-Runway-Version": "2024-11-06"
83
  }
84
 
85
  payload = {
86
+ "promptText": prompt[:1000],
87
+ "model": "veo3",
88
  "duration": duration,
89
+ "ratio": "1280:720", # Standard HD ratio
90
+ # "seed": 42 # Optional: for reproducibility
91
  }
92
+
93
  async with aiohttp.ClientSession() as session:
94
+ # Create video generation task
95
  async with session.post(
96
+ "https://api.dev.runwayml.com/v1/text_to_video",
97
  headers=headers,
98
  json=payload
99
  ) as response:
100
  if response.status != 200:
101
  error_text = await response.text()
102
+ raise Exception(f"RunwayML API error ({response.status}): {error_text}")
103
 
104
  task_data = await response.json()
105
  task_id = task_data['id']
106
  logger.info(f"Video generation task created: {task_id}")
107
 
108
  # Poll for completion
109
+ max_attempts = 120 # 20 minutes max (video generation can take time)
110
  attempt = 0
111
 
112
  while attempt < max_attempts:
113
+ await asyncio.sleep(10) # Check every 10 seconds
114
 
115
  async with session.get(
116
+ f"https://api.dev.runwayml.com/v1/tasks/{task_id}", # Correct tasks endpoint
117
  headers=headers
118
  ) as status_response:
119
+ if status_response.status != 200:
120
+ error_text = await status_response.text()
121
+ raise Exception(f"Task status check failed: {error_text}")
122
+
123
  status_data = await status_response.json()
124
  status = status_data['status']
125
 
126
  if status == 'SUCCEEDED':
127
+ video_urls = status_data['output'] # Returns array of URLs
128
+ video_url = video_urls[0] if video_urls else None
129
+
130
+ if not video_url:
131
+ raise Exception("No video URL in successful response")
132
+
133
  logger.info(f"Video generated successfully: {video_url}")
134
  return {
135
  'video_url': video_url,
136
  'task_id': task_id,
137
  'duration': duration,
138
+ 'prompt': prompt,
139
+ 'status': status,
140
+ 'created_at': status_data.get('createdAt')
141
  }
142
+
143
  elif status == 'FAILED':
144
+ failure_msg = status_data.get('failure', 'Unknown error')
145
+ failure_code = status_data.get('failureCode', 'UNKNOWN')
146
+ raise Exception(f"Video generation failed: {failure_msg} (Code: {failure_code})")
147
+ elif status == 'THROTTLED':
148
+ logger.warning("Video generation throttled, retrying...")
149
+ elif status == 'PENDING':
150
+ logger.info("Video generation pending...")
151
+ elif status == 'RUNNING':
152
+ # Still processing
153
+ progress = status_data.get('progress', 0)
154
+ logger.info(f"Video generation {status.lower()}: {progress*100:.1f}% complete")
155
+ else:
156
+ logger.warning(f"Unknown status: {status}")
157
 
158
  attempt += 1
 
159
 
160
+ raise Exception(f"Video generation timeout after {max_attempts * 5} seconds")
161
 
162
  except Exception as e:
163
  logger.error(f"Error generating video with RunwayML: {e}")
 
168
  Generate TTS audio using Google Cloud TTS
169
  """
170
  try:
171
+ logger.info(f"Generating TTS for text: {text[:300]}...")
172
 
173
  if not voice_name:
174
  voice_name = self.config.get('default_voice', 'en-US-Neural2-F')
 
191
  pitch=0.0
192
  )
193
 
 
194
  response = self.tts_client.synthesize_speech(
195
  input=synthesis_input,
196
  voice=voice,
197
  audio_config=audio_config
 
198
  )
199
 
200
  # Save audio to temporary file
 
205
  with open(audio_path, "wb") as out:
206
  out.write(response.audio_content)
207
 
208
+ # Get actual audio duration using mutagen or pydub
209
+ try:
210
+ from mutagen.mp3 import MP3
211
+ audio = MP3(audio_path)
212
+ duration = audio.info.length
213
+ logger.info(f"Audio duration: {duration:.2f}s")
214
+ except ImportError:
215
+ # Fallback: use pydub if mutagen not available
216
+ try:
217
+ from pydub import AudioSegment
218
+ audio = AudioSegment.from_mp3(audio_path)
219
+ duration = len(audio) / 1000.0 # Convert milliseconds to seconds
220
+ logger.info(f"Audio duration: {duration:.2f}s (via pydub)")
221
+ except ImportError:
222
+ # Last resort: estimate based on text length
223
+ # Average speaking rate: ~150 words per minute
224
+ word_count = len(text.split())
225
+ duration = (word_count / 150) * 60
226
+ logger.warning(f"⚠️ Could not determine exact duration, estimating: {duration:.2f}s")
227
+
228
  # Upload to GCS
229
  audio_url = await self.store_in_gcs(audio_path, 'audio')
230
 
 
231
  logger.info(f"TTS generated successfully: {audio_url}")
232
 
233
  return {
234
  'audio_url': audio_url,
235
+ 'duration': duration,
236
  'voice': voice_name,
237
  'text': text,
238
+ 'local_path': audio_path
239
  }
240
 
241
  except Exception as e:
 
319
  else:
320
  logger.error(" ❌ TTS API: Not configured")
321
 
322
+
 
 
 
 
 
 
323
  all_healthy = all(health.values())
324
  status = "βœ… All systems operational!" if all_healthy else "⚠️ Some services have issues"
325
  logger.info(f"\n{status}")
326
 
327
  return health
328
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  async def store_in_gcs(self, file_path: str, content_type: str = 'video') -> str:
330
  """
331
+ Store file in Google Cloud Storage with signed URL
332
 
333
  Args:
334
  file_path: Local file path
335
  content_type: Type of content ('video', 'audio', etc.)
336
 
337
  Returns:
338
+ Signed URL with temporary access
339
  """
340
  try:
341
  logger.info(f"Storing file in GCS: {file_path}")
 
358
  # Upload file
359
  blob.upload_from_filename(file_path)
360
 
361
+ # Generate signed URL (valid for 7 days)
362
+ from datetime import timedelta
363
+ signed_url = blob.generate_signed_url(
364
+ version="v4",
365
+ expiration=timedelta(days=7),
366
+ method="GET"
367
+ )
368
 
369
+ logger.info(f"File uploaded with signed URL: {signed_url[:100]}...")
 
370
 
371
+ return signed_url
372
 
373
  except Exception as e:
374
  logger.error(f"Error storing file in GCS: {e}")
375
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/asset_selector.py CHANGED
@@ -1,11 +1,11 @@
1
- """
2
- AI-powered asset selection using DeepSeek for contextual video matching
3
- """
4
  import pandas as pd
5
  import aiohttp
6
  import json
7
  from typing import List, Dict, Optional
8
  from utils import logger
 
 
 
9
 
10
 
11
  class AssetSelector:
@@ -15,39 +15,58 @@ class AssetSelector:
15
  self.audio_library = self._load_audio_library()
16
 
17
  def _load_video_library(self) -> pd.DataFrame:
18
- """Load video library from CSV data"""
19
  try:
20
- # Create a simple video library from your provided data
21
- video_data = [
22
- {
23
- 'url': 'https://storage.googleapis.com/somira/Somira%20Massager.mp4',
24
- 'duration': 2,
25
- 'alignment': 'product mention, solution, features',
26
- 'energy': 5,
27
- 'description': 'Product showcase'
28
- },
29
- {
30
- 'url': 'https://storage.googleapis.com/somira/FemaleWomenPuttingOnNeckMassagerr.mp4',
31
- 'duration': 2,
32
- 'alignment': 'using the product, turning on, operation',
33
- 'energy': 35,
34
- 'description': 'Product usage demonstration'
35
- },
36
- {
37
- 'url': 'https://storage.googleapis.com/somira/PersonEnjoyingTheNeckMassager.mp4',
38
- 'duration': 1.5,
39
- 'alignment': 'comfort, relaxation, satisfaction',
40
- 'energy': 40,
41
- 'description': 'User satisfaction'
42
- },
43
- # Add more videos as needed for testing
44
- ]
45
-
46
- return pd.DataFrame(video_data)
47
 
48
  except Exception as e:
49
- logger.error(f"Failed to load video library: {e}")
50
  return pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  def _load_audio_library(self) -> List[str]:
53
  """Load audio library URLs"""
@@ -65,16 +84,21 @@ class AssetSelector:
65
  List of selected video metadata
66
  """
67
  try:
68
- logger.info(f"πŸ€– AI video selection for script: {tts_script[:100]}...")
69
 
70
- # Use DeepSeek for intelligent selection
71
- selected_videos = await self._analyze_with_deepseek(tts_script, max_duration)
72
 
73
  if not selected_videos:
74
  logger.warning("⚠️ AI selection failed, using fallback")
75
  selected_videos = self._fallback_selection(tts_script, max_duration)
76
 
77
- total_duration = sum(v['duration'] for v in selected_videos)
 
 
 
 
 
78
  logger.info(f"βœ“ Selected {len(selected_videos)} videos, total: {total_duration}s")
79
 
80
  return selected_videos
@@ -83,12 +107,36 @@ class AssetSelector:
83
  logger.error(f"❌ Video selection failed: {e}")
84
  return self._fallback_selection(tts_script, max_duration)
85
 
86
- async def _analyze_with_deepseek(self, tts_script: str, max_duration: int) -> List[Dict]:
87
- """Use DeepSeek API for contextual video selection"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  try:
89
  # Prepare video library context
90
  video_context = "\n".join([
91
- f"{i}. {row['description']} - {row['duration']}s - Alignment: {row['alignment']}"
92
  for i, row in self.video_library.iterrows()
93
  ])
94
 
@@ -104,7 +152,7 @@ class AssetSelector:
104
  - Total duration under {max_duration} seconds
105
  - Energy level appropriateness
106
 
107
- Return JSON format:
108
  {{
109
  "selected_videos": [
110
  {{
@@ -118,52 +166,44 @@ class AssetSelector:
118
  }}
119
  """
120
 
121
- # DeepSeek API call
122
- headers = {
123
- "Authorization": f"Bearer {self.config.get('deepseek_api_key')}",
124
- "Content-Type": "application/json"
125
- }
126
 
127
- payload = {
128
- "model": "deepseek-chat",
129
- "messages": [
130
- {"role": "system", "content": "You are a video editor AI that selects the most relevant videos for advertising content."},
131
- {"role": "user", "content": prompt}
132
- ],
133
- "temperature": 0.3,
134
- "max_tokens": 2000
135
- }
136
 
137
- async with aiohttp.ClientSession() as session:
138
- async with session.post(
139
- "https://api.deepseek.com/v1/chat/completions",
140
- headers=headers,
141
- json=payload
142
- ) as response:
143
- if response.status == 200:
144
- result = await response.json()
145
- selection = json.loads(result['choices'][0]['message']['content'])
146
-
147
- # Map to actual video data
148
- selected = []
149
- for item in selection['selected_videos']:
150
- if item['index'] < len(self.video_library):
151
- video = self.video_library.iloc[item['index']]
152
- selected.append({
153
- 'url': video['url'],
154
- 'duration': video['duration'],
155
- 'reason': item['reason'],
156
- 'alignment': video['alignment'],
157
- 'energy': video['energy']
158
- })
159
-
160
- return selected
161
- else:
162
- logger.error(f"DeepSeek API error: {response.status}")
163
- return []
164
 
 
 
 
 
165
  except Exception as e:
166
- logger.error(f"DeepSeek analysis failed: {e}")
167
  return []
168
 
169
  def _fallback_selection(self, tts_script: str, max_duration: int) -> List[Dict]:
@@ -212,19 +252,6 @@ class AssetSelector:
212
 
213
  return selected[:3] # Max 3 videos
214
 
215
- def _find_video_for_category(self, category: str) -> Optional[Dict]:
216
- """Find best video for a category"""
217
- for _, row in self.video_library.iterrows():
218
- if category in str(row['alignment']).lower():
219
- return {
220
- 'url': row['url'],
221
- 'duration': row['duration'],
222
- 'reason': f"Matches {category} category",
223
- 'alignment': row['alignment'],
224
- 'energy': row['energy']
225
- }
226
- return None
227
-
228
  def select_background_music(self) -> str:
229
  """Select background music using round-robin"""
230
  import random
 
 
 
 
1
  import pandas as pd
2
  import aiohttp
3
  import json
4
  from typing import List, Dict, Optional
5
  from utils import logger
6
+ import google.generativeai as genai
7
+ import os
8
+ import re
9
 
10
 
11
  class AssetSelector:
 
15
  self.audio_library = self._load_audio_library()
16
 
17
  def _load_video_library(self) -> pd.DataFrame:
18
+ """Load video library from specific CSV file"""
19
  try:
20
+ # Use path relative to this file
21
+ current_dir = os.path.dirname(os.path.abspath(__file__))
22
+ csv_filename = os.path.join(current_dir, "somira_video_library.csv")
23
+
24
+ if not os.path.exists(csv_filename):
25
+ logger.error(f"CSV file not found: {csv_filename}")
26
+ return pd.DataFrame()
27
+
28
+ # Load the CSV
29
+ df = pd.read_csv(csv_filename)
30
+
31
+ # Parse energy scores if the column exists
32
+ if 'Energy Score (0-100)' in df.columns:
33
+ df['energy_score'] = df['Energy Score (0-100)'].apply(self._parse_energy_score)
34
+
35
+ # Parse durations - convert to integers
36
+ if 'Duration' in df.columns:
37
+ df['duration'] = df['Duration'].apply(self._parse_duration)
38
+ elif 'duration' in df.columns:
39
+ df['duration'] = df['duration'].apply(self._parse_duration)
40
+
41
+ logger.info(f"Successfully loaded video library with {len(df)} entries")
42
+ return df
 
 
 
 
43
 
44
  except Exception as e:
45
+ logger.error(f"Failed to load video library from CSV: {e}")
46
  return pd.DataFrame()
47
+
48
+ def _parse_duration(self, duration_str: str) -> int:
49
+ """
50
+ Parse duration from various string formats to integer seconds.
51
+ Handles formats like: "2 seconds", "3 seconds", "1.5 seconds", "2s", etc.
52
+ """
53
+ try:
54
+ if pd.isna(duration_str) or duration_str == "":
55
+ return 0
56
+
57
+ # Convert to string and lowercase
58
+ duration_str = str(duration_str).lower().strip()
59
+
60
+ # Extract numbers - handle decimals too
61
+ numbers = re.findall(r'(\d+\.?\d*)', duration_str)
62
+ if numbers:
63
+ # Convert to float first to handle decimals, then round to int
64
+ return int(float(numbers[0]))
65
+
66
+ return 0
67
+ except (ValueError, TypeError) as e:
68
+ logger.warning(f"Failed to parse duration '{duration_str}': {e}")
69
+ return 0
70
 
71
  def _load_audio_library(self) -> List[str]:
72
  """Load audio library URLs"""
 
84
  List of selected video metadata
85
  """
86
  try:
87
+ logger.info(f"πŸ€– AI video selection for script: {tts_script[:300]}...")
88
 
89
+ # Use Gemini for intelligent selection
90
+ selected_videos = await self._analyze_with_gemini(tts_script, max_duration)
91
 
92
  if not selected_videos:
93
  logger.warning("⚠️ AI selection failed, using fallback")
94
  selected_videos = self._fallback_selection(tts_script, max_duration)
95
 
96
+ # Ensure all durations are integers before summing
97
+ for video in selected_videos:
98
+ if isinstance(video.get('duration'), str):
99
+ video['duration'] = self._parse_duration(video['duration'])
100
+
101
+ total_duration = sum(int(v.get('duration', 0)) for v in selected_videos)
102
  logger.info(f"βœ“ Selected {len(selected_videos)} videos, total: {total_duration}s")
103
 
104
  return selected_videos
 
107
  logger.error(f"❌ Video selection failed: {e}")
108
  return self._fallback_selection(tts_script, max_duration)
109
 
110
+ def _parse_energy_score(self, energy_score_str: str) -> int:
111
+ """
112
+ Parse energy score from string format to integer.
113
+ Handles formats like: "5 out of 100", "35 out of 100", "40 out of 100"
114
+ """
115
+ try:
116
+ if pd.isna(energy_score_str) or energy_score_str == "":
117
+ return 0
118
+
119
+ # Extract the first number from strings like "5 out of 100"
120
+ match = re.search(r'(\d+)\s*out of\s*\d+', str(energy_score_str))
121
+ if match:
122
+ return int(match.group(1))
123
+
124
+ # Try to extract just a number if no "out of" pattern
125
+ numbers = re.findall(r'\d+', str(energy_score_str))
126
+ if numbers:
127
+ return int(numbers[0])
128
+
129
+ return 0
130
+ except (ValueError, TypeError) as e:
131
+ logger.warning(f"Failed to parse energy score '{energy_score_str}': {e}")
132
+ return 0
133
+
134
+ async def _analyze_with_gemini(self, tts_script: str, max_duration: int) -> List[Dict]:
135
+ """Use Gemini API for contextual video selection"""
136
  try:
137
  # Prepare video library context
138
  video_context = "\n".join([
139
+ f"{i}. {row.get('Full Video Description Summary', row.get('description', ''))} - {row.get('duration', 0)}s - Alignment: {row.get('Video Alignment with the TTS Script', row.get('alignment', ''))}"
140
  for i, row in self.video_library.iterrows()
141
  ])
142
 
 
152
  - Total duration under {max_duration} seconds
153
  - Energy level appropriateness
154
 
155
+ Return ONLY valid JSON in this exact format (no markdown, no extra text):
156
  {{
157
  "selected_videos": [
158
  {{
 
166
  }}
167
  """
168
 
169
+ # Gemini API call
170
+ model = genai.GenerativeModel('gemini-2.0-flash-exp')
171
+ response = model.generate_content(prompt)
 
 
172
 
173
+ # Extract and parse JSON response
174
+ response_text = response.text.strip()
 
 
 
 
 
 
 
175
 
176
+ # Remove markdown code blocks if present
177
+ if response_text.startswith('```'):
178
+ response_text = response_text.split('```')[1]
179
+ if response_text.startswith('json'):
180
+ response_text = response_text[4:]
181
+ response_text = response_text.strip()
182
+
183
+ selection = json.loads(response_text)
184
+
185
+ # Map to actual video data
186
+ selected = []
187
+ for item in selection['selected_videos']:
188
+ if item['index'] < len(self.video_library):
189
+ video = self.video_library.iloc[item['index']]
190
+ selected.append({
191
+ 'url': video.get('Video URL (No Audio)', video.get('url', '')),
192
+ 'duration': video.get('duration', 0),
193
+ 'reason': item['reason'],
194
+ 'alignment': video.get('Video Alignment with the TTS Script', video.get('alignment', '')),
195
+ 'energy': video.get('energy_score', 0)
196
+ })
197
+
198
+ logger.info(f"βœ“ Gemini selected {len(selected)} videos: {selection.get('rationale', '')}")
199
+ return selected
 
 
 
200
 
201
+ except json.JSONDecodeError as e:
202
+ logger.error(f"Failed to parse Gemini JSON response: {e}")
203
+ logger.debug(f"Raw response: {response_text[:500]}")
204
+ return []
205
  except Exception as e:
206
+ logger.error(f"Gemini analysis failed: {e}")
207
  return []
208
 
209
  def _fallback_selection(self, tts_script: str, max_duration: int) -> List[Dict]:
 
252
 
253
  return selected[:3] # Max 3 videos
254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  def select_background_music(self) -> str:
256
  """Select background music using round-robin"""
257
  import random
src/automation.py CHANGED
@@ -114,61 +114,6 @@ class ContentAutomation:
114
  logger.error(f"πŸ“‹ Debug: {traceback.format_exc()}")
115
  return False
116
 
117
- async def local_test(self):
118
- """Run a local test without external APIs"""
119
- logger.info("πŸ§ͺ Running local functionality test...")
120
-
121
- try:
122
- # Test 1: Check if we can create basic video clips
123
- logger.info("1. Testing video clip creation...")
124
- from moviepy.editor import ColorClip
125
- test_clip = ColorClip(size=(100, 100), color=(255, 0, 0), duration=1)
126
- test_clip = test_clip.set_fps(24) # Add FPS
127
- test_clip.write_videofile('/tmp/test_color.mp4', verbose=False, logger=None)
128
- test_clip.close()
129
- logger.info(" βœ… Video clip creation: OK")
130
-
131
- # Test 2: Check if we can create audio clips
132
- logger.info("2. Testing audio clip creation...")
133
- from moviepy.editor import AudioClip
134
- import numpy as np
135
-
136
- def make_tone(duration):
137
- return lambda t: 0.1 * np.sin(440 * 2 * np.pi * t)
138
-
139
- test_audio = AudioClip(make_tone(1), duration=1)
140
- test_audio.write_audiofile('/tmp/test_audio.mp3', verbose=False, logger=None)
141
- test_audio.close()
142
- logger.info(" βœ… Audio clip creation: OK")
143
-
144
- # Test 3: Check video rendering with simple assets
145
- logger.info("3. Testing video rendering pipeline...")
146
- test_assets = {
147
- 'selected_videos': [
148
- {
149
- 'local_path': '/tmp/test_color.mp4',
150
- 'duration': 1,
151
- 'reason': 'Test video'
152
- }
153
- ],
154
- 'tts_audio': {
155
- 'local_path': '/tmp/test_audio.mp3',
156
- 'duration': 1
157
- },
158
- 'tts_script': 'Test script.',
159
- 'background_music_local': '/tmp/test_audio.mp3'
160
- }
161
-
162
- output_path = await self.video_renderer.render_video(test_assets)
163
- logger.info(f" βœ… Video rendering: OK - {output_path}")
164
-
165
- logger.info("\nπŸŽ‰ Local functionality test passed!")
166
- return True
167
-
168
- except Exception as e:
169
- logger.error(f"❌ Local test failed: {e}")
170
- return False
171
-
172
  async def execute_pipeline(self, content_strategy: Dict[str, str], tts_script: str) -> Dict[str, Any]:
173
  """
174
  Execute complete production video pipeline with better error handling
@@ -182,8 +127,8 @@ class ContentAutomation:
182
  assets = await self._generate_assets_parallel(content_strategy, tts_script)
183
 
184
  # Check if we have minimum required assets
185
- if not assets.get('selected_videos') or not assets.get('tts_audio'):
186
- raise ValueError("Missing critical assets: videos or TTS audio")
187
 
188
  # Step 2: Download all remote assets
189
  logger.info("\n⬇️ STEP 2: Downloading Remote Assets")
@@ -261,8 +206,17 @@ class ContentAutomation:
261
  # Generate video
262
  video_data = await self.api_clients.generate_video(
263
  enhanced_prompt,
264
- duration=5 # 5-second hook video
265
  )
 
 
 
 
 
 
 
 
 
266
 
267
  return video_data
268
 
@@ -376,33 +330,3 @@ class ContentAutomation:
376
  print("❌ System has significant issues that need attention")
377
 
378
  return health_status
379
-
380
- async def basic_test(self):
381
- """Basic test without external APIs"""
382
- logger.info("πŸ§ͺ Running basic pipeline test...")
383
-
384
- # Use local test assets
385
- test_assets = {
386
- 'selected_videos': [
387
- {
388
- 'url': 'https://example.com/video1.mp4',
389
- 'duration': 2,
390
- 'reason': 'Test video 1',
391
- 'local_path': '/tmp/test_video1.mp4' # You'd need to create this
392
- }
393
- ],
394
- 'tts_audio': {
395
- 'local_path': '/tmp/test_audio.mp3', # You'd need to create this
396
- 'duration': 10
397
- },
398
- 'background_music_local': '/tmp/test_music.mp3',
399
- 'tts_script': 'Test script for video generation.'
400
- }
401
-
402
- try:
403
- final_video_path = await self.video_renderer.render_video(test_assets)
404
- logger.info(f"βœ… Basic test passed: {final_video_path}")
405
- return True
406
- except Exception as e:
407
- logger.error(f"❌ Basic test failed: {e}")
408
- return False
 
114
  logger.error(f"πŸ“‹ Debug: {traceback.format_exc()}")
115
  return False
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  async def execute_pipeline(self, content_strategy: Dict[str, str], tts_script: str) -> Dict[str, Any]:
118
  """
119
  Execute complete production video pipeline with better error handling
 
127
  assets = await self._generate_assets_parallel(content_strategy, tts_script)
128
 
129
  # Check if we have minimum required assets
130
+ if not assets.get('selected_videos') or not assets.get('tts_audio') or not assets.get('hook_video'):
131
+ raise ValueError("Missing critical assets: hook video or library videos or TTS audio")
132
 
133
  # Step 2: Download all remote assets
134
  logger.info("\n⬇️ STEP 2: Downloading Remote Assets")
 
206
  # Generate video
207
  video_data = await self.api_clients.generate_video(
208
  enhanced_prompt,
209
+ duration=8
210
  )
211
+ # TODO: Mocking video generation for now
212
+ # return {
213
+ # 'video_url': 'https://dnznrvs05pmza.cloudfront.net/veo3/projects/vertex-ai-claude-431722/locations/us-central1/publishers/google/models/veo-3.0-generate-001/operations/12d22a72-16b2-4767-a9f4-edc8589bb199/A_slow__deliberate_dolly_in_shot_focuses_on_a_blonde_woman_in_her_early_30s__positioned_within_the_p.mp4?_jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJrZXlIYXNoIjoiYTJkMzQzOTlkZDM3YWU1ZCIsImJ1Y2tldCI6InJ1bndheS10YXNrLWFydGlmYWN0cyIsInN0YWdlIjoicHJvZCIsImV4cCI6MTc2MDE0MDgwMH0.pWG8lw7CE4No8VeRTxPuHSHin6sANds6ScnmoCydzmA',
214
+ # 'task_id': '0328498f-7ea8-46a5-9c6d-f997770abeb6',
215
+ # 'duration': 8,
216
+ # 'prompt': prompt,
217
+ # 'status': 'SUCCEEDED',
218
+ # 'created_at': '2025-10-08T20:52:09.879Z',
219
+ # }
220
 
221
  return video_data
222
 
 
330
  print("❌ System has significant issues that need attention")
331
 
332
  return health_status
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/somira_video_library.csv ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Video URL (No Audio),Full Video Description Summary,Duration,Video Alignment with the TTS Script,Product Visibility,Energy Score (0-100)
2
+ https://storage.googleapis.com/somira/Somira%20Massager.mp4,"The video clearly displays the Somira Massager product from a front-view perspective against a professional white and light beige background. It features a static image with a slow zoom-in effect. The video contains no people, no significant action, and minimal visual excitement. Its primary purpose is to quickly and simply show the Somira Massager product itself. Therefore, this video is ideal for use whenever the script explicitly mentions terms like ""Somira Massager,"" ""the product,"" ""massage product,"" or any indirect or direct references to this specific product. The focal point of the image is centered, and the product image appears slightly black-and-white for a professional appearance. ",2 seconds,"This video can be used when the TTS script explicitly mentions words or phrases such as ""Somira Massager,"" ""product,"" or ""massager."" Additionally, the video fits well when the script discusses a solution to a problem, highlights product features, or indirectly refers to the product without naming it explicitly. This video is versatile and serves as a reliable fallback option whenever clear product imagery is required.",The product is fully visible. ,"5 out of 100.
3
+ This video is calm and only contains a slight zoom with little motion. "
4
+ https://storage.googleapis.com/somira/FemaleWomenPuttingOnNeckMassagerr.mp4,"The video shows a blonde-haired woman seated upright on a couch at home, directly facing the camera at eye level, in a brightly lit setting with white and light beige colors. During the first 5 seconds, she calmly lifts the Somira neck massager over her head, places it around her neck, and inserts her arms into its built-in armrests. In the final 3 seconds, she clearly presses the power button on the massager, starting the massage function. The camera remains completely still throughout, with no movement or distraction, ensuring a consistent focus on the woman and the product. Her facial expression appears comfortable and satisfied, resembling a happy customer. The video is instructional, clean, and clearly emphasizes product usage.",2 seconds,"Use this video when the TTS script explicitly mentions actions like ""using the product,"" ""turning on the massager,"" or references to ""easy product operation."" It is also suitable whenever the script discusses product comfort, ease-of-use, user satisfaction, or demonstrates how the Somira neck massager works. Additionally, this video aligns well when highlighting a positive customer experience, comfort benefits, or general product functionality.",The product is visible. ,"35 out of 100.
5
+ The camera does not move, but the women performs movement with her upper body. "
6
+ https://storage.googleapis.com/somira/PersonUsingTheMassagerProductt.mp4,"The video shows a person seated upright and directly facing the camera at eye level, already wearing the Somira neck massager around their neck. The massager is actively running throughout the entire 6-second duration, clearly demonstrating its massage functionality. The person remains mostly still with minimal movement, visibly enjoying the massage. The video does not display the heat feature. Due to limited action and engagement, it's recommended to trim the video length to approximately the first 1–3 seconds for optimal use. This clip effectively demonstrates normal usage and user satisfaction with the product.",2 seconds,"Use this video when the TTS script refers explicitly or implicitly to ""massager usage,"" ""relaxation,"" ""comfort,"" ""customer satisfaction,"" or ""product in action."" It is suitable for many scenarios, including general product benefits, demonstrating product effectiveness, showcasing user enjoyment, or emphasizing the massage functionality. Avoid this clip when specifically discussing or emphasizing the heat feature, as it is not displayed here. Due to its versatility, this video can act as a reliable fallback for showcasing the product in use during many scenarios. ",The product is visible. ,"20 out of 100.
7
+ The camera does not move, but the women wears the activated massager slightly moves her upper body. "
8
+ https://storage.googleapis.com/somira/MassagerMassageAndHeatFeatureWhiteBackgroundd.mp4,"The video provides a close-up view of the Somira Massager, clearly showing it powered on with both the massage function and the red-light heat therapy feature activated. It gradually moves closer towards the massage knots to highlight key product details. The original background has been digitally replaced with plain white to prominently showcase the massager without distraction. This handheld mobile shot is smooth but engaging, focusing exclusively on the product with no visible people. The video should be trimmed in length for optimal engagement. It is ideal as a general or fallback clip for clearly demonstrating product features.",2 seconds,"Use this video when the TTS script explicitly mentions or implies ""massage knots,"" ""heat therapy,"" ""red-light,"" ""product features,"" ""massager functionality,"" or ""product close-up."" Additionally, it's suitable when discussing detailed product benefits, highlighting massage effectiveness, or referring generally to the Somira Massager. Due to its clarity, versatility, and focus on specific product functions, it also works effectively as a fallback or filler video in many scenarios.",The product is partially visible. ,"40 out of 100.
9
+ The camera moves and the massager is activated and moving."
10
+ https://storage.googleapis.com/somira/PersonWearingSomiraMassagerr.mp4,"A blonde-haired woman is standing upright against a soft white and light beige wall. She is already wearing the Somira neck massager on her neck. The scene is lit with bright, professional lighting and framed with a static, eye-level camera. Throughout the clip, the woman slowly rotates her upper body left and right in a smooth, calm motion to present the massager from multiple angles. Her facial expression is relaxed and content, reinforcing a feeling of comfort and product satisfaction. The setting is minimal and distraction-free, maintaining focus on the product and its wearability. No additional movement occurs. For better viewer engagement, trimming the video is recommended.",2 seconds,"Use this video when the TTS script explicitly mentions the design, comfort, style, or wearability of the Somira neck massager. It is particularly suitable for emphasizing themes like relaxation, comfort while wearing, product aesthetics, or phrases highlighting a calming experience. Additionally, use this video to visually reinforce narratives related to user satisfaction, ease of wear, or ergonomic design, as the gentle rotations and relaxed expressions effectively communicate these aspects to the viewer.",The product is visible. ,35 out of 100.
11
+ https://storage.googleapis.com/somira/PersonEnjoyingTheNeckMassager.mp4,"This selfie video depicts a person seated comfortably on a couch in a relaxed home environment, positioned centrally in frame. The individual leans gently backward, resting their head and neck fully onto the Somira neck massager, which is draped securely behind the neck and over the shoulders in the standard usage position. Their eyes are closed, and the expression is visibly calm, serene, and deeply contented, clearly conveying comfort and satisfaction derived from the massager’s relaxing effects. The clip is very short with only 1.5 seconds in length. The neutral background and clear framing emphasize the person’s peaceful facial expression and the immediate benefits of using the Somira massager.",1.5 seconds,"Use this video clip whenever the TTS script emphasizes comfort, relaxation, immediate relief from neck pain, tension relief, or product satisfaction. It’s especially suitable for phrases like ""experience soothing relief,"" ""relax your neck muscles,"" ""comfortable massage,"" or when highlighting general satisfaction and wellness from using the Somira massager. Its brief, highly focused nature makes it a versatile visual insert when quickly underscoring the relaxing benefits of the product.",The product is visible. ,40 out of 100.
12
+ https://storage.googleapis.com/somira/PersonCuriouslyEnjoyingNeckMassager.mp4,"This selfie video shows a person comfortably seated on a couch in a calm, homely setting, with the Somira neck massager positioned securely behind their neck and over their shoulders in the standard usage position. The individual's facial expression is notably curious and slightly skeptical, with eyes partially open and an expression suggesting a combination of mild discomfort and pleasure. The nuanced expression effectively communicates the sensation of deep-tissue massage, capturing the essence of therapeutic pressure in targeting muscle knots. The brief 1.5-second clip highlights a genuine reaction, reflecting an authentic ""hurts-so-good"" experience typical of effective, targeted massage therapy.",1.5 seconds,"Use this clip specifically when the TTS script references therapeutic massage effects, deep-tissue relief, or phrases such as ""deep massage,"" ""working out tough knots,"" or ""therapeutic sensation."" It's particularly suitable in scenarios emphasizing realistic user reactions to initial discomfort combined with beneficial relief from deep massage. Avoid using in generalized relaxation contexts, and instead align with scripts highlighting the intensity or effectiveness of targeted therapeutic massages.",The product is visible. ,40 out of 100.
13
+ https://storage.googleapis.com/somira/PersonPainfullyEnjoyingNeckMassage.mp4,"The selfie video features a person seated comfortably on a couch within a relaxed home environment, using the Somira neck massager placed around their neck and shoulders. The individual's facial expression vividly conveys an experience of intense massage therapy, characterized by slightly furrowed eyebrows, partially squinted eyes, and a subtly tense mouth indicating discomfort coupled with therapeutic relief. Despite the visible intensity, the expression clearly communicates beneficial outcomes and appreciation of the massage’s deep effectiveness. The brief, expressive 1.5-second clip authentically portrays the experience of intense massage pressure effectively relieving stubborn muscle tension.",1.5 seconds,"Align this video carefully with TTS narratives mentioning intense therapeutic massages, deep muscle relief, phrases such as ""intensive massage,"" ""deep therapeutic effects,"" or explicit descriptions of initial discomfort transforming into beneficial relief. Ideal for illustrating realistic, impactful massage experiences, use this clip selectively to reinforce messaging on deep massage benefits, effectively targeting muscle tension, knots, and substantial relief from severe neck discomfort or stiffness.",The product is visible. ,40 out of 100.
14
+ https://storage.googleapis.com/somira/WeirdChiropractorTreatmentNeckPain.mp4,"The video is a brief, unusual chiropractic treatment for neck pain, lasting approximately one second. It features a chiropractor holding a towel wrapped under the patient's neck, pulling upward while pushing down firmly on the patient's shoulders with his bare feet. Only the chiropractor's hands, feet, towel, and the patient's uncomfortable facial expression are visible, creating a strikingly peculiar visual. This video strongly communicates a unique, niche treatment method rather than conventional chiropractic care.",1 second,"Use this video specifically when the TTS script mentions unusual or alternative chiropractic techniques, unconventional neck pain treatments, or emphasizes unique therapeutic experiences that are a bad solution compared to using the Somira massager product. Avoid using this clip in generalized wellness or standard chiropractic care contexts due to its peculiar nature. Only use this clip when it matches the context specifically.Β ",The product is not visible.,60 out of 100.
15
+ https://storage.googleapis.com/somira/NeckMassage.mp4,"This three-second video presents a serene, professional point-of-view (POV) of an expert masseuse performing a relaxing massage on a person's upper back. The setting emphasizes tranquility, with a soothing atmosphere enhanced by soft lighting, flowers, and candles visible in the background. The massage technique demonstrated is gentle, rhythmic, and intended to induce calmness and relaxation.",3 seconds,"Use this video whenever the TTS script discusses relaxation, professional massage therapy, stress relief, or tranquil and soothing massage experiences. It is ideal for scripts highlighting comfort, wellness practices, spa-like environments, or peaceful therapeutic care. It is also ideal to show that the Somira massager feels like a professional massage!",The product is not visible.,40 out of 100.
16
+ https://storage.googleapis.com/somira/PersonEnjoyingMassagerInBed.mp4,"This three-second clip captures a medium close-up of a person comfortably lying in bed, visibly relaxed with closed eyes, enjoying the soothing effects of the Somira neck massager. The massager is positioned snugly behind the neck, sandwiched between the pillow and neck, clearly demonstrating the activated red-light heat function. The person's tranquil expression and comfortable setting strongly convey a moment of peaceful self-care and wellness.",3 seconds,"Ideal for TTS scripts emphasizing relaxation, bedtime routines, comfort, wellness, stress relief, or self-care rituals. Use this clip particularly when mentioning the heat feature of the Somira massager or illustrating calming, end-of-day relaxation scenarios.",The product is visible. ,35 out of 100.
17
+ https://storage.googleapis.com/somira/OldManWearingSomira.mp4,"An older man, approximately fifty years old, sits on a couch at home, visibly experiencing intense but beneficial discomfort from using the Somira massager. His expression vividly conveys a combination of pain and relief, indicating deep tissue or therapeutic massage effects, particularly effective for tension relief in the shoulders and upper back.",3 seconds,"Use this video exclusively when the script explicitly references older users, mature adults, or hard workers and highlights deep, intense, therapeutic massages. It is particularly suitable for discussing targeted pain relief, overcoming muscular tension, or demonstrating the product's efficacy for older adults experiencing chronic muscle stiffness. It can also be used to show the product is used by a diverse group of people.",The product is visible. ,50 out of 100.
18
+ https://storage.googleapis.com/somira/PersonShowingButtons.mp4,"This brief, focused video prominently shows a person’s face and hands as they hold a specific part of the Somira massager's armrest close to the camera, clearly displaying the four navigation buttons. The setting is a casual home environment, conveying a personal and authentic feel. The buttons are held prominently at chest-level, highlighting ease of access and control. The person has a calm, pleasant expression, enhancing the product's approachable and user-friendly impression.",3 seconds,"Use this video specifically when the TTS script references the Somira massager's navigation buttons, ease of control, or various available functions. It's particularly suitable for emphasizing product usability, demonstrating ease of switching modes, or when explaining the convenience of accessing multiple massage options through simple controls.",The product is visible. ,40 out of 100.
19
+ https://storage.googleapis.com/somira/TurningOnMassager.mp4,"This short video clearly illustrates the Somira massager’s primary massage function. The video captures the person's face and upper body, with hands holding the massager closely at chest-level. After pressing the top navigation button to turn on the device, the focus shifts directly onto the massage knots, clearly visible as they begin gently moving in a rhythmic motion. The scene is calm, warm, and personal, showcasing the massager’s core massage functionality without activating the heat option.",3 seconds,"Ideal for TTS scripts that explicitly mention activating the massage function, general massaging capabilities, product functionality demonstrations, or emphasizing the soothing movement of massage knots. Avoid this clip when specifically highlighting the heat or red-light function. This is also a great general video of the massage product that can be used in a large number of circumstances or as fallback. ",The product is visible. ,40 out of 100.
20
+ https://storage.googleapis.com/somira/ShowingMassageFunctionWithHeatRedLight.mp4,"In this concise video, the person clearly demonstrates the Somira massager’s combined massage and heat functions. The frame prominently features the individual's calm, satisfied expression and their hands holding the massager. The massage knots visibly move, illuminated distinctly by the red lights indicating the heat therapy function. The setting remains warm and personal, emphasizing comfort and product effectiveness in delivering heated massage therapy. Red lights and heat is turned on.",3 seconds,"Use this video when the script explicitly mentions the combination of massage with heat therapy, red-light features, or the massager's multifunctional capabilities. It's perfectly used when talking about red light and heat function. It’s especially suitable for highlighting therapeutic warmth, enhanced relaxation, or the effectiveness of heat in relieving tension and soothing muscles. This is also a great general video of the massage product that can be used in a large number of circumstances or as fallback. ",The product is visible. ,35 out of 100.
21
+ https://storage.googleapis.com/somira/PersonWearingTheSomiraMassager.mp4,"This straightforward video features a person wearing the Somira massager around their neck in a comfortable home setting. The medium-shot frames the person from the waist up, presenting a calm, content facial expression to convey ease and satisfaction. The massager is worn naturally and visibly, without the heat or red-light function activated, emphasizing comfort, wearability, and everyday usage.",3 seconds,"Use this video whenever the TTS script discusses comfort, ease of use, or wearable convenience of the Somira massager. It's especially appropriate for general descriptions of product usage, daily wellness routines, or emphasizing the natural comfort and simplicity of incorporating the massager into everyday life. Avoid using when specifically mentioning the heat or red-light features.",The product is visible. ,35 out of 100.
22
+ https://storage.googleapis.com/somira/PersonWearingMassagerWithRedLightHeat.mp4,"This medium-shot video captures a person comfortably wearing the Somira massager around their neck in a relaxed home environment. The person's facial expression indicates satisfaction and tranquility. The massager is visibly activated, showcasing the glowing red-light feature signifying the therapeutic heat function. The video effectively communicates relaxation, enhanced comfort, and the added benefits of warmth for tension relief.",3 seconds,"Ideal for scripts specifically highlighting the red-light heat therapy feature, relaxation through warmth, or enhanced comfort and stress relief provided by the massager. Perfect for emphasizing scenarios involving deeper muscle relaxation, therapeutic heat benefits, or soothing, end-of-day wellness routines.",The product is visible. ,40 out of 100.
23
+ https://storage.googleapis.com/somira/Short3DanimationSomiraMassager.mp4,"This concise yet elegant video provides a professional 3D animated visualization of the Somira massager, displayed against a single attractive and calm background. The product slowly rotates in a smooth, slow-motion animation, effectively showcasing its key design elements and refined aesthetic.",3 seconds,"Ideal for scripts that introduce the product, emphasize its attractive design, or describe general benefits and features. Due showing the entire product, it's suitable for many use cases, particularly as an introductory visual or a clean, professional representation of the Somira massager.",The product is fully visible. ,40 out of 100.
24
+ https://storage.googleapis.com/somira/Long3DanimationSomiraMassagerr.mp4,"This video features a polished, professional 3D animation showcasing the Somira massager product. The massager is presented against three distinct, calm, and aesthetically pleasing backgrounds, gently rotating in slow-motion to display its sleek design and features comprehensively. The animation effectively highlights the product's shape, texture, and overall appeal without distractions.",1 second,"Use this video when the script emphasizes product design, aesthetic appeal, general features, or introduces the Somira massager to viewers. It's particularly suitable for visually engaging presentations or as a versatile fallback clip due to its clean and professional appearance.",The product is fully visible. ,40 out of 100.
25
+ https://storage.googleapis.com/somira/MassagerMassageAndHeatFeatureDarkBackgroundd.mp4,"The video provides a close-up view of the Somira Massager, clearly showing it powered on with both the massage function and the red-light heat therapy feature activated. It gradually moves closer towards the massage knots to highlight key product details. The original background has been digitally replaced with a dark blue backdrop, prominently showcasing the massager without distraction. This handheld mobile shot is smooth but engaging, focusing exclusively on the product with no visible people. The video should be trimmed in length for optimal engagement. It is ideal as a general or fallback clip for clearly demonstrating product features.",1 second,"Use this video when the TTS script explicitly mentions or implies ""massage knots,"" ""heat therapy,"" ""red-light,"" ""product features,"" ""massager functionality,"" or ""product close-up."" Additionally, it's suitable when discussing detailed product benefits, highlighting massage effectiveness, or referring generally to the Somira Massager. Due to its clarity, versatility, and focus on specific product functions, it also works effectively as a fallback or filler video in many scenarios.",The product is visible. ,40 out of 100.
26
+ https://storage.googleapis.com/somira/3D1.mp4,"This animation presents the Somira neck massager in a slow, floating rotation against a neutral grey background. The product is fully visible and remains centered throughout the clip, allowing for a clean, uninterrupted view. The darker tones give the scene a polished, professional atmosphere that feels serious and refined. The subtle floating motion highlights the product’s design and shape without distraction, creating a premium visual experience that communicates sophistication.",2 seconds,"This clip is best used when the TTS script introduces the product in a professional tone, emphasizes its premium design, or highlights general product features. Its clear full-frame view of the massager makes it a versatile option when showcasing the product itself. It works particularly well in moments where the narration is focused on credibility, quality, or premium appeal.",The product is fully visible. ,35 out of 100.
27
+ https://storage.googleapis.com/somira/3D3.mp4,"This stylish 3D animation showcases the Somira neck massager as it floats into view, rotating gently on a warm beige background that reflects the brand’s primary color palette. The product is visible from behind and at an angle, creating a more dynamic and engaging perspective compared to a static front view. The floating entrance from the top adds energy and intrigue, while the soft beige tones keep the presentation aligned with brand aesthetics.",2 seconds,"This clip works best when the narration focuses on brand style, product design identity, or when adding variety to a sequence of product animations. Since the product is only partially shown, it’s not ideal for general showcases but is effective for moments emphasizing mood, brand sophistication, or visual flair. Use it to create engaging transitions or to highlight design language in a more subtle, stylish way.",The product is partially visible. ,35 out of 100.
28
+ https://storage.googleapis.com/somira/3D4.mp4,"In this animation, the Somira massager floats down from above and moves smoothly into the center of the frame, fully visible against a dark brown background. The reveal effect adds an engaging element, as the product appears with motion rather than remaining static. The darker color scheme lends a sense of richness and depth, while the centered product presentation ensures a clear focus on design and shape.",2 seconds,"This clip is ideal when the narration highlights a reveal, discovery, or introduction to the product. It pairs effectively with lines that position the massager as a solution or innovation being presented to the viewer. Its smooth floating reveal makes it a good choice for energetic or engaging script moments that call for a touch of drama while keeping the product clearly visible.",The product is visible. ,50 out of 100.
29
+ https://storage.googleapis.com/somira/3dAnimationControls.mp4,"This close-up 3D animation highlights the Somira neck massager’s smart control buttons with precision and clarity. Against a sleek black background, the camera smoothly pans across the armrest, focusing exclusively on the navigation buttons. The darker backdrop and close framing create a professional, technical mood, emphasizing the usability and sophistication of the controls.",3 seconds,"This video is best aligned with TTS narration that discusses smart controls, ease of navigation, or multiple massage modes. The serious, professional aesthetic works well when emphasizing advanced product functionality, precision, or innovation. It’s particularly effective when highlighting technical aspects of usability or showcasing the quality of design.",The product is partially visible. ,40 out of 100.
30
+ https://storage.googleapis.com/somira/3Dsmartcontrols.mp4,"This 3D animation highlights the Somira neck massager’s smart control buttons in a premium, cinematic style. Set against a beige-golden background, the camera smoothly zooms in with a soft, lens-blur effect that creates depth and a realistic sense of focus. The lighting and movement emphasize the design quality of the controls while maintaining a bright, modern atmosphere. The polished visuals communicate luxury, sophistication, and intuitive usability.",2 seconds,"This video is best suited for narration that highlights the massager’s smart controls, ease of operation, or advanced functionality. The warm tones and cinematic blur create a lighter, inviting mood, making it especially effective when emphasizing convenience, accessibility, and lifestyle benefits. It reinforces the premium design of the product while showing how simple and elegant the control functions are to use.",The product is partially visible. ,40 out of 100.
31
+ https://storage.googleapis.com/somira/3DlightEffectReveal.mp4,"This elegant animation showcases the Somira neck massager as it remains fully centered and still, while light sweeps gracefully across the product. The shifting illumination reveals its contours, textures, and refined design in dramatic fashion. The interplay of light and shadow creates a luxurious reveal effect that positions the product as premium and aspirational.",2 seconds,"This video pairs well with narration that emphasizes luxury, design, craftsmanship, or premium quality. It is especially suitable for introducing the product in a dramatic or stylish way, or when highlighting elegance and sophistication. Use it to create visual impact during moments where the TTS positions the Somira massager as a high-end, beautifully designed product.",The product is visible. ,40 out of 100.
32
+ https://storage.googleapis.com/somira/Animation.mp4,"This animation presents the Somira massager at a slight front-facing angle, slowly zooming in to provide a full, unobstructed view of the product. Against a sleek black background, the entire product remains clearly visible throughout the clip. The lack of flashy effects or distractions makes it a clean, versatile, and highly usable animation, with a premium yet straightforward style.",2 seconds,"This clip is highly flexible and can be used in a wide range of scenarios whenever the narration discusses the product itself. Its clarity and simplicity make it a strong fallback option, ideal for moments that require a direct product showcase. It works well with general mentions of the Somira massager, as well as segments that highlight design, usability, or overall appeal.",The product is fully visible. ,35 out of 100.
33
+ https://storage.googleapis.com/somira/GirlWearingSomiraNeckMassagerCalmSettingWarm.mp4,"This video shows a woman seated comfortably with the Somira neck massager around her neck. The front view captures her calm, content expression as she leans her head slightly, reinforcing a sense of ease and satisfaction. The background features a warm, elegant night scene with the Eiffel Tower in view, paired with beige furniture that adds to the cozy, stylish atmosphere. The soft lighting and warm tones highlight both the product and the luxurious setting, creating a polished, inviting mood.",2 seconds,"This video is best aligned with narration that emphasizes personal comfort, stylish relaxation, or the feeling of satisfaction when using the Somira massager. The premium background and calm demeanor make it ideal when the script highlights lifestyle benefits, wellness, or the elegance of incorporating the massager into daily routines. It visually reinforces comfort in a high-end, aspirational context.",The product is visible. ,40 out of 100.
34
+ https://storage.googleapis.com/somira/HappyPersonWearingSomiraMassager.mp4,"This video captures a person seated at home with the Somira neck massager worn naturally around the neck. The individual looks directly toward the camera with a bright, joyful smile and visible teeth, radiating happiness and satisfaction. The home setting feels casual yet inviting, creating a relatable, everyday environment. The combination of genuine expression and the clear view of the product emphasizes both enjoyment and authenticity.",2 seconds,"This video works perfectly when the narration highlights customer happiness, satisfaction, or the positive emotional impact of using the Somira massager. The clear smile conveys authenticity, making it highly effective for lines that reinforce trust, customer experience, or overall joy from the product. It is especially strong for emphasizing real, happy results.",The product is visible. ,40 out of 100.
35
+ https://storage.googleapis.com/somira/ModelGirlWearingSomira.mp4,"This video presents a model standing confidently with the Somira massager worn around her neck. The stylish background features the Eiffel Tower in a luxurious urban setting, elevating the overall presentation. The model’s composed posture and fashionable appearance frame the product as a premium lifestyle accessory. The combination of professional lighting, elegant scenery, and centered focus communicates refinement and exclusivity.",2 seconds,"This video is best suited for narration that highlights elegance, premium lifestyle, or the fashionable appeal of the Somira massager. It pairs well with lines that connect the product to luxury, sophistication, or high-end self-care. The stylish setting makes it effective for aspirational messaging, where the product is positioned not just as a tool but as part of an elevated lifestyle.",The product is visible. ,40 out of 100.
36
+ https://storage.googleapis.com/somira/PersonWearingSomiraInSpace.mp4,"This entertaining video shows a person inside a futuristic spaceship environment while wearing the Somira neck massager. The setting is vibrant and imaginative, contrasting advanced technology with personal relaxation. The individual’s calm demeanor while seated highlights the ease of use and enjoyment of the massager, even in a highly unconventional location. The combination of the sci-fi background and the product in use creates a unique and playful presentation.",2 seconds,"This video works best when the narration leans into futuristic, innovative, or entertaining themes. It is highly effective for highlighting the idea that the Somira massager can bring comfort and relaxation anywhere, even in unexpected environments. Use it to add a fun, eye-catching twist to the message while still showcasing the product in use.",The product is visible. ,40 out of 100.
37
+ https://storage.googleapis.com/somira/BusinessPersonExperiencingNeckPain.mp4,"This video captures a stylish business professional in an office setting, visibly experiencing neck pain. Shot from a medium close-up that moves smoothly into a close-up, the handheld camera adds a dynamic, authentic feel. The person looks stressed yet composed, maintaining a professional charm while placing a hand on their neck to clearly signal discomfort. The modern office environment reinforces the relatability of workplace strain, while the body language makes the pain immediately recognizable.",1 second,"This clip works best when the narration addresses relatable daily struggles such as work-related stress, neck stiffness, or discomfort from long hours at a desk. The professional setting and expressive gesture make it highly effective for illustrating common lifestyle challenges that the Somira massager helps resolve. It is ideal for connecting with viewers who experience tension from work and need practical, stylish solutions.",The product is not visible.,45 out of 100.
38
+ https://storage.googleapis.com/somira/neckpain.mp4,"This high-end 3D animation presents a technical and professional visualization of neck pain. The animation depicts a semi-transparent human figure in shades of blue, showing both skeletal and muscular structures. Against a sleek black background, the camera zooms in on the neck area to emphasize tension and discomfort. The polished, anatomical rendering provides a scientific and precise way of communicating pain, stiffness, or strain in the neck.",3 seconds,"This video aligns best when the TTS script mentions neck pain and muscle stiffness. The high-end 3D anatomical style makes it different from live-action clips, giving it a credible and highly professional tone. It is most effective when the TTS highlights neck pain discomfort and needs a clear, accurate visualization of neck pain before presenting the Somira massager as the solution.",The product is not visible.,45 out of 100.
39
+ https://storage.googleapis.com/somira/StrongFaceExpressionNeckPain.mp4,"This video portrays a person working on a laptop while visibly struggling with neck pain. The individual’s exaggerated facial expressions and hand placed firmly on the neck create a playful yet instantly recognizable depiction of discomfort. The casual home or work setting keeps the scene relatable, while the strong acting style ensures that the pain is clear and attention-grabbing. The lighthearted exaggeration gives the clip an entertaining edge while still communicating the problem.",2 seconds,"This clip is highly effective when the narration emphasizes relatable tension, stress, or pain from everyday activities such as working at a computer. Its exaggerated style makes it ideal for more playful or entertaining edits while still conveying the key issue of neck discomfort. Use it when the script aims to quickly grab attention, connect with viewers through humor, and set the stage for showing how the Somira massager delivers relief.",The product is not visible.,50 out of 100.
40
+ https://storage.googleapis.com/somira/PersonExperiencingStressOffice.mp4,"This video shows a stylish business professional in an office setting, dealing with visible stress from work. Captured from a medium close-up that moves toward a close-up, the lighting uses strong contrasts to highlight the mood while keeping a polished and modern look. The individual’s expression and gestures convey fatigue and pressure but still maintain a sense of style and composure. The overall tone is serious yet relatable, presenting the challenges of office life in a way that feels authentic and professional.",2 seconds,"This video aligns best when the TTS script mentions office stress, long hours, or the strain of demanding work. The balanced tone of stress combined with the individual’s composed presence makes it effective for connecting with viewers in relatable professional scenarios. ",The product is not visible.,45 out of 100.
41
+ https://storage.googleapis.com/somira/DogPosingAsHumanInOfficeWorkingDesk.mp4,"This playful video features a dog dressed in a shirt and tie, seated at an office desk and typing on a laptop as if he were a human professional. The warm tone and lighting add charm while highlighting the humorous contrast of a dog acting in a role that only people normally do. The dog’s serious, focused expression mimics the stress and concentration of office work, turning a common theme of workplace pressure into an entertaining, impossible, and eye-catching scenario.",2 seconds,"This video aligns best when the TTS script talks about stress, office life, or workplace responsibilities in a fun and relatable way. The unique humor of a dog posing as a human professional makes it effective for playful edits that still connect to the themes of long hours, desk jobs, and the pressures of modern work.",The product is not visible.,45 out of 100.
42
+ https://storage.googleapis.com/somira/TiredDogPosingAsHomanInOfficeJob.mp4,"This entertaining video shows a dog wearing a suit and seated at an office desk, surrounded by multiple computer screens, working like a human professional. The cooler lighting tone emphasizes a slightly more serious and tired atmosphere, contrasting the humor of the setup. The dog’s concentrated, weary expression mimics the exhaustion of office workers, creating a funny and exaggerated take on stress and overwork in a corporate environment.",1 second,"This video aligns best when the TTS script emphasizes stress, long hours, or the strain of demanding office jobs. The humor of a dog posing as a human worker makes it memorable and engaging, while the cooler tone underscores the tiredness and intensity of workplace stress in an entertaining way.",The product is not visible.,45 out of 100.
43
+ https://storage.googleapis.com/somira/BackFlop.mp4,"This short, entertaining clip shows a parkour athlete attempting a backflip but landing flat on his back on the concrete. The video cuts right before the impact, creating a funny, exaggerated fail moment that hooks attention instantly. The sudden cutoff adds suspense and humor, making it highly engaging and shareable.",1 second,"This video aligns best when the TTS script references sudden pain, unexpected mistakes, or back and neck discomfort in a humorous way. Its exaggerated fail makes it an ideal viral-style hook to grab attention and inject energy.",The product is not visible.,90 out of 100.
44
+ https://storage.googleapis.com/somira/BumpySlide.mp4,"This playful video shows an adult sliding down a children’s slide at high speed. The oversized rider bumps along the way and the clip cuts just before hitting the ground, leaving the viewer laughing at the exaggerated, chaotic outcome. The comedic timing and relatable playground setting make it instantly engaging.",2 seconds,"This video aligns best when the TTS script mentions relatable discomforts, funny struggles, or moments of tension such as neck pain, back strain, or daily stress. The exaggerated slide fail makes it versatile as a hook or an entertaining insert that keeps attention high and adds humor, no matter where it appears in the edit.",The product is not visible.,90 out of 100.
45
+ https://storage.googleapis.com/somira/Car-Flip-Over.mp4,"This shocking short clip shows a person struck at the legs by a moving car and flipping into the air. The video cuts before the fall lands, creating suspense and making the exaggerated fail more attention-grabbing. The dramatic action makes it both funny and unforgettable as a viral-style moment.",2 seconds,"This video aligns best when the TTS script mentions discomfort, unexpected strain, or exaggerated situations like muscle pain or body stress. The dramatic action makes it a versatile choice as a hook or entertaining insert, keeping the audience engaged and adding humor and surprise at different points in the edit.",The product is not visible.,90 out of 100.
46
+ https://storage.googleapis.com/somira/Construction-Water.mp4,"This entertaining video shows two construction workers handling a large pipe when water suddenly bursts out uncontrollably. One of them slips during the chaos, adding slapstick humor to the scene. The surprise and exaggerated reaction create a funny, lighthearted viral moment.",3 seconds,"This video aligns best when the TTS script highlights unexpected challenges, stressful moments, or chaotic situations. Its playful style makes it a versatile insert to add humor, boost engagement, and keep the audience entertained throughout the edit.",The product is not visible.,85 out of 100.
47
+ https://storage.googleapis.com/somira/DonkeyTakesTumble.mp4,"This goofy clip shows a donkey attempting to hop over a fence but getting stuck in a hilariously awkward way. The clumsy and unexpected result creates laughter and surprise, making it a memorable and entertaining viral-style fail.",1 second,"This video aligns best when the TTS script refers to awkward moments, clumsy mistakes, or humorous discomforts. Its exaggerated tumble makes it a versatile choice for injecting humor, keeping energy high, and grabbing attention in playful edits.",The product is not visible.,90 out of 100.
48
+ https://storage.googleapis.com/somira/FunnyPersonDoingHulaHoop.mp4,This lively clip shows a person in a gym exaggerating their hula hoop moves in a funny and playful way. The over-the-top energy and unexpected expressions make the video entertaining and instantly engaging for viewers.,1 second,"This video aligns best when the TTS script emphasizes playful exaggeration, bursts of energy, or lighthearted moments. Its humor and upbeat tone make it a flexible option to keep the edit fun, engaging, and entertaining for the audience. Also when talking about positive elements such as success, relief, relaxation, wellness or similar. ",The product is not visible.,85 out of 100.
49
+ https://storage.googleapis.com/somira/PeopleTurningLookingEngagingFace.mp4,"This playful video shows three people dramatically turning their heads with wide-eyed, exaggerated expressions of surprise and amazement. The synchronized reactions are humorous and create intrigue, making viewers curious about what they are looking at.",2 seconds,"This video aligns best when the TTS script introduces something impressive, highlights a surprising benefit, or builds curiosity. The exaggerated reactions make it ideal for playful reveals, adding humor and keeping the audience engaged.",The product is not visible.,85 out of 100.
50
+ https://storage.googleapis.com/somira/Tube-Launch.mp4,"This high-energy clip shows a person on an inflatable tube being launched at high speed across a lake. The video cuts before the landing, amplifying suspense and leaving viewers laughing at the chaotic ride. The extreme motion and timing make it instantly engaging and attention-grabbing.",2 seconds,"This video aligns best when the TTS script highlights problems or pain. Its fast-paced style makes it a versatile mid-edit hook insert, adding humor, surprise, and entertainment to keep the video lively.",The product is not visible.,100 out of 100.
51
+ https://storage.googleapis.com/somira/PersonWearingMassagerRelaxedd.mp4,"This video shows a male person sitting on the coach with their eyes closed and deeply relaxed while the Somira neck massager is turned on and behind the persons neck. Calm, natural at home setting.",2 seconds,"Use this clip specifically when the TTS script references therapeutic massage effects, deep-tissue relief, or general relaxation benifits of using the Somira neck massager. It is particularly suitable strong comfort and relaxation when the massager is used by a person.",The product is visible. ,35 out of 100.
src/utils.py CHANGED
@@ -77,132 +77,3 @@ def setup_logger(name='ContentAutomation', level=logging.INFO, log_file=None):
77
 
78
  # Create global logger instance
79
  logger = setup_logger()
80
-
81
-
82
- def format_duration(seconds: float) -> str:
83
- """
84
- Format duration in seconds to human-readable string
85
-
86
- Args:
87
- seconds: Duration in seconds
88
-
89
- Returns:
90
- Formatted string (e.g., "1m 23s" or "45s")
91
- """
92
- if seconds < 60:
93
- return f"{seconds:.1f}s"
94
-
95
- minutes = int(seconds // 60)
96
- remaining_seconds = seconds % 60
97
-
98
- if minutes < 60:
99
- return f"{minutes}m {remaining_seconds:.0f}s"
100
-
101
- hours = int(minutes // 60)
102
- remaining_minutes = minutes % 60
103
- return f"{hours}h {remaining_minutes}m"
104
-
105
-
106
- def format_file_size(size_bytes: int) -> str:
107
- """
108
- Format file size in bytes to human-readable string
109
-
110
- Args:
111
- size_bytes: Size in bytes
112
-
113
- Returns:
114
- Formatted string (e.g., "1.5 MB")
115
- """
116
- for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
117
- if size_bytes < 1024.0:
118
- return f"{size_bytes:.1f} {unit}"
119
- size_bytes /= 1024.0
120
- return f"{size_bytes:.1f} PB"
121
-
122
-
123
- def validate_video_config(config: dict) -> bool:
124
- """
125
- Validate video configuration parameters
126
-
127
- Args:
128
- config: Video configuration dictionary
129
-
130
- Returns:
131
- True if valid, False otherwise
132
- """
133
- valid_aspect_ratios = ['16:9', '9:16', '1:1', '4:5']
134
- valid_styles = ['commercial', 'minimal', 'cinematic', 'social']
135
-
136
- if 'aspect_ratio' in config:
137
- if config['aspect_ratio'] not in valid_aspect_ratios:
138
- logger.warning(f"Invalid aspect ratio: {config['aspect_ratio']}")
139
- return False
140
-
141
- if 'style' in config:
142
- if config['style'] not in valid_styles:
143
- logger.warning(f"Invalid style: {config['style']}")
144
- return False
145
-
146
- if 'duration' in config:
147
- if not (1 <= config['duration'] <= 60):
148
- logger.warning(f"Invalid duration: {config['duration']}s (must be 1-60)")
149
- return False
150
-
151
- return True
152
-
153
-
154
- def sanitize_filename(filename: str) -> str:
155
- """
156
- Sanitize filename by removing invalid characters
157
-
158
- Args:
159
- filename: Original filename
160
-
161
- Returns:
162
- Sanitized filename
163
- """
164
- import re
165
- # Remove invalid characters
166
- filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
167
- # Remove leading/trailing spaces and dots
168
- filename = filename.strip('. ')
169
- return filename
170
-
171
-
172
- def generate_video_id() -> str:
173
- """
174
- Generate unique video ID based on timestamp
175
-
176
- Returns:
177
- Unique video ID string
178
- """
179
- timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
180
- return f"video_{timestamp}"
181
-
182
-
183
- class ProgressTracker:
184
- """Track progress of multi-step operations"""
185
-
186
- def __init__(self, total_steps: int, description: str = "Processing"):
187
- self.total_steps = total_steps
188
- self.current_step = 0
189
- self.description = description
190
- self.start_time = datetime.now()
191
-
192
- def update(self, step_name: str):
193
- """Update progress to next step"""
194
- self.current_step += 1
195
- progress = (self.current_step / self.total_steps) * 100
196
- elapsed = (datetime.now() - self.start_time).total_seconds()
197
-
198
- logger.info(
199
- f"[{progress:.0f}%] Step {self.current_step}/{self.total_steps}: "
200
- f"{step_name} (Elapsed: {format_duration(elapsed)})"
201
- )
202
-
203
- def complete(self):
204
- """Mark progress as complete"""
205
- elapsed = (datetime.now() - self.start_time).total_seconds()
206
- logger.info(
207
- f"βœ“ {self.description} completed in {format_duration(elapsed)}"
208
- )
 
77
 
78
  # Create global logger instance
79
  logger = setup_logger()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/video_renderer.py CHANGED
@@ -8,6 +8,7 @@ if not hasattr(PIL.Image, 'ANTIALIAS'):
8
 
9
  import os
10
  import tempfile
 
11
  from typing import List, Dict, Optional
12
  from pathlib import Path
13
 
@@ -16,7 +17,7 @@ from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, con
16
  import numpy as np
17
  import textwrap
18
 
19
- from utils import logger, format_duration
20
 
21
 
22
  class VideoRenderer:
@@ -43,17 +44,21 @@ class VideoRenderer:
43
  if not self._validate_assets(assets):
44
  raise ValueError("Invalid assets provided for video rendering")
45
 
46
- # Load and prepare all assets
47
- video_clips = await self._prepare_video_clips(assets)
48
- audio_clips = await self._prepare_audio_clips(assets)
49
 
50
- # Create video sequence
51
- final_video = await self._create_video_sequence(video_clips, video_config)
 
52
 
53
- # Add audio
 
 
 
54
  final_video = await self._add_audio_track(final_video, audio_clips)
55
 
56
- # Add subtitles if script provided
57
  if assets.get('tts_script'):
58
  final_video = await self._add_subtitles(final_video, assets['tts_script'])
59
 
@@ -70,17 +75,51 @@ class VideoRenderer:
70
  logger.error(f"❌ Video rendering failed: {e}")
71
  raise
72
 
73
- async def _prepare_video_clips(self, assets: Dict) -> List[VideoFileClip]:
74
- """Load and prepare all video clips"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  clips = []
76
 
77
  try:
78
- # Load RunwayML hook video
79
  if assets.get('hook_video'):
80
  hook_clip = VideoFileClip(assets['hook_video']['local_path'])
 
81
  hook_clip = hook_clip.without_audio()
82
- clips.append(('hook', hook_clip))
83
- logger.info(f"βœ“ Loaded hook video: {hook_clip.duration:.2f}s")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  # Load library videos
86
  for i, lib_video in enumerate(assets.get('selected_videos', [])):
@@ -99,35 +138,38 @@ class VideoRenderer:
99
  clip.close()
100
  raise
101
 
102
- async def _prepare_audio_clips(self, assets: Dict) -> List[AudioFileClip]:
103
- """Load and prepare all audio clips with proper error handling"""
104
  clips = []
105
 
106
  try:
107
- # Load TTS audio
108
  if assets.get('tts_audio') and assets['tts_audio'].get('local_path'):
109
  try:
110
  tts_clip = AudioFileClip(assets['tts_audio']['local_path'])
111
- # Ensure the clip has proper duration
112
  if tts_clip.duration > 0:
 
113
  clips.append(('tts', tts_clip))
114
- logger.info(f"βœ“ Loaded TTS audio: {tts_clip.duration:.2f}s")
115
  else:
116
  logger.warning("⚠️ TTS audio has zero duration")
117
  tts_clip.close()
118
  except Exception as e:
119
  logger.error(f"❌ Failed to load TTS audio: {e}")
120
 
121
- # Load background music
122
  if assets.get('background_music_local'):
123
  try:
124
  bg_clip = AudioFileClip(assets['background_music_local'])
125
- # Ensure the clip has proper duration
126
  if bg_clip.duration > 0:
127
- # Reduce volume using volumex instead of custom function
128
- bg_clip = bg_clip.volumex(0.3)
 
 
 
 
129
  clips.append(('background', bg_clip))
130
- logger.info(f"βœ“ Loaded background music: {bg_clip.duration:.2f}s")
131
  else:
132
  logger.warning("⚠️ Background music has zero duration")
133
  bg_clip.close()
@@ -146,28 +188,133 @@ class VideoRenderer:
146
  pass
147
  raise
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  async def _create_video_sequence(self, video_clips: List[VideoFileClip],
150
- video_config: Optional[Dict]) -> VideoFileClip:
151
- """Create the final video sequence with proper timing"""
152
  try:
153
  if not video_clips:
154
  raise ValueError("No video clips available for sequence")
155
 
156
- # Calculate total available duration (max 15 seconds)
157
- max_duration = 15.0
158
- current_duration = sum(clip.duration for clip in video_clips)
 
159
 
160
- if current_duration > max_duration:
161
- logger.warning(f"⚠️ Video sequence too long ({current_duration:.1f}s), will trim to {max_duration}s")
162
- video_clips = self._trim_clips_to_fit(video_clips, max_duration)
163
-
164
- # Resize all clips to target aspect ratio (9:16 vertical)
165
- target_size = (1080, 1920) # 9:16 vertical
166
- resized_clips = [self._resize_for_vertical(clip, target_size) for clip in video_clips]
167
-
168
- # Create sequence
169
- final_sequence = concatenate_videoclips(resized_clips)
170
- logger.info(f"βœ“ Created video sequence: {final_sequence.duration:.2f}s")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  return final_sequence
173
 
@@ -177,6 +324,83 @@ class VideoRenderer:
177
  clip.close()
178
  raise
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  def _resize_for_vertical(self, clip: VideoFileClip, target_size: tuple) -> VideoFileClip:
181
  """Resize clip to fit vertical 9:16 aspect ratio"""
182
  target_w, target_h = target_size
@@ -190,9 +414,8 @@ class VideoRenderer:
190
  # Clip is taller, fit to width and crop height
191
  new_clip = clip.resize(width=target_w)
192
 
193
- # Center crop to exact size using a more compatible method
194
  try:
195
- # Try the new method first
196
  new_clip = new_clip.crop(
197
  x_center=new_clip.w / 2,
198
  y_center=new_clip.h / 2,
@@ -200,41 +423,20 @@ class VideoRenderer:
200
  height=target_h
201
  )
202
  except Exception:
203
- # Fallback method for cropping
204
  x1 = (new_clip.w - target_w) // 2
205
  y1 = (new_clip.h - target_h) // 2
206
  new_clip = new_clip.crop(x1=x1, y1=y1, x2=x1+target_w, y2=y1+target_h)
207
 
208
  return new_clip
209
 
210
- def _trim_clips_to_fit(self, clips: List[VideoFileClip], max_duration: float) -> List[VideoFileClip]:
211
- """Trim video clips to fit within max duration"""
212
- trimmed_clips = []
213
- remaining_duration = max_duration
214
-
215
- for clip in clips:
216
- if remaining_duration <= 0:
217
- break
218
-
219
- use_duration = min(clip.duration, remaining_duration)
220
- if use_duration < clip.duration:
221
- trimmed_clip = clip.subclip(0, use_duration)
222
- trimmed_clips.append(trimmed_clip)
223
- logger.info(f"Trimmed clip from {clip.duration:.1f}s to {use_duration:.1f}s")
224
- else:
225
- trimmed_clips.append(clip)
226
-
227
- remaining_duration -= use_duration
228
-
229
- return trimmed_clips
230
-
231
  async def _add_audio_track(self, video_clip: VideoFileClip, audio_clips: List[AudioFileClip]) -> VideoFileClip:
232
- """Add audio track to video with proper timing"""
233
  if not audio_clips:
234
  return video_clip
235
 
236
  try:
237
- # Filter out invalid audio clips
238
  valid_audio_clips = []
239
  for clip in audio_clips:
240
  if clip.duration > 0:
@@ -246,24 +448,26 @@ class VideoRenderer:
246
  if not valid_audio_clips:
247
  return video_clip
248
 
249
- # Mix all valid audio clips
250
  mixed_audio = CompositeAudioClip(valid_audio_clips)
251
 
252
- # Ensure audio doesn't exceed video duration
253
  video_duration = video_clip.duration
254
- if mixed_audio.duration > video_duration:
255
- logger.info(f"Trimming audio from {mixed_audio.duration:.2f}s to {video_duration:.2f}s")
256
- mixed_audio = mixed_audio.subclip(0, video_duration)
 
 
 
 
257
 
258
  # Add audio to video
259
  video_with_audio = video_clip.set_audio(mixed_audio)
260
- logger.info(f"βœ“ Added audio track: {mixed_audio.duration:.2f}s")
261
 
262
  return video_with_audio
263
 
264
  except Exception as e:
265
  logger.error(f"❌ Failed to add audio track: {e}")
266
- # Cleanup audio clips
267
  for clip in audio_clips:
268
  try:
269
  clip.close()
@@ -272,82 +476,206 @@ class VideoRenderer:
272
  return video_clip
273
 
274
  async def _add_subtitles(self, video_clip: VideoFileClip, script: str) -> CompositeVideoClip:
275
- """Add animated subtitles to video"""
276
  try:
277
- phrases = self._split_script_into_phrases(script)
 
278
  text_clips = []
279
 
280
  total_duration = video_clip.duration
 
 
 
 
 
 
 
 
 
 
 
 
281
  duration_per_phrase = total_duration / len(phrases)
282
- fade_duration = 0.3
283
 
284
- target_width, target_height = video_clip.size
285
 
286
  for i, phrase in enumerate(phrases):
287
  start_time = i * duration_per_phrase
 
288
 
289
- # Word wrapping for vertical format
290
- max_chars_per_line = 25
291
- wrapped_text = '\n'.join(textwrap.wrap(phrase, width=max_chars_per_line))
292
-
293
- # Create text clip
294
- text_clip = TextClip(
295
- txt=wrapped_text,
296
- fontsize=65,
297
- color='yellow' if i % 2 == 1 else 'white',
298
- font='Helvetica',
299
- stroke_color='black',
300
- stroke_width=4,
301
- method='caption',
302
- size=(int(target_width * 0.85), None)
303
- )
304
 
305
- # Position in center-upper area (safe zone for vertical video)
306
- vertical_position = int(target_height * 0.40)
307
- text_clip = text_clip.set_position(('center', vertical_position))
308
- text_clip = text_clip.set_start(start_time)
309
- text_clip = text_clip.set_duration(duration_per_phrase)
310
 
311
- # Add fade effects manually
312
- text_clip = text_clip.crossfadein(fade_duration).crossfadeout(fade_duration)
313
-
314
- text_clips.append(text_clip)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
316
  # Combine video with subtitles
317
  final_video = CompositeVideoClip([video_clip] + text_clips)
318
- logger.info(f"βœ“ Added {len(text_clips)} subtitle segments")
319
 
320
  return final_video
321
 
322
  except Exception as e:
323
  logger.error(f"❌ Failed to add subtitles: {e}")
 
 
324
  return video_clip
325
 
326
- def _split_script_into_phrases(self, script: str) -> List[str]:
327
- """Split script into subtitle phrases"""
328
- # Simple sentence splitting - can be enhanced with NLP
329
- sentences = [s.strip() + '.' for s in script.split('.') if s.strip()]
330
- return sentences[:6] # Limit to 6 phrases max
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
 
332
  async def _render_final_video(self, video_clip: VideoFileClip) -> str:
333
- """Render final video to file"""
334
- output_path = self.temp_dir / "final_video.mp4"
 
 
 
335
 
336
  try:
337
- logger.info("πŸ“Ή Rendering final video file...")
 
 
 
 
338
 
339
- video_clip.write_videofile(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  str(output_path),
341
  codec='libx264',
342
  audio_codec='aac',
343
- temp_audiofile=str(self.temp_dir / 'temp_audio.m4a'),
344
  remove_temp=True,
345
  fps=24,
346
  verbose=False,
347
- logger=None # Suppress moviepy progress bars
348
  )
349
 
350
- logger.info(f"βœ“ Final video rendered: {output_path}")
351
  return str(output_path)
352
 
353
  except Exception as e:
@@ -358,7 +686,7 @@ class VideoRenderer:
358
 
359
  def _validate_assets(self, assets: Dict) -> bool:
360
  """Validate that required assets are present"""
361
- required = ['selected_videos', 'tts_audio']
362
 
363
  for req in required:
364
  if not assets.get(req):
 
8
 
9
  import os
10
  import tempfile
11
+ import uuid
12
  from typing import List, Dict, Optional
13
  from pathlib import Path
14
 
 
17
  import numpy as np
18
  import textwrap
19
 
20
+ from utils import logger
21
 
22
 
23
  class VideoRenderer:
 
44
  if not self._validate_assets(assets):
45
  raise ValueError("Invalid assets provided for video rendering")
46
 
47
+ # Get TTS audio duration as the base duration
48
+ tts_duration = await self._get_audio_duration(assets.get('tts_audio', {}).get('local_path'))
49
+ logger.info(f"πŸ“Š TTS audio duration: {tts_duration:.2f}s - this will be our target video duration")
50
 
51
+ # Load and prepare all assets with TTS duration as target
52
+ video_clips = await self._prepare_video_clips(assets, tts_duration)
53
+ audio_clips = await self._prepare_audio_clips(assets, tts_duration)
54
 
55
+ # Create video sequence that matches TTS duration exactly
56
+ final_video = await self._create_video_sequence(video_clips, tts_duration, video_config)
57
+
58
+ # Add audio without any trimming
59
  final_video = await self._add_audio_track(final_video, audio_clips)
60
 
61
+ # Add improved subtitles if script provided
62
  if assets.get('tts_script'):
63
  final_video = await self._add_subtitles(final_video, assets['tts_script'])
64
 
 
75
  logger.error(f"❌ Video rendering failed: {e}")
76
  raise
77
 
78
+ async def _get_audio_duration(self, audio_path: str) -> float:
79
+ """Get the duration of the TTS audio file"""
80
+ try:
81
+ if not audio_path or not os.path.exists(audio_path):
82
+ logger.warning("⚠️ TTS audio path not found, using default duration")
83
+ return 12.0 # Fallback duration
84
+
85
+ audio_clip = AudioFileClip(audio_path)
86
+ duration = audio_clip.duration
87
+ audio_clip.close()
88
+ return duration
89
+ except Exception as e:
90
+ logger.error(f"❌ Failed to get audio duration: {e}")
91
+ return 12.0 # Fallback duration
92
+
93
+ async def _prepare_video_clips(self, assets: Dict, target_duration: float) -> List[VideoFileClip]:
94
+ """Load and prepare all video clips - create seamless loop from hook video"""
95
  clips = []
96
 
97
  try:
98
+ # Load hook video for seamless looping
99
  if assets.get('hook_video'):
100
  hook_clip = VideoFileClip(assets['hook_video']['local_path'])
101
+ hook_duration = hook_clip.duration
102
  hook_clip = hook_clip.without_audio()
103
+
104
+ logger.info(f"πŸ”„ Creating seamless loop from {hook_duration:.2f}s hook video")
105
+
106
+ # For seamless loop: Use SECOND HALF at start, FIRST HALF at end
107
+ # This creates: [second_half] -> [library videos] -> [first_half]
108
+ # When looped: [first_half][second_half] appears continuous
109
+
110
+ mid_point = hook_duration / 2
111
+
112
+ # Second half for beginning (e.g., 4-8s of an 8s video)
113
+ hook_start = hook_clip.subclip(mid_point, hook_duration)
114
+ clips.append(('hook_start', hook_start))
115
+ logger.info(f"βœ“ Hook start (second half): {hook_start.duration:.2f}s ({mid_point:.2f}s - {hook_duration:.2f}s)")
116
+
117
+ # First half for ending (e.g., 0-4s of an 8s video)
118
+ hook_end = hook_clip.subclip(0, mid_point)
119
+ clips.append(('hook_end', hook_end))
120
+ logger.info(f"βœ“ Hook end (first half): {hook_end.duration:.2f}s (0s - {mid_point:.2f}s)")
121
+
122
+ hook_clip.close()
123
 
124
  # Load library videos
125
  for i, lib_video in enumerate(assets.get('selected_videos', [])):
 
138
  clip.close()
139
  raise
140
 
141
+ async def _prepare_audio_clips(self, assets: Dict, target_duration: float) -> List[AudioFileClip]:
142
+ """Load audio clips and prepare for speed adjustment"""
143
  clips = []
144
 
145
  try:
146
+ # Load TTS audio - KEEP ORIGINAL VOLUME (no reduction)
147
  if assets.get('tts_audio') and assets['tts_audio'].get('local_path'):
148
  try:
149
  tts_clip = AudioFileClip(assets['tts_audio']['local_path'])
 
150
  if tts_clip.duration > 0:
151
+ # Keep TTS at full volume (1.0x) - no volumex applied
152
  clips.append(('tts', tts_clip))
153
+ logger.info(f"βœ“ Loaded TTS audio at FULL volume: {tts_clip.duration:.2f}s")
154
  else:
155
  logger.warning("⚠️ TTS audio has zero duration")
156
  tts_clip.close()
157
  except Exception as e:
158
  logger.error(f"❌ Failed to load TTS audio: {e}")
159
 
160
+ # Load background music - VERY LOW volume to not compete with TTS
161
  if assets.get('background_music_local'):
162
  try:
163
  bg_clip = AudioFileClip(assets['background_music_local'])
 
164
  if bg_clip.duration > 0:
165
+ # Trim background music to match TTS duration
166
+ if bg_clip.duration > target_duration:
167
+ bg_clip = bg_clip.subclip(0, target_duration)
168
+ logger.info(f"βœ“ Trimmed background music to {target_duration:.2f}s")
169
+ # Reduce volume significantly - TTS should be dominant
170
+ bg_clip = bg_clip.volumex(0.08) # Reduced from 0.15 to 0.08 (very subtle)
171
  clips.append(('background', bg_clip))
172
+ logger.info(f"βœ“ Loaded background music at 8% volume: {bg_clip.duration:.2f}s")
173
  else:
174
  logger.warning("⚠️ Background music has zero duration")
175
  bg_clip.close()
 
188
  pass
189
  raise
190
 
191
+ async def _speed_up_audio_with_pitch_correction(self, audio_clip, speed_factor: float) -> AudioFileClip:
192
+ """Speed up audio while preserving pitch using librosa (pitch-preserving time stretch)"""
193
+ try:
194
+ import numpy as np
195
+ try:
196
+ import librosa
197
+ import soundfile as sf
198
+ has_librosa = True
199
+ except ImportError:
200
+ has_librosa = False
201
+ logger.warning("⚠️ librosa not available, using simple speed-up (pitch will change)")
202
+
203
+ if not has_librosa:
204
+ # Fallback: use simple speedx (will change pitch)
205
+ logger.warning("⚠️ Using simple speedx - voice pitch will be higher")
206
+ # For audio, we can't use speedx directly, so we'll return the original
207
+ # and let the video speed handle it
208
+ return audio_clip
209
+
210
+ # Create temp paths
211
+ temp_input = str(self.temp_dir / f"audio_input_{uuid.uuid4().hex[:8]}.wav")
212
+ temp_output = str(self.temp_dir / f"audio_output_{uuid.uuid4().hex[:8]}.wav")
213
+
214
+ # Write original audio to temp file
215
+ logger.info(f"🎡 Exporting audio for pitch-preserving speed adjustment...")
216
+ audio_clip.write_audiofile(
217
+ temp_input,
218
+ fps=44100,
219
+ nbytes=2,
220
+ codec='pcm_s16le',
221
+ verbose=False,
222
+ logger=None
223
+ )
224
+
225
+ # Load audio with librosa
226
+ y, sr = librosa.load(temp_input, sr=44100)
227
+
228
+ # Time-stretch without changing pitch
229
+ logger.info(f"🎡 Applying pitch-preserving time stretch {speed_factor}x...")
230
+ y_stretched = librosa.effects.time_stretch(y, rate=speed_factor)
231
+
232
+ # Save the stretched audio
233
+ sf.write(temp_output, y_stretched, sr)
234
+
235
+ # Clean up input file
236
+ if os.path.exists(temp_input):
237
+ os.remove(temp_input)
238
+
239
+ # Load back as AudioFileClip
240
+ stretched_clip = AudioFileClip(temp_output)
241
+
242
+ logger.info(f"βœ… Audio sped up {speed_factor}x with preserved pitch using librosa")
243
+ return stretched_clip
244
+
245
+ except Exception as e:
246
+ logger.error(f"❌ Failed to speed up audio with pitch correction: {e}")
247
+ import traceback
248
+ logger.error(traceback.format_exc())
249
+ # Return original audio as fallback
250
+ logger.warning("⚠️ Returning original audio without speed adjustment")
251
+ return audio_clip
252
+
253
  async def _create_video_sequence(self, video_clips: List[VideoFileClip],
254
+ target_duration: float, video_config: Optional[Dict]) -> VideoFileClip:
255
+ """Create video sequence matching TTS audio duration exactly"""
256
  try:
257
  if not video_clips:
258
  raise ValueError("No video clips available for sequence")
259
 
260
+ # Identify clips by matching against expected structure
261
+ hook_start = None
262
+ hook_end = None
263
+ library_clips = []
264
 
265
+ # First pass: identify hook clips (they should be equal duration, around 4s each for 8s video)
266
+ hook_candidates = []
267
+ for clip in video_clips:
268
+ if 3.0 <= clip.duration <= 5.0: # Hook segments are typically 3-5 seconds
269
+ hook_candidates.append(clip)
270
+ else:
271
+ library_clips.append(clip)
272
+
273
+ # Assign hook clips if we found exactly 2
274
+ if len(hook_candidates) == 2:
275
+ hook_start = hook_candidates[0]
276
+ hook_end = hook_candidates[1]
277
+ logger.info(f"βœ“ Identified hook clips: start={hook_start.duration:.2f}s, end={hook_end.duration:.2f}s")
278
+ elif len(hook_candidates) > 0:
279
+ # Use what we have
280
+ hook_start = hook_candidates[0]
281
+ if len(hook_candidates) > 1:
282
+ hook_end = hook_candidates[1]
283
+ logger.info(f"⚠️ Found {len(hook_candidates)} hook candidates, using available")
284
+
285
+ # Build sequence: [hook_start] -> [library_clips] -> [hook_end]
286
+ sequence_clips = []
287
+
288
+ if hook_start:
289
+ sequence_clips.append(hook_start)
290
+ logger.info(f" Added hook_start: {hook_start.duration:.2f}s")
291
+
292
+ for i, clip in enumerate(library_clips):
293
+ sequence_clips.append(clip)
294
+ logger.info(f" Added library_{i}: {clip.duration:.2f}s")
295
+
296
+ if hook_end:
297
+ sequence_clips.append(hook_end)
298
+ logger.info(f" Added hook_end: {hook_end.duration:.2f}s")
299
+
300
+ # Calculate current total
301
+ current_duration = sum(clip.duration for clip in sequence_clips)
302
+ logger.info(f"πŸ“Š Current sequence: {current_duration:.2f}s, Target: {target_duration:.2f}s")
303
+
304
+ # Adjust to exact target duration
305
+ sequence_clips = self._adjust_clips_to_duration(sequence_clips, target_duration)
306
+
307
+ # Verify adjusted duration
308
+ adjusted_duration = sum(clip.duration for clip in sequence_clips)
309
+ logger.info(f"πŸ“Š Adjusted sequence: {adjusted_duration:.2f}s")
310
+
311
+ # Resize all clips to 9:16 vertical
312
+ target_size = (1080, 1920)
313
+ resized_clips = [self._resize_for_vertical(clip, target_size) for clip in sequence_clips]
314
+
315
+ # Concatenate with no gap (method='compose' ensures smooth transitions)
316
+ final_sequence = concatenate_videoclips(resized_clips, method="compose")
317
+ logger.info(f"βœ… Created seamless video sequence: {final_sequence.duration:.2f}s")
318
 
319
  return final_sequence
320
 
 
324
  clip.close()
325
  raise
326
 
327
+ def _adjust_clips_to_duration(self, clips: List[VideoFileClip], target_duration: float) -> List[VideoFileClip]:
328
+ """Adjust video clips to match target duration by speeding up or extending library clips"""
329
+ current_duration = sum(clip.duration for clip in clips)
330
+ duration_diff = target_duration - current_duration
331
+
332
+ logger.info(f"βš™οΈ Adjusting: {current_duration:.2f}s -> {target_duration:.2f}s (diff: {duration_diff:.2f}s)")
333
+
334
+ if abs(duration_diff) < 0.1: # Close enough
335
+ return clips
336
+
337
+ # Identify which clips are library clips (not hook clips)
338
+ # Hook clips are typically shorter (3-5s), library clips are longer
339
+ library_indices = []
340
+ for i, clip in enumerate(clips):
341
+ if clip.duration > 5.0: # Likely a library clip
342
+ library_indices.append(i)
343
+
344
+ if not library_indices:
345
+ # If no library clips identified, adjust all clips proportionally
346
+ library_indices = list(range(len(clips)))
347
+
348
+ if duration_diff > 0:
349
+ # Need to extend - slow down or loop library clips
350
+ return self._extend_clips(clips, library_indices, duration_diff)
351
+ else:
352
+ # Need to shorten - speed up or trim library clips
353
+ return self._shorten_clips(clips, library_indices, abs(duration_diff))
354
+
355
+ def _extend_clips(self, clips: List[VideoFileClip], library_indices: List[int], extra_duration: float) -> List[VideoFileClip]:
356
+ """Extend duration by slowing down library clips"""
357
+ if not library_indices:
358
+ return clips
359
+
360
+ adjusted_clips = []
361
+ duration_per_clip = extra_duration / len(library_indices)
362
+
363
+ for i, clip in enumerate(clips):
364
+ if i in library_indices:
365
+ # Calculate speed factor to extend this clip
366
+ target_clip_duration = clip.duration + duration_per_clip
367
+ speed_factor = clip.duration / target_clip_duration
368
+
369
+ # Slow down the clip (speed < 1.0 means slower)
370
+ slowed_clip = clip.fx(lambda c: c.speedx(speed_factor))
371
+ adjusted_clips.append(slowed_clip)
372
+ logger.info(f" Extended clip {i}: {clip.duration:.2f}s -> {slowed_clip.duration:.2f}s (speed: {speed_factor:.2f}x)")
373
+ else:
374
+ adjusted_clips.append(clip)
375
+
376
+ return adjusted_clips
377
+
378
+ def _shorten_clips(self, clips: List[VideoFileClip], library_indices: List[int], reduce_duration: float) -> List[VideoFileClip]:
379
+ """Shorten duration by speeding up library clips"""
380
+ if not library_indices:
381
+ return clips
382
+
383
+ adjusted_clips = []
384
+ duration_per_clip = reduce_duration / len(library_indices)
385
+
386
+ for i, clip in enumerate(clips):
387
+ if i in library_indices:
388
+ # Calculate speed factor to shorten this clip
389
+ target_clip_duration = clip.duration - duration_per_clip
390
+ if target_clip_duration < 1.0:
391
+ target_clip_duration = 1.0 # Minimum duration
392
+
393
+ speed_factor = clip.duration / target_clip_duration
394
+
395
+ # Speed up the clip (speed > 1.0 means faster)
396
+ sped_clip = clip.fx(lambda c: c.speedx(speed_factor))
397
+ adjusted_clips.append(sped_clip)
398
+ logger.info(f" Shortened clip {i}: {clip.duration:.2f}s -> {sped_clip.duration:.2f}s (speed: {speed_factor:.2f}x)")
399
+ else:
400
+ adjusted_clips.append(clip)
401
+
402
+ return adjusted_clips
403
+
404
  def _resize_for_vertical(self, clip: VideoFileClip, target_size: tuple) -> VideoFileClip:
405
  """Resize clip to fit vertical 9:16 aspect ratio"""
406
  target_w, target_h = target_size
 
414
  # Clip is taller, fit to width and crop height
415
  new_clip = clip.resize(width=target_w)
416
 
417
+ # Center crop to exact size
418
  try:
 
419
  new_clip = new_clip.crop(
420
  x_center=new_clip.w / 2,
421
  y_center=new_clip.h / 2,
 
423
  height=target_h
424
  )
425
  except Exception:
426
+ # Fallback cropping method
427
  x1 = (new_clip.w - target_w) // 2
428
  y1 = (new_clip.h - target_h) // 2
429
  new_clip = new_clip.crop(x1=x1, y1=y1, x2=x1+target_w, y2=y1+target_h)
430
 
431
  return new_clip
432
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
  async def _add_audio_track(self, video_clip: VideoFileClip, audio_clips: List[AudioFileClip]) -> VideoFileClip:
434
+ """Add full audio track - video duration matches TTS, so no trimming needed"""
435
  if not audio_clips:
436
  return video_clip
437
 
438
  try:
439
+ # Filter valid audio clips
440
  valid_audio_clips = []
441
  for clip in audio_clips:
442
  if clip.duration > 0:
 
448
  if not valid_audio_clips:
449
  return video_clip
450
 
451
+ # Mix all audio clips
452
  mixed_audio = CompositeAudioClip(valid_audio_clips)
453
 
 
454
  video_duration = video_clip.duration
455
+ audio_duration = mixed_audio.duration
456
+
457
+ logger.info(f"πŸ”Š Audio: {audio_duration:.2f}s, Video: {video_duration:.2f}s")
458
+
459
+ # Video should already match audio duration, but verify
460
+ if abs(video_duration - audio_duration) > 0.5:
461
+ logger.warning(f"⚠️ Duration mismatch: Video={video_duration:.2f}s, Audio={audio_duration:.2f}s")
462
 
463
  # Add audio to video
464
  video_with_audio = video_clip.set_audio(mixed_audio)
465
+ logger.info(f"βœ… Added full audio track (no trimming)")
466
 
467
  return video_with_audio
468
 
469
  except Exception as e:
470
  logger.error(f"❌ Failed to add audio track: {e}")
 
471
  for clip in audio_clips:
472
  try:
473
  clip.close()
 
476
  return video_clip
477
 
478
  async def _add_subtitles(self, video_clip: VideoFileClip, script: str) -> CompositeVideoClip:
479
+ """Add word-by-word animated subtitles synced with TTS timing"""
480
  try:
481
+ # Split script into words for better timing sync
482
+ words = self._split_script_into_words(script)
483
  text_clips = []
484
 
485
  total_duration = video_clip.duration
486
+ target_width, target_height = video_clip.size
487
+
488
+ logger.info(f"πŸ“ Script has {len(words)} words, video duration: {total_duration:.2f}s")
489
+
490
+ # Group words into small phrases (2-4 words) for better readability
491
+ phrases = self._group_words_into_phrases(words, max_words=3)
492
+
493
+ logger.info(f"πŸ“ Grouped into {len(phrases)} phrases:")
494
+ for idx, p in enumerate(phrases):
495
+ logger.info(f" Phrase {idx}: '{p}'")
496
+
497
+ # Calculate equal timing for all phrases to ensure full coverage
498
  duration_per_phrase = total_duration / len(phrases)
 
499
 
500
+ logger.info(f"⏱️ Each phrase gets {duration_per_phrase:.2f}s")
501
 
502
  for i, phrase in enumerate(phrases):
503
  start_time = i * duration_per_phrase
504
+ phrase_duration = duration_per_phrase
505
 
506
+ logger.info(f" Phrase {i} at {start_time:.2f}s for {phrase_duration:.2f}s: '{phrase}'")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
507
 
508
+ # Smart word wrapping - ensure text fits screen
509
+ max_chars_per_line = 18
510
+ wrapped_lines = textwrap.wrap(phrase, width=max_chars_per_line)
 
 
511
 
512
+ # If wrapping creates too many lines, split into smaller phrases
513
+ if len(wrapped_lines) > 3:
514
+ # Split phrase in half and create two separate text clips
515
+ mid_point = len(phrase.split()) // 2
516
+ phrase_words = phrase.split()
517
+ first_half = ' '.join(phrase_words[:mid_point])
518
+ second_half = ' '.join(phrase_words[mid_point:])
519
+
520
+ half_duration = phrase_duration / 2
521
+
522
+ # Process first half
523
+ self._add_single_subtitle(
524
+ first_half, start_time, half_duration,
525
+ target_width, target_height, text_clips
526
+ )
527
+
528
+ # Process second half
529
+ self._add_single_subtitle(
530
+ second_half, start_time + half_duration, half_duration,
531
+ target_width, target_height, text_clips
532
+ )
533
+ else:
534
+ # Normal single subtitle
535
+ self._add_single_subtitle(
536
+ phrase, start_time, phrase_duration,
537
+ target_width, target_height, text_clips
538
+ )
539
+
540
+ logger.info(f"πŸ“Š Created {len(text_clips)} subtitle clips covering full {total_duration:.2f}s")
541
 
542
  # Combine video with subtitles
543
  final_video = CompositeVideoClip([video_clip] + text_clips)
544
+ logger.info(f"βœ… Added {len(text_clips)} synced subtitle segments")
545
 
546
  return final_video
547
 
548
  except Exception as e:
549
  logger.error(f"❌ Failed to add subtitles: {e}")
550
+ import traceback
551
+ logger.error(traceback.format_exc())
552
  return video_clip
553
 
554
+ def _add_single_subtitle(self, text: str, start_time: float, duration: float,
555
+ target_width: int, target_height: int, text_clips: List):
556
+ """Add a single subtitle clip with proper formatting"""
557
+ try:
558
+ # Word wrap with reduced character limit
559
+ max_chars_per_line = 18
560
+ wrapped_text = '\n'.join(textwrap.wrap(text, width=max_chars_per_line))
561
+
562
+ # Dynamic font size based on text length
563
+ line_count = len(wrapped_text.split('\n'))
564
+ if line_count > 2:
565
+ fontsize = 70 # Smaller for 3+ lines
566
+ elif line_count > 1:
567
+ fontsize = 75 # Medium for 2 lines
568
+ else:
569
+ fontsize = 85 # Larger for single line
570
+
571
+ # Create text clip
572
+ text_clip = TextClip(
573
+ txt=wrapped_text,
574
+ fontsize=fontsize,
575
+ color='white',
576
+ font='Arial-Bold',
577
+ stroke_color='black',
578
+ stroke_width=4,
579
+ method='caption',
580
+ size=(int(target_width * 0.85), None), # Reduced from 0.90 to 0.85
581
+ align='center'
582
+ )
583
+
584
+ # Position in lower third of screen (safe area)
585
+ vertical_position = int(target_height * 0.72) # Slightly higher
586
+ text_clip = text_clip.set_position(('center', vertical_position))
587
+ text_clip = text_clip.set_start(start_time)
588
+ text_clip = text_clip.set_duration(duration)
589
+
590
+ # Add smooth fade effects
591
+ fade_duration = min(0.2, duration / 4) # Adaptive fade
592
+ text_clip = text_clip.crossfadein(fade_duration).crossfadeout(fade_duration)
593
+
594
+ text_clips.append(text_clip)
595
+
596
+ except Exception as e:
597
+ logger.error(f"❌ Failed to create subtitle: {e}")
598
+
599
+ def _split_script_into_words(self, script: str) -> List[str]:
600
+ """Split script into individual words"""
601
+ # Remove extra punctuation but keep sentence structure
602
+ import re
603
+ # Remove multiple spaces and clean up
604
+ script = re.sub(r'\s+', ' ', script).strip()
605
+ words = script.split()
606
+ return words
607
+
608
+ def _group_words_into_phrases(self, words: List[str], max_words: int = 3) -> List[str]:
609
+ """Group words into small readable phrases"""
610
+ phrases = []
611
+ current_phrase = []
612
+
613
+ for word in words:
614
+ current_phrase.append(word)
615
+
616
+ # Create phrase break at punctuation or max word count
617
+ has_punctuation = any(p in word for p in ['.', ',', '!', '?', ';'])
618
+
619
+ if len(current_phrase) >= max_words or has_punctuation:
620
+ phrases.append(' '.join(current_phrase))
621
+ current_phrase = []
622
+
623
+ # Add remaining words
624
+ if current_phrase:
625
+ phrases.append(' '.join(current_phrase))
626
+
627
+ return phrases
628
 
629
  async def _render_final_video(self, video_clip: VideoFileClip) -> str:
630
+ """Render final video with 1.3x speed - video sped up, audio pitch-preserved"""
631
+ # Generate unique filename using UUID
632
+ unique_id = uuid.uuid4().hex[:8]
633
+ filename = f"final_video_{unique_id}.mp4"
634
+ output_path = self.temp_dir / filename
635
 
636
  try:
637
+ original_duration = video_clip.duration
638
+ speed_factor = 1.3
639
+
640
+ logger.info(f"πŸ“Ή Rendering final video: {filename}")
641
+ logger.info(f"⚑ Speeding up: {original_duration:.2f}s -> {original_duration/speed_factor:.2f}s")
642
 
643
+ # Extract audio for pitch-preserving speed up
644
+ audio_clip = video_clip.audio
645
+
646
+ if audio_clip:
647
+ # Speed up audio with pitch correction using librosa
648
+ sped_audio_clip = await self._speed_up_audio_with_pitch_correction(audio_clip, speed_factor)
649
+
650
+ # Speed up video only (no audio yet)
651
+ video_only = video_clip.without_audio()
652
+ sped_video = video_only.fx(lambda c: c.speedx(speed_factor))
653
+
654
+ # Combine sped-up video with pitch-corrected audio
655
+ final_clip = sped_video.set_audio(sped_audio_clip)
656
+
657
+ # Ensure audio and video match in duration
658
+ final_duration = min(sped_video.duration, sped_audio_clip.duration)
659
+ final_clip = final_clip.set_duration(final_duration)
660
+
661
+ logger.info(f"βœ… Video with pitch-preserved audio: {final_duration:.2f}s")
662
+ else:
663
+ # No audio - just speed up video
664
+ final_clip = video_clip.fx(lambda c: c.speedx(speed_factor))
665
+
666
+ # Render final video
667
+ final_clip.write_videofile(
668
  str(output_path),
669
  codec='libx264',
670
  audio_codec='aac',
671
+ temp_audiofile=str(self.temp_dir / f'temp_audio_{unique_id}.m4a'),
672
  remove_temp=True,
673
  fps=24,
674
  verbose=False,
675
+ logger=None
676
  )
677
 
678
+ logger.info(f"βœ… Final video rendered: {output_path}")
679
  return str(output_path)
680
 
681
  except Exception as e:
 
686
 
687
  def _validate_assets(self, assets: Dict) -> bool:
688
  """Validate that required assets are present"""
689
+ required = ['selected_videos', 'tts_audio', 'hook_video']
690
 
691
  for req in required:
692
  if not assets.get(req):