jebin2 commited on
Commit
439da91
Β·
1 Parent(s): 8dd22d6

random vieo based on beats

Browse files
requirements.txt CHANGED
@@ -14,6 +14,7 @@ aiofiles==23.2.1
14
  google-cloud-speech==2.34.0
15
  google-api-python-client==2.184.0
16
  google-auth-oauthlib==1.2.3
 
17
 
18
  # aiosignal==1.4.0
19
  # annotated-types==0.7.0
 
14
  google-cloud-speech==2.34.0
15
  google-api-python-client==2.184.0
16
  google-auth-oauthlib==1.2.3
17
+ librosa=0.11.0
18
 
19
  # aiosignal==1.4.0
20
  # annotated-types==0.7.0
src/asset_selector.py CHANGED
@@ -282,3 +282,16 @@ Video Options: {video_context}
282
  """Reset audio index to start from beginning (useful for batch processing)"""
283
  self.current_audio_index = 0
284
  logger.info("πŸ”„ Reset background music index to 0")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  """Reset audio index to start from beginning (useful for batch processing)"""
283
  self.current_audio_index = 0
284
  logger.info("πŸ”„ Reset background music index to 0")
285
+
286
+ def select_random_videos(self, count: int) -> List[str]:
287
+ import random
288
+
289
+ all_videos = self.data_holder.visual_assets.get("all_videos", [])
290
+ available_videos = [v for v in all_videos if v.get("local_path")]
291
+
292
+ if len(available_videos) < count:
293
+ raise ValueError(f"Not enough videos to select {count} random videos.")
294
+
295
+ selected_videos = random.sample(available_videos, count)
296
+
297
+ return [v["local_path"] for v in selected_videos]
src/automation.py CHANGED
@@ -19,6 +19,8 @@ from a2e_avatar import create_greenscreen_video_workflow
19
  import remove_green_bg
20
  import hashlib
21
  from onscreebcta import add_cta
 
 
22
 
23
  class ContentAutomation:
24
  def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None):
@@ -39,10 +41,14 @@ class ContentAutomation:
39
  try:
40
  await self.api_clients.list_gcs_files()
41
  self.data_holder.hash_tts_script = hashlib.sha256(tts_script.encode('utf-8')).hexdigest()
 
42
  # STEP 1: clean tts_script for better TTS
43
  logger.info("\n🎭 STEP 1: Clean TTS Script")
44
  self.data_holder.tts_script = utils.clean_tts_script(tts_script)
45
 
 
 
 
46
  prompt_refer = content_strategy.get("gemini_prompt", "")
47
  if os.getenv("USE_VEO", "false").lower() == "true":
48
  prompt_refer = content_strategy.get("runway_veo_prompt", "")
@@ -185,6 +191,82 @@ class ContentAutomation:
185
  logger.error(traceback.format_exc())
186
  return {"success": False, "error": str(e), "duration": elapsed_time}
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  async def create_audio(self):
189
  try_again = False
190
  tts_audio, timed_words = await self.process_audio()
@@ -515,3 +597,40 @@ class ContentAutomation:
515
 
516
  logger.error(f"πŸ“‹ Debug: {traceback.format_exc()}")
517
  return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  import remove_green_bg
20
  import hashlib
21
  from onscreebcta import add_cta
22
+ import numpy as np
23
+ from moviepy.editor import VideoFileClip, concatenate_videoclips
24
 
25
  class ContentAutomation:
26
  def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None):
 
41
  try:
42
  await self.api_clients.list_gcs_files()
43
  self.data_holder.hash_tts_script = hashlib.sha256(tts_script.encode('utf-8')).hexdigest()
44
+
45
  # STEP 1: clean tts_script for better TTS
46
  logger.info("\n🎭 STEP 1: Clean TTS Script")
47
  self.data_holder.tts_script = utils.clean_tts_script(tts_script)
48
 
49
+ if os.getenv("ONLY_RANDOM_VIDEOS", "false").lower() == "true":
50
+ return await self.execute_random_pipeline(content_strategy, tts_script)
51
+
52
  prompt_refer = content_strategy.get("gemini_prompt", "")
53
  if os.getenv("USE_VEO", "false").lower() == "true":
54
  prompt_refer = content_strategy.get("runway_veo_prompt", "")
 
191
  logger.error(traceback.format_exc())
192
  return {"success": False, "error": str(e), "duration": elapsed_time}
193
 
194
+ async def execute_random_pipeline(self, content_strategy: Dict[str, str], tts_script: str) -> Dict[str, Any]:
195
+ try:
196
+ self._download_all_video()
197
+ logger.info("\n🎡 STEP 1: Background Music")
198
+ self.data_holder.visual_assets["background_music_url"] = self.asset_selector.select_background_music()
199
+ await self._download_to_local(
200
+ self.data_holder.visual_assets["background_music_url"], "background_music.mp3", self.data_holder.visual_assets, "background_music_local"
201
+ )
202
+
203
+ # Get music duration
204
+ # from moviepy.editor import AudioFileClip
205
+ # music_clip = AudioFileClip(self.data_holder.visual_assets["background_music_local"])
206
+ music_duration = 15
207
+ # music_clip.close()
208
+
209
+ beat_times = self.get_beat_times(self.data_holder.visual_assets["background_music_local"])
210
+
211
+ # Filter beats to be at least 1 second apart
212
+ filtered_beat_times = self._filter_beats_by_min_interval(beat_times, min_interval=1.0)
213
+
214
+ logger.info(f"Original beats: {len(beat_times)}, Filtered beats: {len(filtered_beat_times)}")
215
+ logger.info(f"Music duration: {music_duration:.2f}s")
216
+ logger.info(f"Filtered beat times: {filtered_beat_times}")
217
+
218
+ # Select enough videos (one for each beat interval + intro/outro)
219
+ num_videos_needed = len(filtered_beat_times) + 2
220
+ self.data_holder.visual_assets["selected_videos"] = self.asset_selector.select_random_videos(num_videos_needed)
221
+ logger.info(self.data_holder.visual_assets["selected_videos"])
222
+
223
+ for v in self.data_holder.visual_assets["selected_videos"]: utils.resize_video(v, overwrite=True)
224
+
225
+ # IMPORTANT: Pass filtered_beat_times, not beat_intervals!
226
+ video_no_audio_path = await self.video_renderer.render_random_video(
227
+ filtered_beat_times, # <-- Pass the beat times, not intervals
228
+ music_duration
229
+ )
230
+
231
+ # STEP 7: Add audio to video
232
+ logger.info("\nπŸ”Š STEP 2: Add Audio to Video")
233
+ final_video_path = await self.video_renderer.add_audio_to_video(video_no_audio_path)
234
+
235
+ # final_video_path = loudness_normalize.normalize_loudness(final_video_path)
236
+
237
+ # STEP 8: Upload to cloud storage
238
+ final_url = None
239
+ if os.getenv("DO_NOT_PUBLISH", "false").lower() != "true":
240
+ logger.info("\n☁️ STEP 9: Cloud Storage Upload")
241
+ final_url = await self.api_clients.store_in_gcs(final_video_path, "video")
242
+ await self.api_clients.upload_to_temp_gcs(final_video_path, "video")
243
+
244
+ # Pipeline completion
245
+ elapsed_time = time.time() - self.pipeline_start_time
246
+ logger.info(f"\nβœ… Enhanced pipeline completed in {elapsed_time:.2f}s")
247
+
248
+ return {
249
+ "success": True,
250
+ "final_url": final_url,
251
+ "tts_script": self.data_holder.tts_script,
252
+ "local_path": final_video_path,
253
+ "duration": elapsed_time,
254
+ "voice_used": self.data_holder.selected_voice,
255
+ "assets_metadata": {
256
+ "hook_video": self.data_holder.visual_assets.get("hook_video", {}).get("task_id"),
257
+ "selected_videos_count": len(self.data_holder.visual_assets.get("selected_videos", [])),
258
+ "natural_speed": True, # Indicate no slow-motion
259
+ },
260
+ }
261
+
262
+ except Exception as e:
263
+ elapsed_time = time.time() - self.pipeline_start_time if self.pipeline_start_time else 0
264
+ logger.error(f"\n❌ Pipeline failed after {elapsed_time:.2f}s: {e}")
265
+ import traceback
266
+
267
+ logger.error(traceback.format_exc())
268
+ return {"success": False, "error": str(e), "duration": elapsed_time}
269
+
270
  async def create_audio(self):
271
  try_again = False
272
  tts_audio, timed_words = await self.process_audio()
 
597
 
598
  logger.error(f"πŸ“‹ Debug: {traceback.format_exc()}")
599
  return False
600
+
601
+ def get_beat_times(self, audio_path: str) -> List[float]:
602
+ import librosa
603
+
604
+ y, sr = librosa.load(audio_path)
605
+
606
+ tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
607
+
608
+ beat_times = librosa.frames_to_time(beat_frames, sr=sr)
609
+
610
+ logger.info(f"Tempo: {tempo} BPM")
611
+
612
+ logger.info(f"Beat times (seconds): {beat_times}")
613
+
614
+ return beat_times.tolist()
615
+
616
+ def _filter_beats_by_min_interval(self, beat_times: np.ndarray, min_interval: float = 1.0) -> np.ndarray:
617
+ """
618
+ Filter beat times to ensure minimum interval between beats.
619
+
620
+ Args:
621
+ beat_times: Array of beat timestamps in seconds
622
+ min_interval: Minimum time interval between beats (default 1.0 second)
623
+
624
+ Returns:
625
+ Filtered array of beat times
626
+ """
627
+ if len(beat_times) == 0:
628
+ return beat_times
629
+
630
+ filtered = [beat_times[0]] # Always keep the first beat
631
+
632
+ for beat in beat_times[1:]:
633
+ if beat - filtered[-1] >= min_interval:
634
+ filtered.append(beat)
635
+
636
+ return np.array(filtered)
src/gemini_sdk.py CHANGED
@@ -75,7 +75,7 @@ def generate_video(prompt: str, output_path: str, image: str = None) -> str | No
75
  return None
76
  generated_video = operation.response.generated_videos[0]
77
  client.files.download(file=generated_video.video)
78
- unique_id = uuid.uuid4().hex[:8]
79
  generated_video.video.save(output_path)
80
  utils.remove_black_padding(output_path, overwrite=True)
81
  utils.resize_video(output_path, overwrite=True)
 
75
  return None
76
  generated_video = operation.response.generated_videos[0]
77
  client.files.download(file=generated_video.video)
78
+
79
  generated_video.video.save(output_path)
80
  utils.remove_black_padding(output_path, overwrite=True)
81
  utils.resize_video(output_path, overwrite=True)
src/process_csv.py CHANGED
@@ -14,6 +14,7 @@ from data_holder import DataHolder
14
  from asset_selector import AssetSelector
15
  import argparse
16
  import random
 
17
 
18
  DATA_DIR = Path("data")
19
  ALL_VIDEO_FILE_INFO = None
@@ -21,6 +22,8 @@ ALL_VIDEO_FILE_INFO = None
21
  def get_progress_file(job_index=None):
22
  """Get the appropriate progress file for this job."""
23
  if job_index is not None:
 
 
24
  return DATA_DIR / f"executed_lines_job{job_index}.txt"
25
  return DATA_DIR / "executed_lines.txt"
26
 
@@ -259,6 +262,36 @@ async def process_all_csvs(config, commit=False, job_index=None, total_jobs=None
259
 
260
  logger.info(f"🏁 Job {job_index} finished: {success_count}/{processed_count} successful")
261
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
  async def main():
264
  """Parse command-line arguments."""
@@ -319,6 +352,9 @@ Examples:
319
  os.environ.pop("VERTEX_AI_CREDENTIALS_JSON", None)
320
 
321
  await download_all_video(config)
 
 
 
322
  await process_all_csvs(config, commit=args.commit, job_index=job_index, total_jobs=total_jobs)
323
 
324
 
 
14
  from asset_selector import AssetSelector
15
  import argparse
16
  import random
17
+ import uuid
18
 
19
  DATA_DIR = Path("data")
20
  ALL_VIDEO_FILE_INFO = None
 
22
  def get_progress_file(job_index=None):
23
  """Get the appropriate progress file for this job."""
24
  if job_index is not None:
25
+ if job_index == "create_plain_videos":
26
+ return DATA_DIR / f"create_plain_videos_executed_count.txt"
27
  return DATA_DIR / f"executed_lines_job{job_index}.txt"
28
  return DATA_DIR / "executed_lines.txt"
29
 
 
262
 
263
  logger.info(f"🏁 Job {job_index} finished: {success_count}/{processed_count} successful")
264
 
265
+ async def create_plain_videos(config, commit=False):
266
+ """Create N random videos for testing purposes."""
267
+ n = int(os.getenv("PlAIN_VIDEO_COUNT", 100))
268
+ logger.info(f"Creating {n} random videos for testing...")
269
+ progress_file = get_progress_file("create_plain_videos")
270
+ skip_upto = -1
271
+ if progress_file.exists():
272
+ with progress_file.open("r") as pf:
273
+ try: skip_upto = int(pf.read().strip())
274
+ except: skip_upto = -1
275
+
276
+ logger.info(f"Skipping first {skip_upto} videos already created.")
277
+ for i in range(n):
278
+ if i <= skip_upto:
279
+ continue
280
+ row = {
281
+ "TTS Script (AI Avatar)": uuid.uuid4().hex[:8],
282
+ }
283
+ config["current_audio_index"] = i
284
+ result = await process_row(row, config)
285
+ # Mark as executed
286
+ if commit and result.get("success", False):
287
+ with progress_file.open("w") as pf:
288
+ pf.write(str(i))
289
+ git_commit_progress("create_plain_videos", commit)
290
+
291
+ if os.getenv("DO_NOT_PUBLISH", "false").lower() == "true":
292
+ break
293
+
294
+ logger.info(f"Finished creating {n} test videos.")
295
 
296
  async def main():
297
  """Parse command-line arguments."""
 
352
  os.environ.pop("VERTEX_AI_CREDENTIALS_JSON", None)
353
 
354
  await download_all_video(config)
355
+ if os.getenv("ONLY_RANDOM_VIDEOS", "false").lower() == "true":
356
+ await create_plain_videos(config, commit=args.commit)
357
+
358
  await process_all_csvs(config, commit=args.commit, job_index=job_index, total_jobs=total_jobs)
359
 
360
 
src/video_renderer.py CHANGED
@@ -967,6 +967,7 @@ class VideoRenderer:
967
  return video_clip
968
 
969
  mixed_audio = CompositeAudioClip(valid_audio_clips)
 
970
  video_with_audio = video_clip.set_audio(mixed_audio)
971
 
972
  logger.info(f"βœ… Added audio track")
@@ -1069,6 +1070,85 @@ class VideoRenderer:
1069
 
1070
  return self.data_holder.current_caption_style
1071
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1072
 
1073
  def _validate_assets_for_video_only(self) -> bool:
1074
  """Validate assets for video-only rendering"""
 
967
  return video_clip
968
 
969
  mixed_audio = CompositeAudioClip(valid_audio_clips)
970
+ mixed_audio = mixed_audio.subclip(0, min(video_clip.duration, mixed_audio.duration))
971
  video_with_audio = video_clip.set_audio(mixed_audio)
972
 
973
  logger.info(f"βœ… Added audio track")
 
1070
 
1071
  return self.data_holder.current_caption_style
1072
 
1073
+ async def render_random_video(self, beat_times, music_duration):
1074
+ """
1075
+ Render video that syncs perfectly with music beats.
1076
+
1077
+ Args:
1078
+ beat_times: Array of beat timestamps (NOT intervals)
1079
+ music_duration: Total duration of the background music
1080
+ """
1081
+ clips = []
1082
+
1083
+ if len(beat_times) == 0:
1084
+ raise ValueError("No beat times detected")
1085
+
1086
+ logger.info(f"Creating video synced to {len(beat_times)} beats")
1087
+ logger.info(f"Beat times: {beat_times}")
1088
+
1089
+ # Handle the segment BEFORE the first beat (if any)
1090
+ if beat_times[0] > 0.1: # If first beat doesn't start immediately
1091
+ first_video = self.data_holder.visual_assets["selected_videos"][0]
1092
+ clip = VideoFileClip(first_video)
1093
+ intro_duration = min(beat_times[0], clip.duration)
1094
+ first_clip = clip.subclip(0, intro_duration)
1095
+ clips.append(first_clip)
1096
+ logger.info(f"Intro clip: 0.00s to {beat_times[0]:.2f}s (duration: {intro_duration:.2f}s)")
1097
+ video_start_idx = 1
1098
+ else:
1099
+ video_start_idx = 0
1100
+
1101
+ # Create clips for each beat interval
1102
+ for i in range(len(beat_times) - 1):
1103
+ video_idx = video_start_idx + i
1104
+ if video_idx >= len(self.data_holder.visual_assets["selected_videos"]):
1105
+ break
1106
+
1107
+ video_path = self.data_holder.visual_assets["selected_videos"][video_idx]
1108
+
1109
+ # Duration = time until next beat
1110
+ duration = beat_times[i + 1] - beat_times[i]
1111
+
1112
+ try:
1113
+ clip = VideoFileClip(video_path)
1114
+ trim_duration = min(duration, clip.duration)
1115
+
1116
+ trimmed_clip = clip.subclip(0, trim_duration)
1117
+ clips.append(trimmed_clip)
1118
+
1119
+ logger.info(f"Clip {i+1}: from {beat_times[i]:.2f}s to {beat_times[i+1]:.2f}s (duration: {duration:.2f}s)")
1120
+
1121
+ except Exception as e:
1122
+ logger.error(f"Error processing video {video_idx}: {e}")
1123
+ continue
1124
+
1125
+ # Handle the last segment (from last beat to end of music)
1126
+ last_duration = music_duration - beat_times[-1]
1127
+ if last_duration > 0.5: # If there's significant time left
1128
+ video_idx = video_start_idx + len(beat_times) - 1
1129
+ if video_idx < len(self.data_holder.visual_assets["selected_videos"]):
1130
+ video_path = self.data_holder.visual_assets["selected_videos"][video_idx]
1131
+ try:
1132
+ clip = VideoFileClip(video_path)
1133
+ final_clip = clip.subclip(0, min(last_duration, clip.duration))
1134
+ clips.append(final_clip)
1135
+ logger.info(f"Outro clip: from {beat_times[-1]:.2f}s to {music_duration:.2f}s (duration: {last_duration:.2f}s)")
1136
+ except Exception as e:
1137
+ logger.error(f"Error processing final video: {e}")
1138
+
1139
+ if not clips:
1140
+ raise ValueError("No valid video clips created")
1141
+
1142
+ final_video = concatenate_videoclips(clips, method="compose")
1143
+ final_video = final_video.without_audio()
1144
+
1145
+ # Ensure final video matches music duration
1146
+ if final_video.duration < music_duration:
1147
+ # Pad with black frame if needed
1148
+ logger.warning(f"Video duration {final_video.duration:.2f}s < music duration {music_duration:.2f}s")
1149
+
1150
+ final_video = final_video.subclip(0, music_duration)
1151
+ return await self._render_video_only(final_video)
1152
 
1153
  def _validate_assets_for_video_only(self) -> bool:
1154
  """Validate assets for video-only rendering"""