Spaces:

Elvoro
/

Tools

Running

App Files Files Community

jebin2 commited on Nov 29, 2025

Commit

439da91

1 Parent(s): 8dd22d6

random vieo based on beats

Browse files

Files changed (6) hide show

requirements.txt +1 -0
src/asset_selector.py +13 -0
src/automation.py +119 -0
src/gemini_sdk.py +1 -1
src/process_csv.py +36 -0
src/video_renderer.py +80 -0

requirements.txt CHANGED Viewed

@@ -14,6 +14,7 @@ aiofiles==23.2.1
 google-cloud-speech==2.34.0
 google-api-python-client==2.184.0
 google-auth-oauthlib==1.2.3
 # aiosignal==1.4.0
 # annotated-types==0.7.0

 google-cloud-speech==2.34.0
 google-api-python-client==2.184.0
 google-auth-oauthlib==1.2.3
+librosa=0.11.0
 # aiosignal==1.4.0
 # annotated-types==0.7.0

src/asset_selector.py CHANGED Viewed

@@ -282,3 +282,16 @@ Video Options: {video_context}
         """Reset audio index to start from beginning (useful for batch processing)"""
         self.current_audio_index = 0
         logger.info("🔄 Reset background music index to 0")

         """Reset audio index to start from beginning (useful for batch processing)"""
         self.current_audio_index = 0
         logger.info("🔄 Reset background music index to 0")
+    def select_random_videos(self, count: int) -> List[str]:
+        import random
+        all_videos = self.data_holder.visual_assets.get("all_videos", [])
+        available_videos = [v for v in all_videos if v.get("local_path")]
+        if len(available_videos) < count:
+            raise ValueError(f"Not enough videos to select {count} random videos.")
+        selected_videos = random.sample(available_videos, count)
+        return [v["local_path"] for v in selected_videos]

src/automation.py CHANGED Viewed

@@ -19,6 +19,8 @@ from a2e_avatar import create_greenscreen_video_workflow
 import remove_green_bg
 import hashlib
 from onscreebcta import add_cta
 class ContentAutomation:
     def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None):
@@ -39,10 +41,14 @@ class ContentAutomation:
         try:
             await self.api_clients.list_gcs_files()
             self.data_holder.hash_tts_script = hashlib.sha256(tts_script.encode('utf-8')).hexdigest()
             # STEP 1: clean tts_script for better TTS
             logger.info("\n🎭 STEP 1: Clean TTS Script")
             self.data_holder.tts_script = utils.clean_tts_script(tts_script)
             prompt_refer = content_strategy.get("gemini_prompt", "")
             if os.getenv("USE_VEO", "false").lower() == "true":
                 prompt_refer = content_strategy.get("runway_veo_prompt", "")
@@ -185,6 +191,82 @@ class ContentAutomation:
             logger.error(traceback.format_exc())
             return {"success": False, "error": str(e), "duration": elapsed_time}
     async def create_audio(self):
         try_again = False
         tts_audio, timed_words = await self.process_audio()
@@ -515,3 +597,40 @@ class ContentAutomation:
             logger.error(f"📋 Debug: {traceback.format_exc()}")
             return False

 import remove_green_bg
 import hashlib
 from onscreebcta import add_cta
+import numpy as np
+from moviepy.editor import VideoFileClip, concatenate_videoclips
 class ContentAutomation:
     def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None):
         try:
             await self.api_clients.list_gcs_files()
             self.data_holder.hash_tts_script = hashlib.sha256(tts_script.encode('utf-8')).hexdigest()
             # STEP 1: clean tts_script for better TTS
             logger.info("\n🎭 STEP 1: Clean TTS Script")
             self.data_holder.tts_script = utils.clean_tts_script(tts_script)
+            if os.getenv("ONLY_RANDOM_VIDEOS", "false").lower() == "true":
+                return await self.execute_random_pipeline(content_strategy, tts_script)
             prompt_refer = content_strategy.get("gemini_prompt", "")
             if os.getenv("USE_VEO", "false").lower() == "true":
                 prompt_refer = content_strategy.get("runway_veo_prompt", "")
             logger.error(traceback.format_exc())
             return {"success": False, "error": str(e), "duration": elapsed_time}
+    async def execute_random_pipeline(self, content_strategy: Dict[str, str], tts_script: str) -> Dict[str, Any]:
+        try:
+            self._download_all_video()
+            logger.info("\n🎵 STEP 1: Background Music")
+            self.data_holder.visual_assets["background_music_url"] = self.asset_selector.select_background_music()
+            await self._download_to_local(
+                self.data_holder.visual_assets["background_music_url"], "background_music.mp3", self.data_holder.visual_assets, "background_music_local"
+            )
+            # Get music duration
+            # from moviepy.editor import AudioFileClip
+            # music_clip = AudioFileClip(self.data_holder.visual_assets["background_music_local"])
+            music_duration = 15
+            # music_clip.close()
+            beat_times = self.get_beat_times(self.data_holder.visual_assets["background_music_local"])
+            # Filter beats to be at least 1 second apart
+            filtered_beat_times = self._filter_beats_by_min_interval(beat_times, min_interval=1.0)
+            logger.info(f"Original beats: {len(beat_times)}, Filtered beats: {len(filtered_beat_times)}")
+            logger.info(f"Music duration: {music_duration:.2f}s")
+            logger.info(f"Filtered beat times: {filtered_beat_times}")
+            # Select enough videos (one for each beat interval + intro/outro)
+            num_videos_needed = len(filtered_beat_times) + 2
+            self.data_holder.visual_assets["selected_videos"] = self.asset_selector.select_random_videos(num_videos_needed)
+            logger.info(self.data_holder.visual_assets["selected_videos"])
+            for v in self.data_holder.visual_assets["selected_videos"]: utils.resize_video(v, overwrite=True)
+            # IMPORTANT: Pass filtered_beat_times, not beat_intervals!
+            video_no_audio_path = await self.video_renderer.render_random_video(
+                filtered_beat_times,  # <-- Pass the beat times, not intervals
+                music_duration
+            )
+            # STEP 7: Add audio to video
+            logger.info("\n🔊 STEP 2: Add Audio to Video")
+            final_video_path = await self.video_renderer.add_audio_to_video(video_no_audio_path)
+            # final_video_path = loudness_normalize.normalize_loudness(final_video_path)
+            # STEP 8: Upload to cloud storage
+            final_url = None
+            if os.getenv("DO_NOT_PUBLISH", "false").lower() != "true":
+                logger.info("\n☁️ STEP 9: Cloud Storage Upload")
+                final_url = await self.api_clients.store_in_gcs(final_video_path, "video")
+            await self.api_clients.upload_to_temp_gcs(final_video_path, "video")
+            # Pipeline completion
+            elapsed_time = time.time() - self.pipeline_start_time
+            logger.info(f"\n✅ Enhanced pipeline completed in {elapsed_time:.2f}s")
+            return {
+                "success": True,
+                "final_url": final_url,
+                "tts_script": self.data_holder.tts_script,
+                "local_path": final_video_path,
+                "duration": elapsed_time,
+                "voice_used": self.data_holder.selected_voice,
+                "assets_metadata": {
+                    "hook_video": self.data_holder.visual_assets.get("hook_video", {}).get("task_id"),
+                    "selected_videos_count": len(self.data_holder.visual_assets.get("selected_videos", [])),
+                    "natural_speed": True,  # Indicate no slow-motion
+                },
+            }
+        except Exception as e:
+            elapsed_time = time.time() - self.pipeline_start_time if self.pipeline_start_time else 0
+            logger.error(f"\n❌ Pipeline failed after {elapsed_time:.2f}s: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+            return {"success": False, "error": str(e), "duration": elapsed_time}
     async def create_audio(self):
         try_again = False
         tts_audio, timed_words = await self.process_audio()
             logger.error(f"📋 Debug: {traceback.format_exc()}")
             return False
+    def get_beat_times(self, audio_path: str) -> List[float]:
+        import librosa
+        y, sr = librosa.load(audio_path)
+        tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
+        beat_times = librosa.frames_to_time(beat_frames, sr=sr)
+        logger.info(f"Tempo: {tempo} BPM")
+        logger.info(f"Beat times (seconds): {beat_times}")
+        return beat_times.tolist()
+    def _filter_beats_by_min_interval(self, beat_times: np.ndarray, min_interval: float = 1.0) -> np.ndarray:
+        """
+        Filter beat times to ensure minimum interval between beats.
+        Args:
+            beat_times: Array of beat timestamps in seconds
+            min_interval: Minimum time interval between beats (default 1.0 second)
+        Returns:
+            Filtered array of beat times
+        """
+        if len(beat_times) == 0:
+            return beat_times
+        filtered = [beat_times[0]]  # Always keep the first beat
+        for beat in beat_times[1:]:
+            if beat - filtered[-1] >= min_interval:
+                filtered.append(beat)
+        return np.array(filtered)

src/gemini_sdk.py CHANGED Viewed

@@ -75,7 +75,7 @@ def generate_video(prompt: str, output_path: str, image: str = None) -> str | No
             return None
         generated_video = operation.response.generated_videos[0]
         client.files.download(file=generated_video.video)
-        unique_id = uuid.uuid4().hex[:8]
         generated_video.video.save(output_path)
         utils.remove_black_padding(output_path, overwrite=True)
         utils.resize_video(output_path, overwrite=True)

             return None
         generated_video = operation.response.generated_videos[0]
         client.files.download(file=generated_video.video)
         generated_video.video.save(output_path)
         utils.remove_black_padding(output_path, overwrite=True)
         utils.resize_video(output_path, overwrite=True)

src/process_csv.py CHANGED Viewed

@@ -14,6 +14,7 @@ from data_holder import DataHolder
 from asset_selector import AssetSelector
 import argparse
 import random
 DATA_DIR = Path("data")
 ALL_VIDEO_FILE_INFO = None
@@ -21,6 +22,8 @@ ALL_VIDEO_FILE_INFO = None
 def get_progress_file(job_index=None):
     """Get the appropriate progress file for this job."""
     if job_index is not None:
         return DATA_DIR / f"executed_lines_job{job_index}.txt"
     return DATA_DIR / "executed_lines.txt"
@@ -259,6 +262,36 @@ async def process_all_csvs(config, commit=False, job_index=None, total_jobs=None
     logger.info(f"🏁 Job {job_index} finished: {success_count}/{processed_count} successful")
 async def main():
     """Parse command-line arguments."""
@@ -319,6 +352,9 @@ Examples:
         os.environ.pop("VERTEX_AI_CREDENTIALS_JSON", None)
     await download_all_video(config)
     await process_all_csvs(config, commit=args.commit, job_index=job_index, total_jobs=total_jobs)

 from asset_selector import AssetSelector
 import argparse
 import random
+import uuid
 DATA_DIR = Path("data")
 ALL_VIDEO_FILE_INFO = None
 def get_progress_file(job_index=None):
     """Get the appropriate progress file for this job."""
     if job_index is not None:
+        if job_index == "create_plain_videos":
+            return DATA_DIR / f"create_plain_videos_executed_count.txt"
         return DATA_DIR / f"executed_lines_job{job_index}.txt"
     return DATA_DIR / "executed_lines.txt"
     logger.info(f"🏁 Job {job_index} finished: {success_count}/{processed_count} successful")
+async def create_plain_videos(config, commit=False):
+    """Create N random videos for testing purposes."""
+    n = int(os.getenv("PlAIN_VIDEO_COUNT", 100))
+    logger.info(f"Creating {n} random videos for testing...")
+    progress_file = get_progress_file("create_plain_videos")
+    skip_upto = -1
+    if progress_file.exists():
+        with progress_file.open("r") as pf:
+            try: skip_upto = int(pf.read().strip())
+            except: skip_upto = -1
+    logger.info(f"Skipping first {skip_upto} videos already created.")
+    for i in range(n):
+        if i <= skip_upto:
+            continue
+        row = {
+            "TTS Script (AI Avatar)": uuid.uuid4().hex[:8],
+        }
+        config["current_audio_index"] = i
+        result = await process_row(row, config)
+        # Mark as executed
+        if commit and result.get("success", False):
+            with progress_file.open("w") as pf:
+                pf.write(str(i))
+            git_commit_progress("create_plain_videos", commit)
+        if os.getenv("DO_NOT_PUBLISH", "false").lower() == "true":
+            break
+    logger.info(f"Finished creating {n} test videos.")
 async def main():
     """Parse command-line arguments."""
         os.environ.pop("VERTEX_AI_CREDENTIALS_JSON", None)
     await download_all_video(config)
+    if os.getenv("ONLY_RANDOM_VIDEOS", "false").lower() == "true":
+        await create_plain_videos(config, commit=args.commit)
     await process_all_csvs(config, commit=args.commit, job_index=job_index, total_jobs=total_jobs)

src/video_renderer.py CHANGED Viewed

@@ -967,6 +967,7 @@ class VideoRenderer:
                 return video_clip
             mixed_audio = CompositeAudioClip(valid_audio_clips)
             video_with_audio = video_clip.set_audio(mixed_audio)
             logger.info(f"✅ Added audio track")
@@ -1069,6 +1070,85 @@ class VideoRenderer:
         return self.data_holder.current_caption_style
     def _validate_assets_for_video_only(self) -> bool:
         """Validate assets for video-only rendering"""

                 return video_clip
             mixed_audio = CompositeAudioClip(valid_audio_clips)
+            mixed_audio = mixed_audio.subclip(0, min(video_clip.duration, mixed_audio.duration))
             video_with_audio = video_clip.set_audio(mixed_audio)
             logger.info(f"✅ Added audio track")
         return self.data_holder.current_caption_style
+    async def render_random_video(self, beat_times, music_duration):
+        """
+        Render video that syncs perfectly with music beats.
+        Args:
+            beat_times: Array of beat timestamps (NOT intervals)
+            music_duration: Total duration of the background music
+        """
+        clips = []
+        if len(beat_times) == 0:
+            raise ValueError("No beat times detected")
+        logger.info(f"Creating video synced to {len(beat_times)} beats")
+        logger.info(f"Beat times: {beat_times}")
+        # Handle the segment BEFORE the first beat (if any)
+        if beat_times[0] > 0.1:  # If first beat doesn't start immediately
+            first_video = self.data_holder.visual_assets["selected_videos"][0]
+            clip = VideoFileClip(first_video)
+            intro_duration = min(beat_times[0], clip.duration)
+            first_clip = clip.subclip(0, intro_duration)
+            clips.append(first_clip)
+            logger.info(f"Intro clip: 0.00s to {beat_times[0]:.2f}s (duration: {intro_duration:.2f}s)")
+            video_start_idx = 1
+        else:
+            video_start_idx = 0
+        # Create clips for each beat interval
+        for i in range(len(beat_times) - 1):
+            video_idx = video_start_idx + i
+            if video_idx >= len(self.data_holder.visual_assets["selected_videos"]):
+                break
+            video_path = self.data_holder.visual_assets["selected_videos"][video_idx]
+            # Duration = time until next beat
+            duration = beat_times[i + 1] - beat_times[i]
+            try:
+                clip = VideoFileClip(video_path)
+                trim_duration = min(duration, clip.duration)
+                trimmed_clip = clip.subclip(0, trim_duration)
+                clips.append(trimmed_clip)
+                logger.info(f"Clip {i+1}: from {beat_times[i]:.2f}s to {beat_times[i+1]:.2f}s (duration: {duration:.2f}s)")
+            except Exception as e:
+                logger.error(f"Error processing video {video_idx}: {e}")
+                continue
+        # Handle the last segment (from last beat to end of music)
+        last_duration = music_duration - beat_times[-1]
+        if last_duration > 0.5:  # If there's significant time left
+            video_idx = video_start_idx + len(beat_times) - 1
+            if video_idx < len(self.data_holder.visual_assets["selected_videos"]):
+                video_path = self.data_holder.visual_assets["selected_videos"][video_idx]
+                try:
+                    clip = VideoFileClip(video_path)
+                    final_clip = clip.subclip(0, min(last_duration, clip.duration))
+                    clips.append(final_clip)
+                    logger.info(f"Outro clip: from {beat_times[-1]:.2f}s to {music_duration:.2f}s (duration: {last_duration:.2f}s)")
+                except Exception as e:
+                    logger.error(f"Error processing final video: {e}")
+        if not clips:
+            raise ValueError("No valid video clips created")
+        final_video = concatenate_videoclips(clips, method="compose")
+        final_video = final_video.without_audio()
+        # Ensure final video matches music duration
+        if final_video.duration < music_duration:
+            # Pad with black frame if needed
+            logger.warning(f"Video duration {final_video.duration:.2f}s < music duration {music_duration:.2f}s")
+        final_video = final_video.subclip(0, music_duration)
+        return await self._render_video_only(final_video)
     def _validate_assets_for_video_only(self) -> bool:
         """Validate assets for video-only rendering"""