Spaces:

Elvoro
/

Tools

Running

App Files Files Community

jebin2 commited on Jan 9

Commit

e7fbeb6

1 Parent(s): c20adb3

intergate gsheet and send setup

Browse files

Files changed (17) hide show

.gitignore +2 -1
src/api_clients.py +1 -1
src/asset_selector.py +52 -14
src/automation.py +48 -16
src/file_downloader.py +106 -24
src/google_sheet_reader.py +0 -44
src/instagram_publisher.py +1 -1
src/load_config.py +214 -0
src/main.py +1 -88
src/onscreebcta.py +1 -1
src/process_csv.py +1 -1
src/publisher.py +2 -1
src/text_clip.py +1 -1
src/tiktok_publisher.py +1 -1
src/utils.py +54 -2
src/video_renderer.py +4 -7
src/youtube_publisher.py +1 -1

.gitignore CHANGED Viewed

@@ -43,4 +43,5 @@ whoa/
 src/temp*.py
 src/temp*.md
 testData/infloxa*
-testData/output/

 src/temp*.py
 src/temp*.md
 testData/infloxa*
+testData/output/
+testData/ref/

src/api_clients.py CHANGED Viewed

@@ -96,7 +96,7 @@ class APIClients:
         # Track current voice index for sequential selection
         self.current_voice_indices = {category: 0 for category in self.voice_profiles.keys()}
         self.file_names = None
-        self.init_temp_gcs()
     async def get_from_cache(self, method_type, duration=0):
         try:

         # Track current voice index for sequential selection
         self.current_voice_indices = {category: 0 for category in self.voice_profiles.keys()}
         self.file_names = None
+        # self.init_temp_gcs()
     async def get_from_cache(self, method_type, duration=0):
         try:

src/asset_selector.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import pandas as pd
-import aiohttp
 import json
 from typing import List, Dict, Optional, Tuple
 from utils import logger
@@ -99,26 +99,64 @@ class AssetSelector:
             audios = ["testData/infloxa/audiopulse.mp3"]
         return audios
-    def audio_beats_map(self, audio_path: str) -> Optional[List[float]]:
-        """Load or compute audio beats map from local file"""
         try:
-            audio_map = {
-                "testData/infloxa/audiopulse.mp3": [1.01, 1.17, 2.24, 4.06, 5.14, 6.21, 8.03, 9.11],
-            }
-            if audio_path in audio_map:
-                return audio_map[audio_path]
-        except Exception as e:
-            logger.error(f"Failed to compute audio beats map for {audio_path}: {e}")
-        return None
     def _load_audio_library_from_gsheet(self) -> pd.DataFrame:
         """Load audio library from Google Sheet (if needed)"""
         try:
             googleSheetReader = GoogleSheetReader(worksheet_name=os.getenv("AUDIO_LIBRARY_GSHEET_WORKSHEET"))
             audio_df = googleSheetReader.get_filtered_dataframe()
-            return googleSheetReader.clean_and_drop_empty(audio_df, "AUDIO_LINK")
         except Exception as e:
             logger.error(f"Failed to load audio library from Google Sheet: {e}")
             return pd.DataFrame()
@@ -128,7 +166,7 @@ class AssetSelector:
         try:
             googleSheetReader = GoogleSheetReader(worksheet_name=os.getenv("VIDEO_LIBRARY_GSHEET_WORKSHEET"))
             video_df = googleSheetReader.get_filtered_dataframe()
-            return googleSheetReader.clean_and_drop_empty(video_df, "VIDEO_LINK")
         except Exception as e:
             logger.error(f"Failed to load video library from Google Sheet: {e}")
             return pd.DataFrame()
@@ -311,7 +349,7 @@ Video Options: {video_context}
         Select background music SEQUENTIALLY (not random)
         Each call increments the index to ensure different music for each video
         """
-        if not self.audio_library:
             logger.error("❌ Audio library is empty")
             return ""

 import pandas as pd
+import utils
 import json
 from typing import List, Dict, Optional, Tuple
 from utils import logger
             audios = ["testData/infloxa/audiopulse.mp3"]
         return audios
+    def get_audio_beats(self, audio_link: str) -> Optional[List[float]]:
+        """
+        Load audio beats timing from audio_library and convert
+        SS:FF (25 FPS) → seconds (float)
+        Example:
+            "01:12" → 1 + 12/25 = 1.48
+        """
         try:
+            if self.audio_library.empty:
+                logger.error("Audio library is empty")
+                return None
+            # Find matching row
+            row = self.audio_library.loc[
+                self.audio_library["AUDIO_LINK"] == audio_link
+            ]
+            if row.empty:
+                logger.error(f"No audio entry found for: {audio_link}")
+                return None
+            beats_raw = row.iloc[0]["Beats Timing(SS:FF) AT 25FPS"]
+            if pd.isna(beats_raw) or not str(beats_raw).strip():
+                logger.warning(f"No beat data for audio: {audio_link}")
+                return None
+            beats: List[float] = []
+            for token in str(beats_raw).split(","):
+                token = token.strip()
+                if ":" not in token:
+                    continue
+                sec, frame = token.split(":", 1)
+                beats.append(
+                    round(int(sec) + (int(frame) / 25.0), 2)
+                )
+            return beats if beats else None
+        except Exception as e:
+            logger.error(
+                f"Failed to compute audio beats map for {audio_link}: {e}"
+            )
+            return None
     def _load_audio_library_from_gsheet(self) -> pd.DataFrame:
         """Load audio library from Google Sheet (if needed)"""
         try:
             googleSheetReader = GoogleSheetReader(worksheet_name=os.getenv("AUDIO_LIBRARY_GSHEET_WORKSHEET"))
             audio_df = googleSheetReader.get_filtered_dataframe()
+            if os.getenv("HARD_CUT_RANDOM_VIDEOS", "false").lower() == "false":
+                audio_df = utils.clean_and_drop_empty(audio_df, "Beats Timing(SS:FF) AT 25FPS")
+            return utils.clean_and_drop_empty(audio_df, "AUDIO_LINK")
         except Exception as e:
             logger.error(f"Failed to load audio library from Google Sheet: {e}")
             return pd.DataFrame()
         try:
             googleSheetReader = GoogleSheetReader(worksheet_name=os.getenv("VIDEO_LIBRARY_GSHEET_WORKSHEET"))
             video_df = googleSheetReader.get_filtered_dataframe()
+            return utils.clean_and_drop_empty(video_df, "VIDEO_LINK")
         except Exception as e:
             logger.error(f"Failed to load video library from Google Sheet: {e}")
             return pd.DataFrame()
         Select background music SEQUENTIALLY (not random)
         Each call increments the index to ensure different music for each video
         """
+        if self.audio_library.empty:
             logger.error("❌ Audio library is empty")
             return ""

src/automation.py CHANGED Viewed

@@ -21,9 +21,9 @@ import hashlib
 from onscreebcta import add_cta
 import numpy as np
 from moviepy.editor import VideoFileClip, concatenate_videoclips
-import librosa
 import numpy as np
-from scipy import signal
 class ContentAutomation:
     def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None):
@@ -32,6 +32,7 @@ class ContentAutomation:
         self.api_clients = APIClients(config, self.data_holder)
         self.video_renderer = VideoRenderer(config, self.data_holder)
         self.asset_selector = AssetSelector(config, self.data_holder)
         self.pipeline_start_time = None
     async def execute_pipeline(self, content_strategy: Dict[str, str], tts_script: str) -> Dict[str, Any]:
@@ -210,8 +211,13 @@ class ContentAutomation:
                     music_duration = audio_clip.duration - 0.5
-                if self.asset_selector.audio_beats_map().get(self.data_holder.visual_assets.get("background_music_url", ""), None):
-                    beat_times = self.asset_selector.audio_beats_map()[self.data_holder.visual_assets.get("background_music_url", "")]
                     method_used = "cached"
                     logger.info("Using cached beat times.")
                     break
@@ -225,7 +231,9 @@ class ContentAutomation:
                 if beat_times is None:
                     logger.warning("No beats detected, trying alternative method...")
                     try_next = True
             logger.info(f"Using '{method_used}' method: {len(beat_times)} beats detected")
             logger.info(f"Music duration: {music_duration:.2f}s")
             logger.info(f"Beat times: {beat_times}")
@@ -255,7 +263,8 @@ class ContentAutomation:
                 # IMPORTANT: Pass filtered_beat_times, not beat_intervals!
                 video_no_audio_path = await self.video_renderer.render_random_video(
                     beat_times,
-                    music_duration
                 )
             if os.getenv("USE_1X1_RATIO", "false").lower() == "true":
@@ -322,6 +331,7 @@ class ContentAutomation:
         logger.info("\n🎵 STEP 1: Background Music")
         if try_next:
             self.asset_selector.inc_audio_index()
         self.data_holder.visual_assets["background_music_url"] = self.asset_selector.select_background_music()
         await self._download_to_local(
             self.data_holder.visual_assets["background_music_url"], "background_music.mp3", self.data_holder.visual_assets, "background_music_local"
@@ -379,23 +389,25 @@ class ContentAutomation:
             return
         if os.getenv("INFLOXA", "false").lower() == "true":
-            from video_downloader import VideoDownloader
             download_path="testData/infloxa"
             Path(download_path).mkdir(parents=True, exist_ok=True)
             allowed_videos = []
-            self.data_holder.visual_assets["all_videos"] = [
                 {
-                    "url": row.get("VIDEO_FILENAME", "").strip(),
-                    "local_path": VideoDownloader().download_video(
-                        video_filename=row.get("VIDEO_FILENAME", "").strip(),
-                        download_path=download_path
-                    )
                 }
                 for _, row in self.asset_selector.video_library.iterrows()
-                if row.get("VIDEO_FILENAME", "").strip() in allowed_videos
             ]
         else:
             self.data_holder.visual_assets["all_videos"] = [
                 {"url": row.get("Video URL (No Audio)", "").strip()}
@@ -609,6 +621,26 @@ class ContentAutomation:
         return health_status
     async def simple_demo(self):
         """Simple demo with proper audio handling"""
         logger.info("🎬 Starting Simple Demo with Audio Fix...")
@@ -618,13 +650,13 @@ class ContentAutomation:
             # Create simple color videos
             clip1 = ColorClip(size=(640, 480), color=(255, 0, 0), duration=2)
-            clip1 = clip1.set_fps(24)
             clip1_path = "/tmp/simple_red.mp4"
             clip1.write_videofile(clip1_path, verbose=False, logger=None)
             clip1.close()
             clip2 = ColorClip(size=(640, 480), color=(0, 255, 0), duration=2)
-            clip2 = clip2.set_fps(24)
             clip2_path = "/tmp/simple_green.mp4"
             clip2.write_videofile(clip2_path, verbose=False, logger=None)
             clip2.close()

 from onscreebcta import add_cta
 import numpy as np
 from moviepy.editor import VideoFileClip, concatenate_videoclips
+import math
 import numpy as np
+from file_downloader import FileDownloader
 class ContentAutomation:
     def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None):
         self.api_clients = APIClients(config, self.data_holder)
         self.video_renderer = VideoRenderer(config, self.data_holder)
         self.asset_selector = AssetSelector(config, self.data_holder)
+        self.file_downloader = FileDownloader()
         self.pipeline_start_time = None
     async def execute_pipeline(self, content_strategy: Dict[str, str], tts_script: str) -> Dict[str, Any]:
                     music_duration = audio_clip.duration - 0.5
+                beat_times = self.asset_selector.get_audio_beats(self.data_holder.visual_assets["background_music_url"])
+                if beat_times:
+                    beat_times = self.extend_beats_to_audio_end(
+                        beat_times,
+                        self.data_holder.visual_assets["background_music_local"],
+                        fps=25
+                    )
                     method_used = "cached"
                     logger.info("Using cached beat times.")
                     break
                 if beat_times is None:
                     logger.warning("No beats detected, trying alternative method...")
                     try_next = True
+            music_duration = music_duration if music_duration < beat_times[-1] else beat_times[-1]
             logger.info(f"Using '{method_used}' method: {len(beat_times)} beats detected")
             logger.info(f"Music duration: {music_duration:.2f}s")
             logger.info(f"Beat times: {beat_times}")
                 # IMPORTANT: Pass filtered_beat_times, not beat_intervals!
                 video_no_audio_path = await self.video_renderer.render_random_video(
                     beat_times,
+                    music_duration,
+                    min_clip_duration=0
                 )
             if os.getenv("USE_1X1_RATIO", "false").lower() == "true":
         logger.info("\n🎵 STEP 1: Background Music")
         if try_next:
             self.asset_selector.inc_audio_index()
         self.data_holder.visual_assets["background_music_url"] = self.asset_selector.select_background_music()
         await self._download_to_local(
             self.data_holder.visual_assets["background_music_url"], "background_music.mp3", self.data_holder.visual_assets, "background_music_local"
             return
         if os.getenv("INFLOXA", "false").lower() == "true":
             download_path="testData/infloxa"
             Path(download_path).mkdir(parents=True, exist_ok=True)
             allowed_videos = []
+            videos = [
                 {
+                    "url": url,
+                    "local_path": str(local_path),
                 }
                 for _, row in self.asset_selector.video_library.iterrows()
+                if (
+                    (url := str(row.get("VIDEO_LINK", "")).strip())
+                    and (local_path := self.file_downloader.safe_download(url=url))
+                    and utils.is_valid_video(local_path)
+                )
             ]
+            self.data_holder.visual_assets["all_videos"] = videos
         else:
             self.data_holder.visual_assets["all_videos"] = [
                 {"url": row.get("Video URL (No Audio)", "").strip()}
         return health_status
+    def extend_beats_to_audio_end(
+        self,
+        beats: List[float],
+        audio_path: str,
+        fps: int = 25
+    ) -> List[float]:
+        if not beats:
+            return beats
+        with AudioFileClip(audio_path) as audio:
+            duration = audio.duration
+        frame_duration = math.floor(duration * fps) / fps
+        if beats[-1] < frame_duration:
+            return beats + [frame_duration]
+        return beats
     async def simple_demo(self):
         """Simple demo with proper audio handling"""
         logger.info("🎬 Starting Simple Demo with Audio Fix...")
             # Create simple color videos
             clip1 = ColorClip(size=(640, 480), color=(255, 0, 0), duration=2)
+            clip1 = clip1.set_fps(25)
             clip1_path = "/tmp/simple_red.mp4"
             clip1.write_videofile(clip1_path, verbose=False, logger=None)
             clip1.close()
             clip2 = ColorClip(size=(640, 480), color=(0, 255, 0), duration=2)
+            clip2 = clip2.set_fps(25)
             clip2_path = "/tmp/simple_green.mp4"
             clip2.write_videofile(clip2_path, verbose=False, logger=None)
             clip2.close()

src/file_downloader.py CHANGED Viewed

@@ -25,13 +25,16 @@ class FileDownloader:
         "https://www.googleapis.com/auth/drive.file",
     ]
-    def __init__(self):
         logger.info("Initializing FileDownloader")
         # -------- Temp directory handling --------
         self.temp_dir = self._init_temp_dir()
         logger.info("Using temp directory: %s", self.temp_dir)
         # Lazy initialization for clients
         self._drive_service = None
         self._storage_client = None
@@ -48,6 +51,8 @@ class FileDownloader:
             if not base_dir:
                 raise RuntimeError("TEST_DATA_DIRECTORY must be set when TEST_AUTOMATION=true")
             path = Path(base_dir) / "downloads"
             path.mkdir(parents=True, exist_ok=True)
             return path
@@ -150,39 +155,102 @@ class FileDownloader:
         Returns:
             Path to the downloaded file
         """
         logger.info("Downloading from Google Drive | file_id=%s", file_id)
         service = self._get_drive_service()
-        # Get file metadata to determine filename
         if filename is None and output_path is None:
             try:
-                file_metadata = service.files().get(fileId=file_id, fields="name").execute()
-                filename = file_metadata.get("name", f"drive_file_{file_id}")
             except Exception as e:
-                logger.warning("Could not fetch file metadata: %s", e)
                 filename = f"drive_file_{file_id}"
         # Determine output path
         if output_path is None:
             output_path = self.temp_dir / filename
         output_path.parent.mkdir(parents=True, exist_ok=True)
         # Download file
-        request = service.files().get_media(fileId=file_id)
-        with output_path.open("wb") as fh:
-            downloader = MediaIoBaseDownload(fh, request)
-            done = False
-            while not done:
-                status, done = downloader.next_chunk()
-                if status:
-                    logger.debug("Download progress: %d%%", int(status.progress() * 100))
-        logger.info("Downloaded from Drive to %s", output_path)
-        return output_path
     def download_from_gcs(
         self,
@@ -210,6 +278,11 @@ class FileDownloader:
             filename = Path(blob_name).name
             output_path = self.temp_dir / filename
         output_path.parent.mkdir(parents=True, exist_ok=True)
         if public:
@@ -233,6 +306,17 @@ class FileDownloader:
         logger.info("Downloaded from GCS to %s", output_path)
         return output_path
     def download(
         self,
         url: str,
@@ -333,6 +417,7 @@ def main():
     Examples controlled via env variables:
     - DOWNLOAD_URL: Single file to download
     - DOWNLOAD_URLS: Comma-separated list of URLs to download
     """
     try:
@@ -344,15 +429,13 @@ def main():
     downloader = FileDownloader()
     # ------------------ EXAMPLE 1: SINGLE FILE DOWNLOAD ------------------
-    download_url = "https://storage.googleapis.com/somira/audiopulse2.mp3"
     if download_url:
         logger.info("Downloading single file")
-        output_path = os.getenv("OUTPUT_PATH")
         try:
             downloaded_file = downloader.download(
-                url=download_url,
-                output_path=output_path,
             )
             logger.info("File downloaded to: %s", downloaded_file)
         except Exception as e:
@@ -364,12 +447,10 @@ def main():
     if download_urls:
         logger.info("Downloading multiple files")
         urls = [url.strip() for url in download_urls.split(",")]
-        output_dir = os.getenv("OUTPUT_DIR")
         try:
             downloaded_files = downloader.download_multiple(
-                urls=urls,
-                output_dir=output_dir,
             )
             logger.info("Downloaded %d files:", len(downloaded_files))
             for path in downloaded_files:
@@ -385,6 +466,7 @@ def main():
         logger.info("  DOWNLOAD_URL='https://drive.google.com/...' python file_downloader.py")
         logger.info("  DOWNLOAD_URL='gs://bucket/path/file' python file_downloader.py")
         logger.info("  DOWNLOAD_URLS='url1,url2,url3' python file_downloader.py")
 if __name__ == "__main__":

         "https://www.googleapis.com/auth/drive.file",
     ]
+    def __init__(self, skip_existing: bool = True):
         logger.info("Initializing FileDownloader")
         # -------- Temp directory handling --------
         self.temp_dir = self._init_temp_dir()
         logger.info("Using temp directory: %s", self.temp_dir)
+        # Control whether to skip existing files
+        self.skip_existing = skip_existing
         # Lazy initialization for clients
         self._drive_service = None
         self._storage_client = None
             if not base_dir:
                 raise RuntimeError("TEST_DATA_DIRECTORY must be set when TEST_AUTOMATION=true")
+            Path(base_dir).mkdir(parents=True, exist_ok=True)
             path = Path(base_dir) / "downloads"
             path.mkdir(parents=True, exist_ok=True)
             return path
         Returns:
             Path to the downloaded file
+        Raises:
+            Exception: If file cannot be accessed or downloaded
         """
         logger.info("Downloading from Google Drive | file_id=%s", file_id)
         service = self._get_drive_service()
+        # Get file metadata to determine filename with extension
+        metadata_error = None
         if filename is None and output_path is None:
             try:
+                file_metadata = service.files().get(fileId=file_id, fields="name,mimeType,fileExtension").execute()
+                # Use the original filename from Drive
+                filename = file_metadata.get("name")
+                logger.info("Retrieved filename from Drive: %s", filename)
+                # If no name, construct one with proper extension
+                if not filename:
+                    file_extension = file_metadata.get("fileExtension", "")
+                    mime_type = file_metadata.get("mimeType", "")
+                    logger.info("No filename found, mimeType: %s, fileExtension: %s", mime_type, file_extension)
+                    # Map common MIME types to extensions if fileExtension not available
+                    mime_to_ext = {
+                        "application/pdf": "pdf",
+                        "image/jpeg": "jpg",
+                        "image/png": "png",
+                        "image/gif": "gif",
+                        "text/plain": "txt",
+                        "application/json": "json",
+                        "text/csv": "csv",
+                        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
+                        "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
+                        "application/zip": "zip",
+                    }
+                    if not file_extension and mime_type in mime_to_ext:
+                        file_extension = mime_to_ext[mime_type]
+                    filename = f"drive_file_{file_id}"
+                    if file_extension:
+                        filename = f"{filename}.{file_extension}"
             except Exception as e:
+                logger.error("Could not fetch file metadata: %s", e)
+                metadata_error = e
+                # Check if it's a 404 or permission error - these are fatal
+                if hasattr(e, 'resp') and hasattr(e.resp, 'status'):
+                    if e.resp.status in [403, 404]:
+                        raise Exception(f"Cannot access file {file_id}: {str(e)}") from e
                 filename = f"drive_file_{file_id}"
+        # If still no filename, use default
+        if filename is None:
+            filename = f"drive_file_{file_id}"
         # Determine output path
         if output_path is None:
             output_path = self.temp_dir / filename
+        logger.info("Final output path: %s", output_path)
+        # Check if file already exists
+        if self.skip_existing and output_path.exists():
+            logger.info("File already exists, skipping download: %s", output_path)
+            return output_path
         output_path.parent.mkdir(parents=True, exist_ok=True)
         # Download file
+        try:
+            request = service.files().get_media(fileId=file_id)
+            with output_path.open("wb") as fh:
+                downloader = MediaIoBaseDownload(fh, request)
+                done = False
+                while not done:
+                    status, done = downloader.next_chunk()
+                    if status:
+                        logger.debug("Download progress: %d%%", int(status.progress() * 100))
+            logger.info("Downloaded from Drive to %s", output_path)
+            return output_path
+        except Exception as e:
+            # Clean up failed download
+            if output_path.exists():
+                output_path.unlink()
+                logger.info("Cleaned up failed download: %s", output_path)
+            error_msg = f"Failed to download file {file_id}: {str(e)}"
+            logger.error(error_msg)
+            raise Exception(error_msg) from e
     def download_from_gcs(
         self,
             filename = Path(blob_name).name
             output_path = self.temp_dir / filename
+        # Check if file already exists
+        if self.skip_existing and output_path.exists():
+            logger.info("File already exists, skipping download: %s", output_path)
+            return output_path
         output_path.parent.mkdir(parents=True, exist_ok=True)
         if public:
         logger.info("Downloaded from GCS to %s", output_path)
         return output_path
+    def safe_download(self, url: str, output_path: Path | None = None) -> Path | None:
+        """
+        Safe download wrapper to handle exceptions.
+        Returns None if download fails.
+        """
+        try:
+            return self.download(url, output_path=output_path)
+        except Exception as e:
+            logger.error("Download failed for %s: %s", url, e)
+            return None
     def download(
         self,
         url: str,
     Examples controlled via env variables:
     - DOWNLOAD_URL: Single file to download
     - DOWNLOAD_URLS: Comma-separated list of URLs to download
+    - SKIP_EXISTING: Set to 'false' to force re-download (default: 'true')
     """
     try:
     downloader = FileDownloader()
     # ------------------ EXAMPLE 1: SINGLE FILE DOWNLOAD ------------------
+    download_url = "https://drive.google.com/file/d/1jXqLjEDrFzR9858po7BenqKdx3cIm-4Q/view"
     if download_url:
         logger.info("Downloading single file")
         try:
             downloaded_file = downloader.download(
+                url=download_url
             )
             logger.info("File downloaded to: %s", downloaded_file)
         except Exception as e:
     if download_urls:
         logger.info("Downloading multiple files")
         urls = [url.strip() for url in download_urls.split(",")]
         try:
             downloaded_files = downloader.download_multiple(
+                urls=urls
             )
             logger.info("Downloaded %d files:", len(downloaded_files))
             for path in downloaded_files:
         logger.info("  DOWNLOAD_URL='https://drive.google.com/...' python file_downloader.py")
         logger.info("  DOWNLOAD_URL='gs://bucket/path/file' python file_downloader.py")
         logger.info("  DOWNLOAD_URLS='url1,url2,url3' python file_downloader.py")
+        logger.info("  SKIP_EXISTING='false' python file_downloader.py  # Force re-download")
 if __name__ == "__main__":

src/google_sheet_reader.py CHANGED Viewed

@@ -3,7 +3,6 @@ import csv
 import tempfile
 from pathlib import Path
 import pandas as pd
-import numpy as np
 import gspread
 from google.auth import default
@@ -354,49 +353,6 @@ class GoogleSheetReader:
         logger.info("CSV export completed | rows=%d", len(rows) - 1)
         return output_path
-    def clean_and_drop_empty(
-        df: pd.DataFrame,
-        column: str,
-        extra_nulls: list[str] | None = None,
-    ) -> pd.DataFrame:
-        """
-        Normalize Google Sheets empty values and drop rows
-        where `column` is effectively empty.
-        Handles:
-        - NaN
-        - ""
-        - " "
-        - "nan", "None", "NULL", "N/A"
-        Args:
-            df: Input DataFrame
-            column: Column to validate (e.g. "VIDEO_LINK")
-            extra_nulls: Optional extra string values to treat as null
-        Returns:
-            Cleaned DataFrame with valid rows only
-        """
-        if column not in df.columns:
-            raise KeyError(f"Column '{column}' not found in DataFrame")
-        null_values = ["", "nan", "none", "null", "n/a"]
-        if extra_nulls:
-            null_values.extend([v.lower() for v in extra_nulls])
-        df = df.copy()
-        df[column] = (
-            df[column]
-            .astype(str)
-            .str.strip()
-            .str.lower()
-            .replace(null_values, np.nan)
-        )
-        return df.dropna(subset=[column])
 # ------------------ CLI entrypoint ------------------

 import tempfile
 from pathlib import Path
 import pandas as pd
 import gspread
 from google.auth import default
         logger.info("CSV export completed | rows=%d", len(rows) - 1)
         return output_path
 # ------------------ CLI entrypoint ------------------

src/instagram_publisher.py CHANGED Viewed

@@ -13,8 +13,8 @@ import pandas as pd
 from datetime import datetime
 from dotenv import load_dotenv
 from pathlib import Path
 from main import (
-    load_configuration,
     load_content_strategies
 )
 from api_clients import APIClients

 from datetime import datetime
 from dotenv import load_dotenv
 from pathlib import Path
+from load_config import load_configuration
 from main import (
     load_content_strategies
 )
 from api_clients import APIClients

src/load_config.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import os
+import json
+from pathlib import Path
+from typing import Dict
+from dotenv import load_dotenv
+from google.auth import default
+from utils import logger
+def load_configuration() -> Dict:
+    """
+    Load configuration from environment variables with validation.
+    Supports two authentication methods:
+    1. Service Account JSON (CI/CD): Extracts project ID from JSON file or string
+    2. Application Default Credentials (Local): Uses ADC and gcloud config
+    """
+    load_dotenv()
+    gcp_project_id = None
+    creds_data = None
+    auth_method = None
+    # Try multiple possible credential paths (CI/CD environments)
+    gcp_creds_path = (
+        os.getenv("GOOGLE_GHA_CREDS_PATH") or
+        os.getenv("CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE") or
+        os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
+    )
+    # Method 1: Try to load from service account JSON file/string
+    if gcp_creds_path:
+        try:
+            os.environ["MY_TEMP_GCS_BUCKET"] = os.getenv("MY_TEMP_GCS_BUCKET", "")
+            # Check if it's a file path that exists
+            if Path(gcp_creds_path).exists():
+                logger.info(f"Loading GCP credentials from file: {gcp_creds_path}")
+                with open(gcp_creds_path, "r") as f:
+                    creds_data = json.load(f)
+                auth_method = "service_account_file"
+            else:
+                # Try to parse as raw JSON string
+                logger.info("Attempting to parse GCP credentials as JSON string")
+                creds_data = json.loads(gcp_creds_path)
+                auth_method = "service_account_json"
+            if creds_data:
+                gcp_project_id = creds_data.get("project_id")
+                logger.info(f"✓ GCP Project ID loaded from service account: {gcp_project_id}")
+        except json.JSONDecodeError as e:
+            logger.warning(f"Could not parse GCP credentials as JSON. Error: {e}")
+        except FileNotFoundError as e:
+            logger.warning(f"GCP credentials file not found: {e}")
+        except Exception as e:
+            logger.error(f"Unexpected error loading GCP credentials: {e}")
+    # Method 2: Check for Workload Identity Federation (GitHub Actions)
+    if not gcp_project_id:
+        wif_provider = os.getenv("WORKLOAD_IDENTITY_PROVIDER")
+        wif_service_account = os.getenv("SERVICE_ACCOUNT_EMAIL")
+        if wif_provider and wif_service_account:
+            try:
+                logger.info("Attempting to load project from Workload Identity Federation")
+                # WIF credentials are automatically handled by google.auth.default()
+                # when GOOGLE_APPLICATION_CREDENTIALS is not set
+                creds, project = default()
+                if project:
+                    gcp_project_id = project
+                    auth_method = "workload_identity_federation"
+                    logger.info(f"✓ GCP Project ID loaded from WIF: {gcp_project_id}")
+                else:
+                    logger.debug("WIF credentials found but no project set")
+            except Exception as e:
+                logger.debug(f"Could not load from WIF: {e}")
+        else:
+            logger.debug("WIF environment variables not found")
+    # Method 3: Try to get project from Application Default Credentials (ADC)
+    if not gcp_project_id:
+        try:
+            logger.info("Attempting to load project from Application Default Credentials (ADC)")
+            creds, project = default()
+            if project:
+                gcp_project_id = project
+                auth_method = "adc"
+                logger.info(f"✓ GCP Project ID loaded from ADC: {gcp_project_id}")
+            else:
+                logger.debug("ADC credentials found but no project set")
+        except Exception as e:
+            logger.debug(f"Could not load from ADC: {e}")
+    # Method 4: Try environment variables
+    if not gcp_project_id:
+        gcp_project_id = (
+            os.getenv("GOOGLE_CLOUD_PROJECT") or
+            os.getenv("GCP_PROJECT") or
+            os.getenv("GCLOUD_PROJECT") or
+            os.getenv("CLOUDSDK_CORE_PROJECT") or
+            os.getenv("CLOUDSDK_PROJECT") or
+            os.getenv("GCP_PROJECT_ID")
+        )
+        if gcp_project_id:
+            auth_method = "environment_variable"
+            logger.info(f"✓ GCP Project ID loaded from environment: {gcp_project_id}")
+    # Method 5: Try gcloud config as last resort
+    if not gcp_project_id:
+        try:
+            import subprocess
+            result = subprocess.run(
+                ["gcloud", "config", "get-value", "project"],
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+            if result.returncode == 0:
+                gcp_project_id = result.stdout.strip()
+                if gcp_project_id and gcp_project_id != "(unset)":
+                    auth_method = "gcloud_config"
+                    logger.info(f"✓ GCP Project ID loaded from gcloud config: {gcp_project_id}")
+                else:
+                    gcp_project_id = None
+        except Exception as e:
+            logger.debug(f"Could not load from gcloud config: {e}")
+    # Build configuration dictionary
+    config = {
+        "gemini_api_key": os.getenv("GEMINI_API_KEY"),
+        "runwayml_api_key": os.getenv("RUNWAYML_API_KEY"),
+        "gcs_bucket_name": os.getenv("GCS_BUCKET_NAME"),
+        "gcp_project_id": gcp_project_id,
+        "default_voice": os.getenv("DEFAULT_VOICE", "en-US-Neural2-F"),
+        "auth_method": auth_method,  # Track how project was loaded
+    }
+    # Validate required keys
+    required_keys = ["gemini_api_key", "runwayml_api_key", "gcs_bucket_name", "gcp_project_id"]
+    missing_keys = [key for key in required_keys if not config.get(key)]
+    if missing_keys:
+        logger.error(f"Missing required configuration: {', '.join(missing_keys)}")
+        logger.error("Configuration loading attempted via:")
+        logger.error("  1. Service account JSON file/string")
+        logger.error("  2. Workload Identity Federation (GitHub Actions)")
+        logger.error("  3. Application Default Credentials (ADC)")
+        logger.error("  4. Environment variables")
+        logger.error("  5. gcloud config")
+        logger.error("")
+        logger.error("Available environment variables:")
+        for key in [
+            "GOOGLE_GHA_CREDS_PATH",
+            "CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE",
+            "GOOGLE_APPLICATION_CREDENTIALS",
+            "WORKLOAD_IDENTITY_PROVIDER",
+            "SERVICE_ACCOUNT_EMAIL",
+            "GOOGLE_CLOUD_PROJECT",
+            "GCP_PROJECT",
+            "GCP_PROJECT_ID",
+        ]:
+            logger.error(f"  {key}: {os.getenv(key, 'NOT SET')}")
+        logger.error("")
+        logger.error("For local development with ADC:")
+        logger.error("  1. Run: gcloud config set project YOUR_PROJECT_ID")
+        logger.error("  2. Or set: export GCP_PROJECT_ID=YOUR_PROJECT_ID")
+        logger.error("  3. Ensure ADC is set up: gcloud auth application-default login")
+        logger.error("")
+        logger.error("For GitHub Actions with Workload Identity Federation:")
+        logger.error("  1. Set WORKLOAD_IDENTITY_PROVIDER in your workflow")
+        logger.error("  2. Set SERVICE_ACCOUNT_EMAIL in your workflow")
+        logger.error("  3. Or set GCP_PROJECT_ID directly in secrets")
+        raise ValueError(
+            f"Missing required configuration: {', '.join(missing_keys)}.\n"
+            f"Please check your .env file, gcloud config, or GitHub secrets."
+        )
+    logger.info(f"✓ Configuration loaded successfully (auth method: {auth_method})")
+    return config
+def get_gcp_project_id() -> str:
+    """
+    Quick helper to get just the GCP project ID.
+    Useful when you only need the project ID without loading full config.
+    """
+    config = load_configuration()
+    return config["gcp_project_id"]
+# ------------------ Usage Examples ------------------
+if __name__ == "__main__":
+    try:
+        from dotenv import load_dotenv
+        load_dotenv()
+        config = load_configuration()
+        print("\n✓ Configuration loaded successfully!\n")
+        print("Configuration:")
+        for key, value in config.items():
+            if "key" in key.lower() and value:
+                # Mask API keys
+                print(f"  {key}: {value[:10]}...{value[-4:]}")
+            else:
+                print(f"  {key}: {value}")
+    except ValueError as e:
+        print(f"\n✗ Configuration error:\n{e}")

src/main.py CHANGED Viewed

@@ -10,13 +10,11 @@ import argparse
 import json
 from pathlib import Path
 from typing import Dict, Optional
-from dotenv import load_dotenv
 from automation import ContentAutomation
 from utils import logger
 import pandas as pd
-import random
 import warnings
-import threading
 def load_content_strategies(csv_file: Optional[str] = None) -> pd.DataFrame:
@@ -112,91 +110,6 @@ def select_random_strategy(df: pd.DataFrame, index: Optional[int] = None) -> Dic
         "brand": "Somira",
     }
-def load_configuration() -> Dict:
-    """
-    Load configuration from environment variables with validation.
-    Automatically extracts GCP project ID from service account JSON,
-    whether the env var is a path or the raw JSON content.
-    """
-    load_dotenv()
-    # Try multiple possible credential paths in GitHub Actions
-    gcp_creds_path = (
-        os.getenv("GOOGLE_GHA_CREDS_PATH") or
-        os.getenv("CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE") or
-        os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
-    )
-    gcp_project_id = None
-    creds_data = None
-    if not gcp_creds_path:
-        logger.warning("No GCP credentials path found in environment variables.")
-    else:
-        try:
-            os.environ["VERTEX_TEMP_AI_CREDENTIALS_JSON"] = os.getenv("VERTEX_AI_CREDENTIALS_JSON", "")
-            os.environ["MY_TEMP_GCS_BUCKET"] = os.getenv("MY_TEMP_GCS_BUCKET", "")
-            # Check if it's a file path that exists
-            if Path(gcp_creds_path).exists():
-                logger.info(f"Loading GCP credentials from file: {gcp_creds_path}")
-                with open(gcp_creds_path, "r") as f:
-                    creds_data = json.load(f)
-            else:
-                # Try to parse as raw JSON string
-                logger.info("Attempting to parse GCP credentials as JSON string")
-                creds_data = json.loads(gcp_creds_path)
-            if creds_data:
-                gcp_project_id = creds_data.get("project_id")
-                logger.info(f"✓ GCP Project ID loaded: {gcp_project_id}")
-        except json.JSONDecodeError as e:
-            logger.warning(f"Could not parse GCP credentials as JSON. Error: {e}")
-        except FileNotFoundError as e:
-            logger.warning(f"GCP credentials file not found: {e}")
-        except Exception as e:
-            logger.error(f"Unexpected error loading GCP credentials: {e}")
-    # Fallback: try to get project_id from other environment variables
-    if not gcp_project_id:
-        gcp_project_id = (
-            os.getenv("GOOGLE_CLOUD_PROJECT") or
-            os.getenv("GCP_PROJECT") or
-            os.getenv("GCLOUD_PROJECT") or
-            os.getenv("CLOUDSDK_CORE_PROJECT") or
-            os.getenv("CLOUDSDK_PROJECT") or
-            os.getenv("GCP_PROJECT_ID")
-        )
-        if gcp_project_id:
-            logger.info(f"✓ GCP Project ID loaded from environment: {gcp_project_id}")
-    config = {
-        "gemini_api_key": os.getenv("GEMINI_API_KEY"),
-        "runwayml_api_key": os.getenv("RUNWAYML_API_KEY"),
-        "gcs_bucket_name": os.getenv("GCS_BUCKET_NAME"),
-        "gcp_project_id": gcp_project_id,
-        "default_voice": os.getenv("DEFAULT_VOICE", "en-US-Neural2-F"),
-    }
-    # Validate required keys
-    required_keys = ["gemini_api_key", "runwayml_api_key", "gcs_bucket_name", "gcp_project_id"]
-    missing_keys = [key for key in required_keys if not config.get(key)]
-    if missing_keys:
-        logger.error(f"Missing required configuration: {', '.join(missing_keys)}")
-        logger.error("Available environment variables:")
-        for key in ["GOOGLE_GHA_CREDS_PATH", "CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE",
-                    "GOOGLE_APPLICATION_CREDENTIALS", "GOOGLE_CLOUD_PROJECT", "GCP_PROJECT"]:
-            logger.error(f"  {key}: {os.getenv(key, 'NOT SET')}")
-        raise ValueError(
-            f"Missing required configuration: {', '.join(missing_keys)}.\n"
-            f"Please check your .env file or GitHub secrets."
-        )
-    return config
 async def run_pipeline(
     automation: ContentAutomation, content_strategy: Dict, tts_script: str, output_dir: Optional[str] = None
 ) -> Dict:

 import json
 from pathlib import Path
 from typing import Dict, Optional
 from automation import ContentAutomation
 from utils import logger
 import pandas as pd
 import warnings
+from load_config import load_configuration
 def load_content_strategies(csv_file: Optional[str] = None) -> pd.DataFrame:
         "brand": "Somira",
     }
 async def run_pipeline(
     automation: ContentAutomation, content_strategy: Dict, tts_script: str, output_dir: Optional[str] = None
 ) -> Dict:

src/onscreebcta.py CHANGED Viewed

@@ -332,7 +332,7 @@ def add_cta(input_video_path: str, cta_text: str, above_caption: bool = True, pa
         output_video_path,
         codec="libx264",
         audio_codec="aac",
-        fps=24,
     )
     base_video.close()

         output_video_path,
         codec="libx264",
         audio_codec="aac",
+        fps=25,
     )
     base_video.close()

src/process_csv.py CHANGED Viewed

@@ -3,8 +3,8 @@ import csv
 import subprocess
 import os, time
 from pathlib import Path
 from main import (
-    load_configuration,
     load_content_strategies,
     run_pipeline,
 )

 import subprocess
 import os, time
 from pathlib import Path
+from load_config import load_configuration
 from main import (
     load_content_strategies,
     run_pipeline,
 )

src/publisher.py CHANGED Viewed

@@ -13,7 +13,8 @@ import time
 from pathlib import Path
 import hashlib
-from main import load_configuration, load_content_strategies
 from api_clients import APIClients
 # Import individual platform publishers

 from pathlib import Path
 import hashlib
+from load_config import load_configuration
+from main import load_content_strategies
 from api_clients import APIClients
 # Import individual platform publishers

src/text_clip.py CHANGED Viewed

@@ -760,7 +760,7 @@ if __name__ == "__main__":
                 background = ColorClip(size=(VIDEO_WIDTH, VIDEO_HEIGHT), color=(255, 255, 255), duration=total_duration + 1.0)
                 final_video = CompositeVideoClip([background] + text_clips, size=(VIDEO_WIDTH, VIDEO_HEIGHT))
                 print(f"   🎥 Rendering to: {output_filename}")
-                final_video.write_videofile(f"{output_filename}", fps=30, codec='libx264', preset='medium', logger=None, threads=8)
                 print(f"   ✨ Done!\n")
             else:
                 print(f"   ❌ Failed to create caption clip for '{config['name']}'\n")

                 background = ColorClip(size=(VIDEO_WIDTH, VIDEO_HEIGHT), color=(255, 255, 255), duration=total_duration + 1.0)
                 final_video = CompositeVideoClip([background] + text_clips, size=(VIDEO_WIDTH, VIDEO_HEIGHT))
                 print(f"   🎥 Rendering to: {output_filename}")
+                final_video.write_videofile(f"{output_filename}", fps=25, codec='libx264', preset='medium', logger=None, threads=8)
                 print(f"   ✨ Done!\n")
             else:
                 print(f"   ❌ Failed to create caption clip for '{config['name']}'\n")

src/tiktok_publisher.py CHANGED Viewed

@@ -14,8 +14,8 @@ import pandas as pd
 from datetime import datetime
 from dotenv import load_dotenv
 from pathlib import Path
 from main import (
-    load_configuration,
     load_content_strategies
 )
 from api_clients import APIClients

 from datetime import datetime
 from dotenv import load_dotenv
 from pathlib import Path
+from load_config import load_configuration
 from main import (
     load_content_strategies
 )
 from api_clients import APIClients

src/utils.py CHANGED Viewed

@@ -15,6 +15,7 @@ import uuid
 import re
 import shutil
 import librosa
 import numpy as np
 import tempfile
@@ -590,7 +591,8 @@ def reverse_clip(path_or_clip) -> str:
             codec="libx264",
             audio_codec="aac",
             verbose=False,
-            logger=None
         )
     elif isinstance(path_or_clip, str):
@@ -1136,4 +1138,54 @@ def repeat_audio_ffmpeg(input_audio, output_audio, repeat: int):
     finally:
         # Clean up temporary file
         if os.path.exists(temp_trimmed):
-            os.remove(temp_trimmed)

 import re
 import shutil
 import librosa
+import pandas as pd
 import numpy as np
 import tempfile
             codec="libx264",
             audio_codec="aac",
             verbose=False,
+            logger=None,
+            fps=25
         )
     elif isinstance(path_or_clip, str):
     finally:
         # Clean up temporary file
         if os.path.exists(temp_trimmed):
+            os.remove(temp_trimmed)
+def clean_and_drop_empty(
+    df: pd.DataFrame,
+    column: str,
+    extra_nulls: list[str] | None = None,
+) -> pd.DataFrame:
+    """
+    Normalize Google Sheets empty values and drop rows
+    where `column` is effectively empty.
+    Handles:
+    - NaN
+    - ""
+    - " "
+    - "nan", "None", "NULL", "N/A"
+    Args:
+        df: Input DataFrame
+        column: Column to validate (e.g. "VIDEO_LINK")
+        extra_nulls: Optional extra string values to treat as null
+    Returns:
+        Cleaned DataFrame with valid rows only
+    """
+    if column not in df.columns:
+        raise KeyError(f"Column '{column}' not found in DataFrame")
+    null_values = ["", "nan", "none", "null", "n/a"]
+    if extra_nulls:
+        null_values.extend([v.lower() for v in extra_nulls])
+    df = df.copy()
+    df[column] = (
+        df[column]
+        .astype(str)
+        .str.strip()
+        # .str.lower()
+        .replace(null_values, np.nan)
+    )
+    return df.dropna(subset=[column])
+def is_valid_video(path: str) -> bool:
+    if not os.path.exists(path):
+        return False
+    if os.path.getsize(path) < 100 * 1024:  # <100KB = almost certainly invalid
+        return False
+    return True

src/video_renderer.py CHANGED Viewed

@@ -989,7 +989,7 @@ class VideoRenderer:
             safe_name = "".join(c for c in self.data_holder.tts_script[:50] if c.isalnum())
             output_path = self.temp_dir / f"{os.getenv('SETUP_TYPE', 'final_video')}_{safe_name}_{int(time.time())}.mp4"
-            video_clip.write_videofile(str(output_path), codec="libx264", audio_codec="aac", verbose=False, logger=None)
             video_clip.close()
             return str(output_path)
@@ -1010,7 +1010,7 @@ class VideoRenderer:
             logger.info(f"📹 Rendering video (no audio): {filename}")
             video_clip.write_videofile(
-                str(output_path), codec="libx264", fps=24, verbose=False, logger=None
             )
             return str(output_path)
@@ -1077,7 +1077,7 @@ class VideoRenderer:
         return self.data_holder.current_caption_style
-    async def render_random_video(self, beat_times, music_duration):
         """
         Render video that syncs perfectly with music beats.
         Skip very early first beats to avoid ultra-short intro clips.
@@ -1095,10 +1095,7 @@ class VideoRenderer:
         # Track accumulated time deficit to maintain beat sync
         accumulated_deficit = 0.0
-        # Minimum clip duration to avoid glitchy cuts
-        min_clip_duration = 1  # seconds
         # SMART FIX: If first beat is not at 0, insert virtual beat at 0
         # This handles intro in the same loop as regular beats
         if beat_times[0] > 0.0001:

             safe_name = "".join(c for c in self.data_holder.tts_script[:50] if c.isalnum())
             output_path = self.temp_dir / f"{os.getenv('SETUP_TYPE', 'final_video')}_{safe_name}_{int(time.time())}.mp4"
+            video_clip.write_videofile(str(output_path), codec="libx264", audio_codec="aac", fps=25, verbose=False, logger=None)
             video_clip.close()
             return str(output_path)
             logger.info(f"📹 Rendering video (no audio): {filename}")
             video_clip.write_videofile(
+                str(output_path), codec="libx264", fps=25, verbose=False, logger=None
             )
             return str(output_path)
         return self.data_holder.current_caption_style
+    async def render_random_video(self, beat_times, music_duration, min_clip_duration=1) -> VideoFileClip:
         """
         Render video that syncs perfectly with music beats.
         Skip very early first beats to avoid ultra-short intro clips.
         # Track accumulated time deficit to maintain beat sync
         accumulated_deficit = 0.0
         # SMART FIX: If first beat is not at 0, insert virtual beat at 0
         # This handles intro in the same loop as regular beats
         if beat_times[0] > 0.0001:

src/youtube_publisher.py CHANGED Viewed

@@ -16,8 +16,8 @@ import os
 import sys
 import json
 from datetime import datetime, timedelta
 from main import (
-    load_configuration,
     load_content_strategies
 )
 from pathlib import Path

 import sys
 import json
 from datetime import datetime, timedelta
+from load_config import load_configuration
 from main import (
     load_content_strategies
 )
 from pathlib import Path