jebin2 commited on
Commit
e7fbeb6
·
1 Parent(s): c20adb3

intergate gsheet and send setup

Browse files
.gitignore CHANGED
@@ -43,4 +43,5 @@ whoa/
43
  src/temp*.py
44
  src/temp*.md
45
  testData/infloxa*
46
- testData/output/
 
 
43
  src/temp*.py
44
  src/temp*.md
45
  testData/infloxa*
46
+ testData/output/
47
+ testData/ref/
src/api_clients.py CHANGED
@@ -96,7 +96,7 @@ class APIClients:
96
  # Track current voice index for sequential selection
97
  self.current_voice_indices = {category: 0 for category in self.voice_profiles.keys()}
98
  self.file_names = None
99
- self.init_temp_gcs()
100
 
101
  async def get_from_cache(self, method_type, duration=0):
102
  try:
 
96
  # Track current voice index for sequential selection
97
  self.current_voice_indices = {category: 0 for category in self.voice_profiles.keys()}
98
  self.file_names = None
99
+ # self.init_temp_gcs()
100
 
101
  async def get_from_cache(self, method_type, duration=0):
102
  try:
src/asset_selector.py CHANGED
@@ -1,5 +1,5 @@
1
  import pandas as pd
2
- import aiohttp
3
  import json
4
  from typing import List, Dict, Optional, Tuple
5
  from utils import logger
@@ -99,26 +99,64 @@ class AssetSelector:
99
  audios = ["testData/infloxa/audiopulse.mp3"]
100
  return audios
101
 
102
- def audio_beats_map(self, audio_path: str) -> Optional[List[float]]:
103
- """Load or compute audio beats map from local file"""
 
 
 
 
 
 
104
  try:
105
- audio_map = {
106
- "testData/infloxa/audiopulse.mp3": [1.01, 1.17, 2.24, 4.06, 5.14, 6.21, 8.03, 9.11],
107
- }
108
- if audio_path in audio_map:
109
- return audio_map[audio_path]
110
 
111
- except Exception as e:
112
- logger.error(f"Failed to compute audio beats map for {audio_path}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  def _load_audio_library_from_gsheet(self) -> pd.DataFrame:
117
  """Load audio library from Google Sheet (if needed)"""
118
  try:
119
  googleSheetReader = GoogleSheetReader(worksheet_name=os.getenv("AUDIO_LIBRARY_GSHEET_WORKSHEET"))
120
  audio_df = googleSheetReader.get_filtered_dataframe()
121
- return googleSheetReader.clean_and_drop_empty(audio_df, "AUDIO_LINK")
 
 
122
  except Exception as e:
123
  logger.error(f"Failed to load audio library from Google Sheet: {e}")
124
  return pd.DataFrame()
@@ -128,7 +166,7 @@ class AssetSelector:
128
  try:
129
  googleSheetReader = GoogleSheetReader(worksheet_name=os.getenv("VIDEO_LIBRARY_GSHEET_WORKSHEET"))
130
  video_df = googleSheetReader.get_filtered_dataframe()
131
- return googleSheetReader.clean_and_drop_empty(video_df, "VIDEO_LINK")
132
  except Exception as e:
133
  logger.error(f"Failed to load video library from Google Sheet: {e}")
134
  return pd.DataFrame()
@@ -311,7 +349,7 @@ Video Options: {video_context}
311
  Select background music SEQUENTIALLY (not random)
312
  Each call increments the index to ensure different music for each video
313
  """
314
- if not self.audio_library:
315
  logger.error("❌ Audio library is empty")
316
  return ""
317
 
 
1
  import pandas as pd
2
+ import utils
3
  import json
4
  from typing import List, Dict, Optional, Tuple
5
  from utils import logger
 
99
  audios = ["testData/infloxa/audiopulse.mp3"]
100
  return audios
101
 
102
+ def get_audio_beats(self, audio_link: str) -> Optional[List[float]]:
103
+ """
104
+ Load audio beats timing from audio_library and convert
105
+ SS:FF (25 FPS) → seconds (float)
106
+
107
+ Example:
108
+ "01:12" → 1 + 12/25 = 1.48
109
+ """
110
  try:
111
+ if self.audio_library.empty:
112
+ logger.error("Audio library is empty")
113
+ return None
 
 
114
 
115
+ # Find matching row
116
+ row = self.audio_library.loc[
117
+ self.audio_library["AUDIO_LINK"] == audio_link
118
+ ]
119
+
120
+ if row.empty:
121
+ logger.error(f"No audio entry found for: {audio_link}")
122
+ return None
123
+
124
+ beats_raw = row.iloc[0]["Beats Timing(SS:FF) AT 25FPS"]
125
+
126
+ if pd.isna(beats_raw) or not str(beats_raw).strip():
127
+ logger.warning(f"No beat data for audio: {audio_link}")
128
+ return None
129
 
130
+ beats: List[float] = []
131
+
132
+ for token in str(beats_raw).split(","):
133
+ token = token.strip()
134
+
135
+ if ":" not in token:
136
+ continue
137
+
138
+ sec, frame = token.split(":", 1)
139
+
140
+ beats.append(
141
+ round(int(sec) + (int(frame) / 25.0), 2)
142
+ )
143
+
144
+ return beats if beats else None
145
+
146
+ except Exception as e:
147
+ logger.error(
148
+ f"Failed to compute audio beats map for {audio_link}: {e}"
149
+ )
150
+ return None
151
 
152
  def _load_audio_library_from_gsheet(self) -> pd.DataFrame:
153
  """Load audio library from Google Sheet (if needed)"""
154
  try:
155
  googleSheetReader = GoogleSheetReader(worksheet_name=os.getenv("AUDIO_LIBRARY_GSHEET_WORKSHEET"))
156
  audio_df = googleSheetReader.get_filtered_dataframe()
157
+ if os.getenv("HARD_CUT_RANDOM_VIDEOS", "false").lower() == "false":
158
+ audio_df = utils.clean_and_drop_empty(audio_df, "Beats Timing(SS:FF) AT 25FPS")
159
+ return utils.clean_and_drop_empty(audio_df, "AUDIO_LINK")
160
  except Exception as e:
161
  logger.error(f"Failed to load audio library from Google Sheet: {e}")
162
  return pd.DataFrame()
 
166
  try:
167
  googleSheetReader = GoogleSheetReader(worksheet_name=os.getenv("VIDEO_LIBRARY_GSHEET_WORKSHEET"))
168
  video_df = googleSheetReader.get_filtered_dataframe()
169
+ return utils.clean_and_drop_empty(video_df, "VIDEO_LINK")
170
  except Exception as e:
171
  logger.error(f"Failed to load video library from Google Sheet: {e}")
172
  return pd.DataFrame()
 
349
  Select background music SEQUENTIALLY (not random)
350
  Each call increments the index to ensure different music for each video
351
  """
352
+ if self.audio_library.empty:
353
  logger.error("❌ Audio library is empty")
354
  return ""
355
 
src/automation.py CHANGED
@@ -21,9 +21,9 @@ import hashlib
21
  from onscreebcta import add_cta
22
  import numpy as np
23
  from moviepy.editor import VideoFileClip, concatenate_videoclips
24
- import librosa
25
  import numpy as np
26
- from scipy import signal
27
 
28
  class ContentAutomation:
29
  def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None):
@@ -32,6 +32,7 @@ class ContentAutomation:
32
  self.api_clients = APIClients(config, self.data_holder)
33
  self.video_renderer = VideoRenderer(config, self.data_holder)
34
  self.asset_selector = AssetSelector(config, self.data_holder)
 
35
  self.pipeline_start_time = None
36
 
37
  async def execute_pipeline(self, content_strategy: Dict[str, str], tts_script: str) -> Dict[str, Any]:
@@ -210,8 +211,13 @@ class ContentAutomation:
210
  music_duration = audio_clip.duration - 0.5
211
 
212
 
213
- if self.asset_selector.audio_beats_map().get(self.data_holder.visual_assets.get("background_music_url", ""), None):
214
- beat_times = self.asset_selector.audio_beats_map()[self.data_holder.visual_assets.get("background_music_url", "")]
 
 
 
 
 
215
  method_used = "cached"
216
  logger.info("Using cached beat times.")
217
  break
@@ -225,7 +231,9 @@ class ContentAutomation:
225
  if beat_times is None:
226
  logger.warning("No beats detected, trying alternative method...")
227
  try_next = True
228
-
 
 
229
  logger.info(f"Using '{method_used}' method: {len(beat_times)} beats detected")
230
  logger.info(f"Music duration: {music_duration:.2f}s")
231
  logger.info(f"Beat times: {beat_times}")
@@ -255,7 +263,8 @@ class ContentAutomation:
255
  # IMPORTANT: Pass filtered_beat_times, not beat_intervals!
256
  video_no_audio_path = await self.video_renderer.render_random_video(
257
  beat_times,
258
- music_duration
 
259
  )
260
 
261
  if os.getenv("USE_1X1_RATIO", "false").lower() == "true":
@@ -322,6 +331,7 @@ class ContentAutomation:
322
  logger.info("\n🎵 STEP 1: Background Music")
323
  if try_next:
324
  self.asset_selector.inc_audio_index()
 
325
  self.data_holder.visual_assets["background_music_url"] = self.asset_selector.select_background_music()
326
  await self._download_to_local(
327
  self.data_holder.visual_assets["background_music_url"], "background_music.mp3", self.data_holder.visual_assets, "background_music_local"
@@ -379,23 +389,25 @@ class ContentAutomation:
379
  return
380
 
381
  if os.getenv("INFLOXA", "false").lower() == "true":
382
- from video_downloader import VideoDownloader
383
  download_path="testData/infloxa"
384
  Path(download_path).mkdir(parents=True, exist_ok=True)
385
 
386
  allowed_videos = []
387
 
388
- self.data_holder.visual_assets["all_videos"] = [
389
  {
390
- "url": row.get("VIDEO_FILENAME", "").strip(),
391
- "local_path": VideoDownloader().download_video(
392
- video_filename=row.get("VIDEO_FILENAME", "").strip(),
393
- download_path=download_path
394
- )
395
  }
396
  for _, row in self.asset_selector.video_library.iterrows()
397
- if row.get("VIDEO_FILENAME", "").strip() in allowed_videos
 
 
 
 
398
  ]
 
 
399
  else:
400
  self.data_holder.visual_assets["all_videos"] = [
401
  {"url": row.get("Video URL (No Audio)", "").strip()}
@@ -609,6 +621,26 @@ class ContentAutomation:
609
 
610
  return health_status
611
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
612
  async def simple_demo(self):
613
  """Simple demo with proper audio handling"""
614
  logger.info("🎬 Starting Simple Demo with Audio Fix...")
@@ -618,13 +650,13 @@ class ContentAutomation:
618
 
619
  # Create simple color videos
620
  clip1 = ColorClip(size=(640, 480), color=(255, 0, 0), duration=2)
621
- clip1 = clip1.set_fps(24)
622
  clip1_path = "/tmp/simple_red.mp4"
623
  clip1.write_videofile(clip1_path, verbose=False, logger=None)
624
  clip1.close()
625
 
626
  clip2 = ColorClip(size=(640, 480), color=(0, 255, 0), duration=2)
627
- clip2 = clip2.set_fps(24)
628
  clip2_path = "/tmp/simple_green.mp4"
629
  clip2.write_videofile(clip2_path, verbose=False, logger=None)
630
  clip2.close()
 
21
  from onscreebcta import add_cta
22
  import numpy as np
23
  from moviepy.editor import VideoFileClip, concatenate_videoclips
24
+ import math
25
  import numpy as np
26
+ from file_downloader import FileDownloader
27
 
28
  class ContentAutomation:
29
  def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None):
 
32
  self.api_clients = APIClients(config, self.data_holder)
33
  self.video_renderer = VideoRenderer(config, self.data_holder)
34
  self.asset_selector = AssetSelector(config, self.data_holder)
35
+ self.file_downloader = FileDownloader()
36
  self.pipeline_start_time = None
37
 
38
  async def execute_pipeline(self, content_strategy: Dict[str, str], tts_script: str) -> Dict[str, Any]:
 
211
  music_duration = audio_clip.duration - 0.5
212
 
213
 
214
+ beat_times = self.asset_selector.get_audio_beats(self.data_holder.visual_assets["background_music_url"])
215
+ if beat_times:
216
+ beat_times = self.extend_beats_to_audio_end(
217
+ beat_times,
218
+ self.data_holder.visual_assets["background_music_local"],
219
+ fps=25
220
+ )
221
  method_used = "cached"
222
  logger.info("Using cached beat times.")
223
  break
 
231
  if beat_times is None:
232
  logger.warning("No beats detected, trying alternative method...")
233
  try_next = True
234
+
235
+ music_duration = music_duration if music_duration < beat_times[-1] else beat_times[-1]
236
+
237
  logger.info(f"Using '{method_used}' method: {len(beat_times)} beats detected")
238
  logger.info(f"Music duration: {music_duration:.2f}s")
239
  logger.info(f"Beat times: {beat_times}")
 
263
  # IMPORTANT: Pass filtered_beat_times, not beat_intervals!
264
  video_no_audio_path = await self.video_renderer.render_random_video(
265
  beat_times,
266
+ music_duration,
267
+ min_clip_duration=0
268
  )
269
 
270
  if os.getenv("USE_1X1_RATIO", "false").lower() == "true":
 
331
  logger.info("\n🎵 STEP 1: Background Music")
332
  if try_next:
333
  self.asset_selector.inc_audio_index()
334
+
335
  self.data_holder.visual_assets["background_music_url"] = self.asset_selector.select_background_music()
336
  await self._download_to_local(
337
  self.data_holder.visual_assets["background_music_url"], "background_music.mp3", self.data_holder.visual_assets, "background_music_local"
 
389
  return
390
 
391
  if os.getenv("INFLOXA", "false").lower() == "true":
 
392
  download_path="testData/infloxa"
393
  Path(download_path).mkdir(parents=True, exist_ok=True)
394
 
395
  allowed_videos = []
396
 
397
+ videos = [
398
  {
399
+ "url": url,
400
+ "local_path": str(local_path),
 
 
 
401
  }
402
  for _, row in self.asset_selector.video_library.iterrows()
403
+ if (
404
+ (url := str(row.get("VIDEO_LINK", "")).strip())
405
+ and (local_path := self.file_downloader.safe_download(url=url))
406
+ and utils.is_valid_video(local_path)
407
+ )
408
  ]
409
+ self.data_holder.visual_assets["all_videos"] = videos
410
+
411
  else:
412
  self.data_holder.visual_assets["all_videos"] = [
413
  {"url": row.get("Video URL (No Audio)", "").strip()}
 
621
 
622
  return health_status
623
 
624
+ def extend_beats_to_audio_end(
625
+ self,
626
+ beats: List[float],
627
+ audio_path: str,
628
+ fps: int = 25
629
+ ) -> List[float]:
630
+ if not beats:
631
+ return beats
632
+
633
+ with AudioFileClip(audio_path) as audio:
634
+ duration = audio.duration
635
+
636
+ frame_duration = math.floor(duration * fps) / fps
637
+
638
+ if beats[-1] < frame_duration:
639
+ return beats + [frame_duration]
640
+
641
+ return beats
642
+
643
+
644
  async def simple_demo(self):
645
  """Simple demo with proper audio handling"""
646
  logger.info("🎬 Starting Simple Demo with Audio Fix...")
 
650
 
651
  # Create simple color videos
652
  clip1 = ColorClip(size=(640, 480), color=(255, 0, 0), duration=2)
653
+ clip1 = clip1.set_fps(25)
654
  clip1_path = "/tmp/simple_red.mp4"
655
  clip1.write_videofile(clip1_path, verbose=False, logger=None)
656
  clip1.close()
657
 
658
  clip2 = ColorClip(size=(640, 480), color=(0, 255, 0), duration=2)
659
+ clip2 = clip2.set_fps(25)
660
  clip2_path = "/tmp/simple_green.mp4"
661
  clip2.write_videofile(clip2_path, verbose=False, logger=None)
662
  clip2.close()
src/file_downloader.py CHANGED
@@ -25,13 +25,16 @@ class FileDownloader:
25
  "https://www.googleapis.com/auth/drive.file",
26
  ]
27
 
28
- def __init__(self):
29
  logger.info("Initializing FileDownloader")
30
 
31
  # -------- Temp directory handling --------
32
  self.temp_dir = self._init_temp_dir()
33
  logger.info("Using temp directory: %s", self.temp_dir)
34
 
 
 
 
35
  # Lazy initialization for clients
36
  self._drive_service = None
37
  self._storage_client = None
@@ -48,6 +51,8 @@ class FileDownloader:
48
  if not base_dir:
49
  raise RuntimeError("TEST_DATA_DIRECTORY must be set when TEST_AUTOMATION=true")
50
 
 
 
51
  path = Path(base_dir) / "downloads"
52
  path.mkdir(parents=True, exist_ok=True)
53
  return path
@@ -150,39 +155,102 @@ class FileDownloader:
150
 
151
  Returns:
152
  Path to the downloaded file
 
 
 
153
  """
154
  logger.info("Downloading from Google Drive | file_id=%s", file_id)
155
 
156
  service = self._get_drive_service()
157
 
158
- # Get file metadata to determine filename
 
159
  if filename is None and output_path is None:
160
  try:
161
- file_metadata = service.files().get(fileId=file_id, fields="name").execute()
162
- filename = file_metadata.get("name", f"drive_file_{file_id}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  except Exception as e:
164
- logger.warning("Could not fetch file metadata: %s", e)
 
 
 
 
 
165
  filename = f"drive_file_{file_id}"
166
 
 
 
 
 
167
  # Determine output path
168
  if output_path is None:
169
  output_path = self.temp_dir / filename
 
 
 
 
 
 
 
170
 
171
  output_path.parent.mkdir(parents=True, exist_ok=True)
172
 
173
  # Download file
174
- request = service.files().get_media(fileId=file_id)
175
-
176
- with output_path.open("wb") as fh:
177
- downloader = MediaIoBaseDownload(fh, request)
178
- done = False
179
- while not done:
180
- status, done = downloader.next_chunk()
181
- if status:
182
- logger.debug("Download progress: %d%%", int(status.progress() * 100))
183
-
184
- logger.info("Downloaded from Drive to %s", output_path)
185
- return output_path
 
 
 
 
 
 
 
 
 
 
 
186
 
187
  def download_from_gcs(
188
  self,
@@ -210,6 +278,11 @@ class FileDownloader:
210
  filename = Path(blob_name).name
211
  output_path = self.temp_dir / filename
212
 
 
 
 
 
 
213
  output_path.parent.mkdir(parents=True, exist_ok=True)
214
 
215
  if public:
@@ -233,6 +306,17 @@ class FileDownloader:
233
  logger.info("Downloaded from GCS to %s", output_path)
234
  return output_path
235
 
 
 
 
 
 
 
 
 
 
 
 
236
  def download(
237
  self,
238
  url: str,
@@ -333,6 +417,7 @@ def main():
333
  Examples controlled via env variables:
334
  - DOWNLOAD_URL: Single file to download
335
  - DOWNLOAD_URLS: Comma-separated list of URLs to download
 
336
  """
337
 
338
  try:
@@ -344,15 +429,13 @@ def main():
344
  downloader = FileDownloader()
345
 
346
  # ------------------ EXAMPLE 1: SINGLE FILE DOWNLOAD ------------------
347
- download_url = "https://storage.googleapis.com/somira/audiopulse2.mp3"
348
  if download_url:
349
  logger.info("Downloading single file")
350
- output_path = os.getenv("OUTPUT_PATH")
351
 
352
  try:
353
  downloaded_file = downloader.download(
354
- url=download_url,
355
- output_path=output_path,
356
  )
357
  logger.info("File downloaded to: %s", downloaded_file)
358
  except Exception as e:
@@ -364,12 +447,10 @@ def main():
364
  if download_urls:
365
  logger.info("Downloading multiple files")
366
  urls = [url.strip() for url in download_urls.split(",")]
367
- output_dir = os.getenv("OUTPUT_DIR")
368
 
369
  try:
370
  downloaded_files = downloader.download_multiple(
371
- urls=urls,
372
- output_dir=output_dir,
373
  )
374
  logger.info("Downloaded %d files:", len(downloaded_files))
375
  for path in downloaded_files:
@@ -385,6 +466,7 @@ def main():
385
  logger.info(" DOWNLOAD_URL='https://drive.google.com/...' python file_downloader.py")
386
  logger.info(" DOWNLOAD_URL='gs://bucket/path/file' python file_downloader.py")
387
  logger.info(" DOWNLOAD_URLS='url1,url2,url3' python file_downloader.py")
 
388
 
389
 
390
  if __name__ == "__main__":
 
25
  "https://www.googleapis.com/auth/drive.file",
26
  ]
27
 
28
+ def __init__(self, skip_existing: bool = True):
29
  logger.info("Initializing FileDownloader")
30
 
31
  # -------- Temp directory handling --------
32
  self.temp_dir = self._init_temp_dir()
33
  logger.info("Using temp directory: %s", self.temp_dir)
34
 
35
+ # Control whether to skip existing files
36
+ self.skip_existing = skip_existing
37
+
38
  # Lazy initialization for clients
39
  self._drive_service = None
40
  self._storage_client = None
 
51
  if not base_dir:
52
  raise RuntimeError("TEST_DATA_DIRECTORY must be set when TEST_AUTOMATION=true")
53
 
54
+ Path(base_dir).mkdir(parents=True, exist_ok=True)
55
+
56
  path = Path(base_dir) / "downloads"
57
  path.mkdir(parents=True, exist_ok=True)
58
  return path
 
155
 
156
  Returns:
157
  Path to the downloaded file
158
+
159
+ Raises:
160
+ Exception: If file cannot be accessed or downloaded
161
  """
162
  logger.info("Downloading from Google Drive | file_id=%s", file_id)
163
 
164
  service = self._get_drive_service()
165
 
166
+ # Get file metadata to determine filename with extension
167
+ metadata_error = None
168
  if filename is None and output_path is None:
169
  try:
170
+ file_metadata = service.files().get(fileId=file_id, fields="name,mimeType,fileExtension").execute()
171
+
172
+ # Use the original filename from Drive
173
+ filename = file_metadata.get("name")
174
+ logger.info("Retrieved filename from Drive: %s", filename)
175
+
176
+ # If no name, construct one with proper extension
177
+ if not filename:
178
+ file_extension = file_metadata.get("fileExtension", "")
179
+ mime_type = file_metadata.get("mimeType", "")
180
+
181
+ logger.info("No filename found, mimeType: %s, fileExtension: %s", mime_type, file_extension)
182
+
183
+ # Map common MIME types to extensions if fileExtension not available
184
+ mime_to_ext = {
185
+ "application/pdf": "pdf",
186
+ "image/jpeg": "jpg",
187
+ "image/png": "png",
188
+ "image/gif": "gif",
189
+ "text/plain": "txt",
190
+ "application/json": "json",
191
+ "text/csv": "csv",
192
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
193
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
194
+ "application/zip": "zip",
195
+ }
196
+
197
+ if not file_extension and mime_type in mime_to_ext:
198
+ file_extension = mime_to_ext[mime_type]
199
+
200
+ filename = f"drive_file_{file_id}"
201
+ if file_extension:
202
+ filename = f"{filename}.{file_extension}"
203
+
204
  except Exception as e:
205
+ logger.error("Could not fetch file metadata: %s", e)
206
+ metadata_error = e
207
+ # Check if it's a 404 or permission error - these are fatal
208
+ if hasattr(e, 'resp') and hasattr(e.resp, 'status'):
209
+ if e.resp.status in [403, 404]:
210
+ raise Exception(f"Cannot access file {file_id}: {str(e)}") from e
211
  filename = f"drive_file_{file_id}"
212
 
213
+ # If still no filename, use default
214
+ if filename is None:
215
+ filename = f"drive_file_{file_id}"
216
+
217
  # Determine output path
218
  if output_path is None:
219
  output_path = self.temp_dir / filename
220
+
221
+ logger.info("Final output path: %s", output_path)
222
+
223
+ # Check if file already exists
224
+ if self.skip_existing and output_path.exists():
225
+ logger.info("File already exists, skipping download: %s", output_path)
226
+ return output_path
227
 
228
  output_path.parent.mkdir(parents=True, exist_ok=True)
229
 
230
  # Download file
231
+ try:
232
+ request = service.files().get_media(fileId=file_id)
233
+
234
+ with output_path.open("wb") as fh:
235
+ downloader = MediaIoBaseDownload(fh, request)
236
+ done = False
237
+ while not done:
238
+ status, done = downloader.next_chunk()
239
+ if status:
240
+ logger.debug("Download progress: %d%%", int(status.progress() * 100))
241
+
242
+ logger.info("Downloaded from Drive to %s", output_path)
243
+ return output_path
244
+
245
+ except Exception as e:
246
+ # Clean up failed download
247
+ if output_path.exists():
248
+ output_path.unlink()
249
+ logger.info("Cleaned up failed download: %s", output_path)
250
+
251
+ error_msg = f"Failed to download file {file_id}: {str(e)}"
252
+ logger.error(error_msg)
253
+ raise Exception(error_msg) from e
254
 
255
  def download_from_gcs(
256
  self,
 
278
  filename = Path(blob_name).name
279
  output_path = self.temp_dir / filename
280
 
281
+ # Check if file already exists
282
+ if self.skip_existing and output_path.exists():
283
+ logger.info("File already exists, skipping download: %s", output_path)
284
+ return output_path
285
+
286
  output_path.parent.mkdir(parents=True, exist_ok=True)
287
 
288
  if public:
 
306
  logger.info("Downloaded from GCS to %s", output_path)
307
  return output_path
308
 
309
+ def safe_download(self, url: str, output_path: Path | None = None) -> Path | None:
310
+ """
311
+ Safe download wrapper to handle exceptions.
312
+ Returns None if download fails.
313
+ """
314
+ try:
315
+ return self.download(url, output_path=output_path)
316
+ except Exception as e:
317
+ logger.error("Download failed for %s: %s", url, e)
318
+ return None
319
+
320
  def download(
321
  self,
322
  url: str,
 
417
  Examples controlled via env variables:
418
  - DOWNLOAD_URL: Single file to download
419
  - DOWNLOAD_URLS: Comma-separated list of URLs to download
420
+ - SKIP_EXISTING: Set to 'false' to force re-download (default: 'true')
421
  """
422
 
423
  try:
 
429
  downloader = FileDownloader()
430
 
431
  # ------------------ EXAMPLE 1: SINGLE FILE DOWNLOAD ------------------
432
+ download_url = "https://drive.google.com/file/d/1jXqLjEDrFzR9858po7BenqKdx3cIm-4Q/view"
433
  if download_url:
434
  logger.info("Downloading single file")
 
435
 
436
  try:
437
  downloaded_file = downloader.download(
438
+ url=download_url
 
439
  )
440
  logger.info("File downloaded to: %s", downloaded_file)
441
  except Exception as e:
 
447
  if download_urls:
448
  logger.info("Downloading multiple files")
449
  urls = [url.strip() for url in download_urls.split(",")]
 
450
 
451
  try:
452
  downloaded_files = downloader.download_multiple(
453
+ urls=urls
 
454
  )
455
  logger.info("Downloaded %d files:", len(downloaded_files))
456
  for path in downloaded_files:
 
466
  logger.info(" DOWNLOAD_URL='https://drive.google.com/...' python file_downloader.py")
467
  logger.info(" DOWNLOAD_URL='gs://bucket/path/file' python file_downloader.py")
468
  logger.info(" DOWNLOAD_URLS='url1,url2,url3' python file_downloader.py")
469
+ logger.info(" SKIP_EXISTING='false' python file_downloader.py # Force re-download")
470
 
471
 
472
  if __name__ == "__main__":
src/google_sheet_reader.py CHANGED
@@ -3,7 +3,6 @@ import csv
3
  import tempfile
4
  from pathlib import Path
5
  import pandas as pd
6
- import numpy as np
7
 
8
  import gspread
9
  from google.auth import default
@@ -354,49 +353,6 @@ class GoogleSheetReader:
354
  logger.info("CSV export completed | rows=%d", len(rows) - 1)
355
  return output_path
356
 
357
- def clean_and_drop_empty(
358
- df: pd.DataFrame,
359
- column: str,
360
- extra_nulls: list[str] | None = None,
361
- ) -> pd.DataFrame:
362
- """
363
- Normalize Google Sheets empty values and drop rows
364
- where `column` is effectively empty.
365
-
366
- Handles:
367
- - NaN
368
- - ""
369
- - " "
370
- - "nan", "None", "NULL", "N/A"
371
-
372
- Args:
373
- df: Input DataFrame
374
- column: Column to validate (e.g. "VIDEO_LINK")
375
- extra_nulls: Optional extra string values to treat as null
376
-
377
- Returns:
378
- Cleaned DataFrame with valid rows only
379
- """
380
-
381
- if column not in df.columns:
382
- raise KeyError(f"Column '{column}' not found in DataFrame")
383
-
384
- null_values = ["", "nan", "none", "null", "n/a"]
385
- if extra_nulls:
386
- null_values.extend([v.lower() for v in extra_nulls])
387
-
388
- df = df.copy()
389
-
390
- df[column] = (
391
- df[column]
392
- .astype(str)
393
- .str.strip()
394
- .str.lower()
395
- .replace(null_values, np.nan)
396
- )
397
-
398
- return df.dropna(subset=[column])
399
-
400
 
401
  # ------------------ CLI entrypoint ------------------
402
 
 
3
  import tempfile
4
  from pathlib import Path
5
  import pandas as pd
 
6
 
7
  import gspread
8
  from google.auth import default
 
353
  logger.info("CSV export completed | rows=%d", len(rows) - 1)
354
  return output_path
355
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
 
357
  # ------------------ CLI entrypoint ------------------
358
 
src/instagram_publisher.py CHANGED
@@ -13,8 +13,8 @@ import pandas as pd
13
  from datetime import datetime
14
  from dotenv import load_dotenv
15
  from pathlib import Path
 
16
  from main import (
17
- load_configuration,
18
  load_content_strategies
19
  )
20
  from api_clients import APIClients
 
13
  from datetime import datetime
14
  from dotenv import load_dotenv
15
  from pathlib import Path
16
+ from load_config import load_configuration
17
  from main import (
 
18
  load_content_strategies
19
  )
20
  from api_clients import APIClients
src/load_config.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from pathlib import Path
4
+ from typing import Dict
5
+
6
+ from dotenv import load_dotenv
7
+ from google.auth import default
8
+
9
+ from utils import logger
10
+
11
+
12
+ def load_configuration() -> Dict:
13
+ """
14
+ Load configuration from environment variables with validation.
15
+
16
+ Supports two authentication methods:
17
+ 1. Service Account JSON (CI/CD): Extracts project ID from JSON file or string
18
+ 2. Application Default Credentials (Local): Uses ADC and gcloud config
19
+ """
20
+ load_dotenv()
21
+
22
+ gcp_project_id = None
23
+ creds_data = None
24
+ auth_method = None
25
+
26
+ # Try multiple possible credential paths (CI/CD environments)
27
+ gcp_creds_path = (
28
+ os.getenv("GOOGLE_GHA_CREDS_PATH") or
29
+ os.getenv("CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE") or
30
+ os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
31
+ )
32
+
33
+ # Method 1: Try to load from service account JSON file/string
34
+ if gcp_creds_path:
35
+ try:
36
+ os.environ["MY_TEMP_GCS_BUCKET"] = os.getenv("MY_TEMP_GCS_BUCKET", "")
37
+
38
+ # Check if it's a file path that exists
39
+ if Path(gcp_creds_path).exists():
40
+ logger.info(f"Loading GCP credentials from file: {gcp_creds_path}")
41
+ with open(gcp_creds_path, "r") as f:
42
+ creds_data = json.load(f)
43
+ auth_method = "service_account_file"
44
+ else:
45
+ # Try to parse as raw JSON string
46
+ logger.info("Attempting to parse GCP credentials as JSON string")
47
+ creds_data = json.loads(gcp_creds_path)
48
+ auth_method = "service_account_json"
49
+
50
+ if creds_data:
51
+ gcp_project_id = creds_data.get("project_id")
52
+ logger.info(f"✓ GCP Project ID loaded from service account: {gcp_project_id}")
53
+
54
+ except json.JSONDecodeError as e:
55
+ logger.warning(f"Could not parse GCP credentials as JSON. Error: {e}")
56
+ except FileNotFoundError as e:
57
+ logger.warning(f"GCP credentials file not found: {e}")
58
+ except Exception as e:
59
+ logger.error(f"Unexpected error loading GCP credentials: {e}")
60
+
61
+ # Method 2: Check for Workload Identity Federation (GitHub Actions)
62
+ if not gcp_project_id:
63
+ wif_provider = os.getenv("WORKLOAD_IDENTITY_PROVIDER")
64
+ wif_service_account = os.getenv("SERVICE_ACCOUNT_EMAIL")
65
+
66
+ if wif_provider and wif_service_account:
67
+ try:
68
+ logger.info("Attempting to load project from Workload Identity Federation")
69
+ # WIF credentials are automatically handled by google.auth.default()
70
+ # when GOOGLE_APPLICATION_CREDENTIALS is not set
71
+ creds, project = default()
72
+
73
+ if project:
74
+ gcp_project_id = project
75
+ auth_method = "workload_identity_federation"
76
+ logger.info(f"✓ GCP Project ID loaded from WIF: {gcp_project_id}")
77
+ else:
78
+ logger.debug("WIF credentials found but no project set")
79
+ except Exception as e:
80
+ logger.debug(f"Could not load from WIF: {e}")
81
+ else:
82
+ logger.debug("WIF environment variables not found")
83
+
84
+ # Method 3: Try to get project from Application Default Credentials (ADC)
85
+ if not gcp_project_id:
86
+ try:
87
+ logger.info("Attempting to load project from Application Default Credentials (ADC)")
88
+ creds, project = default()
89
+
90
+ if project:
91
+ gcp_project_id = project
92
+ auth_method = "adc"
93
+ logger.info(f"✓ GCP Project ID loaded from ADC: {gcp_project_id}")
94
+ else:
95
+ logger.debug("ADC credentials found but no project set")
96
+ except Exception as e:
97
+ logger.debug(f"Could not load from ADC: {e}")
98
+
99
+ # Method 4: Try environment variables
100
+ if not gcp_project_id:
101
+ gcp_project_id = (
102
+ os.getenv("GOOGLE_CLOUD_PROJECT") or
103
+ os.getenv("GCP_PROJECT") or
104
+ os.getenv("GCLOUD_PROJECT") or
105
+ os.getenv("CLOUDSDK_CORE_PROJECT") or
106
+ os.getenv("CLOUDSDK_PROJECT") or
107
+ os.getenv("GCP_PROJECT_ID")
108
+ )
109
+ if gcp_project_id:
110
+ auth_method = "environment_variable"
111
+ logger.info(f"✓ GCP Project ID loaded from environment: {gcp_project_id}")
112
+
113
+ # Method 5: Try gcloud config as last resort
114
+ if not gcp_project_id:
115
+ try:
116
+ import subprocess
117
+ result = subprocess.run(
118
+ ["gcloud", "config", "get-value", "project"],
119
+ capture_output=True,
120
+ text=True,
121
+ timeout=5,
122
+ )
123
+ if result.returncode == 0:
124
+ gcp_project_id = result.stdout.strip()
125
+ if gcp_project_id and gcp_project_id != "(unset)":
126
+ auth_method = "gcloud_config"
127
+ logger.info(f"✓ GCP Project ID loaded from gcloud config: {gcp_project_id}")
128
+ else:
129
+ gcp_project_id = None
130
+ except Exception as e:
131
+ logger.debug(f"Could not load from gcloud config: {e}")
132
+
133
+ # Build configuration dictionary
134
+ config = {
135
+ "gemini_api_key": os.getenv("GEMINI_API_KEY"),
136
+ "runwayml_api_key": os.getenv("RUNWAYML_API_KEY"),
137
+ "gcs_bucket_name": os.getenv("GCS_BUCKET_NAME"),
138
+ "gcp_project_id": gcp_project_id,
139
+ "default_voice": os.getenv("DEFAULT_VOICE", "en-US-Neural2-F"),
140
+ "auth_method": auth_method, # Track how project was loaded
141
+ }
142
+
143
+ # Validate required keys
144
+ required_keys = ["gemini_api_key", "runwayml_api_key", "gcs_bucket_name", "gcp_project_id"]
145
+ missing_keys = [key for key in required_keys if not config.get(key)]
146
+
147
+ if missing_keys:
148
+ logger.error(f"Missing required configuration: {', '.join(missing_keys)}")
149
+ logger.error("Configuration loading attempted via:")
150
+ logger.error(" 1. Service account JSON file/string")
151
+ logger.error(" 2. Workload Identity Federation (GitHub Actions)")
152
+ logger.error(" 3. Application Default Credentials (ADC)")
153
+ logger.error(" 4. Environment variables")
154
+ logger.error(" 5. gcloud config")
155
+ logger.error("")
156
+ logger.error("Available environment variables:")
157
+ for key in [
158
+ "GOOGLE_GHA_CREDS_PATH",
159
+ "CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE",
160
+ "GOOGLE_APPLICATION_CREDENTIALS",
161
+ "WORKLOAD_IDENTITY_PROVIDER",
162
+ "SERVICE_ACCOUNT_EMAIL",
163
+ "GOOGLE_CLOUD_PROJECT",
164
+ "GCP_PROJECT",
165
+ "GCP_PROJECT_ID",
166
+ ]:
167
+ logger.error(f" {key}: {os.getenv(key, 'NOT SET')}")
168
+
169
+ logger.error("")
170
+ logger.error("For local development with ADC:")
171
+ logger.error(" 1. Run: gcloud config set project YOUR_PROJECT_ID")
172
+ logger.error(" 2. Or set: export GCP_PROJECT_ID=YOUR_PROJECT_ID")
173
+ logger.error(" 3. Ensure ADC is set up: gcloud auth application-default login")
174
+ logger.error("")
175
+ logger.error("For GitHub Actions with Workload Identity Federation:")
176
+ logger.error(" 1. Set WORKLOAD_IDENTITY_PROVIDER in your workflow")
177
+ logger.error(" 2. Set SERVICE_ACCOUNT_EMAIL in your workflow")
178
+ logger.error(" 3. Or set GCP_PROJECT_ID directly in secrets")
179
+
180
+ raise ValueError(
181
+ f"Missing required configuration: {', '.join(missing_keys)}.\n"
182
+ f"Please check your .env file, gcloud config, or GitHub secrets."
183
+ )
184
+
185
+ logger.info(f"✓ Configuration loaded successfully (auth method: {auth_method})")
186
+ return config
187
+
188
+
189
+ def get_gcp_project_id() -> str:
190
+ """
191
+ Quick helper to get just the GCP project ID.
192
+ Useful when you only need the project ID without loading full config.
193
+ """
194
+ config = load_configuration()
195
+ return config["gcp_project_id"]
196
+
197
+
198
+ # ------------------ Usage Examples ------------------
199
+
200
+ if __name__ == "__main__":
201
+ try:
202
+ from dotenv import load_dotenv
203
+ load_dotenv()
204
+ config = load_configuration()
205
+ print("\n✓ Configuration loaded successfully!\n")
206
+ print("Configuration:")
207
+ for key, value in config.items():
208
+ if "key" in key.lower() and value:
209
+ # Mask API keys
210
+ print(f" {key}: {value[:10]}...{value[-4:]}")
211
+ else:
212
+ print(f" {key}: {value}")
213
+ except ValueError as e:
214
+ print(f"\n✗ Configuration error:\n{e}")
src/main.py CHANGED
@@ -10,13 +10,11 @@ import argparse
10
  import json
11
  from pathlib import Path
12
  from typing import Dict, Optional
13
- from dotenv import load_dotenv
14
  from automation import ContentAutomation
15
  from utils import logger
16
  import pandas as pd
17
- import random
18
  import warnings
19
- import threading
20
 
21
 
22
  def load_content_strategies(csv_file: Optional[str] = None) -> pd.DataFrame:
@@ -112,91 +110,6 @@ def select_random_strategy(df: pd.DataFrame, index: Optional[int] = None) -> Dic
112
  "brand": "Somira",
113
  }
114
 
115
-
116
- def load_configuration() -> Dict:
117
- """
118
- Load configuration from environment variables with validation.
119
- Automatically extracts GCP project ID from service account JSON,
120
- whether the env var is a path or the raw JSON content.
121
- """
122
- load_dotenv()
123
-
124
- # Try multiple possible credential paths in GitHub Actions
125
- gcp_creds_path = (
126
- os.getenv("GOOGLE_GHA_CREDS_PATH") or
127
- os.getenv("CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE") or
128
- os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
129
- )
130
-
131
- gcp_project_id = None
132
- creds_data = None
133
-
134
- if not gcp_creds_path:
135
- logger.warning("No GCP credentials path found in environment variables.")
136
- else:
137
- try:
138
- os.environ["VERTEX_TEMP_AI_CREDENTIALS_JSON"] = os.getenv("VERTEX_AI_CREDENTIALS_JSON", "")
139
- os.environ["MY_TEMP_GCS_BUCKET"] = os.getenv("MY_TEMP_GCS_BUCKET", "")
140
- # Check if it's a file path that exists
141
- if Path(gcp_creds_path).exists():
142
- logger.info(f"Loading GCP credentials from file: {gcp_creds_path}")
143
- with open(gcp_creds_path, "r") as f:
144
- creds_data = json.load(f)
145
- else:
146
- # Try to parse as raw JSON string
147
- logger.info("Attempting to parse GCP credentials as JSON string")
148
- creds_data = json.loads(gcp_creds_path)
149
-
150
- if creds_data:
151
- gcp_project_id = creds_data.get("project_id")
152
- logger.info(f"✓ GCP Project ID loaded: {gcp_project_id}")
153
-
154
- except json.JSONDecodeError as e:
155
- logger.warning(f"Could not parse GCP credentials as JSON. Error: {e}")
156
- except FileNotFoundError as e:
157
- logger.warning(f"GCP credentials file not found: {e}")
158
- except Exception as e:
159
- logger.error(f"Unexpected error loading GCP credentials: {e}")
160
-
161
- # Fallback: try to get project_id from other environment variables
162
- if not gcp_project_id:
163
- gcp_project_id = (
164
- os.getenv("GOOGLE_CLOUD_PROJECT") or
165
- os.getenv("GCP_PROJECT") or
166
- os.getenv("GCLOUD_PROJECT") or
167
- os.getenv("CLOUDSDK_CORE_PROJECT") or
168
- os.getenv("CLOUDSDK_PROJECT") or
169
- os.getenv("GCP_PROJECT_ID")
170
- )
171
- if gcp_project_id:
172
- logger.info(f"✓ GCP Project ID loaded from environment: {gcp_project_id}")
173
-
174
- config = {
175
- "gemini_api_key": os.getenv("GEMINI_API_KEY"),
176
- "runwayml_api_key": os.getenv("RUNWAYML_API_KEY"),
177
- "gcs_bucket_name": os.getenv("GCS_BUCKET_NAME"),
178
- "gcp_project_id": gcp_project_id,
179
- "default_voice": os.getenv("DEFAULT_VOICE", "en-US-Neural2-F"),
180
- }
181
-
182
- # Validate required keys
183
- required_keys = ["gemini_api_key", "runwayml_api_key", "gcs_bucket_name", "gcp_project_id"]
184
- missing_keys = [key for key in required_keys if not config.get(key)]
185
-
186
- if missing_keys:
187
- logger.error(f"Missing required configuration: {', '.join(missing_keys)}")
188
- logger.error("Available environment variables:")
189
- for key in ["GOOGLE_GHA_CREDS_PATH", "CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE",
190
- "GOOGLE_APPLICATION_CREDENTIALS", "GOOGLE_CLOUD_PROJECT", "GCP_PROJECT"]:
191
- logger.error(f" {key}: {os.getenv(key, 'NOT SET')}")
192
- raise ValueError(
193
- f"Missing required configuration: {', '.join(missing_keys)}.\n"
194
- f"Please check your .env file or GitHub secrets."
195
- )
196
-
197
- return config
198
-
199
-
200
  async def run_pipeline(
201
  automation: ContentAutomation, content_strategy: Dict, tts_script: str, output_dir: Optional[str] = None
202
  ) -> Dict:
 
10
  import json
11
  from pathlib import Path
12
  from typing import Dict, Optional
 
13
  from automation import ContentAutomation
14
  from utils import logger
15
  import pandas as pd
 
16
  import warnings
17
+ from load_config import load_configuration
18
 
19
 
20
  def load_content_strategies(csv_file: Optional[str] = None) -> pd.DataFrame:
 
110
  "brand": "Somira",
111
  }
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  async def run_pipeline(
114
  automation: ContentAutomation, content_strategy: Dict, tts_script: str, output_dir: Optional[str] = None
115
  ) -> Dict:
src/onscreebcta.py CHANGED
@@ -332,7 +332,7 @@ def add_cta(input_video_path: str, cta_text: str, above_caption: bool = True, pa
332
  output_video_path,
333
  codec="libx264",
334
  audio_codec="aac",
335
- fps=24,
336
  )
337
 
338
  base_video.close()
 
332
  output_video_path,
333
  codec="libx264",
334
  audio_codec="aac",
335
+ fps=25,
336
  )
337
 
338
  base_video.close()
src/process_csv.py CHANGED
@@ -3,8 +3,8 @@ import csv
3
  import subprocess
4
  import os, time
5
  from pathlib import Path
 
6
  from main import (
7
- load_configuration,
8
  load_content_strategies,
9
  run_pipeline,
10
  )
 
3
  import subprocess
4
  import os, time
5
  from pathlib import Path
6
+ from load_config import load_configuration
7
  from main import (
 
8
  load_content_strategies,
9
  run_pipeline,
10
  )
src/publisher.py CHANGED
@@ -13,7 +13,8 @@ import time
13
  from pathlib import Path
14
  import hashlib
15
 
16
- from main import load_configuration, load_content_strategies
 
17
  from api_clients import APIClients
18
 
19
  # Import individual platform publishers
 
13
  from pathlib import Path
14
  import hashlib
15
 
16
+ from load_config import load_configuration
17
+ from main import load_content_strategies
18
  from api_clients import APIClients
19
 
20
  # Import individual platform publishers
src/text_clip.py CHANGED
@@ -760,7 +760,7 @@ if __name__ == "__main__":
760
  background = ColorClip(size=(VIDEO_WIDTH, VIDEO_HEIGHT), color=(255, 255, 255), duration=total_duration + 1.0)
761
  final_video = CompositeVideoClip([background] + text_clips, size=(VIDEO_WIDTH, VIDEO_HEIGHT))
762
  print(f" 🎥 Rendering to: {output_filename}")
763
- final_video.write_videofile(f"{output_filename}", fps=30, codec='libx264', preset='medium', logger=None, threads=8)
764
  print(f" ✨ Done!\n")
765
  else:
766
  print(f" ❌ Failed to create caption clip for '{config['name']}'\n")
 
760
  background = ColorClip(size=(VIDEO_WIDTH, VIDEO_HEIGHT), color=(255, 255, 255), duration=total_duration + 1.0)
761
  final_video = CompositeVideoClip([background] + text_clips, size=(VIDEO_WIDTH, VIDEO_HEIGHT))
762
  print(f" 🎥 Rendering to: {output_filename}")
763
+ final_video.write_videofile(f"{output_filename}", fps=25, codec='libx264', preset='medium', logger=None, threads=8)
764
  print(f" ✨ Done!\n")
765
  else:
766
  print(f" ❌ Failed to create caption clip for '{config['name']}'\n")
src/tiktok_publisher.py CHANGED
@@ -14,8 +14,8 @@ import pandas as pd
14
  from datetime import datetime
15
  from dotenv import load_dotenv
16
  from pathlib import Path
 
17
  from main import (
18
- load_configuration,
19
  load_content_strategies
20
  )
21
  from api_clients import APIClients
 
14
  from datetime import datetime
15
  from dotenv import load_dotenv
16
  from pathlib import Path
17
+ from load_config import load_configuration
18
  from main import (
 
19
  load_content_strategies
20
  )
21
  from api_clients import APIClients
src/utils.py CHANGED
@@ -15,6 +15,7 @@ import uuid
15
  import re
16
  import shutil
17
  import librosa
 
18
  import numpy as np
19
  import tempfile
20
 
@@ -590,7 +591,8 @@ def reverse_clip(path_or_clip) -> str:
590
  codec="libx264",
591
  audio_codec="aac",
592
  verbose=False,
593
- logger=None
 
594
  )
595
 
596
  elif isinstance(path_or_clip, str):
@@ -1136,4 +1138,54 @@ def repeat_audio_ffmpeg(input_audio, output_audio, repeat: int):
1136
  finally:
1137
  # Clean up temporary file
1138
  if os.path.exists(temp_trimmed):
1139
- os.remove(temp_trimmed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  import re
16
  import shutil
17
  import librosa
18
+ import pandas as pd
19
  import numpy as np
20
  import tempfile
21
 
 
591
  codec="libx264",
592
  audio_codec="aac",
593
  verbose=False,
594
+ logger=None,
595
+ fps=25
596
  )
597
 
598
  elif isinstance(path_or_clip, str):
 
1138
  finally:
1139
  # Clean up temporary file
1140
  if os.path.exists(temp_trimmed):
1141
+ os.remove(temp_trimmed)
1142
+
1143
+ def clean_and_drop_empty(
1144
+ df: pd.DataFrame,
1145
+ column: str,
1146
+ extra_nulls: list[str] | None = None,
1147
+ ) -> pd.DataFrame:
1148
+ """
1149
+ Normalize Google Sheets empty values and drop rows
1150
+ where `column` is effectively empty.
1151
+
1152
+ Handles:
1153
+ - NaN
1154
+ - ""
1155
+ - " "
1156
+ - "nan", "None", "NULL", "N/A"
1157
+
1158
+ Args:
1159
+ df: Input DataFrame
1160
+ column: Column to validate (e.g. "VIDEO_LINK")
1161
+ extra_nulls: Optional extra string values to treat as null
1162
+
1163
+ Returns:
1164
+ Cleaned DataFrame with valid rows only
1165
+ """
1166
+
1167
+ if column not in df.columns:
1168
+ raise KeyError(f"Column '{column}' not found in DataFrame")
1169
+
1170
+ null_values = ["", "nan", "none", "null", "n/a"]
1171
+ if extra_nulls:
1172
+ null_values.extend([v.lower() for v in extra_nulls])
1173
+
1174
+ df = df.copy()
1175
+
1176
+ df[column] = (
1177
+ df[column]
1178
+ .astype(str)
1179
+ .str.strip()
1180
+ # .str.lower()
1181
+ .replace(null_values, np.nan)
1182
+ )
1183
+
1184
+ return df.dropna(subset=[column])
1185
+
1186
+ def is_valid_video(path: str) -> bool:
1187
+ if not os.path.exists(path):
1188
+ return False
1189
+ if os.path.getsize(path) < 100 * 1024: # <100KB = almost certainly invalid
1190
+ return False
1191
+ return True
src/video_renderer.py CHANGED
@@ -989,7 +989,7 @@ class VideoRenderer:
989
  safe_name = "".join(c for c in self.data_holder.tts_script[:50] if c.isalnum())
990
  output_path = self.temp_dir / f"{os.getenv('SETUP_TYPE', 'final_video')}_{safe_name}_{int(time.time())}.mp4"
991
 
992
- video_clip.write_videofile(str(output_path), codec="libx264", audio_codec="aac", verbose=False, logger=None)
993
 
994
  video_clip.close()
995
  return str(output_path)
@@ -1010,7 +1010,7 @@ class VideoRenderer:
1010
  logger.info(f"📹 Rendering video (no audio): {filename}")
1011
 
1012
  video_clip.write_videofile(
1013
- str(output_path), codec="libx264", fps=24, verbose=False, logger=None
1014
  )
1015
 
1016
  return str(output_path)
@@ -1077,7 +1077,7 @@ class VideoRenderer:
1077
 
1078
  return self.data_holder.current_caption_style
1079
 
1080
- async def render_random_video(self, beat_times, music_duration):
1081
  """
1082
  Render video that syncs perfectly with music beats.
1083
  Skip very early first beats to avoid ultra-short intro clips.
@@ -1095,10 +1095,7 @@ class VideoRenderer:
1095
 
1096
  # Track accumulated time deficit to maintain beat sync
1097
  accumulated_deficit = 0.0
1098
-
1099
- # Minimum clip duration to avoid glitchy cuts
1100
- min_clip_duration = 1 # seconds
1101
-
1102
  # SMART FIX: If first beat is not at 0, insert virtual beat at 0
1103
  # This handles intro in the same loop as regular beats
1104
  if beat_times[0] > 0.0001:
 
989
  safe_name = "".join(c for c in self.data_holder.tts_script[:50] if c.isalnum())
990
  output_path = self.temp_dir / f"{os.getenv('SETUP_TYPE', 'final_video')}_{safe_name}_{int(time.time())}.mp4"
991
 
992
+ video_clip.write_videofile(str(output_path), codec="libx264", audio_codec="aac", fps=25, verbose=False, logger=None)
993
 
994
  video_clip.close()
995
  return str(output_path)
 
1010
  logger.info(f"📹 Rendering video (no audio): {filename}")
1011
 
1012
  video_clip.write_videofile(
1013
+ str(output_path), codec="libx264", fps=25, verbose=False, logger=None
1014
  )
1015
 
1016
  return str(output_path)
 
1077
 
1078
  return self.data_holder.current_caption_style
1079
 
1080
+ async def render_random_video(self, beat_times, music_duration, min_clip_duration=1) -> VideoFileClip:
1081
  """
1082
  Render video that syncs perfectly with music beats.
1083
  Skip very early first beats to avoid ultra-short intro clips.
 
1095
 
1096
  # Track accumulated time deficit to maintain beat sync
1097
  accumulated_deficit = 0.0
1098
+
 
 
 
1099
  # SMART FIX: If first beat is not at 0, insert virtual beat at 0
1100
  # This handles intro in the same loop as regular beats
1101
  if beat_times[0] > 0.0001:
src/youtube_publisher.py CHANGED
@@ -16,8 +16,8 @@ import os
16
  import sys
17
  import json
18
  from datetime import datetime, timedelta
 
19
  from main import (
20
- load_configuration,
21
  load_content_strategies
22
  )
23
  from pathlib import Path
 
16
  import sys
17
  import json
18
  from datetime import datetime, timedelta
19
+ from load_config import load_configuration
20
  from main import (
 
21
  load_content_strategies
22
  )
23
  from pathlib import Path