intergate gsheet and send setup
Browse files- .gitignore +2 -1
- src/api_clients.py +1 -1
- src/asset_selector.py +52 -14
- src/automation.py +48 -16
- src/file_downloader.py +106 -24
- src/google_sheet_reader.py +0 -44
- src/instagram_publisher.py +1 -1
- src/load_config.py +214 -0
- src/main.py +1 -88
- src/onscreebcta.py +1 -1
- src/process_csv.py +1 -1
- src/publisher.py +2 -1
- src/text_clip.py +1 -1
- src/tiktok_publisher.py +1 -1
- src/utils.py +54 -2
- src/video_renderer.py +4 -7
- src/youtube_publisher.py +1 -1
.gitignore
CHANGED
|
@@ -43,4 +43,5 @@ whoa/
|
|
| 43 |
src/temp*.py
|
| 44 |
src/temp*.md
|
| 45 |
testData/infloxa*
|
| 46 |
-
testData/output/
|
|
|
|
|
|
| 43 |
src/temp*.py
|
| 44 |
src/temp*.md
|
| 45 |
testData/infloxa*
|
| 46 |
+
testData/output/
|
| 47 |
+
testData/ref/
|
src/api_clients.py
CHANGED
|
@@ -96,7 +96,7 @@ class APIClients:
|
|
| 96 |
# Track current voice index for sequential selection
|
| 97 |
self.current_voice_indices = {category: 0 for category in self.voice_profiles.keys()}
|
| 98 |
self.file_names = None
|
| 99 |
-
self.init_temp_gcs()
|
| 100 |
|
| 101 |
async def get_from_cache(self, method_type, duration=0):
|
| 102 |
try:
|
|
|
|
| 96 |
# Track current voice index for sequential selection
|
| 97 |
self.current_voice_indices = {category: 0 for category in self.voice_profiles.keys()}
|
| 98 |
self.file_names = None
|
| 99 |
+
# self.init_temp_gcs()
|
| 100 |
|
| 101 |
async def get_from_cache(self, method_type, duration=0):
|
| 102 |
try:
|
src/asset_selector.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
-
import
|
| 3 |
import json
|
| 4 |
from typing import List, Dict, Optional, Tuple
|
| 5 |
from utils import logger
|
|
@@ -99,26 +99,64 @@ class AssetSelector:
|
|
| 99 |
audios = ["testData/infloxa/audiopulse.mp3"]
|
| 100 |
return audios
|
| 101 |
|
| 102 |
-
def
|
| 103 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
try:
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
if audio_path in audio_map:
|
| 109 |
-
return audio_map[audio_path]
|
| 110 |
|
| 111 |
-
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
def _load_audio_library_from_gsheet(self) -> pd.DataFrame:
|
| 117 |
"""Load audio library from Google Sheet (if needed)"""
|
| 118 |
try:
|
| 119 |
googleSheetReader = GoogleSheetReader(worksheet_name=os.getenv("AUDIO_LIBRARY_GSHEET_WORKSHEET"))
|
| 120 |
audio_df = googleSheetReader.get_filtered_dataframe()
|
| 121 |
-
|
|
|
|
|
|
|
| 122 |
except Exception as e:
|
| 123 |
logger.error(f"Failed to load audio library from Google Sheet: {e}")
|
| 124 |
return pd.DataFrame()
|
|
@@ -128,7 +166,7 @@ class AssetSelector:
|
|
| 128 |
try:
|
| 129 |
googleSheetReader = GoogleSheetReader(worksheet_name=os.getenv("VIDEO_LIBRARY_GSHEET_WORKSHEET"))
|
| 130 |
video_df = googleSheetReader.get_filtered_dataframe()
|
| 131 |
-
return
|
| 132 |
except Exception as e:
|
| 133 |
logger.error(f"Failed to load video library from Google Sheet: {e}")
|
| 134 |
return pd.DataFrame()
|
|
@@ -311,7 +349,7 @@ Video Options: {video_context}
|
|
| 311 |
Select background music SEQUENTIALLY (not random)
|
| 312 |
Each call increments the index to ensure different music for each video
|
| 313 |
"""
|
| 314 |
-
if
|
| 315 |
logger.error("❌ Audio library is empty")
|
| 316 |
return ""
|
| 317 |
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
+
import utils
|
| 3 |
import json
|
| 4 |
from typing import List, Dict, Optional, Tuple
|
| 5 |
from utils import logger
|
|
|
|
| 99 |
audios = ["testData/infloxa/audiopulse.mp3"]
|
| 100 |
return audios
|
| 101 |
|
| 102 |
+
def get_audio_beats(self, audio_link: str) -> Optional[List[float]]:
|
| 103 |
+
"""
|
| 104 |
+
Load audio beats timing from audio_library and convert
|
| 105 |
+
SS:FF (25 FPS) → seconds (float)
|
| 106 |
+
|
| 107 |
+
Example:
|
| 108 |
+
"01:12" → 1 + 12/25 = 1.48
|
| 109 |
+
"""
|
| 110 |
try:
|
| 111 |
+
if self.audio_library.empty:
|
| 112 |
+
logger.error("Audio library is empty")
|
| 113 |
+
return None
|
|
|
|
|
|
|
| 114 |
|
| 115 |
+
# Find matching row
|
| 116 |
+
row = self.audio_library.loc[
|
| 117 |
+
self.audio_library["AUDIO_LINK"] == audio_link
|
| 118 |
+
]
|
| 119 |
+
|
| 120 |
+
if row.empty:
|
| 121 |
+
logger.error(f"No audio entry found for: {audio_link}")
|
| 122 |
+
return None
|
| 123 |
+
|
| 124 |
+
beats_raw = row.iloc[0]["Beats Timing(SS:FF) AT 25FPS"]
|
| 125 |
+
|
| 126 |
+
if pd.isna(beats_raw) or not str(beats_raw).strip():
|
| 127 |
+
logger.warning(f"No beat data for audio: {audio_link}")
|
| 128 |
+
return None
|
| 129 |
|
| 130 |
+
beats: List[float] = []
|
| 131 |
+
|
| 132 |
+
for token in str(beats_raw).split(","):
|
| 133 |
+
token = token.strip()
|
| 134 |
+
|
| 135 |
+
if ":" not in token:
|
| 136 |
+
continue
|
| 137 |
+
|
| 138 |
+
sec, frame = token.split(":", 1)
|
| 139 |
+
|
| 140 |
+
beats.append(
|
| 141 |
+
round(int(sec) + (int(frame) / 25.0), 2)
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
return beats if beats else None
|
| 145 |
+
|
| 146 |
+
except Exception as e:
|
| 147 |
+
logger.error(
|
| 148 |
+
f"Failed to compute audio beats map for {audio_link}: {e}"
|
| 149 |
+
)
|
| 150 |
+
return None
|
| 151 |
|
| 152 |
def _load_audio_library_from_gsheet(self) -> pd.DataFrame:
|
| 153 |
"""Load audio library from Google Sheet (if needed)"""
|
| 154 |
try:
|
| 155 |
googleSheetReader = GoogleSheetReader(worksheet_name=os.getenv("AUDIO_LIBRARY_GSHEET_WORKSHEET"))
|
| 156 |
audio_df = googleSheetReader.get_filtered_dataframe()
|
| 157 |
+
if os.getenv("HARD_CUT_RANDOM_VIDEOS", "false").lower() == "false":
|
| 158 |
+
audio_df = utils.clean_and_drop_empty(audio_df, "Beats Timing(SS:FF) AT 25FPS")
|
| 159 |
+
return utils.clean_and_drop_empty(audio_df, "AUDIO_LINK")
|
| 160 |
except Exception as e:
|
| 161 |
logger.error(f"Failed to load audio library from Google Sheet: {e}")
|
| 162 |
return pd.DataFrame()
|
|
|
|
| 166 |
try:
|
| 167 |
googleSheetReader = GoogleSheetReader(worksheet_name=os.getenv("VIDEO_LIBRARY_GSHEET_WORKSHEET"))
|
| 168 |
video_df = googleSheetReader.get_filtered_dataframe()
|
| 169 |
+
return utils.clean_and_drop_empty(video_df, "VIDEO_LINK")
|
| 170 |
except Exception as e:
|
| 171 |
logger.error(f"Failed to load video library from Google Sheet: {e}")
|
| 172 |
return pd.DataFrame()
|
|
|
|
| 349 |
Select background music SEQUENTIALLY (not random)
|
| 350 |
Each call increments the index to ensure different music for each video
|
| 351 |
"""
|
| 352 |
+
if self.audio_library.empty:
|
| 353 |
logger.error("❌ Audio library is empty")
|
| 354 |
return ""
|
| 355 |
|
src/automation.py
CHANGED
|
@@ -21,9 +21,9 @@ import hashlib
|
|
| 21 |
from onscreebcta import add_cta
|
| 22 |
import numpy as np
|
| 23 |
from moviepy.editor import VideoFileClip, concatenate_videoclips
|
| 24 |
-
import
|
| 25 |
import numpy as np
|
| 26 |
-
from
|
| 27 |
|
| 28 |
class ContentAutomation:
|
| 29 |
def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None):
|
|
@@ -32,6 +32,7 @@ class ContentAutomation:
|
|
| 32 |
self.api_clients = APIClients(config, self.data_holder)
|
| 33 |
self.video_renderer = VideoRenderer(config, self.data_holder)
|
| 34 |
self.asset_selector = AssetSelector(config, self.data_holder)
|
|
|
|
| 35 |
self.pipeline_start_time = None
|
| 36 |
|
| 37 |
async def execute_pipeline(self, content_strategy: Dict[str, str], tts_script: str) -> Dict[str, Any]:
|
|
@@ -210,8 +211,13 @@ class ContentAutomation:
|
|
| 210 |
music_duration = audio_clip.duration - 0.5
|
| 211 |
|
| 212 |
|
| 213 |
-
|
| 214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
method_used = "cached"
|
| 216 |
logger.info("Using cached beat times.")
|
| 217 |
break
|
|
@@ -225,7 +231,9 @@ class ContentAutomation:
|
|
| 225 |
if beat_times is None:
|
| 226 |
logger.warning("No beats detected, trying alternative method...")
|
| 227 |
try_next = True
|
| 228 |
-
|
|
|
|
|
|
|
| 229 |
logger.info(f"Using '{method_used}' method: {len(beat_times)} beats detected")
|
| 230 |
logger.info(f"Music duration: {music_duration:.2f}s")
|
| 231 |
logger.info(f"Beat times: {beat_times}")
|
|
@@ -255,7 +263,8 @@ class ContentAutomation:
|
|
| 255 |
# IMPORTANT: Pass filtered_beat_times, not beat_intervals!
|
| 256 |
video_no_audio_path = await self.video_renderer.render_random_video(
|
| 257 |
beat_times,
|
| 258 |
-
music_duration
|
|
|
|
| 259 |
)
|
| 260 |
|
| 261 |
if os.getenv("USE_1X1_RATIO", "false").lower() == "true":
|
|
@@ -322,6 +331,7 @@ class ContentAutomation:
|
|
| 322 |
logger.info("\n🎵 STEP 1: Background Music")
|
| 323 |
if try_next:
|
| 324 |
self.asset_selector.inc_audio_index()
|
|
|
|
| 325 |
self.data_holder.visual_assets["background_music_url"] = self.asset_selector.select_background_music()
|
| 326 |
await self._download_to_local(
|
| 327 |
self.data_holder.visual_assets["background_music_url"], "background_music.mp3", self.data_holder.visual_assets, "background_music_local"
|
|
@@ -379,23 +389,25 @@ class ContentAutomation:
|
|
| 379 |
return
|
| 380 |
|
| 381 |
if os.getenv("INFLOXA", "false").lower() == "true":
|
| 382 |
-
from video_downloader import VideoDownloader
|
| 383 |
download_path="testData/infloxa"
|
| 384 |
Path(download_path).mkdir(parents=True, exist_ok=True)
|
| 385 |
|
| 386 |
allowed_videos = []
|
| 387 |
|
| 388 |
-
|
| 389 |
{
|
| 390 |
-
"url":
|
| 391 |
-
"local_path":
|
| 392 |
-
video_filename=row.get("VIDEO_FILENAME", "").strip(),
|
| 393 |
-
download_path=download_path
|
| 394 |
-
)
|
| 395 |
}
|
| 396 |
for _, row in self.asset_selector.video_library.iterrows()
|
| 397 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
]
|
|
|
|
|
|
|
| 399 |
else:
|
| 400 |
self.data_holder.visual_assets["all_videos"] = [
|
| 401 |
{"url": row.get("Video URL (No Audio)", "").strip()}
|
|
@@ -609,6 +621,26 @@ class ContentAutomation:
|
|
| 609 |
|
| 610 |
return health_status
|
| 611 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 612 |
async def simple_demo(self):
|
| 613 |
"""Simple demo with proper audio handling"""
|
| 614 |
logger.info("🎬 Starting Simple Demo with Audio Fix...")
|
|
@@ -618,13 +650,13 @@ class ContentAutomation:
|
|
| 618 |
|
| 619 |
# Create simple color videos
|
| 620 |
clip1 = ColorClip(size=(640, 480), color=(255, 0, 0), duration=2)
|
| 621 |
-
clip1 = clip1.set_fps(
|
| 622 |
clip1_path = "/tmp/simple_red.mp4"
|
| 623 |
clip1.write_videofile(clip1_path, verbose=False, logger=None)
|
| 624 |
clip1.close()
|
| 625 |
|
| 626 |
clip2 = ColorClip(size=(640, 480), color=(0, 255, 0), duration=2)
|
| 627 |
-
clip2 = clip2.set_fps(
|
| 628 |
clip2_path = "/tmp/simple_green.mp4"
|
| 629 |
clip2.write_videofile(clip2_path, verbose=False, logger=None)
|
| 630 |
clip2.close()
|
|
|
|
| 21 |
from onscreebcta import add_cta
|
| 22 |
import numpy as np
|
| 23 |
from moviepy.editor import VideoFileClip, concatenate_videoclips
|
| 24 |
+
import math
|
| 25 |
import numpy as np
|
| 26 |
+
from file_downloader import FileDownloader
|
| 27 |
|
| 28 |
class ContentAutomation:
|
| 29 |
def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None):
|
|
|
|
| 32 |
self.api_clients = APIClients(config, self.data_holder)
|
| 33 |
self.video_renderer = VideoRenderer(config, self.data_holder)
|
| 34 |
self.asset_selector = AssetSelector(config, self.data_holder)
|
| 35 |
+
self.file_downloader = FileDownloader()
|
| 36 |
self.pipeline_start_time = None
|
| 37 |
|
| 38 |
async def execute_pipeline(self, content_strategy: Dict[str, str], tts_script: str) -> Dict[str, Any]:
|
|
|
|
| 211 |
music_duration = audio_clip.duration - 0.5
|
| 212 |
|
| 213 |
|
| 214 |
+
beat_times = self.asset_selector.get_audio_beats(self.data_holder.visual_assets["background_music_url"])
|
| 215 |
+
if beat_times:
|
| 216 |
+
beat_times = self.extend_beats_to_audio_end(
|
| 217 |
+
beat_times,
|
| 218 |
+
self.data_holder.visual_assets["background_music_local"],
|
| 219 |
+
fps=25
|
| 220 |
+
)
|
| 221 |
method_used = "cached"
|
| 222 |
logger.info("Using cached beat times.")
|
| 223 |
break
|
|
|
|
| 231 |
if beat_times is None:
|
| 232 |
logger.warning("No beats detected, trying alternative method...")
|
| 233 |
try_next = True
|
| 234 |
+
|
| 235 |
+
music_duration = music_duration if music_duration < beat_times[-1] else beat_times[-1]
|
| 236 |
+
|
| 237 |
logger.info(f"Using '{method_used}' method: {len(beat_times)} beats detected")
|
| 238 |
logger.info(f"Music duration: {music_duration:.2f}s")
|
| 239 |
logger.info(f"Beat times: {beat_times}")
|
|
|
|
| 263 |
# IMPORTANT: Pass filtered_beat_times, not beat_intervals!
|
| 264 |
video_no_audio_path = await self.video_renderer.render_random_video(
|
| 265 |
beat_times,
|
| 266 |
+
music_duration,
|
| 267 |
+
min_clip_duration=0
|
| 268 |
)
|
| 269 |
|
| 270 |
if os.getenv("USE_1X1_RATIO", "false").lower() == "true":
|
|
|
|
| 331 |
logger.info("\n🎵 STEP 1: Background Music")
|
| 332 |
if try_next:
|
| 333 |
self.asset_selector.inc_audio_index()
|
| 334 |
+
|
| 335 |
self.data_holder.visual_assets["background_music_url"] = self.asset_selector.select_background_music()
|
| 336 |
await self._download_to_local(
|
| 337 |
self.data_holder.visual_assets["background_music_url"], "background_music.mp3", self.data_holder.visual_assets, "background_music_local"
|
|
|
|
| 389 |
return
|
| 390 |
|
| 391 |
if os.getenv("INFLOXA", "false").lower() == "true":
|
|
|
|
| 392 |
download_path="testData/infloxa"
|
| 393 |
Path(download_path).mkdir(parents=True, exist_ok=True)
|
| 394 |
|
| 395 |
allowed_videos = []
|
| 396 |
|
| 397 |
+
videos = [
|
| 398 |
{
|
| 399 |
+
"url": url,
|
| 400 |
+
"local_path": str(local_path),
|
|
|
|
|
|
|
|
|
|
| 401 |
}
|
| 402 |
for _, row in self.asset_selector.video_library.iterrows()
|
| 403 |
+
if (
|
| 404 |
+
(url := str(row.get("VIDEO_LINK", "")).strip())
|
| 405 |
+
and (local_path := self.file_downloader.safe_download(url=url))
|
| 406 |
+
and utils.is_valid_video(local_path)
|
| 407 |
+
)
|
| 408 |
]
|
| 409 |
+
self.data_holder.visual_assets["all_videos"] = videos
|
| 410 |
+
|
| 411 |
else:
|
| 412 |
self.data_holder.visual_assets["all_videos"] = [
|
| 413 |
{"url": row.get("Video URL (No Audio)", "").strip()}
|
|
|
|
| 621 |
|
| 622 |
return health_status
|
| 623 |
|
| 624 |
+
def extend_beats_to_audio_end(
|
| 625 |
+
self,
|
| 626 |
+
beats: List[float],
|
| 627 |
+
audio_path: str,
|
| 628 |
+
fps: int = 25
|
| 629 |
+
) -> List[float]:
|
| 630 |
+
if not beats:
|
| 631 |
+
return beats
|
| 632 |
+
|
| 633 |
+
with AudioFileClip(audio_path) as audio:
|
| 634 |
+
duration = audio.duration
|
| 635 |
+
|
| 636 |
+
frame_duration = math.floor(duration * fps) / fps
|
| 637 |
+
|
| 638 |
+
if beats[-1] < frame_duration:
|
| 639 |
+
return beats + [frame_duration]
|
| 640 |
+
|
| 641 |
+
return beats
|
| 642 |
+
|
| 643 |
+
|
| 644 |
async def simple_demo(self):
|
| 645 |
"""Simple demo with proper audio handling"""
|
| 646 |
logger.info("🎬 Starting Simple Demo with Audio Fix...")
|
|
|
|
| 650 |
|
| 651 |
# Create simple color videos
|
| 652 |
clip1 = ColorClip(size=(640, 480), color=(255, 0, 0), duration=2)
|
| 653 |
+
clip1 = clip1.set_fps(25)
|
| 654 |
clip1_path = "/tmp/simple_red.mp4"
|
| 655 |
clip1.write_videofile(clip1_path, verbose=False, logger=None)
|
| 656 |
clip1.close()
|
| 657 |
|
| 658 |
clip2 = ColorClip(size=(640, 480), color=(0, 255, 0), duration=2)
|
| 659 |
+
clip2 = clip2.set_fps(25)
|
| 660 |
clip2_path = "/tmp/simple_green.mp4"
|
| 661 |
clip2.write_videofile(clip2_path, verbose=False, logger=None)
|
| 662 |
clip2.close()
|
src/file_downloader.py
CHANGED
|
@@ -25,13 +25,16 @@ class FileDownloader:
|
|
| 25 |
"https://www.googleapis.com/auth/drive.file",
|
| 26 |
]
|
| 27 |
|
| 28 |
-
def __init__(self):
|
| 29 |
logger.info("Initializing FileDownloader")
|
| 30 |
|
| 31 |
# -------- Temp directory handling --------
|
| 32 |
self.temp_dir = self._init_temp_dir()
|
| 33 |
logger.info("Using temp directory: %s", self.temp_dir)
|
| 34 |
|
|
|
|
|
|
|
|
|
|
| 35 |
# Lazy initialization for clients
|
| 36 |
self._drive_service = None
|
| 37 |
self._storage_client = None
|
|
@@ -48,6 +51,8 @@ class FileDownloader:
|
|
| 48 |
if not base_dir:
|
| 49 |
raise RuntimeError("TEST_DATA_DIRECTORY must be set when TEST_AUTOMATION=true")
|
| 50 |
|
|
|
|
|
|
|
| 51 |
path = Path(base_dir) / "downloads"
|
| 52 |
path.mkdir(parents=True, exist_ok=True)
|
| 53 |
return path
|
|
@@ -150,39 +155,102 @@ class FileDownloader:
|
|
| 150 |
|
| 151 |
Returns:
|
| 152 |
Path to the downloaded file
|
|
|
|
|
|
|
|
|
|
| 153 |
"""
|
| 154 |
logger.info("Downloading from Google Drive | file_id=%s", file_id)
|
| 155 |
|
| 156 |
service = self._get_drive_service()
|
| 157 |
|
| 158 |
-
# Get file metadata to determine filename
|
|
|
|
| 159 |
if filename is None and output_path is None:
|
| 160 |
try:
|
| 161 |
-
file_metadata = service.files().get(fileId=file_id, fields="name").execute()
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
except Exception as e:
|
| 164 |
-
logger.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
filename = f"drive_file_{file_id}"
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
# Determine output path
|
| 168 |
if output_path is None:
|
| 169 |
output_path = self.temp_dir / filename
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 172 |
|
| 173 |
# Download file
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
def download_from_gcs(
|
| 188 |
self,
|
|
@@ -210,6 +278,11 @@ class FileDownloader:
|
|
| 210 |
filename = Path(blob_name).name
|
| 211 |
output_path = self.temp_dir / filename
|
| 212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 214 |
|
| 215 |
if public:
|
|
@@ -233,6 +306,17 @@ class FileDownloader:
|
|
| 233 |
logger.info("Downloaded from GCS to %s", output_path)
|
| 234 |
return output_path
|
| 235 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
def download(
|
| 237 |
self,
|
| 238 |
url: str,
|
|
@@ -333,6 +417,7 @@ def main():
|
|
| 333 |
Examples controlled via env variables:
|
| 334 |
- DOWNLOAD_URL: Single file to download
|
| 335 |
- DOWNLOAD_URLS: Comma-separated list of URLs to download
|
|
|
|
| 336 |
"""
|
| 337 |
|
| 338 |
try:
|
|
@@ -344,15 +429,13 @@ def main():
|
|
| 344 |
downloader = FileDownloader()
|
| 345 |
|
| 346 |
# ------------------ EXAMPLE 1: SINGLE FILE DOWNLOAD ------------------
|
| 347 |
-
download_url = "https://
|
| 348 |
if download_url:
|
| 349 |
logger.info("Downloading single file")
|
| 350 |
-
output_path = os.getenv("OUTPUT_PATH")
|
| 351 |
|
| 352 |
try:
|
| 353 |
downloaded_file = downloader.download(
|
| 354 |
-
url=download_url
|
| 355 |
-
output_path=output_path,
|
| 356 |
)
|
| 357 |
logger.info("File downloaded to: %s", downloaded_file)
|
| 358 |
except Exception as e:
|
|
@@ -364,12 +447,10 @@ def main():
|
|
| 364 |
if download_urls:
|
| 365 |
logger.info("Downloading multiple files")
|
| 366 |
urls = [url.strip() for url in download_urls.split(",")]
|
| 367 |
-
output_dir = os.getenv("OUTPUT_DIR")
|
| 368 |
|
| 369 |
try:
|
| 370 |
downloaded_files = downloader.download_multiple(
|
| 371 |
-
urls=urls
|
| 372 |
-
output_dir=output_dir,
|
| 373 |
)
|
| 374 |
logger.info("Downloaded %d files:", len(downloaded_files))
|
| 375 |
for path in downloaded_files:
|
|
@@ -385,6 +466,7 @@ def main():
|
|
| 385 |
logger.info(" DOWNLOAD_URL='https://drive.google.com/...' python file_downloader.py")
|
| 386 |
logger.info(" DOWNLOAD_URL='gs://bucket/path/file' python file_downloader.py")
|
| 387 |
logger.info(" DOWNLOAD_URLS='url1,url2,url3' python file_downloader.py")
|
|
|
|
| 388 |
|
| 389 |
|
| 390 |
if __name__ == "__main__":
|
|
|
|
| 25 |
"https://www.googleapis.com/auth/drive.file",
|
| 26 |
]
|
| 27 |
|
| 28 |
+
def __init__(self, skip_existing: bool = True):
|
| 29 |
logger.info("Initializing FileDownloader")
|
| 30 |
|
| 31 |
# -------- Temp directory handling --------
|
| 32 |
self.temp_dir = self._init_temp_dir()
|
| 33 |
logger.info("Using temp directory: %s", self.temp_dir)
|
| 34 |
|
| 35 |
+
# Control whether to skip existing files
|
| 36 |
+
self.skip_existing = skip_existing
|
| 37 |
+
|
| 38 |
# Lazy initialization for clients
|
| 39 |
self._drive_service = None
|
| 40 |
self._storage_client = None
|
|
|
|
| 51 |
if not base_dir:
|
| 52 |
raise RuntimeError("TEST_DATA_DIRECTORY must be set when TEST_AUTOMATION=true")
|
| 53 |
|
| 54 |
+
Path(base_dir).mkdir(parents=True, exist_ok=True)
|
| 55 |
+
|
| 56 |
path = Path(base_dir) / "downloads"
|
| 57 |
path.mkdir(parents=True, exist_ok=True)
|
| 58 |
return path
|
|
|
|
| 155 |
|
| 156 |
Returns:
|
| 157 |
Path to the downloaded file
|
| 158 |
+
|
| 159 |
+
Raises:
|
| 160 |
+
Exception: If file cannot be accessed or downloaded
|
| 161 |
"""
|
| 162 |
logger.info("Downloading from Google Drive | file_id=%s", file_id)
|
| 163 |
|
| 164 |
service = self._get_drive_service()
|
| 165 |
|
| 166 |
+
# Get file metadata to determine filename with extension
|
| 167 |
+
metadata_error = None
|
| 168 |
if filename is None and output_path is None:
|
| 169 |
try:
|
| 170 |
+
file_metadata = service.files().get(fileId=file_id, fields="name,mimeType,fileExtension").execute()
|
| 171 |
+
|
| 172 |
+
# Use the original filename from Drive
|
| 173 |
+
filename = file_metadata.get("name")
|
| 174 |
+
logger.info("Retrieved filename from Drive: %s", filename)
|
| 175 |
+
|
| 176 |
+
# If no name, construct one with proper extension
|
| 177 |
+
if not filename:
|
| 178 |
+
file_extension = file_metadata.get("fileExtension", "")
|
| 179 |
+
mime_type = file_metadata.get("mimeType", "")
|
| 180 |
+
|
| 181 |
+
logger.info("No filename found, mimeType: %s, fileExtension: %s", mime_type, file_extension)
|
| 182 |
+
|
| 183 |
+
# Map common MIME types to extensions if fileExtension not available
|
| 184 |
+
mime_to_ext = {
|
| 185 |
+
"application/pdf": "pdf",
|
| 186 |
+
"image/jpeg": "jpg",
|
| 187 |
+
"image/png": "png",
|
| 188 |
+
"image/gif": "gif",
|
| 189 |
+
"text/plain": "txt",
|
| 190 |
+
"application/json": "json",
|
| 191 |
+
"text/csv": "csv",
|
| 192 |
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
|
| 193 |
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
|
| 194 |
+
"application/zip": "zip",
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
if not file_extension and mime_type in mime_to_ext:
|
| 198 |
+
file_extension = mime_to_ext[mime_type]
|
| 199 |
+
|
| 200 |
+
filename = f"drive_file_{file_id}"
|
| 201 |
+
if file_extension:
|
| 202 |
+
filename = f"{filename}.{file_extension}"
|
| 203 |
+
|
| 204 |
except Exception as e:
|
| 205 |
+
logger.error("Could not fetch file metadata: %s", e)
|
| 206 |
+
metadata_error = e
|
| 207 |
+
# Check if it's a 404 or permission error - these are fatal
|
| 208 |
+
if hasattr(e, 'resp') and hasattr(e.resp, 'status'):
|
| 209 |
+
if e.resp.status in [403, 404]:
|
| 210 |
+
raise Exception(f"Cannot access file {file_id}: {str(e)}") from e
|
| 211 |
filename = f"drive_file_{file_id}"
|
| 212 |
|
| 213 |
+
# If still no filename, use default
|
| 214 |
+
if filename is None:
|
| 215 |
+
filename = f"drive_file_{file_id}"
|
| 216 |
+
|
| 217 |
# Determine output path
|
| 218 |
if output_path is None:
|
| 219 |
output_path = self.temp_dir / filename
|
| 220 |
+
|
| 221 |
+
logger.info("Final output path: %s", output_path)
|
| 222 |
+
|
| 223 |
+
# Check if file already exists
|
| 224 |
+
if self.skip_existing and output_path.exists():
|
| 225 |
+
logger.info("File already exists, skipping download: %s", output_path)
|
| 226 |
+
return output_path
|
| 227 |
|
| 228 |
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 229 |
|
| 230 |
# Download file
|
| 231 |
+
try:
|
| 232 |
+
request = service.files().get_media(fileId=file_id)
|
| 233 |
+
|
| 234 |
+
with output_path.open("wb") as fh:
|
| 235 |
+
downloader = MediaIoBaseDownload(fh, request)
|
| 236 |
+
done = False
|
| 237 |
+
while not done:
|
| 238 |
+
status, done = downloader.next_chunk()
|
| 239 |
+
if status:
|
| 240 |
+
logger.debug("Download progress: %d%%", int(status.progress() * 100))
|
| 241 |
+
|
| 242 |
+
logger.info("Downloaded from Drive to %s", output_path)
|
| 243 |
+
return output_path
|
| 244 |
+
|
| 245 |
+
except Exception as e:
|
| 246 |
+
# Clean up failed download
|
| 247 |
+
if output_path.exists():
|
| 248 |
+
output_path.unlink()
|
| 249 |
+
logger.info("Cleaned up failed download: %s", output_path)
|
| 250 |
+
|
| 251 |
+
error_msg = f"Failed to download file {file_id}: {str(e)}"
|
| 252 |
+
logger.error(error_msg)
|
| 253 |
+
raise Exception(error_msg) from e
|
| 254 |
|
| 255 |
def download_from_gcs(
|
| 256 |
self,
|
|
|
|
| 278 |
filename = Path(blob_name).name
|
| 279 |
output_path = self.temp_dir / filename
|
| 280 |
|
| 281 |
+
# Check if file already exists
|
| 282 |
+
if self.skip_existing and output_path.exists():
|
| 283 |
+
logger.info("File already exists, skipping download: %s", output_path)
|
| 284 |
+
return output_path
|
| 285 |
+
|
| 286 |
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 287 |
|
| 288 |
if public:
|
|
|
|
| 306 |
logger.info("Downloaded from GCS to %s", output_path)
|
| 307 |
return output_path
|
| 308 |
|
| 309 |
+
def safe_download(self, url: str, output_path: Path | None = None) -> Path | None:
|
| 310 |
+
"""
|
| 311 |
+
Safe download wrapper to handle exceptions.
|
| 312 |
+
Returns None if download fails.
|
| 313 |
+
"""
|
| 314 |
+
try:
|
| 315 |
+
return self.download(url, output_path=output_path)
|
| 316 |
+
except Exception as e:
|
| 317 |
+
logger.error("Download failed for %s: %s", url, e)
|
| 318 |
+
return None
|
| 319 |
+
|
| 320 |
def download(
|
| 321 |
self,
|
| 322 |
url: str,
|
|
|
|
| 417 |
Examples controlled via env variables:
|
| 418 |
- DOWNLOAD_URL: Single file to download
|
| 419 |
- DOWNLOAD_URLS: Comma-separated list of URLs to download
|
| 420 |
+
- SKIP_EXISTING: Set to 'false' to force re-download (default: 'true')
|
| 421 |
"""
|
| 422 |
|
| 423 |
try:
|
|
|
|
| 429 |
downloader = FileDownloader()
|
| 430 |
|
| 431 |
# ------------------ EXAMPLE 1: SINGLE FILE DOWNLOAD ------------------
|
| 432 |
+
download_url = "https://drive.google.com/file/d/1jXqLjEDrFzR9858po7BenqKdx3cIm-4Q/view"
|
| 433 |
if download_url:
|
| 434 |
logger.info("Downloading single file")
|
|
|
|
| 435 |
|
| 436 |
try:
|
| 437 |
downloaded_file = downloader.download(
|
| 438 |
+
url=download_url
|
|
|
|
| 439 |
)
|
| 440 |
logger.info("File downloaded to: %s", downloaded_file)
|
| 441 |
except Exception as e:
|
|
|
|
| 447 |
if download_urls:
|
| 448 |
logger.info("Downloading multiple files")
|
| 449 |
urls = [url.strip() for url in download_urls.split(",")]
|
|
|
|
| 450 |
|
| 451 |
try:
|
| 452 |
downloaded_files = downloader.download_multiple(
|
| 453 |
+
urls=urls
|
|
|
|
| 454 |
)
|
| 455 |
logger.info("Downloaded %d files:", len(downloaded_files))
|
| 456 |
for path in downloaded_files:
|
|
|
|
| 466 |
logger.info(" DOWNLOAD_URL='https://drive.google.com/...' python file_downloader.py")
|
| 467 |
logger.info(" DOWNLOAD_URL='gs://bucket/path/file' python file_downloader.py")
|
| 468 |
logger.info(" DOWNLOAD_URLS='url1,url2,url3' python file_downloader.py")
|
| 469 |
+
logger.info(" SKIP_EXISTING='false' python file_downloader.py # Force re-download")
|
| 470 |
|
| 471 |
|
| 472 |
if __name__ == "__main__":
|
src/google_sheet_reader.py
CHANGED
|
@@ -3,7 +3,6 @@ import csv
|
|
| 3 |
import tempfile
|
| 4 |
from pathlib import Path
|
| 5 |
import pandas as pd
|
| 6 |
-
import numpy as np
|
| 7 |
|
| 8 |
import gspread
|
| 9 |
from google.auth import default
|
|
@@ -354,49 +353,6 @@ class GoogleSheetReader:
|
|
| 354 |
logger.info("CSV export completed | rows=%d", len(rows) - 1)
|
| 355 |
return output_path
|
| 356 |
|
| 357 |
-
def clean_and_drop_empty(
|
| 358 |
-
df: pd.DataFrame,
|
| 359 |
-
column: str,
|
| 360 |
-
extra_nulls: list[str] | None = None,
|
| 361 |
-
) -> pd.DataFrame:
|
| 362 |
-
"""
|
| 363 |
-
Normalize Google Sheets empty values and drop rows
|
| 364 |
-
where `column` is effectively empty.
|
| 365 |
-
|
| 366 |
-
Handles:
|
| 367 |
-
- NaN
|
| 368 |
-
- ""
|
| 369 |
-
- " "
|
| 370 |
-
- "nan", "None", "NULL", "N/A"
|
| 371 |
-
|
| 372 |
-
Args:
|
| 373 |
-
df: Input DataFrame
|
| 374 |
-
column: Column to validate (e.g. "VIDEO_LINK")
|
| 375 |
-
extra_nulls: Optional extra string values to treat as null
|
| 376 |
-
|
| 377 |
-
Returns:
|
| 378 |
-
Cleaned DataFrame with valid rows only
|
| 379 |
-
"""
|
| 380 |
-
|
| 381 |
-
if column not in df.columns:
|
| 382 |
-
raise KeyError(f"Column '{column}' not found in DataFrame")
|
| 383 |
-
|
| 384 |
-
null_values = ["", "nan", "none", "null", "n/a"]
|
| 385 |
-
if extra_nulls:
|
| 386 |
-
null_values.extend([v.lower() for v in extra_nulls])
|
| 387 |
-
|
| 388 |
-
df = df.copy()
|
| 389 |
-
|
| 390 |
-
df[column] = (
|
| 391 |
-
df[column]
|
| 392 |
-
.astype(str)
|
| 393 |
-
.str.strip()
|
| 394 |
-
.str.lower()
|
| 395 |
-
.replace(null_values, np.nan)
|
| 396 |
-
)
|
| 397 |
-
|
| 398 |
-
return df.dropna(subset=[column])
|
| 399 |
-
|
| 400 |
|
| 401 |
# ------------------ CLI entrypoint ------------------
|
| 402 |
|
|
|
|
| 3 |
import tempfile
|
| 4 |
from pathlib import Path
|
| 5 |
import pandas as pd
|
|
|
|
| 6 |
|
| 7 |
import gspread
|
| 8 |
from google.auth import default
|
|
|
|
| 353 |
logger.info("CSV export completed | rows=%d", len(rows) - 1)
|
| 354 |
return output_path
|
| 355 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
|
| 357 |
# ------------------ CLI entrypoint ------------------
|
| 358 |
|
src/instagram_publisher.py
CHANGED
|
@@ -13,8 +13,8 @@ import pandas as pd
|
|
| 13 |
from datetime import datetime
|
| 14 |
from dotenv import load_dotenv
|
| 15 |
from pathlib import Path
|
|
|
|
| 16 |
from main import (
|
| 17 |
-
load_configuration,
|
| 18 |
load_content_strategies
|
| 19 |
)
|
| 20 |
from api_clients import APIClients
|
|
|
|
| 13 |
from datetime import datetime
|
| 14 |
from dotenv import load_dotenv
|
| 15 |
from pathlib import Path
|
| 16 |
+
from load_config import load_configuration
|
| 17 |
from main import (
|
|
|
|
| 18 |
load_content_strategies
|
| 19 |
)
|
| 20 |
from api_clients import APIClients
|
src/load_config.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Dict
|
| 5 |
+
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
from google.auth import default
|
| 8 |
+
|
| 9 |
+
from utils import logger
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def load_configuration() -> Dict:
|
| 13 |
+
"""
|
| 14 |
+
Load configuration from environment variables with validation.
|
| 15 |
+
|
| 16 |
+
Supports two authentication methods:
|
| 17 |
+
1. Service Account JSON (CI/CD): Extracts project ID from JSON file or string
|
| 18 |
+
2. Application Default Credentials (Local): Uses ADC and gcloud config
|
| 19 |
+
"""
|
| 20 |
+
load_dotenv()
|
| 21 |
+
|
| 22 |
+
gcp_project_id = None
|
| 23 |
+
creds_data = None
|
| 24 |
+
auth_method = None
|
| 25 |
+
|
| 26 |
+
# Try multiple possible credential paths (CI/CD environments)
|
| 27 |
+
gcp_creds_path = (
|
| 28 |
+
os.getenv("GOOGLE_GHA_CREDS_PATH") or
|
| 29 |
+
os.getenv("CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE") or
|
| 30 |
+
os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Method 1: Try to load from service account JSON file/string
|
| 34 |
+
if gcp_creds_path:
|
| 35 |
+
try:
|
| 36 |
+
os.environ["MY_TEMP_GCS_BUCKET"] = os.getenv("MY_TEMP_GCS_BUCKET", "")
|
| 37 |
+
|
| 38 |
+
# Check if it's a file path that exists
|
| 39 |
+
if Path(gcp_creds_path).exists():
|
| 40 |
+
logger.info(f"Loading GCP credentials from file: {gcp_creds_path}")
|
| 41 |
+
with open(gcp_creds_path, "r") as f:
|
| 42 |
+
creds_data = json.load(f)
|
| 43 |
+
auth_method = "service_account_file"
|
| 44 |
+
else:
|
| 45 |
+
# Try to parse as raw JSON string
|
| 46 |
+
logger.info("Attempting to parse GCP credentials as JSON string")
|
| 47 |
+
creds_data = json.loads(gcp_creds_path)
|
| 48 |
+
auth_method = "service_account_json"
|
| 49 |
+
|
| 50 |
+
if creds_data:
|
| 51 |
+
gcp_project_id = creds_data.get("project_id")
|
| 52 |
+
logger.info(f"✓ GCP Project ID loaded from service account: {gcp_project_id}")
|
| 53 |
+
|
| 54 |
+
except json.JSONDecodeError as e:
|
| 55 |
+
logger.warning(f"Could not parse GCP credentials as JSON. Error: {e}")
|
| 56 |
+
except FileNotFoundError as e:
|
| 57 |
+
logger.warning(f"GCP credentials file not found: {e}")
|
| 58 |
+
except Exception as e:
|
| 59 |
+
logger.error(f"Unexpected error loading GCP credentials: {e}")
|
| 60 |
+
|
| 61 |
+
# Method 2: Check for Workload Identity Federation (GitHub Actions)
|
| 62 |
+
if not gcp_project_id:
|
| 63 |
+
wif_provider = os.getenv("WORKLOAD_IDENTITY_PROVIDER")
|
| 64 |
+
wif_service_account = os.getenv("SERVICE_ACCOUNT_EMAIL")
|
| 65 |
+
|
| 66 |
+
if wif_provider and wif_service_account:
|
| 67 |
+
try:
|
| 68 |
+
logger.info("Attempting to load project from Workload Identity Federation")
|
| 69 |
+
# WIF credentials are automatically handled by google.auth.default()
|
| 70 |
+
# when GOOGLE_APPLICATION_CREDENTIALS is not set
|
| 71 |
+
creds, project = default()
|
| 72 |
+
|
| 73 |
+
if project:
|
| 74 |
+
gcp_project_id = project
|
| 75 |
+
auth_method = "workload_identity_federation"
|
| 76 |
+
logger.info(f"✓ GCP Project ID loaded from WIF: {gcp_project_id}")
|
| 77 |
+
else:
|
| 78 |
+
logger.debug("WIF credentials found but no project set")
|
| 79 |
+
except Exception as e:
|
| 80 |
+
logger.debug(f"Could not load from WIF: {e}")
|
| 81 |
+
else:
|
| 82 |
+
logger.debug("WIF environment variables not found")
|
| 83 |
+
|
| 84 |
+
# Method 3: Try to get project from Application Default Credentials (ADC)
|
| 85 |
+
if not gcp_project_id:
|
| 86 |
+
try:
|
| 87 |
+
logger.info("Attempting to load project from Application Default Credentials (ADC)")
|
| 88 |
+
creds, project = default()
|
| 89 |
+
|
| 90 |
+
if project:
|
| 91 |
+
gcp_project_id = project
|
| 92 |
+
auth_method = "adc"
|
| 93 |
+
logger.info(f"✓ GCP Project ID loaded from ADC: {gcp_project_id}")
|
| 94 |
+
else:
|
| 95 |
+
logger.debug("ADC credentials found but no project set")
|
| 96 |
+
except Exception as e:
|
| 97 |
+
logger.debug(f"Could not load from ADC: {e}")
|
| 98 |
+
|
| 99 |
+
# Method 4: Try environment variables
|
| 100 |
+
if not gcp_project_id:
|
| 101 |
+
gcp_project_id = (
|
| 102 |
+
os.getenv("GOOGLE_CLOUD_PROJECT") or
|
| 103 |
+
os.getenv("GCP_PROJECT") or
|
| 104 |
+
os.getenv("GCLOUD_PROJECT") or
|
| 105 |
+
os.getenv("CLOUDSDK_CORE_PROJECT") or
|
| 106 |
+
os.getenv("CLOUDSDK_PROJECT") or
|
| 107 |
+
os.getenv("GCP_PROJECT_ID")
|
| 108 |
+
)
|
| 109 |
+
if gcp_project_id:
|
| 110 |
+
auth_method = "environment_variable"
|
| 111 |
+
logger.info(f"✓ GCP Project ID loaded from environment: {gcp_project_id}")
|
| 112 |
+
|
| 113 |
+
# Method 5: Try gcloud config as last resort
|
| 114 |
+
if not gcp_project_id:
|
| 115 |
+
try:
|
| 116 |
+
import subprocess
|
| 117 |
+
result = subprocess.run(
|
| 118 |
+
["gcloud", "config", "get-value", "project"],
|
| 119 |
+
capture_output=True,
|
| 120 |
+
text=True,
|
| 121 |
+
timeout=5,
|
| 122 |
+
)
|
| 123 |
+
if result.returncode == 0:
|
| 124 |
+
gcp_project_id = result.stdout.strip()
|
| 125 |
+
if gcp_project_id and gcp_project_id != "(unset)":
|
| 126 |
+
auth_method = "gcloud_config"
|
| 127 |
+
logger.info(f"✓ GCP Project ID loaded from gcloud config: {gcp_project_id}")
|
| 128 |
+
else:
|
| 129 |
+
gcp_project_id = None
|
| 130 |
+
except Exception as e:
|
| 131 |
+
logger.debug(f"Could not load from gcloud config: {e}")
|
| 132 |
+
|
| 133 |
+
# Build configuration dictionary
|
| 134 |
+
config = {
|
| 135 |
+
"gemini_api_key": os.getenv("GEMINI_API_KEY"),
|
| 136 |
+
"runwayml_api_key": os.getenv("RUNWAYML_API_KEY"),
|
| 137 |
+
"gcs_bucket_name": os.getenv("GCS_BUCKET_NAME"),
|
| 138 |
+
"gcp_project_id": gcp_project_id,
|
| 139 |
+
"default_voice": os.getenv("DEFAULT_VOICE", "en-US-Neural2-F"),
|
| 140 |
+
"auth_method": auth_method, # Track how project was loaded
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
# Validate required keys
|
| 144 |
+
required_keys = ["gemini_api_key", "runwayml_api_key", "gcs_bucket_name", "gcp_project_id"]
|
| 145 |
+
missing_keys = [key for key in required_keys if not config.get(key)]
|
| 146 |
+
|
| 147 |
+
if missing_keys:
|
| 148 |
+
logger.error(f"Missing required configuration: {', '.join(missing_keys)}")
|
| 149 |
+
logger.error("Configuration loading attempted via:")
|
| 150 |
+
logger.error(" 1. Service account JSON file/string")
|
| 151 |
+
logger.error(" 2. Workload Identity Federation (GitHub Actions)")
|
| 152 |
+
logger.error(" 3. Application Default Credentials (ADC)")
|
| 153 |
+
logger.error(" 4. Environment variables")
|
| 154 |
+
logger.error(" 5. gcloud config")
|
| 155 |
+
logger.error("")
|
| 156 |
+
logger.error("Available environment variables:")
|
| 157 |
+
for key in [
|
| 158 |
+
"GOOGLE_GHA_CREDS_PATH",
|
| 159 |
+
"CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE",
|
| 160 |
+
"GOOGLE_APPLICATION_CREDENTIALS",
|
| 161 |
+
"WORKLOAD_IDENTITY_PROVIDER",
|
| 162 |
+
"SERVICE_ACCOUNT_EMAIL",
|
| 163 |
+
"GOOGLE_CLOUD_PROJECT",
|
| 164 |
+
"GCP_PROJECT",
|
| 165 |
+
"GCP_PROJECT_ID",
|
| 166 |
+
]:
|
| 167 |
+
logger.error(f" {key}: {os.getenv(key, 'NOT SET')}")
|
| 168 |
+
|
| 169 |
+
logger.error("")
|
| 170 |
+
logger.error("For local development with ADC:")
|
| 171 |
+
logger.error(" 1. Run: gcloud config set project YOUR_PROJECT_ID")
|
| 172 |
+
logger.error(" 2. Or set: export GCP_PROJECT_ID=YOUR_PROJECT_ID")
|
| 173 |
+
logger.error(" 3. Ensure ADC is set up: gcloud auth application-default login")
|
| 174 |
+
logger.error("")
|
| 175 |
+
logger.error("For GitHub Actions with Workload Identity Federation:")
|
| 176 |
+
logger.error(" 1. Set WORKLOAD_IDENTITY_PROVIDER in your workflow")
|
| 177 |
+
logger.error(" 2. Set SERVICE_ACCOUNT_EMAIL in your workflow")
|
| 178 |
+
logger.error(" 3. Or set GCP_PROJECT_ID directly in secrets")
|
| 179 |
+
|
| 180 |
+
raise ValueError(
|
| 181 |
+
f"Missing required configuration: {', '.join(missing_keys)}.\n"
|
| 182 |
+
f"Please check your .env file, gcloud config, or GitHub secrets."
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
logger.info(f"✓ Configuration loaded successfully (auth method: {auth_method})")
|
| 186 |
+
return config
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def get_gcp_project_id() -> str:
|
| 190 |
+
"""
|
| 191 |
+
Quick helper to get just the GCP project ID.
|
| 192 |
+
Useful when you only need the project ID without loading full config.
|
| 193 |
+
"""
|
| 194 |
+
config = load_configuration()
|
| 195 |
+
return config["gcp_project_id"]
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
# ------------------ Usage Examples ------------------
|
| 199 |
+
|
| 200 |
+
if __name__ == "__main__":
|
| 201 |
+
try:
|
| 202 |
+
from dotenv import load_dotenv
|
| 203 |
+
load_dotenv()
|
| 204 |
+
config = load_configuration()
|
| 205 |
+
print("\n✓ Configuration loaded successfully!\n")
|
| 206 |
+
print("Configuration:")
|
| 207 |
+
for key, value in config.items():
|
| 208 |
+
if "key" in key.lower() and value:
|
| 209 |
+
# Mask API keys
|
| 210 |
+
print(f" {key}: {value[:10]}...{value[-4:]}")
|
| 211 |
+
else:
|
| 212 |
+
print(f" {key}: {value}")
|
| 213 |
+
except ValueError as e:
|
| 214 |
+
print(f"\n✗ Configuration error:\n{e}")
|
src/main.py
CHANGED
|
@@ -10,13 +10,11 @@ import argparse
|
|
| 10 |
import json
|
| 11 |
from pathlib import Path
|
| 12 |
from typing import Dict, Optional
|
| 13 |
-
from dotenv import load_dotenv
|
| 14 |
from automation import ContentAutomation
|
| 15 |
from utils import logger
|
| 16 |
import pandas as pd
|
| 17 |
-
import random
|
| 18 |
import warnings
|
| 19 |
-
import
|
| 20 |
|
| 21 |
|
| 22 |
def load_content_strategies(csv_file: Optional[str] = None) -> pd.DataFrame:
|
|
@@ -112,91 +110,6 @@ def select_random_strategy(df: pd.DataFrame, index: Optional[int] = None) -> Dic
|
|
| 112 |
"brand": "Somira",
|
| 113 |
}
|
| 114 |
|
| 115 |
-
|
| 116 |
-
def load_configuration() -> Dict:
|
| 117 |
-
"""
|
| 118 |
-
Load configuration from environment variables with validation.
|
| 119 |
-
Automatically extracts GCP project ID from service account JSON,
|
| 120 |
-
whether the env var is a path or the raw JSON content.
|
| 121 |
-
"""
|
| 122 |
-
load_dotenv()
|
| 123 |
-
|
| 124 |
-
# Try multiple possible credential paths in GitHub Actions
|
| 125 |
-
gcp_creds_path = (
|
| 126 |
-
os.getenv("GOOGLE_GHA_CREDS_PATH") or
|
| 127 |
-
os.getenv("CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE") or
|
| 128 |
-
os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
|
| 129 |
-
)
|
| 130 |
-
|
| 131 |
-
gcp_project_id = None
|
| 132 |
-
creds_data = None
|
| 133 |
-
|
| 134 |
-
if not gcp_creds_path:
|
| 135 |
-
logger.warning("No GCP credentials path found in environment variables.")
|
| 136 |
-
else:
|
| 137 |
-
try:
|
| 138 |
-
os.environ["VERTEX_TEMP_AI_CREDENTIALS_JSON"] = os.getenv("VERTEX_AI_CREDENTIALS_JSON", "")
|
| 139 |
-
os.environ["MY_TEMP_GCS_BUCKET"] = os.getenv("MY_TEMP_GCS_BUCKET", "")
|
| 140 |
-
# Check if it's a file path that exists
|
| 141 |
-
if Path(gcp_creds_path).exists():
|
| 142 |
-
logger.info(f"Loading GCP credentials from file: {gcp_creds_path}")
|
| 143 |
-
with open(gcp_creds_path, "r") as f:
|
| 144 |
-
creds_data = json.load(f)
|
| 145 |
-
else:
|
| 146 |
-
# Try to parse as raw JSON string
|
| 147 |
-
logger.info("Attempting to parse GCP credentials as JSON string")
|
| 148 |
-
creds_data = json.loads(gcp_creds_path)
|
| 149 |
-
|
| 150 |
-
if creds_data:
|
| 151 |
-
gcp_project_id = creds_data.get("project_id")
|
| 152 |
-
logger.info(f"✓ GCP Project ID loaded: {gcp_project_id}")
|
| 153 |
-
|
| 154 |
-
except json.JSONDecodeError as e:
|
| 155 |
-
logger.warning(f"Could not parse GCP credentials as JSON. Error: {e}")
|
| 156 |
-
except FileNotFoundError as e:
|
| 157 |
-
logger.warning(f"GCP credentials file not found: {e}")
|
| 158 |
-
except Exception as e:
|
| 159 |
-
logger.error(f"Unexpected error loading GCP credentials: {e}")
|
| 160 |
-
|
| 161 |
-
# Fallback: try to get project_id from other environment variables
|
| 162 |
-
if not gcp_project_id:
|
| 163 |
-
gcp_project_id = (
|
| 164 |
-
os.getenv("GOOGLE_CLOUD_PROJECT") or
|
| 165 |
-
os.getenv("GCP_PROJECT") or
|
| 166 |
-
os.getenv("GCLOUD_PROJECT") or
|
| 167 |
-
os.getenv("CLOUDSDK_CORE_PROJECT") or
|
| 168 |
-
os.getenv("CLOUDSDK_PROJECT") or
|
| 169 |
-
os.getenv("GCP_PROJECT_ID")
|
| 170 |
-
)
|
| 171 |
-
if gcp_project_id:
|
| 172 |
-
logger.info(f"✓ GCP Project ID loaded from environment: {gcp_project_id}")
|
| 173 |
-
|
| 174 |
-
config = {
|
| 175 |
-
"gemini_api_key": os.getenv("GEMINI_API_KEY"),
|
| 176 |
-
"runwayml_api_key": os.getenv("RUNWAYML_API_KEY"),
|
| 177 |
-
"gcs_bucket_name": os.getenv("GCS_BUCKET_NAME"),
|
| 178 |
-
"gcp_project_id": gcp_project_id,
|
| 179 |
-
"default_voice": os.getenv("DEFAULT_VOICE", "en-US-Neural2-F"),
|
| 180 |
-
}
|
| 181 |
-
|
| 182 |
-
# Validate required keys
|
| 183 |
-
required_keys = ["gemini_api_key", "runwayml_api_key", "gcs_bucket_name", "gcp_project_id"]
|
| 184 |
-
missing_keys = [key for key in required_keys if not config.get(key)]
|
| 185 |
-
|
| 186 |
-
if missing_keys:
|
| 187 |
-
logger.error(f"Missing required configuration: {', '.join(missing_keys)}")
|
| 188 |
-
logger.error("Available environment variables:")
|
| 189 |
-
for key in ["GOOGLE_GHA_CREDS_PATH", "CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE",
|
| 190 |
-
"GOOGLE_APPLICATION_CREDENTIALS", "GOOGLE_CLOUD_PROJECT", "GCP_PROJECT"]:
|
| 191 |
-
logger.error(f" {key}: {os.getenv(key, 'NOT SET')}")
|
| 192 |
-
raise ValueError(
|
| 193 |
-
f"Missing required configuration: {', '.join(missing_keys)}.\n"
|
| 194 |
-
f"Please check your .env file or GitHub secrets."
|
| 195 |
-
)
|
| 196 |
-
|
| 197 |
-
return config
|
| 198 |
-
|
| 199 |
-
|
| 200 |
async def run_pipeline(
|
| 201 |
automation: ContentAutomation, content_strategy: Dict, tts_script: str, output_dir: Optional[str] = None
|
| 202 |
) -> Dict:
|
|
|
|
| 10 |
import json
|
| 11 |
from pathlib import Path
|
| 12 |
from typing import Dict, Optional
|
|
|
|
| 13 |
from automation import ContentAutomation
|
| 14 |
from utils import logger
|
| 15 |
import pandas as pd
|
|
|
|
| 16 |
import warnings
|
| 17 |
+
from load_config import load_configuration
|
| 18 |
|
| 19 |
|
| 20 |
def load_content_strategies(csv_file: Optional[str] = None) -> pd.DataFrame:
|
|
|
|
| 110 |
"brand": "Somira",
|
| 111 |
}
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
async def run_pipeline(
|
| 114 |
automation: ContentAutomation, content_strategy: Dict, tts_script: str, output_dir: Optional[str] = None
|
| 115 |
) -> Dict:
|
src/onscreebcta.py
CHANGED
|
@@ -332,7 +332,7 @@ def add_cta(input_video_path: str, cta_text: str, above_caption: bool = True, pa
|
|
| 332 |
output_video_path,
|
| 333 |
codec="libx264",
|
| 334 |
audio_codec="aac",
|
| 335 |
-
fps=
|
| 336 |
)
|
| 337 |
|
| 338 |
base_video.close()
|
|
|
|
| 332 |
output_video_path,
|
| 333 |
codec="libx264",
|
| 334 |
audio_codec="aac",
|
| 335 |
+
fps=25,
|
| 336 |
)
|
| 337 |
|
| 338 |
base_video.close()
|
src/process_csv.py
CHANGED
|
@@ -3,8 +3,8 @@ import csv
|
|
| 3 |
import subprocess
|
| 4 |
import os, time
|
| 5 |
from pathlib import Path
|
|
|
|
| 6 |
from main import (
|
| 7 |
-
load_configuration,
|
| 8 |
load_content_strategies,
|
| 9 |
run_pipeline,
|
| 10 |
)
|
|
|
|
| 3 |
import subprocess
|
| 4 |
import os, time
|
| 5 |
from pathlib import Path
|
| 6 |
+
from load_config import load_configuration
|
| 7 |
from main import (
|
|
|
|
| 8 |
load_content_strategies,
|
| 9 |
run_pipeline,
|
| 10 |
)
|
src/publisher.py
CHANGED
|
@@ -13,7 +13,8 @@ import time
|
|
| 13 |
from pathlib import Path
|
| 14 |
import hashlib
|
| 15 |
|
| 16 |
-
from
|
|
|
|
| 17 |
from api_clients import APIClients
|
| 18 |
|
| 19 |
# Import individual platform publishers
|
|
|
|
| 13 |
from pathlib import Path
|
| 14 |
import hashlib
|
| 15 |
|
| 16 |
+
from load_config import load_configuration
|
| 17 |
+
from main import load_content_strategies
|
| 18 |
from api_clients import APIClients
|
| 19 |
|
| 20 |
# Import individual platform publishers
|
src/text_clip.py
CHANGED
|
@@ -760,7 +760,7 @@ if __name__ == "__main__":
|
|
| 760 |
background = ColorClip(size=(VIDEO_WIDTH, VIDEO_HEIGHT), color=(255, 255, 255), duration=total_duration + 1.0)
|
| 761 |
final_video = CompositeVideoClip([background] + text_clips, size=(VIDEO_WIDTH, VIDEO_HEIGHT))
|
| 762 |
print(f" 🎥 Rendering to: {output_filename}")
|
| 763 |
-
final_video.write_videofile(f"{output_filename}", fps=
|
| 764 |
print(f" ✨ Done!\n")
|
| 765 |
else:
|
| 766 |
print(f" ❌ Failed to create caption clip for '{config['name']}'\n")
|
|
|
|
| 760 |
background = ColorClip(size=(VIDEO_WIDTH, VIDEO_HEIGHT), color=(255, 255, 255), duration=total_duration + 1.0)
|
| 761 |
final_video = CompositeVideoClip([background] + text_clips, size=(VIDEO_WIDTH, VIDEO_HEIGHT))
|
| 762 |
print(f" 🎥 Rendering to: {output_filename}")
|
| 763 |
+
final_video.write_videofile(f"{output_filename}", fps=25, codec='libx264', preset='medium', logger=None, threads=8)
|
| 764 |
print(f" ✨ Done!\n")
|
| 765 |
else:
|
| 766 |
print(f" ❌ Failed to create caption clip for '{config['name']}'\n")
|
src/tiktok_publisher.py
CHANGED
|
@@ -14,8 +14,8 @@ import pandas as pd
|
|
| 14 |
from datetime import datetime
|
| 15 |
from dotenv import load_dotenv
|
| 16 |
from pathlib import Path
|
|
|
|
| 17 |
from main import (
|
| 18 |
-
load_configuration,
|
| 19 |
load_content_strategies
|
| 20 |
)
|
| 21 |
from api_clients import APIClients
|
|
|
|
| 14 |
from datetime import datetime
|
| 15 |
from dotenv import load_dotenv
|
| 16 |
from pathlib import Path
|
| 17 |
+
from load_config import load_configuration
|
| 18 |
from main import (
|
|
|
|
| 19 |
load_content_strategies
|
| 20 |
)
|
| 21 |
from api_clients import APIClients
|
src/utils.py
CHANGED
|
@@ -15,6 +15,7 @@ import uuid
|
|
| 15 |
import re
|
| 16 |
import shutil
|
| 17 |
import librosa
|
|
|
|
| 18 |
import numpy as np
|
| 19 |
import tempfile
|
| 20 |
|
|
@@ -590,7 +591,8 @@ def reverse_clip(path_or_clip) -> str:
|
|
| 590 |
codec="libx264",
|
| 591 |
audio_codec="aac",
|
| 592 |
verbose=False,
|
| 593 |
-
logger=None
|
|
|
|
| 594 |
)
|
| 595 |
|
| 596 |
elif isinstance(path_or_clip, str):
|
|
@@ -1136,4 +1138,54 @@ def repeat_audio_ffmpeg(input_audio, output_audio, repeat: int):
|
|
| 1136 |
finally:
|
| 1137 |
# Clean up temporary file
|
| 1138 |
if os.path.exists(temp_trimmed):
|
| 1139 |
-
os.remove(temp_trimmed)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
import re
|
| 16 |
import shutil
|
| 17 |
import librosa
|
| 18 |
+
import pandas as pd
|
| 19 |
import numpy as np
|
| 20 |
import tempfile
|
| 21 |
|
|
|
|
| 591 |
codec="libx264",
|
| 592 |
audio_codec="aac",
|
| 593 |
verbose=False,
|
| 594 |
+
logger=None,
|
| 595 |
+
fps=25
|
| 596 |
)
|
| 597 |
|
| 598 |
elif isinstance(path_or_clip, str):
|
|
|
|
| 1138 |
finally:
|
| 1139 |
# Clean up temporary file
|
| 1140 |
if os.path.exists(temp_trimmed):
|
| 1141 |
+
os.remove(temp_trimmed)
|
| 1142 |
+
|
| 1143 |
+
def clean_and_drop_empty(
|
| 1144 |
+
df: pd.DataFrame,
|
| 1145 |
+
column: str,
|
| 1146 |
+
extra_nulls: list[str] | None = None,
|
| 1147 |
+
) -> pd.DataFrame:
|
| 1148 |
+
"""
|
| 1149 |
+
Normalize Google Sheets empty values and drop rows
|
| 1150 |
+
where `column` is effectively empty.
|
| 1151 |
+
|
| 1152 |
+
Handles:
|
| 1153 |
+
- NaN
|
| 1154 |
+
- ""
|
| 1155 |
+
- " "
|
| 1156 |
+
- "nan", "None", "NULL", "N/A"
|
| 1157 |
+
|
| 1158 |
+
Args:
|
| 1159 |
+
df: Input DataFrame
|
| 1160 |
+
column: Column to validate (e.g. "VIDEO_LINK")
|
| 1161 |
+
extra_nulls: Optional extra string values to treat as null
|
| 1162 |
+
|
| 1163 |
+
Returns:
|
| 1164 |
+
Cleaned DataFrame with valid rows only
|
| 1165 |
+
"""
|
| 1166 |
+
|
| 1167 |
+
if column not in df.columns:
|
| 1168 |
+
raise KeyError(f"Column '{column}' not found in DataFrame")
|
| 1169 |
+
|
| 1170 |
+
null_values = ["", "nan", "none", "null", "n/a"]
|
| 1171 |
+
if extra_nulls:
|
| 1172 |
+
null_values.extend([v.lower() for v in extra_nulls])
|
| 1173 |
+
|
| 1174 |
+
df = df.copy()
|
| 1175 |
+
|
| 1176 |
+
df[column] = (
|
| 1177 |
+
df[column]
|
| 1178 |
+
.astype(str)
|
| 1179 |
+
.str.strip()
|
| 1180 |
+
# .str.lower()
|
| 1181 |
+
.replace(null_values, np.nan)
|
| 1182 |
+
)
|
| 1183 |
+
|
| 1184 |
+
return df.dropna(subset=[column])
|
| 1185 |
+
|
| 1186 |
+
def is_valid_video(path: str) -> bool:
|
| 1187 |
+
if not os.path.exists(path):
|
| 1188 |
+
return False
|
| 1189 |
+
if os.path.getsize(path) < 100 * 1024: # <100KB = almost certainly invalid
|
| 1190 |
+
return False
|
| 1191 |
+
return True
|
src/video_renderer.py
CHANGED
|
@@ -989,7 +989,7 @@ class VideoRenderer:
|
|
| 989 |
safe_name = "".join(c for c in self.data_holder.tts_script[:50] if c.isalnum())
|
| 990 |
output_path = self.temp_dir / f"{os.getenv('SETUP_TYPE', 'final_video')}_{safe_name}_{int(time.time())}.mp4"
|
| 991 |
|
| 992 |
-
video_clip.write_videofile(str(output_path), codec="libx264", audio_codec="aac", verbose=False, logger=None)
|
| 993 |
|
| 994 |
video_clip.close()
|
| 995 |
return str(output_path)
|
|
@@ -1010,7 +1010,7 @@ class VideoRenderer:
|
|
| 1010 |
logger.info(f"📹 Rendering video (no audio): {filename}")
|
| 1011 |
|
| 1012 |
video_clip.write_videofile(
|
| 1013 |
-
str(output_path), codec="libx264", fps=
|
| 1014 |
)
|
| 1015 |
|
| 1016 |
return str(output_path)
|
|
@@ -1077,7 +1077,7 @@ class VideoRenderer:
|
|
| 1077 |
|
| 1078 |
return self.data_holder.current_caption_style
|
| 1079 |
|
| 1080 |
-
async def render_random_video(self, beat_times, music_duration):
|
| 1081 |
"""
|
| 1082 |
Render video that syncs perfectly with music beats.
|
| 1083 |
Skip very early first beats to avoid ultra-short intro clips.
|
|
@@ -1095,10 +1095,7 @@ class VideoRenderer:
|
|
| 1095 |
|
| 1096 |
# Track accumulated time deficit to maintain beat sync
|
| 1097 |
accumulated_deficit = 0.0
|
| 1098 |
-
|
| 1099 |
-
# Minimum clip duration to avoid glitchy cuts
|
| 1100 |
-
min_clip_duration = 1 # seconds
|
| 1101 |
-
|
| 1102 |
# SMART FIX: If first beat is not at 0, insert virtual beat at 0
|
| 1103 |
# This handles intro in the same loop as regular beats
|
| 1104 |
if beat_times[0] > 0.0001:
|
|
|
|
| 989 |
safe_name = "".join(c for c in self.data_holder.tts_script[:50] if c.isalnum())
|
| 990 |
output_path = self.temp_dir / f"{os.getenv('SETUP_TYPE', 'final_video')}_{safe_name}_{int(time.time())}.mp4"
|
| 991 |
|
| 992 |
+
video_clip.write_videofile(str(output_path), codec="libx264", audio_codec="aac", fps=25, verbose=False, logger=None)
|
| 993 |
|
| 994 |
video_clip.close()
|
| 995 |
return str(output_path)
|
|
|
|
| 1010 |
logger.info(f"📹 Rendering video (no audio): {filename}")
|
| 1011 |
|
| 1012 |
video_clip.write_videofile(
|
| 1013 |
+
str(output_path), codec="libx264", fps=25, verbose=False, logger=None
|
| 1014 |
)
|
| 1015 |
|
| 1016 |
return str(output_path)
|
|
|
|
| 1077 |
|
| 1078 |
return self.data_holder.current_caption_style
|
| 1079 |
|
| 1080 |
+
async def render_random_video(self, beat_times, music_duration, min_clip_duration=1) -> VideoFileClip:
|
| 1081 |
"""
|
| 1082 |
Render video that syncs perfectly with music beats.
|
| 1083 |
Skip very early first beats to avoid ultra-short intro clips.
|
|
|
|
| 1095 |
|
| 1096 |
# Track accumulated time deficit to maintain beat sync
|
| 1097 |
accumulated_deficit = 0.0
|
| 1098 |
+
|
|
|
|
|
|
|
|
|
|
| 1099 |
# SMART FIX: If first beat is not at 0, insert virtual beat at 0
|
| 1100 |
# This handles intro in the same loop as regular beats
|
| 1101 |
if beat_times[0] > 0.0001:
|
src/youtube_publisher.py
CHANGED
|
@@ -16,8 +16,8 @@ import os
|
|
| 16 |
import sys
|
| 17 |
import json
|
| 18 |
from datetime import datetime, timedelta
|
|
|
|
| 19 |
from main import (
|
| 20 |
-
load_configuration,
|
| 21 |
load_content_strategies
|
| 22 |
)
|
| 23 |
from pathlib import Path
|
|
|
|
| 16 |
import sys
|
| 17 |
import json
|
| 18 |
from datetime import datetime, timedelta
|
| 19 |
+
from load_config import load_configuration
|
| 20 |
from main import (
|
|
|
|
| 21 |
load_content_strategies
|
| 22 |
)
|
| 23 |
from pathlib import Path
|