jebin2 commited on
Commit
659fbdb
·
1 Parent(s): 6887a80

Refactor: Integrate setup_config and cleanup asset handling

Browse files

- Remove hardcoded audio/video lists from
- Update and to use for checks
- Integrate loading into
- Simplify video download logic and remove unused paths

.github/workflows/process_csv.yml CHANGED
@@ -105,7 +105,6 @@ jobs:
105
  IS_ONSCREEN_CTA: ${{ vars.IS_ONSCREEN_CTA }}
106
  DELETE_ALL_A2E_VIDEOS: ${{ vars.DELETE_ALL_A2E_VIDEOS }}
107
  USE_GEMIMI_VIDEO: ${{ vars.USE_GEMIMI_VIDEO }}
108
- ONLY_RANDOM_VIDEOS: ${{ vars.ONLY_RANDOM_VIDEOS }}
109
  PlAIN_VIDEO_COUNT: ${{ vars.PlAIN_VIDEO_COUNT }}
110
  USE_1X1_RATIO: ${{ vars.USE_1X1_RATIO }}
111
  ON_SCREEN_TEXT: ${{ vars.ON_SCREEN_TEXT }}
 
105
  IS_ONSCREEN_CTA: ${{ vars.IS_ONSCREEN_CTA }}
106
  DELETE_ALL_A2E_VIDEOS: ${{ vars.DELETE_ALL_A2E_VIDEOS }}
107
  USE_GEMIMI_VIDEO: ${{ vars.USE_GEMIMI_VIDEO }}
 
108
  PlAIN_VIDEO_COUNT: ${{ vars.PlAIN_VIDEO_COUNT }}
109
  USE_1X1_RATIO: ${{ vars.USE_1X1_RATIO }}
110
  ON_SCREEN_TEXT: ${{ vars.ON_SCREEN_TEXT }}
setup/beats_cut/config.toml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Beats Cut Random Videos Setup
2
+ # Uses beat detection to sync video cuts with music
3
+
4
+ [general]
5
+ setup_type = "beats_cut"
6
+ description = "Random videos with cuts synced to music beats"
7
+
8
+ [video]
9
+ beat_method = "downbeat"
10
+
11
+ [gsheet]
12
+ name = "Infloxa Data for Elvoro"
13
+ id = "1djnE1u_QCveGlhjNnZRfXiY-3NLzO0V-04tZ9P2mVcs"
14
+ video_library_worksheet = "Video Library"
15
+ audio_library_worksheet = "Audio Library"
16
+ logs_worksheet = "Infloxa Data for Elvoro LOGS"
setup/hard_cut/config.toml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hard Cut Random Videos Setup
2
+ # Uses fixed intervals instead of beat detection for video cuts
3
+
4
+ [general]
5
+ setup_type = "hard_cut"
6
+ description = "Random videos with hard cuts at fixed intervals"
7
+
8
+ [video]
9
+ hard_cut_random_videos_interval = "0.5"
10
+
11
+ [gsheet]
12
+ name = "Infloxa Data for Elvoro"
13
+ id = "1djnE1u_QCveGlhjNnZRfXiY-3NLzO0V-04tZ9P2mVcs"
14
+ video_library_worksheet = "Video Library"
15
+ audio_library_worksheet = "Audio Library"
16
+ logs_worksheet = "Infloxa Data for Elvoro LOGS"
setup/setup_4_no_ai_video/README.md CHANGED
@@ -25,13 +25,6 @@ CSV file containing content strategy entries.
25
  ```
26
  - Full path: `data/content_strategies_on_screen_text.csv`
27
 
28
- #### **ONLY_RANDOM_VIDEOS**
29
- Enables random library clip selection (bypasses metadata-driven selection).
30
-
31
- ```
32
- ONLY_RANDOM_VIDEOS=true
33
- ```
34
-
35
  #### **BEAT_METHOD**
36
  Controls beat-sync behavior.
37
 
 
25
  ```
26
  - Full path: `data/content_strategies_on_screen_text.csv`
27
 
 
 
 
 
 
 
 
28
  #### **BEAT_METHOD**
29
  Controls beat-sync behavior.
30
 
src/api_clients.py CHANGED
@@ -867,7 +867,7 @@ class APIClients:
867
  """
868
  try:
869
  if os.getenv("TEST_AUTOMATION", "").lower() == "true":
870
- return f"{os.getenv('TEST_DATA_DIRECTORY')}/final_video_Ifeltexh_1762093574_compressed.mp4"
871
 
872
  # Prepare a safe pattern to search by
873
  safe_name_10 = "".join(c for c in tts_script[:10] if c.isalnum())
 
867
  """
868
  try:
869
  if os.getenv("TEST_AUTOMATION", "").lower() == "true":
870
+ return f"{os.getenv('TEST_DATA_DIRECTORY')}/{uuid.uuid4().hex}.mp4"
871
 
872
  # Prepare a safe pattern to search by
873
  safe_name_10 = "".join(c for c in tts_script[:10] if c.isalnum())
src/asset_selector.py CHANGED
@@ -12,6 +12,7 @@ from data_holder import DataHolder
12
  import gemini_sdk
13
  from google_sheet_reader import GoogleSheetReader
14
  from google_src import GCloudWrapper, GCloudAccount, get_default_wrapper
 
15
 
16
 
17
  class AssetSelector:
@@ -40,37 +41,6 @@ class AssetSelector:
40
  self.current_audio_index = (self.current_audio_index + 1) % len(self.audio_library)
41
  self.config["current_audio_index"] = self.current_audio_index
42
 
43
- def _load_video_library(self) -> pd.DataFrame:
44
- """Load video library from specific CSV file"""
45
- try:
46
- if os.getenv("INFLOXA", "false").lower() == "true":
47
- csv_filename = "data/infloxa_video_library100.csv"
48
- elif os.getenv("USE_VEO", "false").lower() == "true":
49
- csv_filename = "data/somira_video_library_veo.csv"
50
- else:
51
- csv_filename = "data/somira_video_library.csv"
52
-
53
- if not os.path.exists(csv_filename):
54
- logger.error(f"CSV file not found: {csv_filename}")
55
- return pd.DataFrame()
56
-
57
- df = pd.read_csv(csv_filename)
58
-
59
- if "Energy Score (0-100)" in df.columns:
60
- df["energy_score"] = df["Energy Score (0-100)"].apply(self._parse_energy_score)
61
-
62
- if "Duration" in df.columns:
63
- df["duration"] = df["Duration"].apply(self._parse_duration)
64
- elif "duration" in df.columns:
65
- df["duration"] = df["duration"].apply(self._parse_duration)
66
-
67
- logger.info(f"Successfully loaded video library with {len(df)} entries")
68
- return df
69
-
70
- except Exception as e:
71
- logger.error(f"Failed to load video library from CSV: {e}")
72
- raise
73
-
74
  def _parse_duration(self, duration_str: str) -> int:
75
  """Parse duration from various string formats to integer seconds"""
76
  try:
@@ -87,30 +57,6 @@ class AssetSelector:
87
  logger.warning(f"Failed to parse duration '{duration_str}': {e}")
88
  return 0
89
 
90
- def _load_audio_library(self) -> List[str]:
91
- """Load audio library URLs"""
92
- audios = [f"https://storage.googleapis.com/somira/{i}.mp3" for i in range(1, 27)]
93
- for adio in [
94
- "https://storage.googleapis.com/somira/ssstik.io_1762269951926.mp3",
95
- "https://storage.googleapis.com/somira/Runaway_musicaldown.com_1762637229.mp3",
96
- "https://storage.googleapis.com/somira/Relaxed%20(Sped%20Up)_musicaldown.com_1762641321.mp3",
97
- "https://storage.googleapis.com/somira/original%20sound%20-%20vienna.visitas_musicaldown.com_1762639795.mp3",
98
- "https://storage.googleapis.com/somira/original%20sound%20-%20lxuissoundz_musicaldown.com_1762636022.mp3",
99
- "https://storage.googleapis.com/somira/LUNA%20BALA%20(Slowed)_musicaldown.com_1762637032.mp3",
100
- "https://storage.googleapis.com/somira/Funny_musicaldown.com_1762641235.mp3",
101
- "https://storage.googleapis.com/somira/7AM%20-%20Slowed%20%2B%20Reverb_musicaldown.com_1762638022.mp3",
102
- "https://storage.googleapis.com/somira/original%20sound%20-%20yzwlk_musicaldown.com_1762641762.mp3"
103
- ]:
104
- audios.append(adio)
105
-
106
- if os.getenv("SETUP_TYPE", "").lower() == "onlyrandomvideo_onscreen_text":
107
- audios.remove("https://storage.googleapis.com/somira/23.mp3")
108
- audios.remove("https://storage.googleapis.com/somira/ssstik.io_1762269951926.mp3")
109
-
110
- if os.getenv("INFLOXA", "false").lower() == "true":
111
- audios = ["testData/infloxa/audiopulse.mp3"]
112
- return audios
113
-
114
  def get_audio_beats(self, audio_link: str) -> Optional[List[float]]:
115
  """
116
  Load audio beats timing from audio_library and convert
@@ -181,7 +127,7 @@ class AssetSelector:
181
  account_id=account_id,
182
  )
183
  audio_df = googleSheetReader.get_filtered_dataframe()
184
- if os.getenv("HARD_CUT_RANDOM_VIDEOS", "false").lower() == "false":
185
  audio_df = utils.clean_and_drop_empty(audio_df, "Beats Timing(SS:FF) AT 25FPS")
186
  return utils.clean_and_drop_empty(audio_df, "AUDIO_LINK")
187
  except Exception as e:
 
12
  import gemini_sdk
13
  from google_sheet_reader import GoogleSheetReader
14
  from google_src import GCloudWrapper, GCloudAccount, get_default_wrapper
15
+ import setup_config
16
 
17
 
18
  class AssetSelector:
 
41
  self.current_audio_index = (self.current_audio_index + 1) % len(self.audio_library)
42
  self.config["current_audio_index"] = self.current_audio_index
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def _parse_duration(self, duration_str: str) -> int:
45
  """Parse duration from various string formats to integer seconds"""
46
  try:
 
57
  logger.warning(f"Failed to parse duration '{duration_str}': {e}")
58
  return 0
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  def get_audio_beats(self, audio_link: str) -> Optional[List[float]]:
61
  """
62
  Load audio beats timing from audio_library and convert
 
127
  account_id=account_id,
128
  )
129
  audio_df = googleSheetReader.get_filtered_dataframe()
130
+ if setup_config.get_str("setup_type") == "beats_cut":
131
  audio_df = utils.clean_and_drop_empty(audio_df, "Beats Timing(SS:FF) AT 25FPS")
132
  return utils.clean_and_drop_empty(audio_df, "AUDIO_LINK")
133
  except Exception as e:
src/automation.py CHANGED
@@ -26,6 +26,7 @@ import math
26
  import numpy as np
27
  from file_downloader import FileDownloader
28
  from data_holder import DataHolder
 
29
 
30
  class ContentAutomation:
31
  def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None, asset_selector: 'AssetSelector' = None, api_clients: 'APIClients' = None):
@@ -53,7 +54,7 @@ class ContentAutomation:
53
  logger.info("\n🎭 STEP 1: Clean TTS Script")
54
  self.data_holder.tts_script = utils.clean_tts_script(tts_script)
55
 
56
- if os.getenv("ONLY_RANDOM_VIDEOS", "false").lower() == "true":
57
  return await self.execute_random_pipeline(content_strategy, tts_script)
58
 
59
  prompt_refer = content_strategy.get("gemini_prompt", "")
@@ -199,14 +200,12 @@ class ContentAutomation:
199
  try:
200
  await self._download_all_video()
201
 
202
- music_duration = 10
203
- if os.getenv("INFLOXA", "false").lower() == "true":
204
- music_duration = 15
205
 
206
  beat_times = None
207
  try_next = False
208
- hard_cut_mode = os.getenv("HARD_CUT_RANDOM_VIDEOS", "false").lower() == "true"
209
- hard_cut_mode_interval = os.getenv("HARD_CUT_RANDOM_VIDEOS_INTERVAL", "0.5")
210
 
211
  if hard_cut_mode:
212
  # No beat detection needed, just download music once
@@ -269,7 +268,7 @@ class ContentAutomation:
269
  logger.info(f"[{idx}/{total}] Done")
270
 
271
 
272
- if os.getenv("HARD_CUT_RANDOM_VIDEOS", "false").lower() == "true":
273
  # IMPORTANT: Pass filtered_beat_times, not beat_intervals!
274
  video_no_audio_path = await self.video_renderer.render_interval_video(
275
  float(hard_cut_mode_interval),
@@ -354,10 +353,6 @@ class ContentAutomation:
354
  await self._download_to_local(
355
  self.data_holder.visual_assets["background_music_url"], "background_music.mp3", self.data_holder.visual_assets, "background_music_local"
356
  )
357
- # if os.getenv("INFLOXA", "false").lower() == "true":
358
- # output_path = "/tmp/repeated_bg_music.mp3"
359
- # output_path = utils.repeat_audio_ffmpeg(self.data_holder.visual_assets["background_music_local"], output_path, 5)
360
- # self.data_holder.visual_assets["background_music_local"] = output_path
361
 
362
  async def create_audio(self):
363
  try_again = False
@@ -406,37 +401,33 @@ class ContentAutomation:
406
  logger.info("✅ All videos already have local_path — skipping download.")
407
  return
408
 
409
- if os.getenv("INFLOXA", "false").lower() == "true":
410
- download_path="testData/infloxa"
411
- Path(download_path).mkdir(parents=True, exist_ok=True)
412
 
413
- allowed_videos = []
414
-
415
- videos = [
416
- {
417
- "url": url,
418
- "local_path": str(local_path),
419
- }
420
- for _, row in self.asset_selector.video_library.iterrows()
421
- if (
422
- (url := str(row.get("VIDEO_LINK", "")).strip())
423
- and (local_path := self.file_downloader.safe_download(url=url))
424
- and utils.is_valid_video(local_path)
425
- )
426
- ]
427
- self.data_holder.visual_assets["all_videos"] = videos
428
-
429
- else:
430
- self.data_holder.visual_assets["all_videos"] = [
431
- {"url": row.get("Video URL (No Audio)", "").strip()}
432
- for _, row in self.asset_selector.video_library.iterrows()
433
- if row.get("Video URL (No Audio)", "").strip()
434
- ]
435
-
436
- # ⬇️ Download them
437
- logger.info(f"📥 Downloading {len(self.data_holder.visual_assets['all_videos'])} videos...")
438
- await self._download_all_visual_assets()
439
- logger.info("✅ All videos downloaded successfully")
440
 
441
  async def _generate_visual_assets_parallel(self, content_strategy: Dict) -> Dict:
442
  """Generate visual assets in parallel (hook video + library videos)"""
 
26
  import numpy as np
27
  from file_downloader import FileDownloader
28
  from data_holder import DataHolder
29
+ import setup_config
30
 
31
  class ContentAutomation:
32
  def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None, asset_selector: 'AssetSelector' = None, api_clients: 'APIClients' = None):
 
54
  logger.info("\n🎭 STEP 1: Clean TTS Script")
55
  self.data_holder.tts_script = utils.clean_tts_script(tts_script)
56
 
57
+ if setup_config.get_str("setup_type") in ["beats_cut", "hard_cut"]:
58
  return await self.execute_random_pipeline(content_strategy, tts_script)
59
 
60
  prompt_refer = content_strategy.get("gemini_prompt", "")
 
200
  try:
201
  await self._download_all_video()
202
 
203
+ music_duration = None
 
 
204
 
205
  beat_times = None
206
  try_next = False
207
+ hard_cut_mode = setup_config.get_str("setup_type") == "hard_cut"
208
+ hard_cut_mode_interval = setup_config.get_str("hard_cut_random_videos_interval", "0.5")
209
 
210
  if hard_cut_mode:
211
  # No beat detection needed, just download music once
 
268
  logger.info(f"[{idx}/{total}] Done")
269
 
270
 
271
+ if setup_config.get_str("setup_type") == "hard_cut":
272
  # IMPORTANT: Pass filtered_beat_times, not beat_intervals!
273
  video_no_audio_path = await self.video_renderer.render_interval_video(
274
  float(hard_cut_mode_interval),
 
353
  await self._download_to_local(
354
  self.data_holder.visual_assets["background_music_url"], "background_music.mp3", self.data_holder.visual_assets, "background_music_local"
355
  )
 
 
 
 
356
 
357
  async def create_audio(self):
358
  try_again = False
 
401
  logger.info("✅ All videos already have local_path — skipping download.")
402
  return
403
 
404
+ download_path = "testData/video_for_workflow"
405
+ Path(download_path).mkdir(parents=True, exist_ok=True)
 
406
 
407
+ videos = []
408
+ for _, row in self.asset_selector.video_library.iterrows():
409
+ url = str(row.get("VIDEO_LINK", "")).strip()
410
+ if not url:
411
+ continue
412
+
413
+ local_path = self.file_downloader.safe_download(url=url)
414
+ if not local_path or not utils.is_valid_video(local_path):
415
+ continue
416
+
417
+ # Resize and remove padding (handle potential errors)
418
+ try:
419
+ utils.resize_video(local_path, overwrite=True)
420
+ utils.remove_black_padding(local_path, overwrite=True)
421
+ except Exception as e:
422
+ logger.warning(f"⚠️ Could not process {local_path}: {e}")
423
+ # Continue anyway - video is still usable
424
+
425
+ videos.append({
426
+ "url": url,
427
+ "local_path": str(local_path),
428
+ })
429
+
430
+ self.data_holder.visual_assets["all_videos"] = videos
 
 
 
431
 
432
  async def _generate_visual_assets_parallel(self, content_strategy: Dict) -> Dict:
433
  """Generate visual assets in parallel (hook video + library videos)"""
src/load_config.py CHANGED
@@ -131,12 +131,23 @@ def load_configuration() -> Dict:
131
  logger.debug(f"Could not load from gcloud config: {e}")
132
 
133
  # Build configuration dictionary
 
 
 
 
 
 
 
 
 
 
134
  config = {
 
135
  "gemini_api_key": os.getenv("GEMINI_API_KEY"),
136
  "runwayml_api_key": os.getenv("RUNWAYML_API_KEY"),
137
  "gcs_bucket_name": os.getenv("GCS_BUCKET_NAME"),
138
  "gcp_project_id": gcp_project_id,
139
- "default_voice": os.getenv("DEFAULT_VOICE", "en-US-Neural2-F"),
140
  "auth_method": auth_method, # Track how project was loaded
141
  }
142
 
 
131
  logger.debug(f"Could not load from gcloud config: {e}")
132
 
133
  # Build configuration dictionary
134
+ # Start with setup config from TOML if available
135
+ try:
136
+ from setup_config import load_setup_config
137
+ setup_config = load_setup_config()
138
+ logger.info(f"✓ Loaded setup config: {setup_config.get('setup_type', 'unknown')}")
139
+ except (ValueError, FileNotFoundError, ImportError) as e:
140
+ logger.debug(f"Setup config not loaded (optional): {e}")
141
+ setup_config = {}
142
+
143
+ # Merge setup config with secrets from environment
144
  config = {
145
+ **setup_config, # TOML config values (can be overridden below)
146
  "gemini_api_key": os.getenv("GEMINI_API_KEY"),
147
  "runwayml_api_key": os.getenv("RUNWAYML_API_KEY"),
148
  "gcs_bucket_name": os.getenv("GCS_BUCKET_NAME"),
149
  "gcp_project_id": gcp_project_id,
150
+ "default_voice": setup_config.get("voice") or os.getenv("DEFAULT_VOICE", "en-US-Neural2-F"),
151
  "auth_method": auth_method, # Track how project was loaded
152
  }
153
 
src/onscreebcta.py CHANGED
@@ -309,9 +309,9 @@ def create_cta_on_strip(
309
 
310
  def add_cta(input_video_path: str, cta_text: str, above_caption: bool = True, padding: int = 20, show_strip: bool = False, bottom_safe_y: int = None) -> str:
311
  if above_caption:
312
- output_video_path = f"/tmp/{uuid.uuid4().hex[:8]}final_video_above_caption.mp4"
313
  else:
314
- output_video_path = f"/tmp/{uuid.uuid4().hex[:8]}final_video_below_caption.mp4"
315
 
316
  logger.info(f"Loading video: '{input_video_path}'...")
317
  base_video = VideoFileClip(input_video_path)
 
309
 
310
  def add_cta(input_video_path: str, cta_text: str, above_caption: bool = True, padding: int = 20, show_strip: bool = False, bottom_safe_y: int = None) -> str:
311
  if above_caption:
312
+ output_video_path = f"/tmp/{uuid.uuid4().hex[:8]}_above_caption.mp4"
313
  else:
314
+ output_video_path = f"/tmp/{uuid.uuid4().hex[:8]}_below_caption.mp4"
315
 
316
  logger.info(f"Loading video: '{input_video_path}'...")
317
  base_video = VideoFileClip(input_video_path)
src/process_csv.py CHANGED
@@ -18,6 +18,7 @@ import argparse
18
  import uuid
19
  from cleanup_manager import process_delete_entries
20
  from google_src.gcs_utils import list_gcs_files
 
21
 
22
  DATA_DIR = Path("data")
23
  ALL_VIDEO_FILE_INFO = None
@@ -391,7 +392,7 @@ Examples:
391
  os.environ.pop("VERTEX_AI_CREDENTIALS_JSON", None)
392
 
393
  await download_all_video(config)
394
- if os.getenv("ON_SCREEN_TEXT", "false").lower() != "true" and os.getenv("ONLY_RANDOM_VIDEOS", "false").lower() == "true":
395
  await create_plain_videos(config, commit=args.commit, job_index=job_index, total_jobs=total_jobs)
396
  else:
397
  await process_all_csvs(config, commit=args.commit, job_index=job_index, total_jobs=total_jobs)
 
18
  import uuid
19
  from cleanup_manager import process_delete_entries
20
  from google_src.gcs_utils import list_gcs_files
21
+ import setup_config
22
 
23
  DATA_DIR = Path("data")
24
  ALL_VIDEO_FILE_INFO = None
 
392
  os.environ.pop("VERTEX_AI_CREDENTIALS_JSON", None)
393
 
394
  await download_all_video(config)
395
+ if os.getenv("ON_SCREEN_TEXT", "false").lower() != "true" and setup_config.get_str("setup_type") in ["beats_cut", "hard_cut"]:
396
  await create_plain_videos(config, commit=args.commit, job_index=job_index, total_jobs=total_jobs)
397
  else:
398
  await process_all_csvs(config, commit=args.commit, job_index=job_index, total_jobs=total_jobs)
src/setup_config.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Setup Configuration Loader
3
+
4
+ Loads TOML configuration from setup/<setup_name>/config.toml files.
5
+ This allows different setups to be selected via SETUP_NAME env var,
6
+ keeping secrets in .env and config in version-controlled TOML files.
7
+ """
8
+
9
+ import logging
10
+ import os
11
+ import sys
12
+ from pathlib import Path
13
+ from typing import Dict, Any, Optional
14
+
15
+ # Use standalone logger to avoid heavy imports from utils
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Use tomllib (Python 3.11+) or fall back to tomli
19
+ if sys.version_info >= (3, 11):
20
+ import tomllib
21
+ else:
22
+ try:
23
+ import tomli as tomllib
24
+ except ImportError:
25
+ tomllib = None
26
+
27
+
28
+ # Cached config singleton
29
+ _cached_config: Optional[Dict[str, Any]] = None
30
+ _cached_setup_name: Optional[str] = None
31
+
32
+
33
+ def get_setup_dir() -> Path:
34
+ """Get the setup directory path."""
35
+ # Go from src/ to project root, then into setup/
36
+ project_root = Path(__file__).parent.parent
37
+ return project_root / "setup"
38
+
39
+
40
+ def list_available_setups() -> list[str]:
41
+ """List all available setup configurations."""
42
+ setup_dir = get_setup_dir()
43
+ setups = []
44
+
45
+ if setup_dir.exists():
46
+ for item in setup_dir.iterdir():
47
+ if item.is_dir() and (item / "config.toml").exists():
48
+ setups.append(item.name)
49
+
50
+ return sorted(setups)
51
+
52
+
53
+ def load_setup_config(setup_name: Optional[str] = None, force_reload: bool = False) -> Dict[str, Any]:
54
+ """
55
+ Load configuration from setup/<setup_name>/config.toml.
56
+
57
+ Args:
58
+ setup_name: Name of the setup folder. If None, uses SETUP_NAME env var.
59
+ force_reload: If True, bypass cache and reload from file.
60
+
61
+ Returns:
62
+ Dictionary with flattened config values, with env var overrides applied.
63
+
64
+ Raises:
65
+ ValueError: If setup_name is not provided and SETUP_NAME env var is not set.
66
+ FileNotFoundError: If the config.toml file doesn't exist.
67
+ """
68
+ global _cached_config, _cached_setup_name
69
+
70
+ # Use env var if setup_name not provided
71
+ if setup_name is None:
72
+ setup_name = os.getenv("SETUP_NAME")
73
+
74
+ if not setup_name:
75
+ available = list_available_setups()
76
+ raise ValueError(
77
+ f"SETUP_NAME environment variable not set. "
78
+ f"Available setups: {', '.join(available) if available else 'none found'}"
79
+ )
80
+
81
+ # Return cached config if same setup and not forcing reload
82
+ if not force_reload and _cached_config is not None and _cached_setup_name == setup_name:
83
+ return _cached_config
84
+
85
+ # Check if tomllib is available
86
+ if tomllib is None:
87
+ raise ImportError(
88
+ "TOML parsing requires Python 3.11+ or the 'tomli' package. "
89
+ "Install with: pip install tomli"
90
+ )
91
+
92
+ # Load the TOML file
93
+ setup_dir = get_setup_dir()
94
+ config_path = setup_dir / setup_name / "config.toml"
95
+
96
+ if not config_path.exists():
97
+ available = list_available_setups()
98
+ raise FileNotFoundError(
99
+ f"Config file not found: {config_path}\n"
100
+ f"Available setups: {', '.join(available) if available else 'none found'}"
101
+ )
102
+
103
+ logger.info(f"Loading setup config from: {config_path}")
104
+
105
+ with open(config_path, "rb") as f:
106
+ raw_config = tomllib.load(f)
107
+
108
+ # Flatten the config and apply env var overrides
109
+ config = _flatten_config(raw_config)
110
+ config = _apply_env_overrides(config)
111
+
112
+ # Cache the result
113
+ _cached_config = config
114
+ _cached_setup_name = setup_name
115
+
116
+ logger.info(f"✓ Loaded setup config: {setup_name} ({len(config)} settings)")
117
+
118
+ return config
119
+
120
+
121
+ def _flatten_config(config: Dict[str, Any], prefix: str = "") -> Dict[str, Any]:
122
+ """
123
+ Flatten nested TOML config into a flat dictionary.
124
+
125
+ Example:
126
+ {"video": {"only_random_videos": true}}
127
+ -> {"video.only_random_videos": true, "only_random_videos": true}
128
+
129
+ Both nested key and flat key are provided for flexibility.
130
+ """
131
+ result = {}
132
+
133
+ for key, value in config.items():
134
+ full_key = f"{prefix}.{key}" if prefix else key
135
+
136
+ if isinstance(value, dict):
137
+ # Recurse into nested dicts
138
+ nested = _flatten_config(value, full_key)
139
+ result.update(nested)
140
+ else:
141
+ # Add both the full nested key and the simple key
142
+ result[full_key] = value
143
+ # Also add simple key if it doesn't conflict
144
+ if key not in result:
145
+ result[key] = value
146
+
147
+ return result
148
+
149
+
150
+ def _apply_env_overrides(config: Dict[str, Any]) -> Dict[str, Any]:
151
+ """
152
+ Apply environment variable overrides to config values.
153
+
154
+ Env var names are mapped from config keys:
155
+ only_random_videos -> ONLY_RANDOM_VIDEOS
156
+ video.hard_cut_random_videos -> HARD_CUT_RANDOM_VIDEOS
157
+ """
158
+ result = config.copy()
159
+
160
+ # Map of config keys to env var names
161
+ env_mappings = {
162
+ "only_random_videos": "ONLY_RANDOM_VIDEOS",
163
+ "hard_cut_random_videos": "HARD_CUT_RANDOM_VIDEOS",
164
+ "use_veo": "USE_VEO",
165
+ "beat_method": "BEAT_METHOD",
166
+ "setup_type": "SETUP_TYPE",
167
+ "caption_style": "CAPTION_STYLE",
168
+ "content_strategy_file": "CONTENT_STRATEGY_FILE",
169
+ "on_screen_text": "ON_SCREEN_TEXT",
170
+ "is_onscreen_cta": "IS_ONSCREEN_CTA",
171
+ "is_a2e_lip_sync": "IS_A2E_LIP_SYNC",
172
+ "use_1x1_ratio": "USE_1X1_RATIO",
173
+ "audio_library_size": "AUDIO_LIBRARY_SIZE",
174
+ "video_library_size": "VIDEO_LIBRARY_SIZE",
175
+ "debug_mode": "DEBUG_MODE",
176
+ "video_quality": "VIDEO_QUALITY",
177
+ "voice": "DEFAULT_VOICE",
178
+ "video_generation_timeout": "VIDEO_GENERATION_TIMEOUT",
179
+ "max_concurrent_requests": "MAX_CONCURRENT_REQUESTS",
180
+ "max_retry_attempts": "MAX_RETRY_ATTEMPTS",
181
+ "output_directory": "OUTPUT_DIRECTORY",
182
+ "temp_directory": "TEMP_DIRECTORY",
183
+ # GSheet mappings
184
+ "gsheet.name": "GSHEET_NAME",
185
+ "gsheet.id": "GSHEET_ID",
186
+ "video_library_worksheet": "VIDEO_LIBRARY_GSHEET_WORKSHEET",
187
+ "audio_library_worksheet": "AUDIO_LIBRARY_GSHEET_WORKSHEET",
188
+ "logs_worksheet": "GSHEET_WORKSHEET_LOGS",
189
+ }
190
+
191
+ for config_key, env_var in env_mappings.items():
192
+ env_value = os.getenv(env_var)
193
+ if env_value is not None:
194
+ # Convert to appropriate type based on existing config value
195
+ if config_key in result:
196
+ original_type = type(result[config_key])
197
+ if original_type == bool:
198
+ result[config_key] = env_value.lower() in ("true", "1", "yes")
199
+ elif original_type == int:
200
+ try:
201
+ result[config_key] = int(env_value)
202
+ except ValueError:
203
+ pass
204
+ else:
205
+ result[config_key] = env_value
206
+ else:
207
+ result[config_key] = env_value
208
+
209
+ return result
210
+
211
+
212
+ def get_config_value(key: str, default: Any = None) -> Any:
213
+ """
214
+ Get a single config value by key.
215
+
216
+ Args:
217
+ key: Config key (e.g., "only_random_videos" or "video.only_random_videos")
218
+ default: Default value if key not found
219
+
220
+ Returns:
221
+ Config value or default
222
+ """
223
+ config = load_setup_config()
224
+ return config.get(key, default)
225
+
226
+
227
+ def get_bool(key: str, default: bool = False) -> bool:
228
+ """Get a boolean config value."""
229
+ value = get_config_value(key, default)
230
+ if isinstance(value, bool):
231
+ return value
232
+ if isinstance(value, str):
233
+ return value.lower() in ("true", "1", "yes")
234
+ return bool(value)
235
+
236
+
237
+ def get_int(key: str, default: int = 0) -> int:
238
+ """Get an integer config value."""
239
+ value = get_config_value(key, default)
240
+ try:
241
+ return int(value)
242
+ except (ValueError, TypeError):
243
+ return default
244
+
245
+
246
+ def get_str(key: str, default: str = "") -> str:
247
+ """Get a string config value."""
248
+ value = get_config_value(key, default)
249
+ return str(value) if value is not None else default
250
+
251
+
252
+ # Convenience function for common pattern
253
+ def is_enabled(key: str) -> bool:
254
+ """Check if a feature flag is enabled."""
255
+ return get_bool(key, False)
256
+
257
+
258
+ # ------------------ CLI Usage ------------------
259
+
260
+ if __name__ == "__main__":
261
+ from dotenv import load_dotenv
262
+ load_dotenv()
263
+
264
+ print("\n=== Available Setups ===")
265
+ for setup in list_available_setups():
266
+ print(f" - {setup}")
267
+
268
+ print("\n=== Loading Config ===")
269
+ try:
270
+ config = load_setup_config()
271
+ print(f"\nLoaded config ({len(config)} keys):")
272
+ for key, value in sorted(config.items()):
273
+ print(f" {key}: {value}")
274
+ except (ValueError, FileNotFoundError) as e:
275
+ print(f"\nError: {e}")
src/video_downloader.py CHANGED
@@ -670,13 +670,13 @@ if __name__ == "__main__":
670
  load_dotenv()
671
 
672
  downloader = VideoDownloader()
673
- downloader._init_drive_downloader(download_path="testData/infloxa")
674
  add_link_column("testData/infloxa_copy/videos.csv", "testData/infloxa_copy/videos_with_links.csv", downloader)
675
 
676
  # Download from Drive folder link
677
  # result = downloader.download_from_drive_link(
678
  # drive_link="https://drive.google.com/drive/folders/1WSrVAyqvPJzpRnoUxkNx0LqK9VlDs432",
679
- # download_root="testData/infloxa",
680
  # file_extensions=['.mp4', '.mov', '.avi', '.mkv'] # Only video files
681
  # )
682
 
 
670
  load_dotenv()
671
 
672
  downloader = VideoDownloader()
673
+ downloader._init_drive_downloader(download_path="testData/video_for_workflow")
674
  add_link_column("testData/infloxa_copy/videos.csv", "testData/infloxa_copy/videos_with_links.csv", downloader)
675
 
676
  # Download from Drive folder link
677
  # result = downloader.download_from_drive_link(
678
  # drive_link="https://drive.google.com/drive/folders/1WSrVAyqvPJzpRnoUxkNx0LqK9VlDs432",
679
+ # download_root="testData/video_for_workflow",
680
  # file_extensions=['.mp4', '.mov', '.avi', '.mkv'] # Only video files
681
  # )
682
 
src/video_renderer.py CHANGED
@@ -986,8 +986,7 @@ class VideoRenderer:
986
  async def render_video_final(self, video_clip) -> str:
987
  """Render final video clip to file"""
988
  try:
989
- safe_name = "".join(c for c in self.data_holder.tts_script[:50] if c.isalnum())
990
- output_path = self.temp_dir / f"{os.getenv('SETUP_TYPE', 'final_video')}_{safe_name}_{int(time.time())}.mp4"
991
 
992
  video_clip.write_videofile(str(output_path), codec="libx264", audio_codec="aac", fps=25, verbose=False, logger=None)
993
 
 
986
  async def render_video_final(self, video_clip) -> str:
987
  """Render final video clip to file"""
988
  try:
989
+ output_path = self.temp_dir / f"{uuid.uuid4().hex}.mp4"
 
990
 
991
  video_clip.write_videofile(str(output_path), codec="libx264", audio_codec="aac", fps=25, verbose=False, logger=None)
992
 
video_analyser/config.yaml CHANGED
@@ -22,7 +22,7 @@ processing:
22
 
23
  # Output Settings
24
  output:
25
- local_video_dir: "infloxa" # Relative to video_analyser/
26
  csv_file: "infloxa_video_analysis.csv" # Relative to video_analyser/
27
 
28
  # Scoring Weights (must sum to 1.0)
 
22
 
23
  # Output Settings
24
  output:
25
+ local_video_dir: "video_for_workflow" # Relative to video_analyser/
26
  csv_file: "infloxa_video_analysis.csv" # Relative to video_analyser/
27
 
28
  # Scoring Weights (must sum to 1.0)