jebin2 commited on
Commit
d524fdc
Β·
1 Parent(s): fd0174a

refactor: Remove APIClients class and migrate to standalone GCS utils

Browse files

- Deleted src/api_clients.py
- Moved download_from_gcs logic to src/google_src/gcs_utils.py as find_and_download_gcs_file
- Refactored all social media publishers to use gcs_utils instead of APIClients
- Removed APIClients usage from a2e_avatar.py, automation.py, and process_csv.py
- Removed cache usage from a2e_avatar.py

src/a2e_avatar.py CHANGED
@@ -67,7 +67,6 @@ class TalkingVideoGenerator:
67
  timeout: int = 100,
68
  max_retries: int = 3,
69
  data_holder: DataHolder = None,
70
- api_clients: APIClients = None
71
  ):
72
  """
73
  Initialize the A2E video generator
@@ -92,7 +91,6 @@ class TalkingVideoGenerator:
92
  self.tmp_dir = Path("/tmp")
93
  self.tmp_dir.mkdir(exist_ok=True)
94
  self.data_holder = data_holder or DataHolder()
95
- self.api_clients = api_clients
96
 
97
  logger.info("TalkingVideoGenerator initialized")
98
 
@@ -439,22 +437,6 @@ Available Avatars with Usage Count: {available_avatar}
439
  os.remove(tmp_path)
440
  return duration
441
 
442
- async def get_from_cache(self, method_type):
443
- try:
444
- with open(await self.api_clients.get_cache_url(method_type, ".txt"), 'r', encoding="utf-8") as file:
445
- logger.info("Using cache a2e_tts.")
446
- return file.read()
447
- except: return None
448
-
449
- async def store_in_cache(self, method_type, content):
450
- try:
451
- unique_id = uuid.uuid4().hex[:8]
452
- file_path = f"/tmp/a2e_tts_{unique_id}.txt"
453
- with open(file_path, "w", encoding="utf-8") as file:
454
- file.write(content)
455
- await self.api_clients.store_in_cache(file_path, method_type, ".txt")
456
- except: pass
457
-
458
  def _get_voice_language_info(self, voice_id: str, voices: List[VoiceInfo]) -> Tuple[str, str]:
459
  """
460
  Get country and region from voice_id by looking up in voices list.
@@ -489,9 +471,6 @@ Available Avatars with Usage Count: {available_avatar}
489
  if os.getenv("TEST_AUTOMATION", "").lower() == "true":
490
  return "testData/Green Screen Avatar Video3.mp3"
491
 
492
- audio_url = await self.get_from_cache("a2e_tts")
493
- if audio_url:
494
- return audio_url
495
 
496
  # --- First attempt ---
497
  logger.info(f"Generating TTS audio (speed: {speed_rate}x)...")
@@ -540,7 +519,6 @@ Available Avatars with Usage Count: {available_avatar}
540
  if duration > 15 or duration < 10:
541
  raise A2EAPIError(f"Audio is longer:({duration}) for the voice: {voice_id}")
542
 
543
- await self.store_in_cache("a2e_tts", audio_url)
544
  logger.info(f"βœ“ Final TTS audio generated ({duration:.2f}s): {audio_url}")
545
  return audio_url
546
 
@@ -582,86 +560,77 @@ Available Avatars with Usage Count: {available_avatar}
582
  "video_url": "testData/Green Screen Avatar Video3.mp4"
583
  }
584
 
585
- content = await self.get_from_cache("a2e_video")
586
- if content:
587
- return json.loads(content)
588
-
589
- task_id = await self.get_from_cache("a2e_video_task_id")
590
- if not task_id:
591
- logger.info(f"Generating talking video with green screen background...")
592
-
593
- # Calculate dimensions based on aspect ratio
594
- if aspect_ratio == "9:16":
595
- if resolution == 1080:
596
- width, height = 1080, 1920
597
- else: # 720
598
- width, height = 405, 720
599
- elif aspect_ratio == "16:9":
600
- if resolution == 1080:
601
- width, height = 1920, 1080
602
- else: # 720
603
- width, height = 1280, 720
604
- else:
605
- # Default to square
606
- width = height = resolution
607
 
608
- # Convert RGB tuple to rgba() format string (alpha = 1 for fully opaque)
609
- bg_color_str = f"rgba({background_color[0]},{background_color[1]},{background_color[2]},1)"
610
- # {
611
- # "title": "11-09-2025 16:53:47",
612
- # "isSkipRs": true,
613
- # "isAllowReverse": true,
614
- # "resolution": 1080,
615
- # "isSubtitleEnabled": false,
616
- # "anchor_id": "67209ea6a9050edd960698c0",
617
- # "anchor_type": 0,
618
- # "msg": "Hello Ther.",
619
- # "tts_id": "66ca504f2732c24634224075",
620
- # "speech_rate": 1.2,
621
- # "audioSrc": "https://1day-tos.a2e.com.cn/tts3party_cache/stable/el/Aria/speed/a15fe669931864094b03fe17b7b2067d.wav",
622
- # "color": "rgba(0,255,0,1)",
623
- # "web_bg_width": 202.5,
624
- # "web_bg_height": 360,
625
- # "web_people_max_width": 202.5,
626
- # "web_people_max_height": 360,
627
- # "web_people_width": 600,
628
- # "web_people_height": 360,
629
- # "web_people_x": -198.75,
630
- # "web_people_y": 0
631
- # }
632
- payload = {
633
- "title": title,
634
- "anchor_id": avatar_id,
635
- "anchor_type": anchor_type,
636
- "audioSrc": audio_url,
637
- # "resolution": resolution,
638
- "color": bg_color_str, # e.g. "rgba(0,255,0,1)" for green screen
639
- # "anchor_background_color": bg_color_str, # e.g. "rgba(0,255,0,1)" for green screen
640
- # "web_bg_width": width,
641
- # "web_bg_height": height,
642
- # "web_people_width": width,
643
- # "web_people_height": height,
644
- # "web_people_x": 0,
645
- # "web_people_y": 0,
646
- "isSkipRs": True # Fast mode
647
- }
648
- data = await self._make_request(
649
- "POST",
650
- "/api/v1/video/generate",
651
- json=payload
652
- )
653
 
654
- task_id = data.get("data", {}).get("_id")
655
- if not task_id:
656
- raise A2EAPIError("No task ID in response")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
657
 
658
- await self.store_in_cache("a2e_video_task_id", task_id)
 
 
659
 
660
  logger.info(f"βœ“ Video generation started. Task ID: {task_id}")
661
 
662
  # Poll for completion
663
  video_result = await self._poll_video_status(task_id)
664
- await self.store_in_cache("a2e_video", json.dumps(video_result))
665
  return video_result
666
 
667
  except Exception as e:
@@ -946,7 +915,6 @@ async def create_greenscreen_video_workflow(
946
  preferred_language: str = "en-US",
947
  speed_rate: float = 1.2,
948
  data_holder=None,
949
- api_clients=None,
950
  title="Green Screen Avatar Video"
951
  ) -> Path:
952
  """
@@ -967,7 +935,7 @@ async def create_greenscreen_video_workflow(
967
  logger.info("GREEN SCREEN TALKING VIDEO WORKFLOW")
968
  logger.info("=" * 60)
969
 
970
- generator = TalkingVideoGenerator(a2e_api_key=api_key, data_holder=data_holder, api_clients=api_clients)
971
 
972
  try:
973
  # Step 1: Get avatars with background support
 
67
  timeout: int = 100,
68
  max_retries: int = 3,
69
  data_holder: DataHolder = None,
 
70
  ):
71
  """
72
  Initialize the A2E video generator
 
91
  self.tmp_dir = Path("/tmp")
92
  self.tmp_dir.mkdir(exist_ok=True)
93
  self.data_holder = data_holder or DataHolder()
 
94
 
95
  logger.info("TalkingVideoGenerator initialized")
96
 
 
437
  os.remove(tmp_path)
438
  return duration
439
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
  def _get_voice_language_info(self, voice_id: str, voices: List[VoiceInfo]) -> Tuple[str, str]:
441
  """
442
  Get country and region from voice_id by looking up in voices list.
 
471
  if os.getenv("TEST_AUTOMATION", "").lower() == "true":
472
  return "testData/Green Screen Avatar Video3.mp3"
473
 
 
 
 
474
 
475
  # --- First attempt ---
476
  logger.info(f"Generating TTS audio (speed: {speed_rate}x)...")
 
519
  if duration > 15 or duration < 10:
520
  raise A2EAPIError(f"Audio is longer:({duration}) for the voice: {voice_id}")
521
 
 
522
  logger.info(f"βœ“ Final TTS audio generated ({duration:.2f}s): {audio_url}")
523
  return audio_url
524
 
 
560
  "video_url": "testData/Green Screen Avatar Video3.mp4"
561
  }
562
 
563
+ logger.info(f"Generating talking video with green screen background...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564
 
565
+ # Calculate dimensions based on aspect ratio
566
+ if aspect_ratio == "9:16":
567
+ if resolution == 1080:
568
+ width, height = 1080, 1920
569
+ else: # 720
570
+ width, height = 405, 720
571
+ elif aspect_ratio == "16:9":
572
+ if resolution == 1080:
573
+ width, height = 1920, 1080
574
+ else: # 720
575
+ width, height = 1280, 720
576
+ else:
577
+ # Default to square
578
+ width = height = resolution
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
579
 
580
+ # Convert RGB tuple to rgba() format string (alpha = 1 for fully opaque)
581
+ bg_color_str = f"rgba({background_color[0]},{background_color[1]},{background_color[2]},1)"
582
+ # {
583
+ # "title": "11-09-2025 16:53:47",
584
+ # "isSkipRs": true,
585
+ # "isAllowReverse": true,
586
+ # "resolution": 1080,
587
+ # "isSubtitleEnabled": false,
588
+ # "anchor_id": "67209ea6a9050edd960698c0",
589
+ # "anchor_type": 0,
590
+ # "msg": "Hello Ther.",
591
+ # "tts_id": "66ca504f2732c24634224075",
592
+ # "speech_rate": 1.2,
593
+ # "audioSrc": "https://1day-tos.a2e.com.cn/tts3party_cache/stable/el/Aria/speed/a15fe669931864094b03fe17b7b2067d.wav",
594
+ # "color": "rgba(0,255,0,1)",
595
+ # "web_bg_width": 202.5,
596
+ # "web_bg_height": 360,
597
+ # "web_people_max_width": 202.5,
598
+ # "web_people_max_height": 360,
599
+ # "web_people_width": 600,
600
+ # "web_people_height": 360,
601
+ # "web_people_x": -198.75,
602
+ # "web_people_y": 0
603
+ # }
604
+ payload = {
605
+ "title": title,
606
+ "anchor_id": avatar_id,
607
+ "anchor_type": anchor_type,
608
+ "audioSrc": audio_url,
609
+ # "resolution": resolution,
610
+ "color": bg_color_str, # e.g. "rgba(0,255,0,1)" for green screen
611
+ # "anchor_background_color": bg_color_str, # e.g. "rgba(0,255,0,1)" for green screen
612
+ # "web_bg_width": width,
613
+ # "web_bg_height": height,
614
+ # "web_people_width": width,
615
+ # "web_people_height": height,
616
+ # "web_people_x": 0,
617
+ # "web_people_y": 0,
618
+ "isSkipRs": True # Fast mode
619
+ }
620
+ data = await self._make_request(
621
+ "POST",
622
+ "/api/v1/video/generate",
623
+ json=payload
624
+ )
625
 
626
+ task_id = data.get("data", {}).get("_id")
627
+ if not task_id:
628
+ raise A2EAPIError("No task ID in response")
629
 
630
  logger.info(f"βœ“ Video generation started. Task ID: {task_id}")
631
 
632
  # Poll for completion
633
  video_result = await self._poll_video_status(task_id)
 
634
  return video_result
635
 
636
  except Exception as e:
 
915
  preferred_language: str = "en-US",
916
  speed_rate: float = 1.2,
917
  data_holder=None,
 
918
  title="Green Screen Avatar Video"
919
  ) -> Path:
920
  """
 
935
  logger.info("GREEN SCREEN TALKING VIDEO WORKFLOW")
936
  logger.info("=" * 60)
937
 
938
+ generator = TalkingVideoGenerator(a2e_api_key=api_key, data_holder=data_holder)
939
 
940
  try:
941
  # Step 1: Get avatars with background support
src/api_clients.py DELETED
@@ -1,195 +0,0 @@
1
- """
2
- API clients for external services with full implementations
3
- """
4
-
5
- import aiohttp
6
- import json
7
- import os
8
- from typing import Dict, List, Optional
9
- from google.cloud import storage, texttospeech
10
- import asyncio
11
- from utils import logger
12
- from moviepy.editor import AudioFileClip
13
-
14
- """
15
- API clients for external services with full implementations
16
- """
17
-
18
- import aiohttp
19
- import json
20
- import os
21
- from typing import Dict, List, Optional
22
- from google.cloud import storage, texttospeech
23
- import asyncio
24
- from utils import logger
25
- from data_holder import DataHolder
26
- from file_downloader import FileDownloader
27
-
28
-
29
- # --- NEW IMPORTS ---
30
- from google_src.gcs_utils import get_gcs_client, get_gcs_credentials, upload_file_to_gcs, list_gcs_files
31
-
32
- # --------------------
33
-
34
- import base64
35
- from pathlib import Path
36
-
37
- from google_src import ai_studio_sdk
38
- import uuid
39
-
40
- class APIClients:
41
- def __init__(self, config, data_holder: DataHolder = None):
42
- self.config = config
43
- self.data_holder = data_holder
44
-
45
- logger.info("Initializing GCS and TTS clients using google_src.gcs_utils...")
46
-
47
- self.gcs_client = get_gcs_client("final_data")
48
-
49
- gcs_bucket_name = config.get("gcs_bucket_name")
50
- print(gcs_bucket_name)
51
- self.gcs_bucket = self.gcs_client.bucket(gcs_bucket_name)
52
-
53
- async def get_from_cache(self, method_type, duration=0):
54
- try:
55
- with open(await self.get_cache_url(f"{method_type}_{duration}", ".txt"), 'r', encoding="utf-8") as file:
56
- logger.info("Using cache aoi_client.")
57
- return file.read()
58
- except: return None
59
-
60
- async def store_in_cache_file(self, method_type, content, duration=0):
61
- try:
62
- unique_id = uuid.uuid4().hex[:8]
63
- file_path = f"/tmp/google_tts_{unique_id}.txt"
64
- with open(file_path, "w", encoding="utf-8") as file:
65
- file.write(content)
66
- await self.store_in_cache(file_path, f"{method_type}_{duration}", ".txt")
67
- except: pass
68
-
69
-
70
-
71
-
72
-
73
-
74
- async def store_in_cache(self, file_path: str, method_type: str, file_ext: str = ".mp4") -> str:
75
- """Store file in Google Cloud Storage and return its public URL."""
76
- try:
77
- if os.getenv("TEST_AUTOMATION", "").lower() == "true":
78
- return None
79
-
80
- # βœ… Ensure you're using the right attribute name ("has_tts_script" or "hash_tts_script")
81
- filename = f"{self.data_holder.hash_tts_script}_{method_type}{file_ext}"
82
- blob_name = f"cache_reuse/{filename}"
83
- blob = self.my_temp_gcs_bucket.blob(blob_name)
84
-
85
- # βœ… File extension to MIME mapping
86
- content_types = {
87
- ".mp4": "video/mp4",
88
- ".mp3": "audio/mpeg",
89
- ".wav": "audio/wav",
90
- ".png": "image/png",
91
- ".jpg": "image/jpeg",
92
- ".jpeg": "image/jpeg",
93
- ".json": "application/json",
94
- ".txt": "text/plain",
95
- }
96
-
97
- blob.content_type = content_types.get(file_ext, "application/octet-stream")
98
-
99
- logger.info(f"☁️ Uploading {file_path} to gs://{self.my_temp_gcs_bucket.name}/{blob_name}")
100
- blob.upload_from_filename(file_path)
101
-
102
- public_url = f"https://storage.googleapis.com/{self.my_temp_gcs_bucket.name}/{blob_name}"
103
- logger.info(f"βœ… File uploaded successfully: {public_url}")
104
-
105
- return public_url
106
-
107
- except Exception as e:
108
- logger.error(f"❌ Error storing file in cache: {e}")
109
- return None
110
-
111
- async def get_cache_url(self, method_type: str, file_ext: str = ".mp4") -> str:
112
- """Return public URL if cached file exists in GCS, else None."""
113
- try:
114
- filename = f"{self.data_holder.hash_tts_script}_{method_type}{file_ext}"
115
- blob_name = f"cache_reuse/{filename}"
116
- logger.info(f"Cahce file name: {blob_name}")
117
- blob = self.my_temp_gcs_bucket.blob(blob_name)
118
-
119
- if blob.exists(): # βœ… Check before returning
120
- local_path = f'/tmp/{filename}'
121
- blob.download_to_filename(local_path)
122
- return local_path
123
- else:
124
- logger.warning(f"⚠️ Cache miss: {blob_name} not found in bucket.")
125
- return None
126
-
127
- except Exception as e:
128
- logger.error(f"❌ Error fetching cache URL: {e}")
129
- return None
130
-
131
-
132
-
133
- async def download_from_gcs(self, tts_script: str, local_dir: str = "/tmp") -> Optional[str]:
134
- """
135
- Find and download a file from GCS whose name contains part of the TTS script.
136
- Useful when filenames are generated dynamically based on the script text.
137
-
138
- Args:
139
- tts_script: Text content used to search for the file name in GCS
140
- local_dir: Local directory to save the downloaded file
141
-
142
- Returns:
143
- Local path to downloaded file, or None on failure.
144
- """
145
- try:
146
- if os.getenv("TEST_AUTOMATION", "").lower() == "true":
147
- return f"{os.getenv('TEST_DATA_DIRECTORY')}/{uuid.uuid4().hex}.mp4"
148
-
149
- # Prepare a safe pattern to search by
150
- safe_name_10 = "".join(c for c in tts_script[:10] if c.isalnum())
151
- safe_name_50 = "".join(c for c in tts_script[:50] if c.isalnum())
152
-
153
- # Await the async list function
154
- all_files = list_gcs_files()
155
-
156
- # Try to find a matching file name
157
- blob_name = next((f for f in all_files if self.data_holder.tts_script in f), None)
158
- if not blob_name:
159
- blob_name = next((f for f in all_files if safe_name_50 in f), None)
160
- if not blob_name:
161
- blob_name = next((f for f in all_files if safe_name_10 in f), None)
162
-
163
- if not blob_name:
164
- logger.error(f"❌ No matching file found in GCS for script: {tts_script[:50]}...")
165
- return None
166
-
167
- logger.info(f"☁️ Found matching file: gs://{self.gcs_bucket.name}/{blob_name}")
168
-
169
- # Construct GCS URL
170
- gs_url = f"gs://{self.gcs_bucket.name}/{blob_name}"
171
-
172
- # Use FileDownloader
173
- downloader = FileDownloader()
174
- # We can download mostly anywhere, but let's stick to the default behavior or local_dir if needed.
175
- # safe_download uses temp dir by default. The original code used local_dir="/tmp/tts_downloads".
176
- # FileDownloader uses its own temp dir logic.
177
- # Let's see if we can pass output path. safe_download takes output_path.
178
-
179
- local_path = os.path.join(local_dir, os.path.basename(blob_name))
180
-
181
- downloaded = downloader.safe_download(gs_url, output_path=local_path)
182
-
183
- if downloaded:
184
- file_size = os.path.getsize(downloaded)
185
- logger.info(f"βœ… Downloaded {blob_name} β†’ {downloaded} ({file_size/1024:.1f} KB)")
186
- return str(downloaded)
187
- else:
188
- logger.error(f"❌ FileDownloader failed for {gs_url}")
189
- return None
190
-
191
- except Exception as e:
192
- logger.error(f"❌ GCS download failed: {e}")
193
- return None
194
-
195
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/automation.py CHANGED
@@ -10,7 +10,6 @@ from google_src import ai_studio_sdk
10
  from video_generation_process import generate_video_process
11
  from typing import Dict, List, Optional, Any
12
  from pathlib import Path
13
- from api_clients import APIClients
14
  from video_renderer import VideoRenderer
15
  from utils import logger
16
  import utils
@@ -36,11 +35,9 @@ from asset_manager import get_asset_downloader, get_audio_lib, AssetProcessor
36
  from file_downloader import FileDownloader
37
 
38
  class ContentAutomation:
39
- def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None, api_clients: 'APIClients' = None):
40
  self.config = config
41
  self.data_holder = data_holder or DataHolder()
42
- # Reuse provided api_clients or create new one
43
- self.api_clients = api_clients or APIClients(config, self.data_holder)
44
 
45
  # Initialize Google Services directly
46
  self.tts = GoogleTTS()
@@ -84,7 +81,6 @@ class ContentAutomation:
84
  preferred_language="en-US",
85
  speed_rate=1.2,
86
  data_holder=self.data_holder,
87
- api_clients=self.api_clients,
88
  title=self.data_holder.hash_tts_script[:30]
89
  )
90
  self.data_holder.visual_assets["tts_audio"] = {
 
10
  from video_generation_process import generate_video_process
11
  from typing import Dict, List, Optional, Any
12
  from pathlib import Path
 
13
  from video_renderer import VideoRenderer
14
  from utils import logger
15
  import utils
 
35
  from file_downloader import FileDownloader
36
 
37
  class ContentAutomation:
38
+ def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None):
39
  self.config = config
40
  self.data_holder = data_holder or DataHolder()
 
 
41
 
42
  # Initialize Google Services directly
43
  self.tts = GoogleTTS()
 
81
  preferred_language="en-US",
82
  speed_rate=1.2,
83
  data_holder=self.data_holder,
 
84
  title=self.data_holder.hash_tts_script[:30]
85
  )
86
  self.data_holder.visual_assets["tts_audio"] = {
src/google_src/gcs_utils.py CHANGED
@@ -228,3 +228,65 @@ def create_bucket_if_not_exists(client, bucket_name: str, location: str = "us-ce
228
  except Exception as e:
229
  logger.error(f"❌ Failed to create bucket {bucket_name}: {e}")
230
  return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  except Exception as e:
229
  logger.error(f"❌ Failed to create bucket {bucket_name}: {e}")
230
  return False
231
+
232
+ def find_and_download_gcs_file(tts_script: str, local_dir: str = "/tmp", account_name: str = "final_data") -> Optional[str]:
233
+ """
234
+ Find and download a file from GCS whose name contains part of the TTS script.
235
+ Useful when filenames are generated dynamically based on the script text.
236
+ Replaces APIClients.download_from_gcs.
237
+
238
+ Args:
239
+ tts_script: Text content used to search for the file name in GCS
240
+ local_dir: Local directory to save the downloaded file
241
+ account_name: GCS account to use
242
+
243
+ Returns:
244
+ Local path to downloaded file, or None on failure.
245
+ """
246
+ try:
247
+ if os.getenv("TEST_AUTOMATION", "").lower() == "true":
248
+ return f"{os.getenv('TEST_DATA_DIRECTORY')}/{uuid.uuid4().hex}.mp4"
249
+
250
+ # Prepare a safe pattern to search by
251
+ safe_name_50 = "".join(c for c in tts_script[:50] if c.isalnum())
252
+ safe_name_10 = "".join(c for c in tts_script[:10] if c.isalnum())
253
+
254
+ all_files = list_gcs_files(prefix="", account_name=account_name)
255
+
256
+ # Try to find a matching file name
257
+ blob_name = next((f for f in all_files if tts_script in f), None)
258
+ if not blob_name:
259
+ blob_name = next((f for f in all_files if safe_name_50 in f), None)
260
+ if not blob_name:
261
+ blob_name = next((f for f in all_files if safe_name_10 in f), None)
262
+
263
+ if not blob_name:
264
+ logger.error(f"❌ No matching file found in GCS for script: {tts_script[:50]}...")
265
+ return None
266
+
267
+ client = get_gcs_client(account_name)
268
+ bucket_name = os.getenv("GCS_BUCKET_NAME")
269
+
270
+ logger.info(f"☁️ Found matching file: gs://{bucket_name}/{blob_name}")
271
+
272
+ # Construct GCS URL
273
+ gs_url = f"gs://{bucket_name}/{blob_name}"
274
+
275
+ from file_downloader import FileDownloader
276
+ downloader = FileDownloader()
277
+
278
+ local_path = os.path.join(local_dir, os.path.basename(blob_name))
279
+
280
+ downloaded = downloader.safe_download(gs_url, output_path=local_path, account_id=account_name)
281
+
282
+ if downloaded:
283
+ file_size = os.path.getsize(downloaded)
284
+ logger.info(f"βœ… Downloaded {blob_name} β†’ {downloaded} ({file_size/1024:.1f} KB)")
285
+ return str(downloaded)
286
+ else:
287
+ logger.error(f"❌ FileDownloader failed for {gs_url}")
288
+ return None
289
+
290
+ except Exception as e:
291
+ logger.error(f"❌ GCS download failed: {e}")
292
+ return None
src/process_csv.py CHANGED
@@ -8,7 +8,6 @@ from main import (
8
  run_pipeline,
9
  )
10
  from automation import ContentAutomation
11
- from api_clients import APIClients
12
  from utils import logger
13
  from data_holder import DataHolder
14
  from google_src.google_sheet import GoogleSheetReader
@@ -20,7 +19,6 @@ import setup_config
20
  from asset_manager import get_video_lib, get_audio_lib, get_asset_downloader, get_content_strategy_lib
21
 
22
  DATA_DIR = Path("data")
23
- SHARED_API_CLIENTS = None # Shared instance to avoid redundant GCS/TTS client initialization
24
 
25
 
26
  def load_executed_from_gsheet(setup_type=None, job_index=None):
@@ -96,7 +94,6 @@ def log_progress_to_gsheet(tts_script: str, result: dict, job_index: int, commit
96
 
97
  async def process_row(row, config: dict):
98
  """Process one CSV row using the main pipeline."""
99
- global SHARED_API_CLIENTS
100
  tts_script = row.get("TTS Script (AI Avatar)", "")
101
  if os.getenv("ON_SCREEN_TEXT", "false").lower() == "true":
102
  tts_script = row.get("On-Screen Text", "").strip()
@@ -108,14 +105,9 @@ async def process_row(row, config: dict):
108
  asset_downloader = get_asset_downloader()
109
  dataHolder.visual_assets["all_videos"] = asset_downloader.downloaded_videos
110
 
111
- # Update shared APIClients with current dataHolder
112
- if SHARED_API_CLIENTS:
113
- SHARED_API_CLIENTS.data_holder = dataHolder
114
-
115
  # ContentAutomation uses asset_manager singletons directly
116
  automation = ContentAutomation(
117
- config, dataHolder,
118
- api_clients=SHARED_API_CLIENTS
119
  )
120
 
121
  content_strategy = {
@@ -138,7 +130,6 @@ async def process_row(row, config: dict):
138
 
139
  async def download_all_video(config: dict):
140
  """Download all library videos once using singletons."""
141
- global SHARED_API_CLIENTS
142
 
143
  # Get the asset downloader singleton
144
  asset_downloader = get_asset_downloader()
@@ -147,12 +138,6 @@ async def download_all_video(config: dict):
147
  logger.info("πŸ“₯ Pre-downloading all library videos...")
148
  videos = await asset_downloader.download_all_videos()
149
 
150
- # Create the shared APIClients once
151
- if SHARED_API_CLIENTS is None:
152
- dataHolder = DataHolder()
153
- dataHolder.visual_assets["all_videos"] = videos
154
- SHARED_API_CLIENTS = APIClients(config, dataHolder)
155
-
156
  logger.info(f"βœ“ Downloaded {len(videos)} library videos")
157
  return videos
158
 
 
8
  run_pipeline,
9
  )
10
  from automation import ContentAutomation
 
11
  from utils import logger
12
  from data_holder import DataHolder
13
  from google_src.google_sheet import GoogleSheetReader
 
19
  from asset_manager import get_video_lib, get_audio_lib, get_asset_downloader, get_content_strategy_lib
20
 
21
  DATA_DIR = Path("data")
 
22
 
23
 
24
  def load_executed_from_gsheet(setup_type=None, job_index=None):
 
94
 
95
  async def process_row(row, config: dict):
96
  """Process one CSV row using the main pipeline."""
 
97
  tts_script = row.get("TTS Script (AI Avatar)", "")
98
  if os.getenv("ON_SCREEN_TEXT", "false").lower() == "true":
99
  tts_script = row.get("On-Screen Text", "").strip()
 
105
  asset_downloader = get_asset_downloader()
106
  dataHolder.visual_assets["all_videos"] = asset_downloader.downloaded_videos
107
 
 
 
 
 
108
  # ContentAutomation uses asset_manager singletons directly
109
  automation = ContentAutomation(
110
+ config, dataHolder
 
111
  )
112
 
113
  content_strategy = {
 
130
 
131
  async def download_all_video(config: dict):
132
  """Download all library videos once using singletons."""
 
133
 
134
  # Get the asset downloader singleton
135
  asset_downloader = get_asset_downloader()
 
138
  logger.info("πŸ“₯ Pre-downloading all library videos...")
139
  videos = await asset_downloader.download_all_videos()
140
 
 
 
 
 
 
 
141
  logger.info(f"βœ“ Downloaded {len(videos)} library videos")
142
  return videos
143
 
src/social_media_publishers/instagram_publisher.py CHANGED
@@ -21,8 +21,8 @@ from load_config import load_configuration
21
  from main import (
22
  load_content_strategies
23
  )
24
- from api_clients import APIClients
25
  import hashlib
 
26
 
27
  DATA_DIR = Path("data")
28
 
@@ -151,7 +151,6 @@ async def main():
151
  try:
152
  # Load config and initialize API client
153
  config = load_configuration()
154
- api_client = APIClients(config)
155
  scheduler = InstagramPublisher()
156
 
157
  csv_files = sorted(DATA_DIR.glob("content_strategies*.csv"))
@@ -175,17 +174,21 @@ async def main():
175
  print("="*50)
176
 
177
  # Download from GCS via API client
178
- api_client.data_holder.hash_tts_script = hashlib.sha256(tts_script.encode('utf-8')).hexdigest()
179
- local_path = await api_client.download_from_gcs(tts_script)
180
 
181
  if not local_path or not os.path.exists(local_path):
182
  print(f"❌ Skipping β€” file not found: {tts_script}")
183
  continue
184
 
185
- # Upload to Instagram
186
- # NOTE: Requires the file to be accessible by Instagram (public URL or CDN link)
187
- # You can upload your video file to a temporary public bucket or use a CDN-backed URL
188
- public_url = api_client.get_public_url(local_path) # <-- implement this in APIClients
 
 
 
 
189
  result = scheduler.upload_reel(public_url, caption)
190
 
191
  # Print result
 
21
  from main import (
22
  load_content_strategies
23
  )
 
24
  import hashlib
25
+ from google_src.gcs_utils import find_and_download_gcs_file, upload_file_to_gcs
26
 
27
  DATA_DIR = Path("data")
28
 
 
151
  try:
152
  # Load config and initialize API client
153
  config = load_configuration()
 
154
  scheduler = InstagramPublisher()
155
 
156
  csv_files = sorted(DATA_DIR.glob("content_strategies*.csv"))
 
174
  print("="*50)
175
 
176
  # Download from GCS via API client
177
+ # Download from GCS via GCS Utils
178
+ local_path = find_and_download_gcs_file(tts_script)
179
 
180
  if not local_path or not os.path.exists(local_path):
181
  print(f"❌ Skipping β€” file not found: {tts_script}")
182
  continue
183
 
184
+ # Upload to temporary GCS for public URL
185
+ uploaded_file = upload_file_to_gcs(local_path, account_name="final_data", generate_signed_url=False, fallback_to_drive=False)
186
+ public_url = uploaded_file.get("public_url")
187
+ if not public_url:
188
+ print(f"❌ Failed to generate public URL for: {local_path}")
189
+ continue
190
+
191
+ print(f"πŸ”— Public URL for Instagram: {public_url}")
192
  result = scheduler.upload_reel(public_url, caption)
193
 
194
  # Print result
src/social_media_publishers/publisher.py CHANGED
@@ -18,7 +18,7 @@ import hashlib
18
 
19
  from load_config import load_configuration
20
  from main import load_content_strategies
21
- from api_clients import APIClients
22
 
23
  # Import individual platform publishers
24
  from social_media_publishers.youtube_publisher import YouTubePublisher
@@ -111,7 +111,6 @@ async def run_publisher(media: str, commit=False):
111
  """Unified publishing pipeline with CSV read + commit tracking."""
112
  print(f"πŸš€ Starting {media.capitalize()} publishing pipeline...")
113
  config = load_configuration()
114
- api_client = APIClients(config)
115
 
116
  # Pick platform publisher
117
  if media == "youtube":
@@ -155,8 +154,7 @@ async def run_publisher(media: str, commit=False):
155
  print("=" * 50)
156
 
157
  try:
158
- api_client.data_holder.hash_tts_script = hashlib.sha256(tts_script.encode('utf-8')).hexdigest()
159
- local_path = await api_client.download_from_gcs(tts_script)
160
  if not local_path or not os.path.exists(local_path):
161
  print(f"❌ Skipping: File not found for {tts_script}")
162
  continue
@@ -173,7 +171,13 @@ async def run_publisher(media: str, commit=False):
173
  )
174
  elif media == "instagram":
175
  # Instagram needs a public URL, not a local file path
176
- public_url = api_client.get_public_url(local_path)
 
 
 
 
 
 
177
  result = scheduler.upload_reel(public_url, caption=description)
178
  elif media == "tiktok":
179
  result = scheduler.upload_video(
 
18
 
19
  from load_config import load_configuration
20
  from main import load_content_strategies
21
+ from google_src.gcs_utils import find_and_download_gcs_file, upload_file_to_gcs
22
 
23
  # Import individual platform publishers
24
  from social_media_publishers.youtube_publisher import YouTubePublisher
 
111
  """Unified publishing pipeline with CSV read + commit tracking."""
112
  print(f"πŸš€ Starting {media.capitalize()} publishing pipeline...")
113
  config = load_configuration()
 
114
 
115
  # Pick platform publisher
116
  if media == "youtube":
 
154
  print("=" * 50)
155
 
156
  try:
157
+ local_path = find_and_download_gcs_file(tts_script)
 
158
  if not local_path or not os.path.exists(local_path):
159
  print(f"❌ Skipping: File not found for {tts_script}")
160
  continue
 
171
  )
172
  elif media == "instagram":
173
  # Instagram needs a public URL, not a local file path
174
+ uploaded_file = upload_file_to_gcs(local_path, account_name="final_data", generate_signed_url=False, fallback_to_drive=False)
175
+ public_url = uploaded_file.get("public_url")
176
+
177
+ if not public_url:
178
+ print("❌ Failed to generate public URL for Instagram")
179
+ continue
180
+
181
  result = scheduler.upload_reel(public_url, caption=description)
182
  elif media == "tiktok":
183
  result = scheduler.upload_video(
src/social_media_publishers/tiktok_publisher.py CHANGED
@@ -22,8 +22,8 @@ from load_config import load_configuration
22
  from main import (
23
  load_content_strategies
24
  )
25
- from api_clients import APIClients
26
  import hashlib
 
27
 
28
  DATA_DIR = Path("data")
29
 
@@ -183,7 +183,7 @@ class TikTokPublisher:
183
  async def main():
184
  try:
185
  config = load_configuration()
186
- api_client = APIClients(config)
187
  scheduler = TikTokPublisher()
188
 
189
  csv_files = sorted(DATA_DIR.glob("content_strategies*.csv"))
@@ -206,8 +206,7 @@ async def main():
206
  print("="*50)
207
 
208
  # Download from GCS
209
- api_client.data_holder.hash_tts_script = hashlib.sha256(tts_script.encode('utf-8')).hexdigest()
210
- local_path = await api_client.download_from_gcs(tts_script)
211
  if not local_path or not os.path.exists(local_path):
212
  print(f"❌ File not found, skipping: {tts_script}")
213
  continue
 
22
  from main import (
23
  load_content_strategies
24
  )
 
25
  import hashlib
26
+ from google_src.gcs_utils import find_and_download_gcs_file
27
 
28
  DATA_DIR = Path("data")
29
 
 
183
  async def main():
184
  try:
185
  config = load_configuration()
186
+ config = load_configuration()
187
  scheduler = TikTokPublisher()
188
 
189
  csv_files = sorted(DATA_DIR.glob("content_strategies*.csv"))
 
206
  print("="*50)
207
 
208
  # Download from GCS
209
+ local_path = find_and_download_gcs_file(tts_script)
 
210
  if not local_path or not os.path.exists(local_path):
211
  print(f"❌ File not found, skipping: {tts_script}")
212
  continue
src/social_media_publishers/youtube_publisher.py CHANGED
@@ -25,9 +25,9 @@ from main import (
25
  load_content_strategies
26
  )
27
  from pathlib import Path
28
- from api_clients import APIClients
29
  from dotenv import load_dotenv
30
  import hashlib
 
31
 
32
  DATA_DIR = Path("data")
33
 
@@ -53,8 +53,8 @@ class YouTubePublisher:
53
  else:
54
  self.youtube = self._authenticate()
55
 
 
56
  config = load_configuration()
57
- self.api_client = APIClients(config)
58
 
59
  def _authenticate(self):
60
  """Authenticate with YouTube API using environment variable (JSON string, base64, or path)."""
@@ -316,9 +316,8 @@ async def main():
316
 
317
  for idx, (csv_name, row) in enumerate(all_rows):
318
  tts_script = row.get("TTS Script (AI Avatar)", "").strip()
319
- scheduler.api_client.data_holder.hash_tts_script = hashlib.sha256(tts_script.encode('utf-8')).hexdigest()
320
  description = row.get("Captions", "").strip()
321
- local_path = await scheduler.api_client.download_from_gcs(tts_script)
322
  if local_path:
323
  # Parse scheduled time if provided
324
  scheduled_dt = None
 
25
  load_content_strategies
26
  )
27
  from pathlib import Path
 
28
  from dotenv import load_dotenv
29
  import hashlib
30
+ from google_src.gcs_utils import find_and_download_gcs_file
31
 
32
  DATA_DIR = Path("data")
33
 
 
53
  else:
54
  self.youtube = self._authenticate()
55
 
56
+
57
  config = load_configuration()
 
58
 
59
  def _authenticate(self):
60
  """Authenticate with YouTube API using environment variable (JSON string, base64, or path)."""
 
316
 
317
  for idx, (csv_name, row) in enumerate(all_rows):
318
  tts_script = row.get("TTS Script (AI Avatar)", "").strip()
 
319
  description = row.get("Captions", "").strip()
320
+ local_path = find_and_download_gcs_file(tts_script)
321
  if local_path:
322
  # Parse scheduled time if provided
323
  scheduled_dt = None