jebin2 commited on
Commit
2008dd3
·
1 Parent(s): e33c526

fix: eliminate redundant initialization of AssetSelector, APIClients, and logging

Browse files

- Remove duplicate logging.basicConfig() in setup_gcs_permissions.py
- Add SHARED_ASSET_SELECTOR and SHARED_API_CLIENTS globals in process_csv.py
- Modify ContentAutomation to accept optional asset_selector and api_clients params
- Update data_holder reference before each row to maintain state consistency
- Remove duplicate ON_SCREEN_TEXT check in process_row()

This reduces video/audio library loads from 3x to 1x per job and GCS/TTS client
initialization from 2x to 1x per job.

src/automation.py CHANGED
@@ -28,12 +28,14 @@ from file_downloader import FileDownloader
28
  from data_holder import DataHolder
29
 
30
  class ContentAutomation:
31
- def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None):
32
  self.config = config
33
  self.data_holder = data_holder or DataHolder()
34
- self.api_clients = APIClients(config, self.data_holder)
 
35
  self.video_renderer = VideoRenderer(config, self.data_holder)
36
- self.asset_selector = AssetSelector(config, self.data_holder)
 
37
  self.file_downloader = FileDownloader()
38
  self.pipeline_start_time = None
39
 
 
28
  from data_holder import DataHolder
29
 
30
  class ContentAutomation:
31
+ def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None, asset_selector: 'AssetSelector' = None, api_clients: 'APIClients' = None):
32
  self.config = config
33
  self.data_holder = data_holder or DataHolder()
34
+ # Reuse provided api_clients or create new one
35
+ self.api_clients = api_clients or APIClients(config, self.data_holder)
36
  self.video_renderer = VideoRenderer(config, self.data_holder)
37
+ # Reuse provided asset_selector or create new one
38
+ self.asset_selector = asset_selector or AssetSelector(config, self.data_holder)
39
  self.file_downloader = FileDownloader()
40
  self.pipeline_start_time = None
41
 
src/google_src/setup_gcs_permissions.py CHANGED
@@ -9,9 +9,11 @@ from google.cloud import storage
9
  from google.iam.v1 import policy_pb2
10
  from google_src.gcloud_wrapper import get_default_wrapper
11
 
12
- # Configure logging
13
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
14
- logger = logging.getLogger(__name__)
 
 
15
 
16
  def setup_bucket_permissions(bucket_name: str, members: list, role: str = "roles/storage.objectViewer", storage_client=None):
17
  """
 
9
  from google.iam.v1 import policy_pb2
10
  from google_src.gcloud_wrapper import get_default_wrapper
11
 
12
+ # Use the project's configured logger to avoid duplicate log output
13
+ try:
14
+ from utils import logger
15
+ except ImportError:
16
+ logger = logging.getLogger(__name__)
17
 
18
  def setup_bucket_permissions(bucket_name: str, members: list, role: str = "roles/storage.objectViewer", storage_client=None):
19
  """
src/process_csv.py CHANGED
@@ -9,6 +9,7 @@ from main import (
9
  run_pipeline,
10
  )
11
  from automation import ContentAutomation
 
12
  from utils import logger
13
  from data_holder import DataHolder
14
  from asset_selector import AssetSelector
@@ -20,6 +21,8 @@ from google_src.gcs_utils import list_gcs_files
20
 
21
  DATA_DIR = Path("data")
22
  ALL_VIDEO_FILE_INFO = None
 
 
23
 
24
 
25
  def load_executed_from_gsheet(setup_type=None, job_index=None):
@@ -97,19 +100,28 @@ def log_progress_to_gsheet(tts_script: str, result: dict, job_index: int, commit
97
 
98
  async def process_row(row, config: dict):
99
  """Process one CSV row using the main pipeline."""
100
- global ALL_VIDEO_FILE_INFO
101
  tts_script = row.get("TTS Script (AI Avatar)", "")
102
  if os.getenv("ON_SCREEN_TEXT", "false").lower() == "true":
103
  tts_script = row.get("On-Screen Text", "").strip()
104
- if os.getenv("ON_SCREEN_TEXT", "false").lower() == "true":
105
- tts_script = row.get("On-Screen Text", "").strip()
106
 
107
  logger.info(f"▶️ Executing: {tts_script}...")
108
 
109
  dataHolder = DataHolder()
110
  dataHolder.visual_assets["all_videos"] = ALL_VIDEO_FILE_INFO
111
 
112
- automation = ContentAutomation(config, dataHolder)
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  content_strategy = {
115
  "gemini_prompt": row.get("Gemini Imagen4 Ultra Prompt (specific)", ""),
@@ -130,23 +142,35 @@ async def process_row(row, config: dict):
130
 
131
 
132
  async def download_all_video(config: dict):
133
- """Download all library videos once and cache them."""
134
- global ALL_VIDEO_FILE_INFO
135
 
136
  if ALL_VIDEO_FILE_INFO is None:
137
  logger.info("📥 Pre-downloading all library videos...")
138
 
139
- asset_selector = AssetSelector(config)
 
 
 
140
  video_urls = [
141
  row.get("Video URL (No Audio)", "").strip()
142
- for _, row in asset_selector.video_library.iterrows()
143
  if row.get("Video URL (No Audio)", "").strip()
144
  ]
145
 
146
  dataHolder = DataHolder()
147
  dataHolder.visual_assets["all_videos"] = [{"url": url} for url in video_urls]
148
 
149
- automation = ContentAutomation(config, dataHolder)
 
 
 
 
 
 
 
 
 
150
  await automation._download_all_visual_assets()
151
 
152
  ALL_VIDEO_FILE_INFO = dataHolder.visual_assets.get("all_videos", [])
 
9
  run_pipeline,
10
  )
11
  from automation import ContentAutomation
12
+ from api_clients import APIClients
13
  from utils import logger
14
  from data_holder import DataHolder
15
  from asset_selector import AssetSelector
 
21
 
22
  DATA_DIR = Path("data")
23
  ALL_VIDEO_FILE_INFO = None
24
+ SHARED_ASSET_SELECTOR = None # Shared instance to avoid redundant sheet loads
25
+ SHARED_API_CLIENTS = None # Shared instance to avoid redundant GCS/TTS client initialization
26
 
27
 
28
  def load_executed_from_gsheet(setup_type=None, job_index=None):
 
100
 
101
  async def process_row(row, config: dict):
102
  """Process one CSV row using the main pipeline."""
103
+ global ALL_VIDEO_FILE_INFO, SHARED_ASSET_SELECTOR, SHARED_API_CLIENTS
104
  tts_script = row.get("TTS Script (AI Avatar)", "")
105
  if os.getenv("ON_SCREEN_TEXT", "false").lower() == "true":
106
  tts_script = row.get("On-Screen Text", "").strip()
 
 
107
 
108
  logger.info(f"▶️ Executing: {tts_script}...")
109
 
110
  dataHolder = DataHolder()
111
  dataHolder.visual_assets["all_videos"] = ALL_VIDEO_FILE_INFO
112
 
113
+ # Update shared instances with current dataHolder before use
114
+ if SHARED_ASSET_SELECTOR:
115
+ SHARED_ASSET_SELECTOR.data_holder = dataHolder
116
+ if SHARED_API_CLIENTS:
117
+ SHARED_API_CLIENTS.data_holder = dataHolder
118
+
119
+ # Reuse shared AssetSelector and APIClients to avoid redundant initialization
120
+ automation = ContentAutomation(
121
+ config, dataHolder,
122
+ asset_selector=SHARED_ASSET_SELECTOR,
123
+ api_clients=SHARED_API_CLIENTS
124
+ )
125
 
126
  content_strategy = {
127
  "gemini_prompt": row.get("Gemini Imagen4 Ultra Prompt (specific)", ""),
 
142
 
143
 
144
  async def download_all_video(config: dict):
145
+ """Download all library videos once and cache them. Creates shared instances."""
146
+ global ALL_VIDEO_FILE_INFO, SHARED_ASSET_SELECTOR, SHARED_API_CLIENTS
147
 
148
  if ALL_VIDEO_FILE_INFO is None:
149
  logger.info("📥 Pre-downloading all library videos...")
150
 
151
+ # Create the shared AssetSelector once - this loads video/audio libraries from sheets
152
+ if SHARED_ASSET_SELECTOR is None:
153
+ SHARED_ASSET_SELECTOR = AssetSelector(config)
154
+
155
  video_urls = [
156
  row.get("Video URL (No Audio)", "").strip()
157
+ for _, row in SHARED_ASSET_SELECTOR.video_library.iterrows()
158
  if row.get("Video URL (No Audio)", "").strip()
159
  ]
160
 
161
  dataHolder = DataHolder()
162
  dataHolder.visual_assets["all_videos"] = [{"url": url} for url in video_urls]
163
 
164
+ # Create the shared APIClients once - this initializes GCS/TTS clients
165
+ if SHARED_API_CLIENTS is None:
166
+ SHARED_API_CLIENTS = APIClients(config, dataHolder)
167
+
168
+ # Pass the shared instances to avoid creating new ones
169
+ automation = ContentAutomation(
170
+ config, dataHolder,
171
+ asset_selector=SHARED_ASSET_SELECTOR,
172
+ api_clients=SHARED_API_CLIENTS
173
+ )
174
  await automation._download_all_visual_assets()
175
 
176
  ALL_VIDEO_FILE_INFO = dataHolder.visual_assets.get("all_videos", [])