fix: eliminate redundant initialization of AssetSelector, APIClients, and logging
Browse files- Remove duplicate logging.basicConfig() in setup_gcs_permissions.py
- Add SHARED_ASSET_SELECTOR and SHARED_API_CLIENTS globals in process_csv.py
- Modify ContentAutomation to accept optional asset_selector and api_clients params
- Update data_holder reference before each row to maintain state consistency
- Remove duplicate ON_SCREEN_TEXT check in process_row()
This reduces video/audio library loads from 3x to 1x per job and GCS/TTS client
initialization from 2x to 1x per job.
- src/automation.py +5 -3
- src/google_src/setup_gcs_permissions.py +5 -3
- src/process_csv.py +33 -9
src/automation.py
CHANGED
|
@@ -28,12 +28,14 @@ from file_downloader import FileDownloader
|
|
| 28 |
from data_holder import DataHolder
|
| 29 |
|
| 30 |
class ContentAutomation:
|
| 31 |
-
def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None):
|
| 32 |
self.config = config
|
| 33 |
self.data_holder = data_holder or DataHolder()
|
| 34 |
-
|
|
|
|
| 35 |
self.video_renderer = VideoRenderer(config, self.data_holder)
|
| 36 |
-
|
|
|
|
| 37 |
self.file_downloader = FileDownloader()
|
| 38 |
self.pipeline_start_time = None
|
| 39 |
|
|
|
|
| 28 |
from data_holder import DataHolder
|
| 29 |
|
| 30 |
class ContentAutomation:
|
| 31 |
+
def __init__(self, config: Dict[str, Any], data_holder: DataHolder = None, asset_selector: 'AssetSelector' = None, api_clients: 'APIClients' = None):
|
| 32 |
self.config = config
|
| 33 |
self.data_holder = data_holder or DataHolder()
|
| 34 |
+
# Reuse provided api_clients or create new one
|
| 35 |
+
self.api_clients = api_clients or APIClients(config, self.data_holder)
|
| 36 |
self.video_renderer = VideoRenderer(config, self.data_holder)
|
| 37 |
+
# Reuse provided asset_selector or create new one
|
| 38 |
+
self.asset_selector = asset_selector or AssetSelector(config, self.data_holder)
|
| 39 |
self.file_downloader = FileDownloader()
|
| 40 |
self.pipeline_start_time = None
|
| 41 |
|
src/google_src/setup_gcs_permissions.py
CHANGED
|
@@ -9,9 +9,11 @@ from google.cloud import storage
|
|
| 9 |
from google.iam.v1 import policy_pb2
|
| 10 |
from google_src.gcloud_wrapper import get_default_wrapper
|
| 11 |
|
| 12 |
-
#
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
| 15 |
|
| 16 |
def setup_bucket_permissions(bucket_name: str, members: list, role: str = "roles/storage.objectViewer", storage_client=None):
|
| 17 |
"""
|
|
|
|
| 9 |
from google.iam.v1 import policy_pb2
|
| 10 |
from google_src.gcloud_wrapper import get_default_wrapper
|
| 11 |
|
| 12 |
+
# Use the project's configured logger to avoid duplicate log output
|
| 13 |
+
try:
|
| 14 |
+
from utils import logger
|
| 15 |
+
except ImportError:
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
|
| 18 |
def setup_bucket_permissions(bucket_name: str, members: list, role: str = "roles/storage.objectViewer", storage_client=None):
|
| 19 |
"""
|
src/process_csv.py
CHANGED
|
@@ -9,6 +9,7 @@ from main import (
|
|
| 9 |
run_pipeline,
|
| 10 |
)
|
| 11 |
from automation import ContentAutomation
|
|
|
|
| 12 |
from utils import logger
|
| 13 |
from data_holder import DataHolder
|
| 14 |
from asset_selector import AssetSelector
|
|
@@ -20,6 +21,8 @@ from google_src.gcs_utils import list_gcs_files
|
|
| 20 |
|
| 21 |
DATA_DIR = Path("data")
|
| 22 |
ALL_VIDEO_FILE_INFO = None
|
|
|
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
def load_executed_from_gsheet(setup_type=None, job_index=None):
|
|
@@ -97,19 +100,28 @@ def log_progress_to_gsheet(tts_script: str, result: dict, job_index: int, commit
|
|
| 97 |
|
| 98 |
async def process_row(row, config: dict):
|
| 99 |
"""Process one CSV row using the main pipeline."""
|
| 100 |
-
global ALL_VIDEO_FILE_INFO
|
| 101 |
tts_script = row.get("TTS Script (AI Avatar)", "")
|
| 102 |
if os.getenv("ON_SCREEN_TEXT", "false").lower() == "true":
|
| 103 |
tts_script = row.get("On-Screen Text", "").strip()
|
| 104 |
-
if os.getenv("ON_SCREEN_TEXT", "false").lower() == "true":
|
| 105 |
-
tts_script = row.get("On-Screen Text", "").strip()
|
| 106 |
|
| 107 |
logger.info(f"▶️ Executing: {tts_script}...")
|
| 108 |
|
| 109 |
dataHolder = DataHolder()
|
| 110 |
dataHolder.visual_assets["all_videos"] = ALL_VIDEO_FILE_INFO
|
| 111 |
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
content_strategy = {
|
| 115 |
"gemini_prompt": row.get("Gemini Imagen4 Ultra Prompt (specific)", ""),
|
|
@@ -130,23 +142,35 @@ async def process_row(row, config: dict):
|
|
| 130 |
|
| 131 |
|
| 132 |
async def download_all_video(config: dict):
|
| 133 |
-
"""Download all library videos once and cache them."""
|
| 134 |
-
global ALL_VIDEO_FILE_INFO
|
| 135 |
|
| 136 |
if ALL_VIDEO_FILE_INFO is None:
|
| 137 |
logger.info("📥 Pre-downloading all library videos...")
|
| 138 |
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
| 140 |
video_urls = [
|
| 141 |
row.get("Video URL (No Audio)", "").strip()
|
| 142 |
-
for _, row in
|
| 143 |
if row.get("Video URL (No Audio)", "").strip()
|
| 144 |
]
|
| 145 |
|
| 146 |
dataHolder = DataHolder()
|
| 147 |
dataHolder.visual_assets["all_videos"] = [{"url": url} for url in video_urls]
|
| 148 |
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
await automation._download_all_visual_assets()
|
| 151 |
|
| 152 |
ALL_VIDEO_FILE_INFO = dataHolder.visual_assets.get("all_videos", [])
|
|
|
|
| 9 |
run_pipeline,
|
| 10 |
)
|
| 11 |
from automation import ContentAutomation
|
| 12 |
+
from api_clients import APIClients
|
| 13 |
from utils import logger
|
| 14 |
from data_holder import DataHolder
|
| 15 |
from asset_selector import AssetSelector
|
|
|
|
| 21 |
|
| 22 |
DATA_DIR = Path("data")
|
| 23 |
ALL_VIDEO_FILE_INFO = None
|
| 24 |
+
SHARED_ASSET_SELECTOR = None # Shared instance to avoid redundant sheet loads
|
| 25 |
+
SHARED_API_CLIENTS = None # Shared instance to avoid redundant GCS/TTS client initialization
|
| 26 |
|
| 27 |
|
| 28 |
def load_executed_from_gsheet(setup_type=None, job_index=None):
|
|
|
|
| 100 |
|
| 101 |
async def process_row(row, config: dict):
|
| 102 |
"""Process one CSV row using the main pipeline."""
|
| 103 |
+
global ALL_VIDEO_FILE_INFO, SHARED_ASSET_SELECTOR, SHARED_API_CLIENTS
|
| 104 |
tts_script = row.get("TTS Script (AI Avatar)", "")
|
| 105 |
if os.getenv("ON_SCREEN_TEXT", "false").lower() == "true":
|
| 106 |
tts_script = row.get("On-Screen Text", "").strip()
|
|
|
|
|
|
|
| 107 |
|
| 108 |
logger.info(f"▶️ Executing: {tts_script}...")
|
| 109 |
|
| 110 |
dataHolder = DataHolder()
|
| 111 |
dataHolder.visual_assets["all_videos"] = ALL_VIDEO_FILE_INFO
|
| 112 |
|
| 113 |
+
# Update shared instances with current dataHolder before use
|
| 114 |
+
if SHARED_ASSET_SELECTOR:
|
| 115 |
+
SHARED_ASSET_SELECTOR.data_holder = dataHolder
|
| 116 |
+
if SHARED_API_CLIENTS:
|
| 117 |
+
SHARED_API_CLIENTS.data_holder = dataHolder
|
| 118 |
+
|
| 119 |
+
# Reuse shared AssetSelector and APIClients to avoid redundant initialization
|
| 120 |
+
automation = ContentAutomation(
|
| 121 |
+
config, dataHolder,
|
| 122 |
+
asset_selector=SHARED_ASSET_SELECTOR,
|
| 123 |
+
api_clients=SHARED_API_CLIENTS
|
| 124 |
+
)
|
| 125 |
|
| 126 |
content_strategy = {
|
| 127 |
"gemini_prompt": row.get("Gemini Imagen4 Ultra Prompt (specific)", ""),
|
|
|
|
| 142 |
|
| 143 |
|
| 144 |
async def download_all_video(config: dict):
|
| 145 |
+
"""Download all library videos once and cache them. Creates shared instances."""
|
| 146 |
+
global ALL_VIDEO_FILE_INFO, SHARED_ASSET_SELECTOR, SHARED_API_CLIENTS
|
| 147 |
|
| 148 |
if ALL_VIDEO_FILE_INFO is None:
|
| 149 |
logger.info("📥 Pre-downloading all library videos...")
|
| 150 |
|
| 151 |
+
# Create the shared AssetSelector once - this loads video/audio libraries from sheets
|
| 152 |
+
if SHARED_ASSET_SELECTOR is None:
|
| 153 |
+
SHARED_ASSET_SELECTOR = AssetSelector(config)
|
| 154 |
+
|
| 155 |
video_urls = [
|
| 156 |
row.get("Video URL (No Audio)", "").strip()
|
| 157 |
+
for _, row in SHARED_ASSET_SELECTOR.video_library.iterrows()
|
| 158 |
if row.get("Video URL (No Audio)", "").strip()
|
| 159 |
]
|
| 160 |
|
| 161 |
dataHolder = DataHolder()
|
| 162 |
dataHolder.visual_assets["all_videos"] = [{"url": url} for url in video_urls]
|
| 163 |
|
| 164 |
+
# Create the shared APIClients once - this initializes GCS/TTS clients
|
| 165 |
+
if SHARED_API_CLIENTS is None:
|
| 166 |
+
SHARED_API_CLIENTS = APIClients(config, dataHolder)
|
| 167 |
+
|
| 168 |
+
# Pass the shared instances to avoid creating new ones
|
| 169 |
+
automation = ContentAutomation(
|
| 170 |
+
config, dataHolder,
|
| 171 |
+
asset_selector=SHARED_ASSET_SELECTOR,
|
| 172 |
+
api_clients=SHARED_API_CLIENTS
|
| 173 |
+
)
|
| 174 |
await automation._download_all_visual_assets()
|
| 175 |
|
| 176 |
ALL_VIDEO_FILE_INFO = dataHolder.visual_assets.get("all_videos", [])
|