topcoderkz
commited on
Commit
·
0b94fac
1
Parent(s):
b620472
Refactor: add API logic, test with actual credentials
Browse files- .env.example +70 -5
- .gitignore +3 -0
- API_SETUP_GUIDE.md +316 -0
- QUICKSTART.md +313 -0
- README.md +351 -17
- example_script.txt +7 -0
- example_strategy.json +45 -0
- requirements.txt +17 -9
- setup.sh +0 -14
- src/api_clients.py +347 -43
- src/automation.py +369 -54
- src/main.py +306 -24
- src/utils.py +197 -23
.env.example
CHANGED
|
@@ -1,10 +1,75 @@
|
|
| 1 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
GEMINI_API_KEY=your_gemini_api_key_here
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
GCS_BUCKET_NAME=your_bucket_name_here
|
| 6 |
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
| 8 |
AUDIO_LIBRARY_SIZE=27
|
|
|
|
|
|
|
| 9 |
VIDEO_LIBRARY_SIZE=47
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================
|
| 2 |
+
# SOMIRA CONTENT AUTOMATION - CONFIGURATION
|
| 3 |
+
# ============================================
|
| 4 |
+
|
| 5 |
+
# -------------------- API KEYS --------------------
|
| 6 |
+
|
| 7 |
+
# Gemini API (Google AI) - For prompt enhancement and video selection
|
| 8 |
+
# Get yours at: https://aistudio.google.com/app/apikey
|
| 9 |
GEMINI_API_KEY=your_gemini_api_key_here
|
| 10 |
+
|
| 11 |
+
# RunwayML API - For AI video generation
|
| 12 |
+
# Get yours at: https://dev.runwayml.com/
|
| 13 |
+
RUNWAYML_API_KEY=key_your_runwayml_api_key_here
|
| 14 |
+
|
| 15 |
+
# Google Cloud - Service Account for TTS and Storage
|
| 16 |
+
# Path to your service account JSON key file
|
| 17 |
+
GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/service-account-key.json
|
| 18 |
+
|
| 19 |
+
# OR use Azure TTS (Alternative to Google TTS)
|
| 20 |
+
# AZURE_SPEECH_KEY=your_azure_speech_key_here
|
| 21 |
+
# AZURE_SPEECH_REGION=eastus
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# -------------------- CLOUD STORAGE --------------------
|
| 25 |
+
|
| 26 |
+
# Google Cloud Storage bucket name for video storage
|
| 27 |
+
# Create bucket at: https://console.cloud.google.com/storage
|
| 28 |
GCS_BUCKET_NAME=your_bucket_name_here
|
| 29 |
|
| 30 |
+
|
| 31 |
+
# -------------------- CONFIGURATION --------------------
|
| 32 |
+
|
| 33 |
+
# Audio library size (number of background music tracks available)
|
| 34 |
AUDIO_LIBRARY_SIZE=27
|
| 35 |
+
|
| 36 |
+
# Video library size (number of product video clips available)
|
| 37 |
VIDEO_LIBRARY_SIZE=47
|
| 38 |
+
|
| 39 |
+
# Default TTS voice (Google Cloud TTS voices)
|
| 40 |
+
# Options: en-US-AriaNeural, en-US-JennyNeural, en-US-GuyNeural, etc.
|
| 41 |
+
# Full list: https://cloud.google.com/text-to-speech/docs/voices
|
| 42 |
+
DEFAULT_VOICE=en-US-Neural2-F
|
| 43 |
+
|
| 44 |
+
# Video rendering quality (low, medium, high, ultra)
|
| 45 |
+
VIDEO_QUALITY=high
|
| 46 |
+
|
| 47 |
+
# Enable debug logging (true/false)
|
| 48 |
+
DEBUG_MODE=false
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
# -------------------- OPTIONAL SETTINGS --------------------
|
| 52 |
+
|
| 53 |
+
# Maximum video generation timeout (seconds)
|
| 54 |
+
VIDEO_GENERATION_TIMEOUT=300
|
| 55 |
+
|
| 56 |
+
# Maximum concurrent API requests
|
| 57 |
+
MAX_CONCURRENT_REQUESTS=4
|
| 58 |
+
|
| 59 |
+
# Retry attempts for failed API calls
|
| 60 |
+
MAX_RETRY_ATTEMPTS=3
|
| 61 |
+
|
| 62 |
+
# Output directory for generated videos
|
| 63 |
+
OUTPUT_DIRECTORY=./output
|
| 64 |
+
|
| 65 |
+
# Temp directory for intermediate files
|
| 66 |
+
TEMP_DIRECTORY=/tmp/somira
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
# -------------------- NOTES --------------------
|
| 70 |
+
#
|
| 71 |
+
# 1. Never commit this file with actual API keys to version control
|
| 72 |
+
# 2. Copy this file to .env and fill in your actual values
|
| 73 |
+
# 3. Make sure .env is listed in your .gitignore file
|
| 74 |
+
# 4. See API_SETUP_GUIDE.md for detailed setup instructions
|
| 75 |
+
#
|
.gitignore
CHANGED
|
@@ -27,3 +27,6 @@ __pycache__/
|
|
| 27 |
*.mp3
|
| 28 |
*.wav
|
| 29 |
*.avi
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
*.mp3
|
| 28 |
*.wav
|
| 29 |
*.avi
|
| 30 |
+
|
| 31 |
+
# secrets
|
| 32 |
+
somira-ffa592f2778a.json
|
API_SETUP_GUIDE.md
ADDED
|
@@ -0,0 +1,316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# API Setup Guide - Complete Instructions
|
| 2 |
+
|
| 3 |
+
This guide will walk you through obtaining all necessary API keys for your Somira video generation system.
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## 1. Google Gemini API (Prompt Enhancement)
|
| 8 |
+
|
| 9 |
+
### Purpose
|
| 10 |
+
Enhances user prompts and analyzes scripts for intelligent video selection.
|
| 11 |
+
|
| 12 |
+
### How to Get Your API Key
|
| 13 |
+
|
| 14 |
+
1. **Go to Google AI Studio**
|
| 15 |
+
- Visit: https://aistudio.google.com/app/apikey
|
| 16 |
+
- Sign in with your Google account
|
| 17 |
+
|
| 18 |
+
2. **Create API Key**
|
| 19 |
+
- Click "Get API key" button (top left)
|
| 20 |
+
- Click "Create API key"
|
| 21 |
+
- Choose "Create API key in new project" (or select existing project)
|
| 22 |
+
- Copy the API key immediately (shown only once!)
|
| 23 |
+
|
| 24 |
+
3. **Add to Your Environment**
|
| 25 |
+
```bash
|
| 26 |
+
export GEMINI_API_KEY="your_api_key_here"
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
### Pricing
|
| 30 |
+
- Free tier available with rate limits
|
| 31 |
+
- Model used: `gemini-2.0-flash-exp` (optimized for speed and cost)
|
| 32 |
+
|
| 33 |
+
### Documentation
|
| 34 |
+
- https://ai.google.dev/gemini-api/docs
|
| 35 |
+
|
| 36 |
+
---
|
| 37 |
+
|
| 38 |
+
## 2. RunwayML API (Video Generation)
|
| 39 |
+
|
| 40 |
+
### Purpose
|
| 41 |
+
Generates AI videos from text prompts using Gen-4 model.
|
| 42 |
+
|
| 43 |
+
### How to Get Your API Key
|
| 44 |
+
|
| 45 |
+
1. **Create Developer Account**
|
| 46 |
+
- Visit: https://dev.runwayml.com/
|
| 47 |
+
- Sign up for a new account
|
| 48 |
+
- Create a new organization (corresponds to your integration)
|
| 49 |
+
|
| 50 |
+
2. **Create API Key**
|
| 51 |
+
- Navigate to "API Keys" tab
|
| 52 |
+
- Click "Create new key"
|
| 53 |
+
- Give it a descriptive name (e.g., "Somira Production")
|
| 54 |
+
- Copy the key immediately and store securely (never shown again)
|
| 55 |
+
|
| 56 |
+
3. **Add Credits**
|
| 57 |
+
- Go to "Billing" tab
|
| 58 |
+
- Add credits to your organization
|
| 59 |
+
- Minimum payment: $10 (at $0.01 per credit)
|
| 60 |
+
|
| 61 |
+
4. **Add to Your Environment**
|
| 62 |
+
```bash
|
| 63 |
+
export RUNWAYML_API_KEY="key_your_api_key_here"
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
### Pricing
|
| 67 |
+
- Pay-per-use model with credits
|
| 68 |
+
- Gen-4 Turbo: ~5-10 credits per 10-second video
|
| 69 |
+
- Minimum: $10 to start
|
| 70 |
+
|
| 71 |
+
### Documentation
|
| 72 |
+
- https://docs.dev.runwayml.com/
|
| 73 |
+
|
| 74 |
+
---
|
| 75 |
+
|
| 76 |
+
## 3. Google Cloud Text-to-Speech (Azure Alternative)
|
| 77 |
+
|
| 78 |
+
### Purpose
|
| 79 |
+
Converts text scripts to natural-sounding speech with timing data for lip-sync.
|
| 80 |
+
|
| 81 |
+
### Option A: Google Cloud TTS (Recommended)
|
| 82 |
+
|
| 83 |
+
#### How to Get Your API Key
|
| 84 |
+
|
| 85 |
+
1. **Create Google Cloud Project**
|
| 86 |
+
- Visit: https://console.cloud.google.com/
|
| 87 |
+
- Create new project or select existing
|
| 88 |
+
|
| 89 |
+
2. **Enable Text-to-Speech API**
|
| 90 |
+
- Go to "APIs & Services" > "Library"
|
| 91 |
+
- Search "Text-to-Speech API"
|
| 92 |
+
- Click "Enable"
|
| 93 |
+
|
| 94 |
+
3. **Create Service Account**
|
| 95 |
+
- Go to "APIs & Services" > "Credentials"
|
| 96 |
+
- Click "Create Credentials" > "Service Account"
|
| 97 |
+
- Download JSON key file
|
| 98 |
+
|
| 99 |
+
4. **Add to Your Environment**
|
| 100 |
+
```bash
|
| 101 |
+
export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account-key.json"
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
#### Pricing
|
| 105 |
+
- Free tier: 1 million characters/month (Standard voices)
|
| 106 |
+
- $4 per million characters after (Standard)
|
| 107 |
+
- $16 per million characters (Neural2/Studio voices)
|
| 108 |
+
|
| 109 |
+
### Option B: Azure Cognitive Services TTS
|
| 110 |
+
|
| 111 |
+
#### How to Get Your API Key
|
| 112 |
+
|
| 113 |
+
1. **Create Azure Account**
|
| 114 |
+
- Visit: https://portal.azure.com/
|
| 115 |
+
- Sign up (free tier available)
|
| 116 |
+
|
| 117 |
+
2. **Create Speech Service Resource**
|
| 118 |
+
- Search "Speech Services" in Azure Portal
|
| 119 |
+
- Click "Create"
|
| 120 |
+
- Select subscription, resource group, region
|
| 121 |
+
- Choose pricing tier (F0 for free)
|
| 122 |
+
|
| 123 |
+
3. **Get Keys**
|
| 124 |
+
- Go to your Speech Service resource
|
| 125 |
+
- Navigate to "Keys and Endpoint"
|
| 126 |
+
- Copy Key 1 or Key 2
|
| 127 |
+
- Copy the Region (e.g., eastus)
|
| 128 |
+
|
| 129 |
+
4. **Add to Your Environment**
|
| 130 |
+
```bash
|
| 131 |
+
export AZURE_SPEECH_KEY="your_key_here"
|
| 132 |
+
export AZURE_SPEECH_REGION="eastus"
|
| 133 |
+
```
|
| 134 |
+
|
| 135 |
+
#### Pricing
|
| 136 |
+
- Free tier: 5 audio hours/month
|
| 137 |
+
- Standard: $1 per audio hour
|
| 138 |
+
- Neural: $16 per million characters
|
| 139 |
+
|
| 140 |
+
### Documentation
|
| 141 |
+
- Google: https://cloud.google.com/text-to-speech/docs
|
| 142 |
+
- Azure: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/
|
| 143 |
+
|
| 144 |
+
---
|
| 145 |
+
|
| 146 |
+
## 4. Google Cloud Storage (Video Storage)
|
| 147 |
+
|
| 148 |
+
### Purpose
|
| 149 |
+
Stores generated videos, audio files, and video library.
|
| 150 |
+
|
| 151 |
+
### How to Set Up
|
| 152 |
+
|
| 153 |
+
1. **Create GCS Bucket**
|
| 154 |
+
- Go to: https://console.cloud.google.com/storage
|
| 155 |
+
- Click "Create Bucket"
|
| 156 |
+
- Choose unique name (e.g., "somira-videos")
|
| 157 |
+
- Select region (same as your app for best performance)
|
| 158 |
+
- Choose "Standard" storage class
|
| 159 |
+
|
| 160 |
+
2. **Set Permissions**
|
| 161 |
+
- Make bucket public (if videos should be publicly accessible)
|
| 162 |
+
- Or configure IAM for service account access
|
| 163 |
+
|
| 164 |
+
3. **Add to Your Environment**
|
| 165 |
+
```bash
|
| 166 |
+
export GCS_BUCKET_NAME="somira-videos"
|
| 167 |
+
```
|
| 168 |
+
|
| 169 |
+
### Pricing
|
| 170 |
+
- $0.020 per GB/month (Standard storage)
|
| 171 |
+
- $0.12 per GB egress (after free tier)
|
| 172 |
+
- Free tier: 5GB storage
|
| 173 |
+
|
| 174 |
+
---
|
| 175 |
+
|
| 176 |
+
## Complete .env File Example
|
| 177 |
+
|
| 178 |
+
Create a `.env` file in your project root:
|
| 179 |
+
|
| 180 |
+
```bash
|
| 181 |
+
# Gemini API (Prompt Enhancement)
|
| 182 |
+
GEMINI_API_KEY=AIzaSyC_your_gemini_key_here
|
| 183 |
+
|
| 184 |
+
# RunwayML API (Video Generation)
|
| 185 |
+
RUNWAYML_API_KEY=key_1234567890abcdefghijklmnop
|
| 186 |
+
|
| 187 |
+
# Google Cloud TTS (Option A - Recommended)
|
| 188 |
+
GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json
|
| 189 |
+
|
| 190 |
+
# OR Azure TTS (Option B)
|
| 191 |
+
# AZURE_SPEECH_KEY=your_azure_key_here
|
| 192 |
+
# AZURE_SPEECH_REGION=eastus
|
| 193 |
+
|
| 194 |
+
# Google Cloud Storage
|
| 195 |
+
GCS_BUCKET_NAME=somira-videos
|
| 196 |
+
|
| 197 |
+
# Configuration
|
| 198 |
+
AUDIO_LIBRARY_SIZE=27
|
| 199 |
+
VIDEO_LIBRARY_SIZE=47
|
| 200 |
+
DEFAULT_VOICE=en-US-AriaNeural
|
| 201 |
+
```
|
| 202 |
+
|
| 203 |
+
---
|
| 204 |
+
|
| 205 |
+
## Security Best Practices
|
| 206 |
+
|
| 207 |
+
### DO:
|
| 208 |
+
- Store API keys in environment variables or secret managers
|
| 209 |
+
- Never commit API keys to version control (add .env to .gitignore)
|
| 210 |
+
- Use descriptive names for API keys so you can revoke them later
|
| 211 |
+
- Rotate keys regularly
|
| 212 |
+
- Use separate keys for development and production
|
| 213 |
+
|
| 214 |
+
### DON'T:
|
| 215 |
+
- Never expose API keys on the client-side or in client-side code
|
| 216 |
+
- Never hard-code API keys directly in source code
|
| 217 |
+
- Don't share keys in public repositories
|
| 218 |
+
|
| 219 |
+
---
|
| 220 |
+
|
| 221 |
+
## Installation Steps
|
| 222 |
+
|
| 223 |
+
1. **Install Dependencies**
|
| 224 |
+
```bash
|
| 225 |
+
pip install -r requirements.txt
|
| 226 |
+
```
|
| 227 |
+
|
| 228 |
+
2. **Set Up Environment Variables**
|
| 229 |
+
```bash
|
| 230 |
+
cp .env.example .env
|
| 231 |
+
# Edit .env with your actual keys
|
| 232 |
+
```
|
| 233 |
+
|
| 234 |
+
3. **Load Environment Variables**
|
| 235 |
+
```python
|
| 236 |
+
from dotenv import load_dotenv
|
| 237 |
+
load_dotenv()
|
| 238 |
+
```
|
| 239 |
+
|
| 240 |
+
4. **Test API Connections**
|
| 241 |
+
```python
|
| 242 |
+
from api_clients import APIClients
|
| 243 |
+
|
| 244 |
+
config = {
|
| 245 |
+
'gemini_api_key': os.getenv('GEMINI_API_KEY'),
|
| 246 |
+
'runwayml_api_key': os.getenv('RUNWAYML_API_KEY'),
|
| 247 |
+
'gcs_bucket_name': os.getenv('GCS_BUCKET_NAME'),
|
| 248 |
+
'video_library_size': 47,
|
| 249 |
+
'default_voice': 'en-US-AriaNeural'
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
clients = APIClients(config)
|
| 253 |
+
```
|
| 254 |
+
|
| 255 |
+
---
|
| 256 |
+
|
| 257 |
+
## Cost Estimates (Monthly)
|
| 258 |
+
|
| 259 |
+
For a moderate usage scenario (100 videos/month):
|
| 260 |
+
|
| 261 |
+
| Service | Usage | Cost |
|
| 262 |
+
|---------|-------|------|
|
| 263 |
+
| Gemini API | ~200K tokens | Free (within limits) |
|
| 264 |
+
| RunwayML | 100 videos × 10 sec | ~$50-100 |
|
| 265 |
+
| Google TTS | ~100K characters | Free (within limits) |
|
| 266 |
+
| Google Cloud Storage | 50GB storage + egress | ~$2-5 |
|
| 267 |
+
| **Total** | | **~$52-105/month** |
|
| 268 |
+
|
| 269 |
+
Most of the cost comes from RunwayML video generation. Consider:
|
| 270 |
+
- Using shorter video durations (5s instead of 10s)
|
| 271 |
+
- Caching generated videos
|
| 272 |
+
- Using Gen-4 Turbo for faster/cheaper results
|
| 273 |
+
|
| 274 |
+
---
|
| 275 |
+
|
| 276 |
+
## Troubleshooting
|
| 277 |
+
|
| 278 |
+
### Common Issues
|
| 279 |
+
|
| 280 |
+
1. **"API key not found" errors**
|
| 281 |
+
- Check environment variables are loaded
|
| 282 |
+
- Verify .env file location
|
| 283 |
+
- Restart your application after adding keys
|
| 284 |
+
|
| 285 |
+
2. **RunwayML "Insufficient credits"**
|
| 286 |
+
- Add credits in the billing tab of developer portal
|
| 287 |
+
- Minimum $10 required to start
|
| 288 |
+
|
| 289 |
+
3. **Google Cloud authentication errors**
|
| 290 |
+
- Verify service account JSON path is correct
|
| 291 |
+
- Check service account has necessary permissions
|
| 292 |
+
- Ensure APIs are enabled in Cloud Console
|
| 293 |
+
|
| 294 |
+
4. **Rate limiting**
|
| 295 |
+
- Implement exponential backoff
|
| 296 |
+
- Add delays between API calls
|
| 297 |
+
- Consider upgrading to paid tiers
|
| 298 |
+
|
| 299 |
+
---
|
| 300 |
+
|
| 301 |
+
## Support Resources
|
| 302 |
+
|
| 303 |
+
- **Gemini**: https://ai.google.dev/support
|
| 304 |
+
- **RunwayML**: https://help.runwayml.com/
|
| 305 |
+
- **Google Cloud**: https://cloud.google.com/support
|
| 306 |
+
- **Azure**: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/get-started-text-to-speech
|
| 307 |
+
|
| 308 |
+
---
|
| 309 |
+
|
| 310 |
+
## Next Steps
|
| 311 |
+
|
| 312 |
+
1. Obtain all API keys following the instructions above
|
| 313 |
+
2. Configure your .env file
|
| 314 |
+
3. Test each API endpoint individually
|
| 315 |
+
4. Run the full video generation pipeline
|
| 316 |
+
5. Monitor usage and costs in each platform's dashboard
|
QUICKSTART.md
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🚀 Quick Start Guide
|
| 2 |
+
|
| 3 |
+
Get your Somira Content Automation System up and running in 5 minutes!
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## Prerequisites
|
| 8 |
+
|
| 9 |
+
- Python 3.8 or higher
|
| 10 |
+
- pip (Python package manager)
|
| 11 |
+
- API keys (see [API_SETUP_GUIDE.md](API_SETUP_GUIDE.md))
|
| 12 |
+
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
+
## Installation
|
| 16 |
+
|
| 17 |
+
### 1. Clone or Download the Project
|
| 18 |
+
|
| 19 |
+
```bash
|
| 20 |
+
cd somira-automation
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
### 2. Create Virtual Environment (Recommended)
|
| 24 |
+
|
| 25 |
+
```bash
|
| 26 |
+
# Create virtual environment
|
| 27 |
+
python -m venv venv
|
| 28 |
+
|
| 29 |
+
# Activate it
|
| 30 |
+
# On macOS/Linux:
|
| 31 |
+
source venv/bin/activate
|
| 32 |
+
# On Windows:
|
| 33 |
+
venv\Scripts\activate
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
### 3. Install Dependencies
|
| 37 |
+
|
| 38 |
+
```bash
|
| 39 |
+
pip install -r requirements.txt
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
## Configuration
|
| 45 |
+
|
| 46 |
+
### 1. Set Up Environment Variables
|
| 47 |
+
|
| 48 |
+
```bash
|
| 49 |
+
# Copy example file
|
| 50 |
+
cp .env.example .env
|
| 51 |
+
|
| 52 |
+
# Edit with your API keys
|
| 53 |
+
nano .env # or use your favorite editor
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
**Required values in `.env`:**
|
| 57 |
+
- `GEMINI_API_KEY` - Get from https://aistudio.google.com/app/apikey
|
| 58 |
+
- `RUNWAYML_API_KEY` - Get from https://dev.runwayml.com/
|
| 59 |
+
- `GOOGLE_APPLICATION_CREDENTIALS` - Path to GCP service account JSON
|
| 60 |
+
- `GCS_BUCKET_NAME` - Your Google Cloud Storage bucket name
|
| 61 |
+
|
| 62 |
+
### 2. Verify Configuration
|
| 63 |
+
|
| 64 |
+
```bash
|
| 65 |
+
python main.py --health-check
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
You should see:
|
| 69 |
+
```
|
| 70 |
+
✓ Gemini API: Connected
|
| 71 |
+
✓ RunwayML API: Configured
|
| 72 |
+
✓ TTS API: Configured
|
| 73 |
+
✓ Google Cloud Storage: Connected
|
| 74 |
+
✅ Health check passed
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
---
|
| 78 |
+
|
| 79 |
+
## Usage
|
| 80 |
+
|
| 81 |
+
### Basic Usage (Default Content)
|
| 82 |
+
|
| 83 |
+
```bash
|
| 84 |
+
python main.py
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
This will:
|
| 88 |
+
1. Generate a hook video using AI
|
| 89 |
+
2. Select background music
|
| 90 |
+
3. Choose 3 relevant product videos
|
| 91 |
+
4. Generate text-to-speech audio
|
| 92 |
+
5. Render the final video with subtitles
|
| 93 |
+
6. Upload to Google Cloud Storage
|
| 94 |
+
|
| 95 |
+
### Custom Content
|
| 96 |
+
|
| 97 |
+
```bash
|
| 98 |
+
python main.py \
|
| 99 |
+
--strategy example_strategy.json \
|
| 100 |
+
--script example_script.txt \
|
| 101 |
+
--output ./output/my_video
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
### Run a Quick Test
|
| 105 |
+
|
| 106 |
+
```bash
|
| 107 |
+
python main.py --test
|
| 108 |
+
```
|
| 109 |
+
|
| 110 |
+
This runs a minimal test to verify everything works without using many credits.
|
| 111 |
+
|
| 112 |
+
---
|
| 113 |
+
|
| 114 |
+
## Command Line Options
|
| 115 |
+
|
| 116 |
+
```bash
|
| 117 |
+
python main.py [OPTIONS]
|
| 118 |
+
|
| 119 |
+
Options:
|
| 120 |
+
--strategy FILE Path to JSON file with content strategy
|
| 121 |
+
--script FILE Path to text file with TTS script
|
| 122 |
+
--output DIR Output directory for results
|
| 123 |
+
--health-check Run health check on all services
|
| 124 |
+
--test Run test pipeline with minimal resources
|
| 125 |
+
--verbose Enable verbose logging
|
| 126 |
+
--help Show help message
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
---
|
| 130 |
+
|
| 131 |
+
## Example Workflows
|
| 132 |
+
|
| 133 |
+
### Create Multiple Videos from Different Scripts
|
| 134 |
+
|
| 135 |
+
```bash
|
| 136 |
+
# Video 1
|
| 137 |
+
python main.py \
|
| 138 |
+
--script scripts/script1.txt \
|
| 139 |
+
--output output/video1
|
| 140 |
+
|
| 141 |
+
# Video 2
|
| 142 |
+
python main.py \
|
| 143 |
+
--script scripts/script2.txt \
|
| 144 |
+
--output output/video2
|
| 145 |
+
|
| 146 |
+
# Video 3
|
| 147 |
+
python main.py \
|
| 148 |
+
--script scripts/script3.txt \
|
| 149 |
+
--output output/video3
|
| 150 |
+
```
|
| 151 |
+
|
| 152 |
+
### Custom Strategy with Different Style
|
| 153 |
+
|
| 154 |
+
Create `my_strategy.json`:
|
| 155 |
+
```json
|
| 156 |
+
{
|
| 157 |
+
"brand": "Somira",
|
| 158 |
+
"gemini_prompt": "Your custom prompt here...",
|
| 159 |
+
"runway_prompt": "Your custom RunwayML prompt...",
|
| 160 |
+
"style": "minimal",
|
| 161 |
+
"aspect_ratio": "16:9",
|
| 162 |
+
"duration": 10
|
| 163 |
+
}
|
| 164 |
+
```
|
| 165 |
+
|
| 166 |
+
Then run:
|
| 167 |
+
```bash
|
| 168 |
+
python main.py --strategy my_strategy.json
|
| 169 |
+
```
|
| 170 |
+
|
| 171 |
+
---
|
| 172 |
+
|
| 173 |
+
## Understanding the Pipeline
|
| 174 |
+
|
| 175 |
+
The automation runs in 4 steps:
|
| 176 |
+
|
| 177 |
+
**Step 1: Asset Generation (Parallel)** ⚡
|
| 178 |
+
- Generate hook video with AI (RunwayML)
|
| 179 |
+
- Select background music (from library)
|
| 180 |
+
- Select 3 product videos (AI-powered)
|
| 181 |
+
- Generate voice-over (TTS)
|
| 182 |
+
|
| 183 |
+
**Step 2: Video Rendering** 🎬
|
| 184 |
+
- Merge all videos
|
| 185 |
+
- Add audio tracks
|
| 186 |
+
- Apply transitions and effects
|
| 187 |
+
|
| 188 |
+
**Step 3: Subtitle Addition** 📝
|
| 189 |
+
- Generate subtitles from TTS timing
|
| 190 |
+
- Overlay on video
|
| 191 |
+
|
| 192 |
+
**Step 4: Cloud Upload** ☁️
|
| 193 |
+
- Upload to Google Cloud Storage
|
| 194 |
+
- Generate public URL
|
| 195 |
+
|
| 196 |
+
---
|
| 197 |
+
|
| 198 |
+
## File Structure
|
| 199 |
+
|
| 200 |
+
```
|
| 201 |
+
somira-automation/
|
| 202 |
+
├── main.py # Main entry point
|
| 203 |
+
├── automation.py # Pipeline orchestrator
|
| 204 |
+
├── api_clients.py # API integrations
|
| 205 |
+
├── video_renderer.py # Video processing
|
| 206 |
+
├── utils.py # Utilities and logging
|
| 207 |
+
├── requirements.txt # Python dependencies
|
| 208 |
+
├── .env # Your API keys (DO NOT COMMIT)
|
| 209 |
+
├── .env.example # Template for .env
|
| 210 |
+
├── example_strategy.json # Sample content strategy
|
| 211 |
+
├── example_script.txt # Sample TTS script
|
| 212 |
+
├── API_SETUP_GUIDE.md # Detailed API setup
|
| 213 |
+
└── QUICKSTART.md # This file
|
| 214 |
+
```
|
| 215 |
+
|
| 216 |
+
---
|
| 217 |
+
|
| 218 |
+
## Troubleshooting
|
| 219 |
+
|
| 220 |
+
### "Module not found" errors
|
| 221 |
+
```bash
|
| 222 |
+
pip install -r requirements.txt
|
| 223 |
+
```
|
| 224 |
+
|
| 225 |
+
### "API key not found" errors
|
| 226 |
+
```bash
|
| 227 |
+
# Check your .env file exists and has the right keys
|
| 228 |
+
cat .env
|
| 229 |
+
|
| 230 |
+
# Make sure you've loaded it
|
| 231 |
+
python -c "from dotenv import load_dotenv; load_dotenv(); import os; print(os.getenv('GEMINI_API_KEY'))"
|
| 232 |
+
```
|
| 233 |
+
|
| 234 |
+
### RunwayML "Insufficient credits"
|
| 235 |
+
- Add credits at https://dev.runwayml.com/ (minimum $10)
|
| 236 |
+
|
| 237 |
+
### Google Cloud authentication errors
|
| 238 |
+
```bash
|
| 239 |
+
# Verify your service account JSON exists
|
| 240 |
+
ls -l /path/to/service-account-key.json
|
| 241 |
+
|
| 242 |
+
# Set it in your .env
|
| 243 |
+
GOOGLE_APPLICATION_CREDENTIALS=/full/path/to/service-account-key.json
|
| 244 |
+
```
|
| 245 |
+
|
| 246 |
+
### Videos taking too long
|
| 247 |
+
- RunwayML video generation takes 30-60 seconds typically
|
| 248 |
+
- The `--test` command uses minimal resources for quick testing
|
| 249 |
+
|
| 250 |
+
---
|
| 251 |
+
|
| 252 |
+
## Cost Estimates
|
| 253 |
+
|
| 254 |
+
For 100 videos per month:
|
| 255 |
+
|
| 256 |
+
| Service | Cost |
|
| 257 |
+
|---------|------|
|
| 258 |
+
| Gemini API | Free (within limits) |
|
| 259 |
+
| RunwayML | ~$50-100 |
|
| 260 |
+
| Google TTS | Free (within limits) |
|
| 261 |
+
| Google Storage | ~$2-5 |
|
| 262 |
+
| **Total** | **~$52-105/month** |
|
| 263 |
+
|
| 264 |
+
💡 **Tip:** Use the `--test` command frequently to avoid unnecessary API costs during development.
|
| 265 |
+
|
| 266 |
+
---
|
| 267 |
+
|
| 268 |
+
## Next Steps
|
| 269 |
+
|
| 270 |
+
1. ✅ Complete API setup (see [API_SETUP_GUIDE.md](API_SETUP_GUIDE.md))
|
| 271 |
+
2. ✅ Run health check: `python main.py --health-check`
|
| 272 |
+
3. ✅ Run test: `python main.py --test`
|
| 273 |
+
4. ✅ Generate your first video: `python main.py`
|
| 274 |
+
5. 📚 Customize: Edit `example_strategy.json` and `example_script.txt`
|
| 275 |
+
6. 🚀 Scale: Create multiple strategies and automate batch processing
|
| 276 |
+
|
| 277 |
+
---
|
| 278 |
+
|
| 279 |
+
## Support
|
| 280 |
+
|
| 281 |
+
- **API Issues:** See [API_SETUP_GUIDE.md](API_SETUP_GUIDE.md)
|
| 282 |
+
- **Bugs:** Check logs in console output
|
| 283 |
+
- **Questions:** Review code comments in `main.py` and `automation.py`
|
| 284 |
+
|
| 285 |
+
---
|
| 286 |
+
|
| 287 |
+
## Tips for Best Results
|
| 288 |
+
|
| 289 |
+
### Prompt Engineering
|
| 290 |
+
- Be specific about visual details
|
| 291 |
+
- Include camera movements
|
| 292 |
+
- Specify lighting and mood
|
| 293 |
+
- Mention aspect ratio for consistency
|
| 294 |
+
|
| 295 |
+
### TTS Scripts
|
| 296 |
+
- Keep sentences natural and conversational
|
| 297 |
+
- Use pauses (commas, periods) for pacing
|
| 298 |
+
- Test different voices in `DEFAULT_VOICE` setting
|
| 299 |
+
- Aim for 15-30 seconds of speech
|
| 300 |
+
|
| 301 |
+
### Video Selection
|
| 302 |
+
- The AI analyzes your script for context
|
| 303 |
+
- More descriptive scripts = better video selection
|
| 304 |
+
- Review selected videos in logs
|
| 305 |
+
|
| 306 |
+
### Performance
|
| 307 |
+
- Parallel execution makes Step 1 fast
|
| 308 |
+
- Most time is spent waiting for RunwayML
|
| 309 |
+
- Use `--test` to verify setup without long waits
|
| 310 |
+
|
| 311 |
+
---
|
| 312 |
+
|
| 313 |
+
Happy automating! 🎉
|
README.md
CHANGED
|
@@ -1,25 +1,359 @@
|
|
| 1 |
-
# Content Automation System
|
| 2 |
-
A Python-based automated video content creation system that generates videos using AI APIs, selects relevant footage from a library, adds text-to-speech audio, and produces finished videos with subtitles.
|
| 3 |
|
| 4 |
-
|
| 5 |
|
| 6 |
-
|
| 7 |
-
- Python 3.8+
|
| 8 |
-
- API keys for:
|
| 9 |
-
- Google Gemini
|
| 10 |
-
- RunwayML
|
| 11 |
-
- Text-to-Speech service (Azure/Google/Amazon)
|
| 12 |
-
- Google Cloud Storage
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
```bash
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
| 19 |
python -m venv venv
|
| 20 |
-
source venv/bin/activate
|
|
|
|
|
|
|
| 21 |
pip install -r requirements.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
cp .env.example .env
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🎬 Somira Content Automation System
|
|
|
|
| 2 |
|
| 3 |
+
**Automated video generation pipeline for product advertisements using AI**
|
| 4 |
|
| 5 |
+
Transform text scripts into professional product videos with AI-generated content, voice-overs, and intelligent video selection - all automated end-to-end.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## ✨ Features
|
| 10 |
+
|
| 11 |
+
- **🤖 AI-Powered Video Generation** - Create unique hook videos using RunwayML Gen-4
|
| 12 |
+
- **🧠 Intelligent Prompt Enhancement** - Gemini AI optimizes prompts for better results
|
| 13 |
+
- **🎙️ Professional Text-to-Speech** - Natural voice-overs with Google Cloud TTS
|
| 14 |
+
- **📹 Smart Video Selection** - AI analyzes scripts to select relevant product footage
|
| 15 |
+
- **🎵 Automatic Music Integration** - Background music from curated library
|
| 16 |
+
- **📝 Subtitle Generation** - Automatic subtitle overlay with timing
|
| 17 |
+
- **⚡ Parallel Processing** - Concurrent API calls for maximum speed
|
| 18 |
+
- **☁️ Cloud Storage** - Automatic upload to Google Cloud Storage
|
| 19 |
+
- **🔄 Robust Error Handling** - Fallback mechanisms for reliability
|
| 20 |
+
|
| 21 |
+
---
|
| 22 |
+
|
| 23 |
+
## 🎯 Use Cases
|
| 24 |
+
|
| 25 |
+
- Product advertisement videos for social media
|
| 26 |
+
- Instagram Reels and TikTok content
|
| 27 |
+
- Automated marketing video generation
|
| 28 |
+
- A/B testing different video hooks
|
| 29 |
+
- Scalable video production pipelines
|
| 30 |
+
- Content marketing automation
|
| 31 |
+
|
| 32 |
+
---
|
| 33 |
+
|
| 34 |
+
## 📋 Requirements
|
| 35 |
+
|
| 36 |
+
- **Python 3.8+**
|
| 37 |
+
- **API Keys:**
|
| 38 |
+
- Google Gemini API (free tier available)
|
| 39 |
+
- RunwayML API ($10 minimum)
|
| 40 |
+
- Google Cloud Platform account (TTS + Storage)
|
| 41 |
+
- **Storage:** ~1GB for video library
|
| 42 |
+
- **RAM:** 4GB minimum
|
| 43 |
+
|
| 44 |
+
---
|
| 45 |
+
|
| 46 |
+
## 🚀 Quick Start
|
| 47 |
+
|
| 48 |
+
### 1. Installation
|
| 49 |
|
| 50 |
```bash
|
| 51 |
+
# Clone repository
|
| 52 |
+
git clone <your-repo-url>
|
| 53 |
+
cd somira-automation
|
| 54 |
+
|
| 55 |
+
# Create virtual environment
|
| 56 |
python -m venv venv
|
| 57 |
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
| 58 |
+
|
| 59 |
+
# Install dependencies
|
| 60 |
pip install -r requirements.txt
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
### 2. Configuration
|
| 64 |
+
|
| 65 |
+
```bash
|
| 66 |
+
# Copy environment template
|
| 67 |
cp .env.example .env
|
| 68 |
+
|
| 69 |
+
# Edit with your API keys
|
| 70 |
+
nano .env
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
**Required API Keys:**
|
| 74 |
+
- `GEMINI_API_KEY` - https://aistudio.google.com/app/apikey
|
| 75 |
+
- `RUNWAYML_API_KEY` - https://dev.runwayml.com/
|
| 76 |
+
- `GOOGLE_APPLICATION_CREDENTIALS` - GCP service account JSON
|
| 77 |
+
- `GCS_BUCKET_NAME` - Your GCS bucket name
|
| 78 |
+
|
| 79 |
+
### 3. Verify Setup
|
| 80 |
+
|
| 81 |
+
```bash
|
| 82 |
+
python main.py --health-check
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
### 4. Generate Your First Video
|
| 86 |
+
|
| 87 |
+
```bash
|
| 88 |
+
python main.py
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
**📚 For detailed setup instructions, see [QUICKSTART.md](QUICKSTART.md)**
|
| 92 |
+
|
| 93 |
+
---
|
| 94 |
+
|
| 95 |
+
## 📖 Documentation
|
| 96 |
+
|
| 97 |
+
| Document | Description |
|
| 98 |
+
|----------|-------------|
|
| 99 |
+
| [QUICKSTART.md](QUICKSTART.md) | Get started in 5 minutes |
|
| 100 |
+
| [API_SETUP_GUIDE.md](API_SETUP_GUIDE.md) | Detailed API key setup |
|
| 101 |
+
| [example_strategy.json](example_strategy.json) | Sample content strategy |
|
| 102 |
+
| [example_script.txt](example_script.txt) | Sample TTS script |
|
| 103 |
+
|
| 104 |
+
---
|
| 105 |
+
|
| 106 |
+
## 🏗️ Architecture
|
| 107 |
+
|
| 108 |
+
```
|
| 109 |
+
┌─────────────────────────────────────────────────────┐
|
| 110 |
+
│ MAIN PIPELINE │
|
| 111 |
+
└─────────────────────────────────────────────────────┘
|
| 112 |
+
│
|
| 113 |
+
▼
|
| 114 |
+
┌─────────────────────────────────────────────────────┐
|
| 115 |
+
│ STEP 1: Asset Generation (Parallel) │
|
| 116 |
+
├─────────────────────────────────────────────────────┤
|
| 117 |
+
│ ┌──────────────┐ ┌──────────────┐ │
|
| 118 |
+
│ │ Gemini API │→ │ RunwayML API │ │
|
| 119 |
+
│ │ (Enhance) │ │ (Hook Video) │ │
|
| 120 |
+
│ └──────────────┘ └──────────────┘ │
|
| 121 |
+
│ │
|
| 122 |
+
│ ┌──────────────┐ ┌──────────────┐ │
|
| 123 |
+
│ │ Music │ │ Video │ │
|
| 124 |
+
│ │ Selection │ │ Selection AI │ │
|
| 125 |
+
│ └──────────────┘ └────���─────────┘ │
|
| 126 |
+
│ │
|
| 127 |
+
│ ┌──────────────┐ │
|
| 128 |
+
│ │ Google TTS │ │
|
| 129 |
+
│ │ (Voice-over) │ │
|
| 130 |
+
│ └──────────────┘ │
|
| 131 |
+
└─────────────────────────────────────────────────────┘
|
| 132 |
+
│
|
| 133 |
+
▼
|
| 134 |
+
┌─────────────────────────────────────────────────────┐
|
| 135 |
+
│ STEP 2: Video Rendering & Merging │
|
| 136 |
+
├─────────────────────────────────────────────────────┤
|
| 137 |
+
│ • Merge hook + library videos │
|
| 138 |
+
│ • Add background music │
|
| 139 |
+
│ • Mix voice-over audio │
|
| 140 |
+
│ • Apply transitions │
|
| 141 |
+
└─────────────────────────────────────────────────────┘
|
| 142 |
+
│
|
| 143 |
+
▼
|
| 144 |
+
┌─────────────────────────────────────────────────────┐
|
| 145 |
+
│ STEP 3: Subtitle Generation │
|
| 146 |
+
├─────────────────────────────────────────────────────┤
|
| 147 |
+
│ • Extract timing from TTS │
|
| 148 |
+
│ • Generate subtitle file │
|
| 149 |
+
│ • Overlay on video │
|
| 150 |
+
└─────────────────────────────────────────────────────┘
|
| 151 |
+
│
|
| 152 |
+
▼
|
| 153 |
+
┌─────────────────────────────────────────────────────┐
|
| 154 |
+
│ STEP 4: Cloud Storage Upload │
|
| 155 |
+
├─────────────────────────────────────────────────────┤
|
| 156 |
+
│ • Upload to Google Cloud Storage │
|
| 157 |
+
│ • Generate public URL │
|
| 158 |
+
│ • Save metadata │
|
| 159 |
+
└─────────────────────────────────────────────────────┘
|
| 160 |
+
```
|
| 161 |
+
|
| 162 |
+
---
|
| 163 |
+
|
| 164 |
+
## 💻 Usage Examples
|
| 165 |
+
|
| 166 |
+
### Basic Usage
|
| 167 |
+
|
| 168 |
+
```bash
|
| 169 |
+
# Use default content
|
| 170 |
+
python main.py
|
| 171 |
+
|
| 172 |
+
# Output:
|
| 173 |
+
# ✅ Pipeline completed successfully
|
| 174 |
+
# 📹 Final Video: https://storage.googleapis.com/...
|
| 175 |
+
```
|
| 176 |
+
|
| 177 |
+
### Custom Content
|
| 178 |
+
|
| 179 |
+
```bash
|
| 180 |
+
# Use custom strategy and script
|
| 181 |
+
python main.py \
|
| 182 |
+
--strategy campaigns/holiday_2025.json \
|
| 183 |
+
--script scripts/holiday_promo.txt \
|
| 184 |
+
--output ./output/holiday_video
|
| 185 |
+
```
|
| 186 |
+
|
| 187 |
+
### Batch Processing
|
| 188 |
+
|
| 189 |
+
```python
|
| 190 |
+
import asyncio
|
| 191 |
+
from automation import ContentAutomation
|
| 192 |
+
|
| 193 |
+
async def generate_multiple_videos():
|
| 194 |
+
automation = ContentAutomation(config)
|
| 195 |
+
|
| 196 |
+
scripts = [
|
| 197 |
+
"scripts/script1.txt",
|
| 198 |
+
"scripts/script2.txt",
|
| 199 |
+
"scripts/script3.txt"
|
| 200 |
+
]
|
| 201 |
+
|
| 202 |
+
for script_file in scripts:
|
| 203 |
+
with open(script_file) as f:
|
| 204 |
+
script = f.read()
|
| 205 |
+
|
| 206 |
+
result = await automation.execute_pipeline(
|
| 207 |
+
content_strategy=strategy,
|
| 208 |
+
tts_script=script
|
| 209 |
+
)
|
| 210 |
+
print(f"Generated: {result['final_url']}")
|
| 211 |
+
|
| 212 |
+
asyncio.run(generate_multiple_videos())
|
| 213 |
+
```
|
| 214 |
+
|
| 215 |
+
### Health Check
|
| 216 |
+
|
| 217 |
+
```bash
|
| 218 |
+
python main.py --health-check
|
| 219 |
+
|
| 220 |
+
# Output:
|
| 221 |
+
# 🏥 Running health check...
|
| 222 |
+
# ✓ Gemini API: Connected
|
| 223 |
+
# ✓ RunwayML API: Configured
|
| 224 |
+
# ✓ TTS API: Configured
|
| 225 |
+
# ✓ Google Cloud Storage: Connected
|
| 226 |
+
# ✅ All systems operational!
|
| 227 |
+
```
|
| 228 |
+
|
| 229 |
+
---
|
| 230 |
+
|
| 231 |
+
## 🔧 Configuration
|
| 232 |
+
|
| 233 |
+
### Content Strategy Format
|
| 234 |
+
|
| 235 |
+
```json
|
| 236 |
+
{
|
| 237 |
+
"brand": "Somira",
|
| 238 |
+
"gemini_prompt": "Descriptive prompt for enhancement",
|
| 239 |
+
"runway_prompt": "Specific prompt for video generation",
|
| 240 |
+
"style": "commercial",
|
| 241 |
+
"aspect_ratio": "9:16",
|
| 242 |
+
"duration": 5,
|
| 243 |
+
"platform": "Instagram Reels / TikTok"
|
| 244 |
+
}
|
| 245 |
+
```
|
| 246 |
+
|
| 247 |
+
### Environment Variables
|
| 248 |
+
|
| 249 |
+
| Variable | Required | Description |
|
| 250 |
+
|----------|----------|-------------|
|
| 251 |
+
| `GEMINI_API_KEY` | Yes | Google Gemini API key |
|
| 252 |
+
| `RUNWAYML_API_KEY` | Yes | RunwayML API key |
|
| 253 |
+
| `GOOGLE_APPLICATION_CREDENTIALS` | Yes | Path to GCP service account JSON |
|
| 254 |
+
| `GCS_BUCKET_NAME` | Yes | Google Cloud Storage bucket |
|
| 255 |
+
| `AUDIO_LIBRARY_SIZE` | No | Number of music tracks (default: 27) |
|
| 256 |
+
| `VIDEO_LIBRARY_SIZE` | No | Number of video clips (default: 47) |
|
| 257 |
+
| `DEFAULT_VOICE` | No | TTS voice name (default: en-US-Neural2-F) |
|
| 258 |
+
|
| 259 |
+
---
|
| 260 |
+
|
| 261 |
+
## 📊 Performance
|
| 262 |
+
|
| 263 |
+
- **Step 1 (Parallel):** 30-60 seconds (depends on RunwayML)
|
| 264 |
+
- **Step 2 (Rendering):** 10-20 seconds
|
| 265 |
+
- **Step 3 (Subtitles):** 5-10 seconds
|
| 266 |
+
- **Step 4 (Upload):** 5-15 seconds
|
| 267 |
+
|
| 268 |
+
**Total:** ~50-105 seconds per video
|
| 269 |
+
|
| 270 |
+
---
|
| 271 |
+
|
| 272 |
+
## 💰 Cost Analysis
|
| 273 |
+
|
| 274 |
+
### Per Video Cost
|
| 275 |
+
|
| 276 |
+
| Service | Cost | Notes |
|
| 277 |
+
|---------|------|-------|
|
| 278 |
+
| Gemini API | ~$0.001 | Usually free tier |
|
| 279 |
+
| RunwayML Gen-4 | $0.50-1.00 | Varies by duration |
|
| 280 |
+
| Google TTS | ~$0.001 | Usually free tier |
|
| 281 |
+
| GCS Storage | ~$0.001 | Per video |
|
| 282 |
+
| **Total per video** | **~$0.50-1.00** | |
|
| 283 |
+
|
| 284 |
+
### Monthly Estimates (100 videos)
|
| 285 |
+
|
| 286 |
+
- Gemini: Free (within free tier)
|
| 287 |
+
- RunwayML: $50-100
|
| 288 |
+
- Google TTS: Free (within 1M chars/month)
|
| 289 |
+
- GCS: $2-5
|
| 290 |
+
- **Total: $52-105/month**
|
| 291 |
+
|
| 292 |
+
---
|
| 293 |
+
|
| 294 |
+
## 🛡️ Error Handling
|
| 295 |
+
|
| 296 |
+
The system includes comprehensive error handling:
|
| 297 |
+
|
| 298 |
+
- ✅ **Automatic retries** for transient API failures
|
| 299 |
+
- ✅ **Fallback mechanisms** for video/music selection
|
| 300 |
+
- ✅ **Graceful degradation** when optional features fail
|
| 301 |
+
- ✅ **Detailed logging** for debugging
|
| 302 |
+
- ✅ **Partial results** saved on pipeline failure
|
| 303 |
+
|
| 304 |
+
---
|
| 305 |
+
|
| 306 |
+
## 📁 Project Structure
|
| 307 |
+
|
| 308 |
+
```
|
| 309 |
+
somira-automation/
|
| 310 |
+
├── main.py # CLI entry point
|
| 311 |
+
├── automation.py # Pipeline orchestrator
|
| 312 |
+
├── api_clients.py # API integrations (Gemini, RunwayML, TTS, GCS)
|
| 313 |
+
├── video_renderer.py # Video processing and rendering
|
| 314 |
+
├── utils.py # Logging and utility functions
|
| 315 |
+
├── requirements.txt # Python dependencies
|
| 316 |
+
├── .env.example # Environment variables template
|
| 317 |
+
├── example_strategy.json # Sample content strategy
|
| 318 |
+
├── example_script.txt # Sample TTS script
|
| 319 |
+
├── README.md # This file
|
| 320 |
+
├── QUICKSTART.md # Quick start guide
|
| 321 |
+
└── API_SETUP_GUIDE.md # Detailed API setup instructions
|
| 322 |
+
```
|
| 323 |
+
|
| 324 |
+
---
|
| 325 |
+
|
| 326 |
+
## 🔐 Security Best Practices
|
| 327 |
+
|
| 328 |
+
1. **Never commit `.env` file** - Added to `.gitignore`
|
| 329 |
+
2. **Use environment variables** - No hardcoded keys
|
| 330 |
+
3. **Restrict API key permissions** - Minimum necessary access
|
| 331 |
+
4. **Rotate keys regularly** - Every 90 days recommended
|
| 332 |
+
5. **Monitor API usage** - Set up billing alerts
|
| 333 |
+
6. **Use service accounts** - For GCP resources
|
| 334 |
+
|
| 335 |
+
---
|
| 336 |
+
|
| 337 |
+
## 🐛 Troubleshooting
|
| 338 |
+
|
| 339 |
+
### Common Issues
|
| 340 |
+
|
| 341 |
+
**"Module not found"**
|
| 342 |
+
```bash
|
| 343 |
+
pip install -r requirements.txt
|
| 344 |
+
```
|
| 345 |
+
|
| 346 |
+
**"API key not valid"**
|
| 347 |
+
- Check your `.env` file
|
| 348 |
+
- Verify keys are correctly copied (no extra spaces)
|
| 349 |
+
- Ensure APIs are enabled in respective consoles
|
| 350 |
+
|
| 351 |
+
**"Insufficient credits" (RunwayML)**
|
| 352 |
+
- Add credits at https://dev.runwayml.com/
|
| 353 |
+
- Minimum $10 required
|
| 354 |
+
|
| 355 |
+
**"Permission denied" (GCS)**
|
| 356 |
+
- Check service account has Storage Admin role
|
| 357 |
+
- Verify `GOOGLE_APPLICATION_CREDENTIALS` path is correct
|
| 358 |
+
|
| 359 |
+
**Videos taking too long**
|
example_script.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
I heard a pop, and suddenly my neck was stuck. I looked like I was mid-sneeze all day.
|
| 2 |
+
|
| 3 |
+
After one minute with the Somira massager it was gone.
|
| 4 |
+
|
| 5 |
+
If you ever feel neck pain, you'll wish you bought one, because the moment I turned my head, I knew I needed relief fast.
|
| 6 |
+
|
| 7 |
+
Get yours today at somira dot com.
|
example_strategy.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"brand": "Somira",
|
| 3 |
+
"product": "Neck Massager",
|
| 4 |
+
"target_audience": "Adults 25-55 with neck pain",
|
| 5 |
+
"tone": "Relatable, humorous, authentic",
|
| 6 |
+
|
| 7 |
+
"gemini_prompt": "A photorealistic, comical yet painfully real depiction of an attractive blonde, blue-eyed female stuck in a neck spasm nightmare in a luxurious home setting. Her head is tilted at an awkward angle, expression frozen mid-surprise. Cinematic lighting with soft shadows, 4K quality, commercial aesthetic. Modern interior design with minimalist furniture. Shot on RED camera with shallow depth of field.",
|
| 8 |
+
|
| 9 |
+
"runway_prompt": "Slow push-in camera movement: a well-dressed blonde woman in her 30s suddenly tilts her head stiffly to the side at an unnatural angle and blinks in surprise, her face frozen in an uncomfortable mid-expression. Luxurious modern home interior with warm natural lighting from large windows. Commercial quality cinematography with cinematic color grading. 9:16 vertical format for social media.",
|
| 10 |
+
|
| 11 |
+
"hook_video": {
|
| 12 |
+
"duration": 5,
|
| 13 |
+
"style": "cinematic",
|
| 14 |
+
"camera_movement": "slow push-in",
|
| 15 |
+
"focal_point": "face and neck"
|
| 16 |
+
},
|
| 17 |
+
|
| 18 |
+
"style": "commercial",
|
| 19 |
+
"aspect_ratio": "9:16",
|
| 20 |
+
"platform": "Instagram Reels / TikTok",
|
| 21 |
+
|
| 22 |
+
"video_structure": {
|
| 23 |
+
"hook": "0-5s - Problem visualization",
|
| 24 |
+
"body": "5-15s - Product showcase with library videos",
|
| 25 |
+
"cta": "15-20s - Call to action"
|
| 26 |
+
},
|
| 27 |
+
|
| 28 |
+
"color_palette": {
|
| 29 |
+
"primary": "#FFFFFF",
|
| 30 |
+
"secondary": "#F5F5F5",
|
| 31 |
+
"accent": "#4A90E2",
|
| 32 |
+
"text": "#333333"
|
| 33 |
+
},
|
| 34 |
+
|
| 35 |
+
"music": {
|
| 36 |
+
"style": "upbeat, modern",
|
| 37 |
+
"volume": "40% (under voiceover)"
|
| 38 |
+
},
|
| 39 |
+
|
| 40 |
+
"metadata": {
|
| 41 |
+
"campaign_name": "Neck Pain Relief Q4 2025",
|
| 42 |
+
"created_date": "2025-09-29",
|
| 43 |
+
"version": "1.0"
|
| 44 |
+
}
|
| 45 |
+
}
|
requirements.txt
CHANGED
|
@@ -1,9 +1,17 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core async HTTP
|
| 2 |
+
aiohttp==3.9.5
|
| 3 |
+
aiofiles==23.2.1
|
| 4 |
+
|
| 5 |
+
# Google AI (Gemini)
|
| 6 |
+
google-generativeai==0.8.3
|
| 7 |
+
|
| 8 |
+
# Google Cloud Services
|
| 9 |
+
google-cloud-storage==2.18.2
|
| 10 |
+
google-cloud-texttospeech==2.17.2
|
| 11 |
+
|
| 12 |
+
# Environment variables
|
| 13 |
+
python-dotenv==1.0.1
|
| 14 |
+
|
| 15 |
+
# Utilities
|
| 16 |
+
asyncio==3.4.3
|
| 17 |
+
typing-extensions==4.12.2
|
setup.sh
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
#!/bin/bash
|
| 2 |
-
echo "Setting up Content Automation System..."
|
| 3 |
-
|
| 4 |
-
# Create directories
|
| 5 |
-
mkdir -p config src assets/video_library assets/audio_library outputs/videos outputs/logs
|
| 6 |
-
|
| 7 |
-
# Run all the creation commands from above (you'd paste all the cat commands here)
|
| 8 |
-
# [Paste all the file creation commands from above here]
|
| 9 |
-
|
| 10 |
-
echo "✅ Setup complete!"
|
| 11 |
-
echo "📝 Next steps:"
|
| 12 |
-
echo "1. Edit .env with your API keys"
|
| 13 |
-
echo "2. Run: pip install -r requirements.txt"
|
| 14 |
-
echo "3. Run: python src/main.py"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/api_clients.py
CHANGED
|
@@ -1,70 +1,374 @@
|
|
| 1 |
"""
|
| 2 |
-
API clients for external services
|
| 3 |
"""
|
| 4 |
import aiohttp
|
| 5 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
from utils import logger
|
| 7 |
|
|
|
|
| 8 |
class APIClients:
|
| 9 |
def __init__(self, config):
|
| 10 |
self.config = config
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
async def
|
| 25 |
-
"""
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
-
async def
|
| 34 |
-
"""
|
| 35 |
-
|
| 36 |
-
logger.info(f"Selecting {count} videos for keywords: {keywords}")
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
-
async def store_in_gcs(self, file_path):
|
| 51 |
-
"""
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
-
def _extract_keywords(self, text):
|
| 57 |
"""Extract keywords from TTS script"""
|
| 58 |
text_lower = text.lower()
|
| 59 |
keywords = []
|
| 60 |
|
| 61 |
key_phrases = [
|
| 62 |
'somira massager', 'neck pain', 'product', 'massager',
|
| 63 |
-
'solution', 'comfort', 'using the product', 'relaxation'
|
|
|
|
| 64 |
]
|
| 65 |
|
| 66 |
for phrase in key_phrases:
|
| 67 |
if phrase in text_lower:
|
| 68 |
keywords.append(phrase)
|
| 69 |
-
|
| 70 |
-
return keywords if keywords else ['general']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
API clients for external services with full implementations
|
| 3 |
"""
|
| 4 |
import aiohttp
|
| 5 |
import json
|
| 6 |
+
import os
|
| 7 |
+
from typing import Dict, List, Optional
|
| 8 |
+
from google import genai
|
| 9 |
+
from google.cloud import storage, texttospeech
|
| 10 |
+
import asyncio
|
| 11 |
from utils import logger
|
| 12 |
|
| 13 |
+
|
| 14 |
class APIClients:
|
| 15 |
def __init__(self, config):
|
| 16 |
self.config = config
|
| 17 |
|
| 18 |
+
# Initialize Gemini client
|
| 19 |
+
self.gemini_client = genai.Client(
|
| 20 |
+
api_key=config.get('gemini_api_key') or os.getenv('GEMINI_API_KEY')
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
# Initialize GCS client
|
| 24 |
+
self.gcs_client = storage.Client()
|
| 25 |
+
self.gcs_bucket = self.gcs_client.bucket(config.get('gcs_bucket_name'))
|
| 26 |
+
|
| 27 |
+
# Initialize Azure TTS client
|
| 28 |
+
self.tts_client = texttospeech.TextToSpeechClient()
|
| 29 |
+
|
| 30 |
+
# RunwayML API configuration
|
| 31 |
+
self.runway_api_key = config.get('runwayml_api_key') or os.getenv('RUNWAYML_API_KEY')
|
| 32 |
+
self.runway_base_url = "https://api.dev.runwayml.com/v1"
|
| 33 |
+
|
| 34 |
+
async def enhance_prompt(self, prompt: str) -> str:
|
| 35 |
+
"""
|
| 36 |
+
Enhance prompt using Gemini API for better video generation
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
prompt: Original user prompt
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
Enhanced prompt optimized for video generation
|
| 43 |
+
"""
|
| 44 |
+
try:
|
| 45 |
+
logger.info(f"Enhancing prompt with Gemini: {prompt[:100]}...")
|
| 46 |
+
|
| 47 |
+
enhancement_instruction = f"""
|
| 48 |
+
You are a prompt enhancement specialist for video generation AI.
|
| 49 |
+
Take this product advertisement prompt and enhance it to be more visually descriptive,
|
| 50 |
+
cinematic, and optimized for AI video generation. Focus on:
|
| 51 |
+
- Visual details and cinematography
|
| 52 |
+
- Lighting and atmosphere
|
| 53 |
+
- Camera movements and angles
|
| 54 |
+
- Brand aesthetic consistency
|
| 55 |
+
|
| 56 |
+
Original prompt: {prompt}
|
| 57 |
+
|
| 58 |
+
Return only the enhanced prompt, nothing else.
|
| 59 |
+
"""
|
| 60 |
+
|
| 61 |
+
response = self.gemini_client.models.generate_content(
|
| 62 |
+
model="gemini-2.0-flash-exp",
|
| 63 |
+
contents=enhancement_instruction
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
enhanced_prompt = response.text.strip()
|
| 67 |
+
logger.info(f"Enhanced prompt: {enhanced_prompt[:100]}...")
|
| 68 |
+
return enhanced_prompt
|
| 69 |
+
|
| 70 |
+
except Exception as e:
|
| 71 |
+
logger.error(f"Error enhancing prompt with Gemini: {e}")
|
| 72 |
+
# Return original prompt if enhancement fails
|
| 73 |
+
return prompt
|
| 74 |
|
| 75 |
+
async def generate_video(self, prompt: str, duration: int = 10) -> Dict:
|
| 76 |
+
"""
|
| 77 |
+
Generate video using RunwayML Gen-4 API
|
| 78 |
+
|
| 79 |
+
Args:
|
| 80 |
+
prompt: Text prompt for video generation
|
| 81 |
+
duration: Video duration in seconds (5 or 10)
|
| 82 |
+
|
| 83 |
+
Returns:
|
| 84 |
+
Dict with video URL and metadata
|
| 85 |
+
"""
|
| 86 |
+
try:
|
| 87 |
+
logger.info(f"Generating video with RunwayML: {prompt[:100]}...")
|
| 88 |
+
|
| 89 |
+
headers = {
|
| 90 |
+
"Authorization": f"Bearer {self.runway_api_key}",
|
| 91 |
+
"Content-Type": "application/json"
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
payload = {
|
| 95 |
+
"promptText": prompt,
|
| 96 |
+
"model": "gen4",
|
| 97 |
+
"duration": duration,
|
| 98 |
+
"ratio": "16:9",
|
| 99 |
+
"watermark": False
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
async with aiohttp.ClientSession() as session:
|
| 103 |
+
# Create generation task
|
| 104 |
+
async with session.post(
|
| 105 |
+
f"{self.runway_base_url}/generations",
|
| 106 |
+
headers=headers,
|
| 107 |
+
json=payload
|
| 108 |
+
) as response:
|
| 109 |
+
if response.status != 200:
|
| 110 |
+
error_text = await response.text()
|
| 111 |
+
raise Exception(f"RunwayML API error: {error_text}")
|
| 112 |
+
|
| 113 |
+
task_data = await response.json()
|
| 114 |
+
task_id = task_data['id']
|
| 115 |
+
logger.info(f"Video generation task created: {task_id}")
|
| 116 |
+
|
| 117 |
+
# Poll for completion
|
| 118 |
+
max_attempts = 60 # 5 minutes max
|
| 119 |
+
attempt = 0
|
| 120 |
+
|
| 121 |
+
while attempt < max_attempts:
|
| 122 |
+
await asyncio.sleep(5) # Check every 5 seconds
|
| 123 |
+
|
| 124 |
+
async with session.get(
|
| 125 |
+
f"{self.runway_base_url}/generations/{task_id}",
|
| 126 |
+
headers=headers
|
| 127 |
+
) as status_response:
|
| 128 |
+
status_data = await status_response.json()
|
| 129 |
+
status = status_data['status']
|
| 130 |
+
|
| 131 |
+
if status == 'SUCCEEDED':
|
| 132 |
+
video_url = status_data['output'][0]
|
| 133 |
+
logger.info(f"Video generated successfully: {video_url}")
|
| 134 |
+
return {
|
| 135 |
+
'video_url': video_url,
|
| 136 |
+
'task_id': task_id,
|
| 137 |
+
'duration': duration,
|
| 138 |
+
'prompt': prompt
|
| 139 |
+
}
|
| 140 |
+
elif status == 'FAILED':
|
| 141 |
+
raise Exception(f"Video generation failed: {status_data.get('failure')}")
|
| 142 |
+
|
| 143 |
+
attempt += 1
|
| 144 |
+
logger.info(f"Video generation in progress... ({status})")
|
| 145 |
+
|
| 146 |
+
raise Exception("Video generation timeout")
|
| 147 |
+
|
| 148 |
+
except Exception as e:
|
| 149 |
+
logger.error(f"Error generating video with RunwayML: {e}")
|
| 150 |
+
raise
|
| 151 |
|
| 152 |
+
async def generate_tts(self, text: str, voice_name: Optional[str] = None) -> Dict:
|
| 153 |
+
"""
|
| 154 |
+
Generate TTS audio using Azure Cognitive Services
|
|
|
|
| 155 |
|
| 156 |
+
Args:
|
| 157 |
+
text: Text to convert to speech
|
| 158 |
+
voice_name: Azure voice name (default from config)
|
| 159 |
+
|
| 160 |
+
Returns:
|
| 161 |
+
Dict with audio URL, duration, and lip sync data
|
| 162 |
+
"""
|
| 163 |
+
try:
|
| 164 |
+
logger.info(f"Generating TTS for text: {text[:100]}...")
|
| 165 |
+
|
| 166 |
+
if not voice_name:
|
| 167 |
+
voice_name = self.config.get('default_voice', 'en-US-AriaNeural')
|
| 168 |
+
|
| 169 |
+
# Configure the speech synthesis request
|
| 170 |
+
synthesis_input = texttospeech.SynthesisInput(text=text)
|
| 171 |
+
|
| 172 |
+
# Parse voice name for language code and voice
|
| 173 |
+
language_code = '-'.join(voice_name.split('-')[:2]) # e.g., 'en-US'
|
| 174 |
+
|
| 175 |
+
voice = texttospeech.VoiceSelectionParams(
|
| 176 |
+
language_code=language_code,
|
| 177 |
+
name=voice_name,
|
| 178 |
+
ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
audio_config = texttospeech.AudioConfig(
|
| 182 |
+
audio_encoding=texttospeech.AudioEncoding.MP3,
|
| 183 |
+
speaking_rate=1.0,
|
| 184 |
+
pitch=0.0
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
# Perform the text-to-speech request
|
| 188 |
+
response = self.tts_client.synthesize_speech(
|
| 189 |
+
input=synthesis_input,
|
| 190 |
+
voice=voice,
|
| 191 |
+
audio_config=audio_config,
|
| 192 |
+
enable_time_pointing=[texttospeech.TimePointingType.SSML_MARK]
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
# Save audio to temporary file
|
| 196 |
+
audio_filename = f"tts_{hash(text)}.mp3"
|
| 197 |
+
audio_path = f"/tmp/{audio_filename}"
|
| 198 |
+
|
| 199 |
+
with open(audio_path, "wb") as out:
|
| 200 |
+
out.write(response.audio_content)
|
| 201 |
+
|
| 202 |
+
# Upload to GCS
|
| 203 |
+
audio_url = await self.store_in_gcs(audio_path, 'audio')
|
| 204 |
+
|
| 205 |
+
# Extract timing information for lip sync
|
| 206 |
+
lip_sync_data = self._extract_timing_data(response)
|
| 207 |
+
|
| 208 |
+
logger.info(f"TTS generated successfully: {audio_url}")
|
| 209 |
+
|
| 210 |
+
return {
|
| 211 |
+
'audio_url': audio_url,
|
| 212 |
+
'duration': len(response.audio_content) / 32000, # Approximate
|
| 213 |
+
'lip_sync_data': lip_sync_data,
|
| 214 |
+
'voice': voice_name,
|
| 215 |
+
'text': text
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
except Exception as e:
|
| 219 |
+
logger.error(f"Error generating TTS: {e}")
|
| 220 |
+
raise
|
| 221 |
+
|
| 222 |
+
async def select_videos(self, tts_script: str, count: int = 3) -> List[Dict]:
|
| 223 |
+
"""
|
| 224 |
+
AI agent selects videos based on script using Gemini
|
| 225 |
|
| 226 |
+
Args:
|
| 227 |
+
tts_script: The TTS script to analyze
|
| 228 |
+
count: Number of videos to select (max 3)
|
| 229 |
+
|
| 230 |
+
Returns:
|
| 231 |
+
List of selected video metadata
|
| 232 |
+
"""
|
| 233 |
+
try:
|
| 234 |
+
logger.info(f"Selecting {count} videos for script...")
|
| 235 |
+
|
| 236 |
+
# Use Gemini to analyze script and suggest video keywords
|
| 237 |
+
analysis_prompt = f"""
|
| 238 |
+
Analyze this product advertisement script and identify {count} key visual moments
|
| 239 |
+
that should be represented with video clips. For each moment, provide:
|
| 240 |
+
1. A descriptive keyword/phrase
|
| 241 |
+
2. The timing (start-end seconds if mentioned)
|
| 242 |
+
3. Visual style preference (product closeup, lifestyle, abstract, etc.)
|
| 243 |
+
|
| 244 |
+
Script: {tts_script}
|
| 245 |
+
|
| 246 |
+
Return as JSON array with format:
|
| 247 |
+
[{{"keyword": "...", "timing": "0-5", "style": "..."}}, ...]
|
| 248 |
+
"""
|
| 249 |
+
|
| 250 |
+
response = self.gemini_client.models.generate_content(
|
| 251 |
+
model="gemini-2.0-flash-exp",
|
| 252 |
+
contents=analysis_prompt
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
# Parse Gemini response
|
| 256 |
+
try:
|
| 257 |
+
suggestions = json.loads(response.text.strip())
|
| 258 |
+
except:
|
| 259 |
+
# Fallback to keyword extraction
|
| 260 |
+
keywords = self._extract_keywords(tts_script)
|
| 261 |
+
suggestions = [
|
| 262 |
+
{"keyword": kw, "timing": f"{i*5}-{(i+1)*5}", "style": "general"}
|
| 263 |
+
for i, kw in enumerate(keywords[:count])
|
| 264 |
+
]
|
| 265 |
+
|
| 266 |
+
# Select videos from library based on suggestions
|
| 267 |
+
selected_videos = []
|
| 268 |
+
for i, suggestion in enumerate(suggestions[:count]):
|
| 269 |
+
video_id = (hash(suggestion['keyword']) + i) % self.config['video_library_size'] + 1
|
| 270 |
+
selected_videos.append({
|
| 271 |
+
'id': video_id,
|
| 272 |
+
'url': f"gs://{self.config['gcs_bucket_name']}/library/video{video_id}.mp4",
|
| 273 |
+
'keyword': suggestion['keyword'],
|
| 274 |
+
'timing': suggestion.get('timing', f"{i*5}-{(i+1)*5}"),
|
| 275 |
+
'style': suggestion.get('style', 'general'),
|
| 276 |
+
'reason': f"Matches: {suggestion['keyword']}"
|
| 277 |
+
})
|
| 278 |
+
|
| 279 |
+
logger.info(f"Selected {len(selected_videos)} videos")
|
| 280 |
+
return selected_videos
|
| 281 |
+
|
| 282 |
+
except Exception as e:
|
| 283 |
+
logger.error(f"Error selecting videos: {e}")
|
| 284 |
+
# Fallback selection
|
| 285 |
+
return self._fallback_video_selection(tts_script, count)
|
| 286 |
|
| 287 |
+
async def store_in_gcs(self, file_path: str, content_type: str = 'video') -> str:
|
| 288 |
+
"""
|
| 289 |
+
Store file in Google Cloud Storage
|
| 290 |
+
|
| 291 |
+
Args:
|
| 292 |
+
file_path: Local file path
|
| 293 |
+
content_type: Type of content ('video', 'audio', etc.)
|
| 294 |
+
|
| 295 |
+
Returns:
|
| 296 |
+
GCS public URL
|
| 297 |
+
"""
|
| 298 |
+
try:
|
| 299 |
+
logger.info(f"Storing file in GCS: {file_path}")
|
| 300 |
+
|
| 301 |
+
filename = os.path.basename(file_path)
|
| 302 |
+
blob_name = f"{content_type}/{filename}"
|
| 303 |
+
blob = self.gcs_bucket.blob(blob_name)
|
| 304 |
+
|
| 305 |
+
# Set content type based on file extension
|
| 306 |
+
content_types = {
|
| 307 |
+
'.mp4': 'video/mp4',
|
| 308 |
+
'.mp3': 'audio/mpeg',
|
| 309 |
+
'.wav': 'audio/wav',
|
| 310 |
+
'.json': 'application/json'
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
file_ext = os.path.splitext(filename)[1]
|
| 314 |
+
blob.content_type = content_types.get(file_ext, 'application/octet-stream')
|
| 315 |
+
|
| 316 |
+
# Upload file
|
| 317 |
+
blob.upload_from_filename(file_path)
|
| 318 |
+
|
| 319 |
+
# Make public (optional)
|
| 320 |
+
blob.make_public()
|
| 321 |
+
|
| 322 |
+
gcs_url = blob.public_url
|
| 323 |
+
logger.info(f"File uploaded to: {gcs_url}")
|
| 324 |
+
|
| 325 |
+
return gcs_url
|
| 326 |
+
|
| 327 |
+
except Exception as e:
|
| 328 |
+
logger.error(f"Error storing file in GCS: {e}")
|
| 329 |
+
raise
|
| 330 |
|
| 331 |
+
def _extract_keywords(self, text: str) -> List[str]:
|
| 332 |
"""Extract keywords from TTS script"""
|
| 333 |
text_lower = text.lower()
|
| 334 |
keywords = []
|
| 335 |
|
| 336 |
key_phrases = [
|
| 337 |
'somira massager', 'neck pain', 'product', 'massager',
|
| 338 |
+
'solution', 'comfort', 'using the product', 'relaxation',
|
| 339 |
+
'relief', 'wellness', 'ergonomic', 'design'
|
| 340 |
]
|
| 341 |
|
| 342 |
for phrase in key_phrases:
|
| 343 |
if phrase in text_lower:
|
| 344 |
keywords.append(phrase)
|
| 345 |
+
|
| 346 |
+
return keywords if keywords else ['general', 'product', 'lifestyle']
|
| 347 |
+
|
| 348 |
+
def _extract_timing_data(self, tts_response) -> Dict:
|
| 349 |
+
"""Extract timing data from TTS response for lip sync"""
|
| 350 |
+
# This would parse the timepoints from Azure TTS response
|
| 351 |
+
# Simplified version
|
| 352 |
+
return {
|
| 353 |
+
'timestamps': [],
|
| 354 |
+
'phonemes': [],
|
| 355 |
+
'words': []
|
| 356 |
+
}
|
| 357 |
+
|
| 358 |
+
def _fallback_video_selection(self, text: str, count: int) -> List[Dict]:
|
| 359 |
+
"""Fallback video selection if AI selection fails"""
|
| 360 |
+
keywords = self._extract_keywords(text)
|
| 361 |
+
selected_videos = []
|
| 362 |
+
|
| 363 |
+
for i in range(min(count, 3)):
|
| 364 |
+
video_id = (hash(text) + i) % self.config['video_library_size'] + 1
|
| 365 |
+
selected_videos.append({
|
| 366 |
+
'id': video_id,
|
| 367 |
+
'url': f"gs://{self.config['gcs_bucket_name']}/library/video{video_id}.mp4",
|
| 368 |
+
'keyword': keywords[i % len(keywords)] if keywords else "general",
|
| 369 |
+
'timing': f"{i*5}-{(i+1)*5}",
|
| 370 |
+
'style': 'general',
|
| 371 |
+
'reason': f'Fallback selection for: {keywords[i % len(keywords)] if keywords else "general"}'
|
| 372 |
+
})
|
| 373 |
+
|
| 374 |
+
return selected_videos
|
src/automation.py
CHANGED
|
@@ -1,92 +1,407 @@
|
|
| 1 |
"""
|
| 2 |
-
Main automation orchestrator
|
| 3 |
"""
|
| 4 |
import asyncio
|
|
|
|
|
|
|
|
|
|
| 5 |
from api_clients import APIClients
|
| 6 |
from video_renderer import VideoRenderer
|
| 7 |
from utils import logger
|
| 8 |
|
|
|
|
| 9 |
class ContentAutomation:
|
| 10 |
-
def __init__(self, config):
|
| 11 |
self.config = config
|
| 12 |
self.api_clients = APIClients(config)
|
| 13 |
self.video_renderer = VideoRenderer(config)
|
| 14 |
self.current_audio_index = 0
|
|
|
|
| 15 |
|
| 16 |
-
async def execute_pipeline(
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
#
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
self.select_background_music(),
|
| 40 |
-
self.select_videos_from_library(tts_script),
|
| 41 |
-
self.generate_tts_audio(tts_script)
|
| 42 |
-
]
|
| 43 |
|
| 44 |
-
results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
'background_music': results[1],
|
| 49 |
-
'selected_videos': results[2],
|
| 50 |
-
'tts_audio': results[3]
|
| 51 |
-
}
|
| 52 |
|
| 53 |
-
async def generate_hook_video(self, strategy):
|
| 54 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
try:
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
# Generate video with RunwayML
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
except Exception as e:
|
| 64 |
-
logger.error(f"Hook video generation failed: {e}")
|
| 65 |
return None
|
| 66 |
|
| 67 |
-
async def select_background_music(self):
|
| 68 |
-
"""
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
-
async def select_videos_from_library(self, tts_script):
|
| 77 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
selected_videos = await self.api_clients.select_videos(tts_script, count=3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
return selected_videos
|
|
|
|
| 81 |
except Exception as e:
|
| 82 |
-
logger.error(f"Video selection failed: {e}")
|
| 83 |
-
return
|
| 84 |
|
| 85 |
-
async def generate_tts_audio(self, tts_script):
|
| 86 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
try:
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
return tts_result
|
|
|
|
| 90 |
except Exception as e:
|
| 91 |
-
logger.error(f"TTS generation failed: {e}")
|
| 92 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
Main automation orchestrator with full implementation
|
| 3 |
"""
|
| 4 |
import asyncio
|
| 5 |
+
import os
|
| 6 |
+
import time
|
| 7 |
+
from typing import Dict, List, Optional, Any
|
| 8 |
from api_clients import APIClients
|
| 9 |
from video_renderer import VideoRenderer
|
| 10 |
from utils import logger
|
| 11 |
|
| 12 |
+
|
| 13 |
class ContentAutomation:
|
| 14 |
+
def __init__(self, config: Dict[str, Any]):
|
| 15 |
self.config = config
|
| 16 |
self.api_clients = APIClients(config)
|
| 17 |
self.video_renderer = VideoRenderer(config)
|
| 18 |
self.current_audio_index = 0
|
| 19 |
+
self.pipeline_start_time = None
|
| 20 |
|
| 21 |
+
async def execute_pipeline(
|
| 22 |
+
self,
|
| 23 |
+
content_strategy: Dict[str, str],
|
| 24 |
+
tts_script: str,
|
| 25 |
+
video_config: Optional[Dict] = None
|
| 26 |
+
) -> Dict[str, Any]:
|
| 27 |
+
"""
|
| 28 |
+
Execute the complete automation pipeline
|
| 29 |
|
| 30 |
+
Args:
|
| 31 |
+
content_strategy: Dict with prompts and style preferences
|
| 32 |
+
tts_script: Text script for voice-over
|
| 33 |
+
video_config: Optional video rendering configuration
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
Dict with final video URL and metadata
|
| 37 |
+
"""
|
| 38 |
+
self.pipeline_start_time = time.time()
|
| 39 |
+
logger.info("=" * 60)
|
| 40 |
+
logger.info("🚀 Starting Content Automation Pipeline")
|
| 41 |
+
logger.info("=" * 60)
|
| 42 |
|
| 43 |
+
try:
|
| 44 |
+
# Step 1: Generate all assets simultaneously
|
| 45 |
+
logger.info("\n📦 STEP 1: Generating Assets (Parallel Execution)")
|
| 46 |
+
assets = await self.execute_step_1(content_strategy, tts_script)
|
| 47 |
+
self._log_step_completion(1, assets)
|
| 48 |
+
|
| 49 |
+
# Validate critical assets
|
| 50 |
+
if not self._validate_assets(assets):
|
| 51 |
+
raise Exception("Critical assets failed to generate")
|
| 52 |
+
|
| 53 |
+
# Step 2: Merge videos and audio
|
| 54 |
+
logger.info("\n🎬 STEP 2: Rendering Video")
|
| 55 |
+
rendered_video = await self.video_renderer.render_video(
|
| 56 |
+
assets,
|
| 57 |
+
video_config or {}
|
| 58 |
+
)
|
| 59 |
+
self._log_step_completion(2, {'rendered_video': rendered_video})
|
| 60 |
+
|
| 61 |
+
# Step 3: Add subtitles
|
| 62 |
+
logger.info("\n📝 STEP 3: Adding Subtitles")
|
| 63 |
+
subtitled_video = await self.video_renderer.add_subtitles(
|
| 64 |
+
rendered_video,
|
| 65 |
+
tts_script,
|
| 66 |
+
assets.get('tts_audio', {})
|
| 67 |
+
)
|
| 68 |
+
self._log_step_completion(3, {'subtitled_video': subtitled_video})
|
| 69 |
+
|
| 70 |
+
# Step 4: Store final video in GCS
|
| 71 |
+
logger.info("\n☁️ STEP 4: Uploading to Cloud Storage")
|
| 72 |
+
final_url = await self.api_clients.store_in_gcs(
|
| 73 |
+
subtitled_video,
|
| 74 |
+
content_type='video'
|
| 75 |
+
)
|
| 76 |
+
self._log_step_completion(4, {'final_url': final_url})
|
| 77 |
+
|
| 78 |
+
# Pipeline completion summary
|
| 79 |
+
elapsed_time = time.time() - self.pipeline_start_time
|
| 80 |
+
logger.info("\n" + "=" * 60)
|
| 81 |
+
logger.info(f"✅ Pipeline Completed Successfully in {elapsed_time:.2f}s")
|
| 82 |
+
logger.info(f"📹 Final Video: {final_url}")
|
| 83 |
+
logger.info("=" * 60)
|
| 84 |
+
|
| 85 |
+
return {
|
| 86 |
+
'success': True,
|
| 87 |
+
'final_url': final_url,
|
| 88 |
+
'local_path': subtitled_video,
|
| 89 |
+
'assets': assets,
|
| 90 |
+
'duration': elapsed_time,
|
| 91 |
+
'metadata': {
|
| 92 |
+
'content_strategy': content_strategy,
|
| 93 |
+
'tts_script': tts_script,
|
| 94 |
+
'timestamp': time.time()
|
| 95 |
+
}
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
except Exception as e:
|
| 99 |
+
elapsed_time = time.time() - self.pipeline_start_time if self.pipeline_start_time else 0
|
| 100 |
+
logger.error(f"\n❌ Pipeline Failed after {elapsed_time:.2f}s: {e}")
|
| 101 |
+
|
| 102 |
+
return {
|
| 103 |
+
'success': False,
|
| 104 |
+
'error': str(e),
|
| 105 |
+
'duration': elapsed_time,
|
| 106 |
+
'partial_assets': locals().get('assets', {})
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
async def execute_step_1(
|
| 110 |
+
self,
|
| 111 |
+
content_strategy: Dict[str, str],
|
| 112 |
+
tts_script: str
|
| 113 |
+
) -> Dict[str, Any]:
|
| 114 |
+
"""
|
| 115 |
+
Execute all step 1 processes simultaneously for maximum efficiency
|
| 116 |
|
| 117 |
+
Args:
|
| 118 |
+
content_strategy: Content generation strategy
|
| 119 |
+
tts_script: Text for TTS generation
|
| 120 |
+
|
| 121 |
+
Returns:
|
| 122 |
+
Dict containing all generated assets
|
| 123 |
+
"""
|
| 124 |
+
logger.info("⚡ Launching parallel tasks...")
|
| 125 |
|
| 126 |
+
# Create all tasks
|
| 127 |
+
tasks = {
|
| 128 |
+
'hook_video': self.generate_hook_video(content_strategy),
|
| 129 |
+
'background_music': self.select_background_music(),
|
| 130 |
+
'selected_videos': self.select_videos_from_library(tts_script),
|
| 131 |
+
'tts_audio': self.generate_tts_audio(tts_script)
|
| 132 |
+
}
|
| 133 |
|
| 134 |
+
# Execute all tasks concurrently
|
| 135 |
+
start_time = time.time()
|
| 136 |
+
results = await asyncio.gather(
|
| 137 |
+
*tasks.values(),
|
| 138 |
+
return_exceptions=True
|
| 139 |
+
)
|
| 140 |
+
execution_time = time.time() - start_time
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
+
# Map results back to task names
|
| 143 |
+
assets = {}
|
| 144 |
+
for (task_name, _), result in zip(tasks.items(), results):
|
| 145 |
+
if isinstance(result, Exception):
|
| 146 |
+
logger.error(f"❌ {task_name} failed: {result}")
|
| 147 |
+
assets[task_name] = None
|
| 148 |
+
else:
|
| 149 |
+
logger.info(f"✓ {task_name} completed")
|
| 150 |
+
assets[task_name] = result
|
| 151 |
|
| 152 |
+
logger.info(f"\n⚡ Parallel execution completed in {execution_time:.2f}s")
|
| 153 |
+
return assets
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
+
async def generate_hook_video(self, strategy: Dict[str, str]) -> Optional[Dict]:
|
| 156 |
+
"""
|
| 157 |
+
Generate hook video using AI APIs with prompt enhancement
|
| 158 |
+
|
| 159 |
+
Args:
|
| 160 |
+
strategy: Content strategy with prompts
|
| 161 |
+
|
| 162 |
+
Returns:
|
| 163 |
+
Dict with video URL and metadata, or None if failed
|
| 164 |
+
"""
|
| 165 |
try:
|
| 166 |
+
logger.info("🎥 Generating hook video...")
|
| 167 |
+
|
| 168 |
+
# Choose the right prompt
|
| 169 |
+
base_prompt = strategy.get('runway_prompt') or strategy.get('gemini_prompt')
|
| 170 |
+
if not base_prompt:
|
| 171 |
+
raise ValueError("No prompt found in strategy")
|
| 172 |
+
|
| 173 |
+
# Enhance prompt with Gemini for better video quality
|
| 174 |
+
logger.info(" → Enhancing prompt with Gemini AI...")
|
| 175 |
+
enhanced_prompt = await self.api_clients.enhance_prompt(base_prompt)
|
| 176 |
|
| 177 |
# Generate video with RunwayML
|
| 178 |
+
logger.info(" → Generating video with RunwayML Gen-4...")
|
| 179 |
+
video_data = await self.api_clients.generate_video(
|
| 180 |
+
enhanced_prompt,
|
| 181 |
+
duration=strategy.get('duration', 5) # Default 5s for hook
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
logger.info(f" ✓ Hook video generated: {video_data.get('task_id', 'N/A')}")
|
| 185 |
+
return video_data
|
| 186 |
|
| 187 |
except Exception as e:
|
| 188 |
+
logger.error(f" ✗ Hook video generation failed: {e}")
|
| 189 |
return None
|
| 190 |
|
| 191 |
+
async def select_background_music(self) -> str:
|
| 192 |
+
"""
|
| 193 |
+
Select background music from library using linear rotation
|
| 194 |
+
|
| 195 |
+
Returns:
|
| 196 |
+
URL to background music file
|
| 197 |
+
"""
|
| 198 |
+
try:
|
| 199 |
+
logger.info("🎵 Selecting background music...")
|
| 200 |
+
|
| 201 |
+
# Linear selection with rotation
|
| 202 |
+
audio_index = self.current_audio_index
|
| 203 |
+
self.current_audio_index = (self.current_audio_index + 1) % self.config['audio_library_size']
|
| 204 |
+
|
| 205 |
+
# Construct GCS URL
|
| 206 |
+
bucket_name = self.config.get('gcs_bucket_name', 'somira-videos')
|
| 207 |
+
audio_url = f"gs://{bucket_name}/audio-library/audio{audio_index + 1}.mp3"
|
| 208 |
+
|
| 209 |
+
logger.info(f" ✓ Selected audio #{audio_index + 1}: {audio_url}")
|
| 210 |
+
return audio_url
|
| 211 |
+
|
| 212 |
+
except Exception as e:
|
| 213 |
+
logger.error(f" ✗ Music selection failed: {e}")
|
| 214 |
+
# Return default/fallback audio
|
| 215 |
+
return f"gs://{self.config.get('gcs_bucket_name')}/audio-library/default.mp3"
|
| 216 |
|
| 217 |
+
async def select_videos_from_library(self, tts_script: str) -> List[Dict]:
|
| 218 |
+
"""
|
| 219 |
+
AI agent selects 3 videos based on TTS script content
|
| 220 |
+
|
| 221 |
+
Args:
|
| 222 |
+
tts_script: The voice-over script to analyze
|
| 223 |
+
|
| 224 |
+
Returns:
|
| 225 |
+
List of selected video metadata dicts
|
| 226 |
+
"""
|
| 227 |
try:
|
| 228 |
+
logger.info("🎬 Selecting videos from library...")
|
| 229 |
+
logger.info(f" → Analyzing script: {tts_script[:80]}...")
|
| 230 |
+
|
| 231 |
+
# Use AI to select contextually relevant videos
|
| 232 |
selected_videos = await self.api_clients.select_videos(tts_script, count=3)
|
| 233 |
+
|
| 234 |
+
if not selected_videos:
|
| 235 |
+
logger.warning(" ⚠ No videos selected, using fallback")
|
| 236 |
+
return self._get_fallback_videos()
|
| 237 |
+
|
| 238 |
+
logger.info(f" ✓ Selected {len(selected_videos)} videos:")
|
| 239 |
+
for i, video in enumerate(selected_videos, 1):
|
| 240 |
+
logger.info(f" {i}. {video.get('keyword', 'N/A')} - {video.get('reason', 'N/A')}")
|
| 241 |
+
|
| 242 |
return selected_videos
|
| 243 |
+
|
| 244 |
except Exception as e:
|
| 245 |
+
logger.error(f" ✗ Video selection failed: {e}")
|
| 246 |
+
return self._get_fallback_videos()
|
| 247 |
|
| 248 |
+
async def generate_tts_audio(self, tts_script: str) -> Optional[Dict]:
|
| 249 |
+
"""
|
| 250 |
+
Generate TTS audio with timing data for lip-sync and subtitles
|
| 251 |
+
|
| 252 |
+
Args:
|
| 253 |
+
tts_script: Text to convert to speech
|
| 254 |
+
|
| 255 |
+
Returns:
|
| 256 |
+
Dict with audio URL, duration, and timing data
|
| 257 |
+
"""
|
| 258 |
try:
|
| 259 |
+
logger.info("🎙️ Generating TTS audio...")
|
| 260 |
+
logger.info(f" → Script length: {len(tts_script)} characters")
|
| 261 |
+
|
| 262 |
+
# Get voice from config
|
| 263 |
+
voice_name = self.config.get('default_voice', 'en-US-AriaNeural')
|
| 264 |
+
|
| 265 |
+
# Generate TTS with timing data
|
| 266 |
+
tts_result = await self.api_clients.generate_tts(
|
| 267 |
+
tts_script,
|
| 268 |
+
voice_name=voice_name
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
if tts_result:
|
| 272 |
+
duration = tts_result.get('duration', 0)
|
| 273 |
+
logger.info(f" ✓ TTS generated: {duration:.2f}s duration")
|
| 274 |
+
logger.info(f" ✓ Audio URL: {tts_result.get('audio_url', 'N/A')}")
|
| 275 |
+
|
| 276 |
return tts_result
|
| 277 |
+
|
| 278 |
except Exception as e:
|
| 279 |
+
logger.error(f" ✗ TTS generation failed: {e}")
|
| 280 |
return None
|
| 281 |
+
|
| 282 |
+
def _validate_assets(self, assets: Dict[str, Any]) -> bool:
|
| 283 |
+
"""
|
| 284 |
+
Validate that critical assets were generated successfully
|
| 285 |
+
|
| 286 |
+
Args:
|
| 287 |
+
assets: Dict of generated assets
|
| 288 |
+
|
| 289 |
+
Returns:
|
| 290 |
+
True if valid, False otherwise
|
| 291 |
+
"""
|
| 292 |
+
critical_assets = ['tts_audio', 'selected_videos']
|
| 293 |
+
optional_assets = ['hook_video', 'background_music']
|
| 294 |
+
|
| 295 |
+
# Check critical assets
|
| 296 |
+
for asset_name in critical_assets:
|
| 297 |
+
if not assets.get(asset_name):
|
| 298 |
+
logger.error(f"❌ Critical asset missing: {asset_name}")
|
| 299 |
+
return False
|
| 300 |
+
|
| 301 |
+
# Warn about optional assets
|
| 302 |
+
for asset_name in optional_assets:
|
| 303 |
+
if not assets.get(asset_name):
|
| 304 |
+
logger.warning(f"⚠️ Optional asset missing: {asset_name}")
|
| 305 |
+
|
| 306 |
+
logger.info("✓ Asset validation passed")
|
| 307 |
+
return True
|
| 308 |
+
|
| 309 |
+
def _get_fallback_videos(self) -> List[Dict]:
|
| 310 |
+
"""
|
| 311 |
+
Get fallback videos if AI selection fails
|
| 312 |
+
|
| 313 |
+
Returns:
|
| 314 |
+
List of default video selections
|
| 315 |
+
"""
|
| 316 |
+
bucket_name = self.config.get('gcs_bucket_name', 'somira-videos')
|
| 317 |
+
return [
|
| 318 |
+
{
|
| 319 |
+
'id': 1,
|
| 320 |
+
'url': f"gs://{bucket_name}/library/video1.mp4",
|
| 321 |
+
'keyword': 'product',
|
| 322 |
+
'timing': '0-5',
|
| 323 |
+
'style': 'general',
|
| 324 |
+
'reason': 'Fallback selection'
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
'id': 15,
|
| 328 |
+
'url': f"gs://{bucket_name}/library/video15.mp4",
|
| 329 |
+
'keyword': 'lifestyle',
|
| 330 |
+
'timing': '5-10',
|
| 331 |
+
'style': 'general',
|
| 332 |
+
'reason': 'Fallback selection'
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
'id': 30,
|
| 336 |
+
'url': f"gs://{bucket_name}/library/video30.mp4",
|
| 337 |
+
'keyword': 'usage',
|
| 338 |
+
'timing': '10-15',
|
| 339 |
+
'style': 'general',
|
| 340 |
+
'reason': 'Fallback selection'
|
| 341 |
+
}
|
| 342 |
+
]
|
| 343 |
+
|
| 344 |
+
def _log_step_completion(self, step: int, data: Dict[str, Any]):
|
| 345 |
+
"""Log step completion with summary"""
|
| 346 |
+
step_names = {
|
| 347 |
+
1: "Asset Generation",
|
| 348 |
+
2: "Video Rendering",
|
| 349 |
+
3: "Subtitle Addition",
|
| 350 |
+
4: "Cloud Upload"
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
elapsed = time.time() - self.pipeline_start_time if self.pipeline_start_time else 0
|
| 354 |
+
logger.info(f"✓ Step {step} ({step_names.get(step, 'Unknown')}) completed [{elapsed:.2f}s total]")
|
| 355 |
+
|
| 356 |
+
async def health_check(self) -> Dict[str, bool]:
|
| 357 |
+
"""
|
| 358 |
+
Check health of all API connections
|
| 359 |
+
|
| 360 |
+
Returns:
|
| 361 |
+
Dict with service health status
|
| 362 |
+
"""
|
| 363 |
+
logger.info("🏥 Running health check...")
|
| 364 |
+
|
| 365 |
+
health = {
|
| 366 |
+
'gemini': False,
|
| 367 |
+
'runwayml': False,
|
| 368 |
+
'tts': False,
|
| 369 |
+
'gcs': False
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
+
try:
|
| 373 |
+
# Test Gemini
|
| 374 |
+
test_prompt = "Hello"
|
| 375 |
+
await self.api_clients.enhance_prompt(test_prompt)
|
| 376 |
+
health['gemini'] = True
|
| 377 |
+
logger.info(" ✓ Gemini API: Connected")
|
| 378 |
+
except Exception as e:
|
| 379 |
+
logger.error(f" ✗ Gemini API: {e}")
|
| 380 |
+
|
| 381 |
+
try:
|
| 382 |
+
# Test GCS (just check bucket exists)
|
| 383 |
+
bucket = self.api_clients.gcs_bucket
|
| 384 |
+
bucket.exists()
|
| 385 |
+
health['gcs'] = True
|
| 386 |
+
logger.info(" ✓ Google Cloud Storage: Connected")
|
| 387 |
+
except Exception as e:
|
| 388 |
+
logger.error(f" ✗ Google Cloud Storage: {e}")
|
| 389 |
+
|
| 390 |
+
# RunwayML and TTS are harder to test without using credits
|
| 391 |
+
# So we just check if API keys are configured
|
| 392 |
+
if self.api_clients.runway_api_key:
|
| 393 |
+
health['runwayml'] = True
|
| 394 |
+
logger.info(" ✓ RunwayML API: Configured")
|
| 395 |
+
else:
|
| 396 |
+
logger.error(" ✗ RunwayML API: Not configured")
|
| 397 |
+
|
| 398 |
+
if self.api_clients.tts_client:
|
| 399 |
+
health['tts'] = True
|
| 400 |
+
logger.info(" ✓ TTS API: Configured")
|
| 401 |
+
else:
|
| 402 |
+
logger.error(" ✗ TTS API: Not configured")
|
| 403 |
+
|
| 404 |
+
all_healthy = all(health.values())
|
| 405 |
+
logger.info(f"\n{'✅' if all_healthy else '⚠️'} Health check {'passed' if all_healthy else 'failed'}")
|
| 406 |
+
|
| 407 |
+
return health
|
src/main.py
CHANGED
|
@@ -1,54 +1,336 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
Main entry point for Content Automation System
|
|
|
|
| 4 |
"""
|
| 5 |
import asyncio
|
| 6 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
from automation import ContentAutomation
|
|
|
|
| 9 |
|
| 10 |
-
# Load environment variables
|
| 11 |
-
load_dotenv()
|
| 12 |
|
| 13 |
-
|
| 14 |
-
"""
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
# Configuration
|
| 18 |
config = {
|
| 19 |
'gemini_api_key': os.getenv('GEMINI_API_KEY'),
|
| 20 |
'runwayml_api_key': os.getenv('RUNWAYML_API_KEY'),
|
| 21 |
-
'
|
| 22 |
-
'gcs_bucket': os.getenv('GCS_BUCKET_NAME'),
|
| 23 |
'audio_library_size': int(os.getenv('AUDIO_LIBRARY_SIZE', 27)),
|
| 24 |
-
'video_library_size': int(os.getenv('VIDEO_LIBRARY_SIZE', 47))
|
|
|
|
| 25 |
}
|
| 26 |
|
| 27 |
-
#
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
#
|
| 31 |
-
|
| 32 |
-
'gemini_prompt':
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
'style': 'commercial',
|
| 35 |
-
'aspect_ratio': '9:16'
|
|
|
|
|
|
|
| 36 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
-
#
|
| 39 |
-
|
| 40 |
I heard a pop, and suddenly my neck was stuck. I looked like I was mid-sneeze all day.
|
| 41 |
After one minute with the Somira massager it was gone. If you ever feel neck pain,
|
| 42 |
-
you'll wish you bought one, because the moment I turned my head.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
try:
|
| 46 |
-
#
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
except Exception as e:
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
if __name__ == "__main__":
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
Main entry point for Content Automation System
|
| 4 |
+
Production-ready implementation with error handling and logging
|
| 5 |
"""
|
| 6 |
import asyncio
|
| 7 |
import os
|
| 8 |
+
import sys
|
| 9 |
+
import argparse
|
| 10 |
+
import json
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from typing import Dict, Optional
|
| 13 |
from dotenv import load_dotenv
|
| 14 |
from automation import ContentAutomation
|
| 15 |
+
from utils import logger
|
| 16 |
|
|
|
|
|
|
|
| 17 |
|
| 18 |
+
def load_configuration() -> Dict:
|
| 19 |
+
"""
|
| 20 |
+
Load configuration from environment variables with validation
|
| 21 |
+
|
| 22 |
+
Returns:
|
| 23 |
+
Configuration dictionary
|
| 24 |
+
|
| 25 |
+
Raises:
|
| 26 |
+
ValueError: If required configuration is missing
|
| 27 |
+
"""
|
| 28 |
+
# Load environment variables from .env file
|
| 29 |
+
load_dotenv()
|
| 30 |
|
|
|
|
| 31 |
config = {
|
| 32 |
'gemini_api_key': os.getenv('GEMINI_API_KEY'),
|
| 33 |
'runwayml_api_key': os.getenv('RUNWAYML_API_KEY'),
|
| 34 |
+
'gcs_bucket_name': os.getenv('GCS_BUCKET_NAME'),
|
|
|
|
| 35 |
'audio_library_size': int(os.getenv('AUDIO_LIBRARY_SIZE', 27)),
|
| 36 |
+
'video_library_size': int(os.getenv('VIDEO_LIBRARY_SIZE', 47)),
|
| 37 |
+
'default_voice': os.getenv('DEFAULT_VOICE', 'en-US-AriaNeural')
|
| 38 |
}
|
| 39 |
|
| 40 |
+
# Validate required keys
|
| 41 |
+
required_keys = ['gemini_api_key', 'runwayml_api_key', 'gcs_bucket_name']
|
| 42 |
+
missing_keys = [key for key in required_keys if not config.get(key)]
|
| 43 |
+
|
| 44 |
+
if missing_keys:
|
| 45 |
+
raise ValueError(
|
| 46 |
+
f"Missing required configuration: {', '.join(missing_keys)}. "
|
| 47 |
+
f"Please check your .env file."
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
return config
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def load_content_strategy(strategy_file: Optional[str] = None) -> Dict:
|
| 54 |
+
"""
|
| 55 |
+
Load content strategy from file or use default
|
| 56 |
+
|
| 57 |
+
Args:
|
| 58 |
+
strategy_file: Path to JSON file with strategy, or None for default
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
Content strategy dictionary
|
| 62 |
+
"""
|
| 63 |
+
if strategy_file and Path(strategy_file).exists():
|
| 64 |
+
logger.info(f"Loading content strategy from: {strategy_file}")
|
| 65 |
+
with open(strategy_file, 'r') as f:
|
| 66 |
+
return json.load(f)
|
| 67 |
|
| 68 |
+
# Default strategy for Somira massager ad
|
| 69 |
+
return {
|
| 70 |
+
'gemini_prompt': (
|
| 71 |
+
'A photorealistic, comical yet painfully real depiction of an attractive '
|
| 72 |
+
'blonde, blue-eyed female stuck in a neck spasm nightmare in a luxurious '
|
| 73 |
+
'home setting. Cinematic lighting, 4K quality, commercial aesthetic.'
|
| 74 |
+
),
|
| 75 |
+
'runway_prompt': (
|
| 76 |
+
'Slow push-in camera: a blonde woman in her 30s suddenly tilts her head '
|
| 77 |
+
'stiffly to the side and blinks in surprise, face frozen mid-expression. '
|
| 78 |
+
'Luxurious modern home interior, soft natural lighting, commercial quality.'
|
| 79 |
+
),
|
| 80 |
'style': 'commercial',
|
| 81 |
+
'aspect_ratio': '9:16',
|
| 82 |
+
'duration': 5, # seconds for hook video
|
| 83 |
+
'brand': 'Somira'
|
| 84 |
}
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def load_tts_script(script_file: Optional[str] = None) -> str:
|
| 88 |
+
"""
|
| 89 |
+
Load TTS script from file or use default
|
| 90 |
+
|
| 91 |
+
Args:
|
| 92 |
+
script_file: Path to text file with script, or None for default
|
| 93 |
+
|
| 94 |
+
Returns:
|
| 95 |
+
TTS script string
|
| 96 |
+
"""
|
| 97 |
+
if script_file and Path(script_file).exists():
|
| 98 |
+
logger.info(f"Loading TTS script from: {script_file}")
|
| 99 |
+
with open(script_file, 'r') as f:
|
| 100 |
+
return f.read().strip()
|
| 101 |
|
| 102 |
+
# Default script for Somira massager ad
|
| 103 |
+
return """
|
| 104 |
I heard a pop, and suddenly my neck was stuck. I looked like I was mid-sneeze all day.
|
| 105 |
After one minute with the Somira massager it was gone. If you ever feel neck pain,
|
| 106 |
+
you'll wish you bought one, because the moment I turned my head, I knew I needed relief fast.
|
| 107 |
+
"""
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
async def run_pipeline(
|
| 111 |
+
automation: ContentAutomation,
|
| 112 |
+
content_strategy: Dict,
|
| 113 |
+
tts_script: str,
|
| 114 |
+
output_dir: Optional[str] = None
|
| 115 |
+
) -> Dict:
|
| 116 |
+
"""
|
| 117 |
+
Run the complete automation pipeline
|
| 118 |
+
|
| 119 |
+
Args:
|
| 120 |
+
automation: ContentAutomation instance
|
| 121 |
+
content_strategy: Content generation strategy
|
| 122 |
+
tts_script: TTS script text
|
| 123 |
+
output_dir: Optional output directory for results
|
| 124 |
+
|
| 125 |
+
Returns:
|
| 126 |
+
Pipeline execution results
|
| 127 |
"""
|
| 128 |
+
logger.info("\n" + "=" * 70)
|
| 129 |
+
logger.info("🎬 SOMIRA CONTENT AUTOMATION SYSTEM")
|
| 130 |
+
logger.info("=" * 70)
|
| 131 |
+
|
| 132 |
+
# Display configuration
|
| 133 |
+
logger.info("\n📋 Pipeline Configuration:")
|
| 134 |
+
logger.info(f" • Brand: {content_strategy.get('brand', 'N/A')}")
|
| 135 |
+
logger.info(f" • Style: {content_strategy.get('style', 'N/A')}")
|
| 136 |
+
logger.info(f" • Aspect Ratio: {content_strategy.get('aspect_ratio', 'N/A')}")
|
| 137 |
+
logger.info(f" • Hook Duration: {content_strategy.get('duration', 5)}s")
|
| 138 |
+
logger.info(f" • Script Length: {len(tts_script)} characters")
|
| 139 |
+
|
| 140 |
+
# Execute pipeline
|
| 141 |
+
result = await automation.execute_pipeline(
|
| 142 |
+
content_strategy=content_strategy,
|
| 143 |
+
tts_script=tts_script
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
# Save results if output directory specified
|
| 147 |
+
if output_dir and result.get('success'):
|
| 148 |
+
output_path = Path(output_dir)
|
| 149 |
+
output_path.mkdir(parents=True, exist_ok=True)
|
| 150 |
+
|
| 151 |
+
# Save metadata
|
| 152 |
+
metadata_file = output_path / 'pipeline_result.json'
|
| 153 |
+
with open(metadata_file, 'w') as f:
|
| 154 |
+
json.dump(result, f, indent=2, default=str)
|
| 155 |
+
logger.info(f"\n💾 Results saved to: {metadata_file}")
|
| 156 |
+
|
| 157 |
+
return result
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
async def health_check_command(automation: ContentAutomation):
|
| 161 |
+
"""Run health check on all services"""
|
| 162 |
+
health_status = await automation.health_check()
|
| 163 |
+
|
| 164 |
+
if all(health_status.values()):
|
| 165 |
+
logger.info("\n✅ All systems operational!")
|
| 166 |
+
return 0
|
| 167 |
+
else:
|
| 168 |
+
logger.error("\n❌ Some systems are not operational")
|
| 169 |
+
return 1
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
async def test_command(automation: ContentAutomation):
|
| 173 |
+
"""Run a quick test of the pipeline with minimal resources"""
|
| 174 |
+
logger.info("\n🧪 Running test pipeline...")
|
| 175 |
+
|
| 176 |
+
test_strategy = {
|
| 177 |
+
'gemini_prompt': 'A simple product shot of a modern massager device',
|
| 178 |
+
'runway_prompt': 'Static product shot of a sleek white massager on a clean background',
|
| 179 |
+
'style': 'minimal',
|
| 180 |
+
'aspect_ratio': '9:16',
|
| 181 |
+
'duration': 5,
|
| 182 |
+
'brand': 'Test'
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
test_script = "This is a test of the text-to-speech system. It should be brief."
|
| 186 |
+
|
| 187 |
+
result = await automation.execute_pipeline(test_strategy, test_script)
|
| 188 |
+
|
| 189 |
+
if result.get('success'):
|
| 190 |
+
logger.info("\n✅ Test completed successfully!")
|
| 191 |
+
return 0
|
| 192 |
+
else:
|
| 193 |
+
logger.error(f"\n❌ Test failed: {result.get('error', 'Unknown error')}")
|
| 194 |
+
return 1
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def parse_arguments():
|
| 198 |
+
"""Parse command line arguments"""
|
| 199 |
+
parser = argparse.ArgumentParser(
|
| 200 |
+
description='Somira Content Automation System',
|
| 201 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 202 |
+
epilog="""
|
| 203 |
+
Examples:
|
| 204 |
+
# Run with default content
|
| 205 |
+
python main.py
|
| 206 |
+
|
| 207 |
+
# Run with custom strategy and script
|
| 208 |
+
python main.py --strategy my_strategy.json --script my_script.txt
|
| 209 |
+
|
| 210 |
+
# Run health check
|
| 211 |
+
python main.py --health-check
|
| 212 |
+
|
| 213 |
+
# Run test pipeline
|
| 214 |
+
python main.py --test
|
| 215 |
+
|
| 216 |
+
# Save output to specific directory
|
| 217 |
+
python main.py --output ./outputs/video_001
|
| 218 |
+
"""
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
parser.add_argument(
|
| 222 |
+
'--strategy',
|
| 223 |
+
type=str,
|
| 224 |
+
help='Path to JSON file with content strategy'
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
parser.add_argument(
|
| 228 |
+
'--script',
|
| 229 |
+
type=str,
|
| 230 |
+
help='Path to text file with TTS script'
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
parser.add_argument(
|
| 234 |
+
'--output',
|
| 235 |
+
type=str,
|
| 236 |
+
help='Output directory for results'
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
parser.add_argument(
|
| 240 |
+
'--health-check',
|
| 241 |
+
action='store_true',
|
| 242 |
+
help='Run health check on all services'
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
parser.add_argument(
|
| 246 |
+
'--test',
|
| 247 |
+
action='store_true',
|
| 248 |
+
help='Run test pipeline with minimal resources'
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
parser.add_argument(
|
| 252 |
+
'--verbose',
|
| 253 |
+
action='store_true',
|
| 254 |
+
help='Enable verbose logging'
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
return parser.parse_args()
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
async def main():
|
| 261 |
+
"""Main execution function"""
|
| 262 |
+
args = parse_arguments()
|
| 263 |
|
| 264 |
try:
|
| 265 |
+
# Load configuration
|
| 266 |
+
logger.info("🔧 Loading configuration...")
|
| 267 |
+
config = load_configuration()
|
| 268 |
+
logger.info("✓ Configuration loaded successfully")
|
| 269 |
+
|
| 270 |
+
# Initialize automation system
|
| 271 |
+
logger.info("🚀 Initializing automation system...")
|
| 272 |
+
automation = ContentAutomation(config)
|
| 273 |
+
logger.info("✓ Automation system initialized")
|
| 274 |
+
|
| 275 |
+
# Handle different commands
|
| 276 |
+
if args.health_check:
|
| 277 |
+
return await health_check_command(automation)
|
| 278 |
+
|
| 279 |
+
if args.test:
|
| 280 |
+
return await test_command(automation)
|
| 281 |
+
|
| 282 |
+
# Load content strategy and script
|
| 283 |
+
content_strategy = load_content_strategy(args.strategy)
|
| 284 |
+
tts_script = load_tts_script(args.script)
|
| 285 |
+
|
| 286 |
+
# Run the pipeline
|
| 287 |
+
result = await run_pipeline(
|
| 288 |
+
automation=automation,
|
| 289 |
+
content_strategy=content_strategy,
|
| 290 |
+
tts_script=tts_script,
|
| 291 |
+
output_dir=args.output
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
# Print final summary
|
| 295 |
+
if result.get('success'):
|
| 296 |
+
print("\n" + "=" * 70)
|
| 297 |
+
print("✅ PIPELINE COMPLETED SUCCESSFULLY")
|
| 298 |
+
print("=" * 70)
|
| 299 |
+
print(f"\n📹 Final Video URL: {result['final_url']}")
|
| 300 |
+
print(f"⏱️ Total Duration: {result['duration']:.2f}s")
|
| 301 |
+
print(f"💾 Local Path: {result.get('local_path', 'N/A')}")
|
| 302 |
+
print("\n" + "=" * 70)
|
| 303 |
+
return 0
|
| 304 |
+
else:
|
| 305 |
+
print("\n" + "=" * 70)
|
| 306 |
+
print("❌ PIPELINE FAILED")
|
| 307 |
+
print("=" * 70)
|
| 308 |
+
print(f"\n🔥 Error: {result.get('error', 'Unknown error')}")
|
| 309 |
+
print(f"⏱️ Failed after: {result.get('duration', 0):.2f}s")
|
| 310 |
+
print("\n" + "=" * 70)
|
| 311 |
+
return 1
|
| 312 |
+
|
| 313 |
+
except ValueError as e:
|
| 314 |
+
logger.error(f"\n❌ Configuration Error: {e}")
|
| 315 |
+
logger.info("\n💡 Tip: Make sure your .env file is properly configured.")
|
| 316 |
+
logger.info(" See API_SETUP_GUIDE.md for detailed instructions.")
|
| 317 |
+
return 1
|
| 318 |
|
| 319 |
except Exception as e:
|
| 320 |
+
logger.error(f"\n❌ Unexpected Error: {e}")
|
| 321 |
+
if args.verbose:
|
| 322 |
+
import traceback
|
| 323 |
+
traceback.print_exc()
|
| 324 |
+
return 1
|
| 325 |
+
|
| 326 |
|
| 327 |
if __name__ == "__main__":
|
| 328 |
+
try:
|
| 329 |
+
exit_code = asyncio.run(main())
|
| 330 |
+
sys.exit(exit_code)
|
| 331 |
+
except KeyboardInterrupt:
|
| 332 |
+
logger.info("\n\n⚠️ Pipeline interrupted by user")
|
| 333 |
+
sys.exit(130)
|
| 334 |
+
except Exception as e:
|
| 335 |
+
logger.error(f"\n❌ Fatal error: {e}")
|
| 336 |
+
sys.exit(1)
|
src/utils.py
CHANGED
|
@@ -1,34 +1,208 @@
|
|
| 1 |
"""
|
| 2 |
-
Utility functions and logging
|
| 3 |
"""
|
| 4 |
import logging
|
| 5 |
import sys
|
|
|
|
| 6 |
from pathlib import Path
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
"""
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
setup_logging()
|
| 24 |
-
logger = logging.getLogger(__name__)
|
| 25 |
|
| 26 |
-
def
|
| 27 |
-
"""
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
Utility functions and logging configuration
|
| 3 |
"""
|
| 4 |
import logging
|
| 5 |
import sys
|
| 6 |
+
from datetime import datetime
|
| 7 |
from pathlib import Path
|
| 8 |
|
| 9 |
+
|
| 10 |
+
class ColoredFormatter(logging.Formatter):
|
| 11 |
+
"""Custom formatter with colors for terminal output"""
|
| 12 |
+
|
| 13 |
+
# ANSI color codes
|
| 14 |
+
COLORS = {
|
| 15 |
+
'DEBUG': '\033[36m', # Cyan
|
| 16 |
+
'INFO': '\033[32m', # Green
|
| 17 |
+
'WARNING': '\033[33m', # Yellow
|
| 18 |
+
'ERROR': '\033[31m', # Red
|
| 19 |
+
'CRITICAL': '\033[35m', # Magenta
|
| 20 |
+
'RESET': '\033[0m' # Reset
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
def format(self, record):
|
| 24 |
+
# Add color to level name
|
| 25 |
+
levelname = record.levelname
|
| 26 |
+
if levelname in self.COLORS:
|
| 27 |
+
record.levelname = f"{self.COLORS[levelname]}{levelname}{self.COLORS['RESET']}"
|
| 28 |
+
|
| 29 |
+
return super().format(record)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def setup_logger(name='ContentAutomation', level=logging.INFO, log_file=None):
|
| 33 |
+
"""
|
| 34 |
+
Set up logger with console and optional file output
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
name: Logger name
|
| 38 |
+
level: Logging level
|
| 39 |
+
log_file: Optional path to log file
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
Configured logger instance
|
| 43 |
+
"""
|
| 44 |
+
logger = logging.getLogger(name)
|
| 45 |
+
logger.setLevel(level)
|
| 46 |
+
|
| 47 |
+
# Avoid adding handlers multiple times
|
| 48 |
+
if logger.handlers:
|
| 49 |
+
return logger
|
| 50 |
+
|
| 51 |
+
# Console handler with colors
|
| 52 |
+
console_handler = logging.StreamHandler(sys.stdout)
|
| 53 |
+
console_handler.setLevel(level)
|
| 54 |
+
console_formatter = ColoredFormatter(
|
| 55 |
+
fmt='%(asctime)s | %(levelname)s | %(message)s',
|
| 56 |
+
datefmt='%H:%M:%S'
|
| 57 |
)
|
| 58 |
+
console_handler.setFormatter(console_formatter)
|
| 59 |
+
logger.addHandler(console_handler)
|
| 60 |
+
|
| 61 |
+
# File handler if specified
|
| 62 |
+
if log_file:
|
| 63 |
+
log_path = Path(log_file)
|
| 64 |
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
| 65 |
+
|
| 66 |
+
file_handler = logging.FileHandler(log_file)
|
| 67 |
+
file_handler.setLevel(level)
|
| 68 |
+
file_formatter = logging.Formatter(
|
| 69 |
+
fmt='%(asctime)s | %(levelname)s | %(name)s | %(message)s',
|
| 70 |
+
datefmt='%Y-%m-%d %H:%M:%S'
|
| 71 |
+
)
|
| 72 |
+
file_handler.setFormatter(file_formatter)
|
| 73 |
+
logger.addHandler(file_handler)
|
| 74 |
+
|
| 75 |
+
return logger
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# Create global logger instance
|
| 79 |
+
logger = setup_logger()
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def format_duration(seconds: float) -> str:
|
| 83 |
+
"""
|
| 84 |
+
Format duration in seconds to human-readable string
|
| 85 |
+
|
| 86 |
+
Args:
|
| 87 |
+
seconds: Duration in seconds
|
| 88 |
+
|
| 89 |
+
Returns:
|
| 90 |
+
Formatted string (e.g., "1m 23s" or "45s")
|
| 91 |
+
"""
|
| 92 |
+
if seconds < 60:
|
| 93 |
+
return f"{seconds:.1f}s"
|
| 94 |
+
|
| 95 |
+
minutes = int(seconds // 60)
|
| 96 |
+
remaining_seconds = seconds % 60
|
| 97 |
+
|
| 98 |
+
if minutes < 60:
|
| 99 |
+
return f"{minutes}m {remaining_seconds:.0f}s"
|
| 100 |
+
|
| 101 |
+
hours = int(minutes // 60)
|
| 102 |
+
remaining_minutes = minutes % 60
|
| 103 |
+
return f"{hours}h {remaining_minutes}m"
|
| 104 |
|
|
|
|
|
|
|
| 105 |
|
| 106 |
+
def format_file_size(size_bytes: int) -> str:
|
| 107 |
+
"""
|
| 108 |
+
Format file size in bytes to human-readable string
|
| 109 |
+
|
| 110 |
+
Args:
|
| 111 |
+
size_bytes: Size in bytes
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
Formatted string (e.g., "1.5 MB")
|
| 115 |
+
"""
|
| 116 |
+
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
|
| 117 |
+
if size_bytes < 1024.0:
|
| 118 |
+
return f"{size_bytes:.1f} {unit}"
|
| 119 |
+
size_bytes /= 1024.0
|
| 120 |
+
return f"{size_bytes:.1f} PB"
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def validate_video_config(config: dict) -> bool:
|
| 124 |
+
"""
|
| 125 |
+
Validate video configuration parameters
|
| 126 |
+
|
| 127 |
+
Args:
|
| 128 |
+
config: Video configuration dictionary
|
| 129 |
+
|
| 130 |
+
Returns:
|
| 131 |
+
True if valid, False otherwise
|
| 132 |
+
"""
|
| 133 |
+
valid_aspect_ratios = ['16:9', '9:16', '1:1', '4:5']
|
| 134 |
+
valid_styles = ['commercial', 'minimal', 'cinematic', 'social']
|
| 135 |
+
|
| 136 |
+
if 'aspect_ratio' in config:
|
| 137 |
+
if config['aspect_ratio'] not in valid_aspect_ratios:
|
| 138 |
+
logger.warning(f"Invalid aspect ratio: {config['aspect_ratio']}")
|
| 139 |
+
return False
|
| 140 |
+
|
| 141 |
+
if 'style' in config:
|
| 142 |
+
if config['style'] not in valid_styles:
|
| 143 |
+
logger.warning(f"Invalid style: {config['style']}")
|
| 144 |
+
return False
|
| 145 |
+
|
| 146 |
+
if 'duration' in config:
|
| 147 |
+
if not (1 <= config['duration'] <= 60):
|
| 148 |
+
logger.warning(f"Invalid duration: {config['duration']}s (must be 1-60)")
|
| 149 |
+
return False
|
| 150 |
+
|
| 151 |
+
return True
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def sanitize_filename(filename: str) -> str:
|
| 155 |
+
"""
|
| 156 |
+
Sanitize filename by removing invalid characters
|
| 157 |
+
|
| 158 |
+
Args:
|
| 159 |
+
filename: Original filename
|
| 160 |
+
|
| 161 |
+
Returns:
|
| 162 |
+
Sanitized filename
|
| 163 |
+
"""
|
| 164 |
+
import re
|
| 165 |
+
# Remove invalid characters
|
| 166 |
+
filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
|
| 167 |
+
# Remove leading/trailing spaces and dots
|
| 168 |
+
filename = filename.strip('. ')
|
| 169 |
+
return filename
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def generate_video_id() -> str:
|
| 173 |
+
"""
|
| 174 |
+
Generate unique video ID based on timestamp
|
| 175 |
+
|
| 176 |
+
Returns:
|
| 177 |
+
Unique video ID string
|
| 178 |
+
"""
|
| 179 |
+
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
| 180 |
+
return f"video_{timestamp}"
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
class ProgressTracker:
|
| 184 |
+
"""Track progress of multi-step operations"""
|
| 185 |
+
|
| 186 |
+
def __init__(self, total_steps: int, description: str = "Processing"):
|
| 187 |
+
self.total_steps = total_steps
|
| 188 |
+
self.current_step = 0
|
| 189 |
+
self.description = description
|
| 190 |
+
self.start_time = datetime.now()
|
| 191 |
|
| 192 |
+
def update(self, step_name: str):
|
| 193 |
+
"""Update progress to next step"""
|
| 194 |
+
self.current_step += 1
|
| 195 |
+
progress = (self.current_step / self.total_steps) * 100
|
| 196 |
+
elapsed = (datetime.now() - self.start_time).total_seconds()
|
| 197 |
+
|
| 198 |
+
logger.info(
|
| 199 |
+
f"[{progress:.0f}%] Step {self.current_step}/{self.total_steps}: "
|
| 200 |
+
f"{step_name} (Elapsed: {format_duration(elapsed)})"
|
| 201 |
+
)
|
| 202 |
|
| 203 |
+
def complete(self):
|
| 204 |
+
"""Mark progress as complete"""
|
| 205 |
+
elapsed = (datetime.now() - self.start_time).total_seconds()
|
| 206 |
+
logger.info(
|
| 207 |
+
f"✓ {self.description} completed in {format_duration(elapsed)}"
|
| 208 |
+
)
|