Merge pull request #2 from ElvoroLtd/feat/video-editor
Browse files- .env.example +6 -59
- API_SETUP_GUIDE.md +0 -316
- QUICKSTART.md +0 -313
- README.md +200 -261
- config/api_keys.yaml +12 -5
- requirements.txt +52 -13
- src/api_clients.py +110 -40
- src/asset_selector.py +233 -0
- src/automation.py +330 -329
- src/main.py +28 -25
- src/video_renderer.py +382 -55
.env.example
CHANGED
|
@@ -1,75 +1,22 @@
|
|
| 1 |
-
#
|
| 2 |
-
# SOMIRA CONTENT AUTOMATION - CONFIGURATION
|
| 3 |
-
# ============================================
|
| 4 |
-
|
| 5 |
-
# -------------------- API KEYS --------------------
|
| 6 |
-
|
| 7 |
-
# Gemini API (Google AI) - For prompt enhancement and video selection
|
| 8 |
-
# Get yours at: https://aistudio.google.com/app/apikey
|
| 9 |
GEMINI_API_KEY=your_gemini_api_key_here
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
# Get yours at: https://dev.runwayml.com/
|
| 13 |
-
RUNWAYML_API_KEY=key_your_runwayml_api_key_here
|
| 14 |
-
|
| 15 |
-
# Google Cloud - Service Account for TTS and Storage
|
| 16 |
-
# Path to your service account JSON key file
|
| 17 |
GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/service-account-key.json
|
| 18 |
|
| 19 |
-
#
|
| 20 |
-
# AZURE_SPEECH_KEY=your_azure_speech_key_here
|
| 21 |
-
# AZURE_SPEECH_REGION=eastus
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
# -------------------- CLOUD STORAGE --------------------
|
| 25 |
-
|
| 26 |
-
# Google Cloud Storage bucket name for video storage
|
| 27 |
-
# Create bucket at: https://console.cloud.google.com/storage
|
| 28 |
GCS_BUCKET_NAME=your_bucket_name_here
|
| 29 |
|
| 30 |
-
|
| 31 |
-
# -------------------- CONFIGURATION --------------------
|
| 32 |
-
|
| 33 |
-
# Audio library size (number of background music tracks available)
|
| 34 |
AUDIO_LIBRARY_SIZE=27
|
| 35 |
-
|
| 36 |
-
# Video library size (number of product video clips available)
|
| 37 |
VIDEO_LIBRARY_SIZE=47
|
| 38 |
-
|
| 39 |
-
# Default TTS voice (Google Cloud TTS voices)
|
| 40 |
-
# Options: en-US-AriaNeural, en-US-JennyNeural, en-US-GuyNeural, etc.
|
| 41 |
-
# Full list: https://cloud.google.com/text-to-speech/docs/voices
|
| 42 |
DEFAULT_VOICE=en-US-Neural2-F
|
| 43 |
-
|
| 44 |
-
# Video rendering quality (low, medium, high, ultra)
|
| 45 |
VIDEO_QUALITY=high
|
| 46 |
-
|
| 47 |
-
# Enable debug logging (true/false)
|
| 48 |
DEBUG_MODE=false
|
| 49 |
|
| 50 |
-
|
| 51 |
-
# -------------------- OPTIONAL SETTINGS --------------------
|
| 52 |
-
|
| 53 |
-
# Maximum video generation timeout (seconds)
|
| 54 |
VIDEO_GENERATION_TIMEOUT=300
|
| 55 |
-
|
| 56 |
-
# Maximum concurrent API requests
|
| 57 |
MAX_CONCURRENT_REQUESTS=4
|
| 58 |
-
|
| 59 |
-
# Retry attempts for failed API calls
|
| 60 |
MAX_RETRY_ATTEMPTS=3
|
| 61 |
-
|
| 62 |
-
# Output directory for generated videos
|
| 63 |
OUTPUT_DIRECTORY=./output
|
| 64 |
-
|
| 65 |
-
# Temp directory for intermediate files
|
| 66 |
TEMP_DIRECTORY=/tmp/somira
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
# -------------------- NOTES --------------------
|
| 70 |
-
#
|
| 71 |
-
# 1. Never commit this file with actual API keys to version control
|
| 72 |
-
# 2. Copy this file to .env and fill in your actual values
|
| 73 |
-
# 3. Make sure .env is listed in your .gitignore file
|
| 74 |
-
# 4. See API_SETUP_GUIDE.md for detailed setup instructions
|
| 75 |
-
#
|
|
|
|
| 1 |
+
# API Keys
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
GEMINI_API_KEY=your_gemini_api_key_here
|
| 3 |
+
RUNWAYML_API_KEY=your_runwayml_api_key_here
|
| 4 |
+
DEEPSEEK_API_KEY=your_deepseek_api_key_here
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/service-account-key.json
|
| 6 |
|
| 7 |
+
# Cloud Storage
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
GCS_BUCKET_NAME=your_bucket_name_here
|
| 9 |
|
| 10 |
+
# Configuration
|
|
|
|
|
|
|
|
|
|
| 11 |
AUDIO_LIBRARY_SIZE=27
|
|
|
|
|
|
|
| 12 |
VIDEO_LIBRARY_SIZE=47
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
DEFAULT_VOICE=en-US-Neural2-F
|
|
|
|
|
|
|
| 14 |
VIDEO_QUALITY=high
|
|
|
|
|
|
|
| 15 |
DEBUG_MODE=false
|
| 16 |
|
| 17 |
+
# Optional Settings
|
|
|
|
|
|
|
|
|
|
| 18 |
VIDEO_GENERATION_TIMEOUT=300
|
|
|
|
|
|
|
| 19 |
MAX_CONCURRENT_REQUESTS=4
|
|
|
|
|
|
|
| 20 |
MAX_RETRY_ATTEMPTS=3
|
|
|
|
|
|
|
| 21 |
OUTPUT_DIRECTORY=./output
|
|
|
|
|
|
|
| 22 |
TEMP_DIRECTORY=/tmp/somira
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
API_SETUP_GUIDE.md
DELETED
|
@@ -1,316 +0,0 @@
|
|
| 1 |
-
# API Setup Guide - Complete Instructions
|
| 2 |
-
|
| 3 |
-
This guide will walk you through obtaining all necessary API keys for your Somira video generation system.
|
| 4 |
-
|
| 5 |
-
---
|
| 6 |
-
|
| 7 |
-
## 1. Google Gemini API (Prompt Enhancement)
|
| 8 |
-
|
| 9 |
-
### Purpose
|
| 10 |
-
Enhances user prompts and analyzes scripts for intelligent video selection.
|
| 11 |
-
|
| 12 |
-
### How to Get Your API Key
|
| 13 |
-
|
| 14 |
-
1. **Go to Google AI Studio**
|
| 15 |
-
- Visit: https://aistudio.google.com/app/apikey
|
| 16 |
-
- Sign in with your Google account
|
| 17 |
-
|
| 18 |
-
2. **Create API Key**
|
| 19 |
-
- Click "Get API key" button (top left)
|
| 20 |
-
- Click "Create API key"
|
| 21 |
-
- Choose "Create API key in new project" (or select existing project)
|
| 22 |
-
- Copy the API key immediately (shown only once!)
|
| 23 |
-
|
| 24 |
-
3. **Add to Your Environment**
|
| 25 |
-
```bash
|
| 26 |
-
export GEMINI_API_KEY="your_api_key_here"
|
| 27 |
-
```
|
| 28 |
-
|
| 29 |
-
### Pricing
|
| 30 |
-
- Free tier available with rate limits
|
| 31 |
-
- Model used: `gemini-2.0-flash-exp` (optimized for speed and cost)
|
| 32 |
-
|
| 33 |
-
### Documentation
|
| 34 |
-
- https://ai.google.dev/gemini-api/docs
|
| 35 |
-
|
| 36 |
-
---
|
| 37 |
-
|
| 38 |
-
## 2. RunwayML API (Video Generation)
|
| 39 |
-
|
| 40 |
-
### Purpose
|
| 41 |
-
Generates AI videos from text prompts using Gen-4 model.
|
| 42 |
-
|
| 43 |
-
### How to Get Your API Key
|
| 44 |
-
|
| 45 |
-
1. **Create Developer Account**
|
| 46 |
-
- Visit: https://dev.runwayml.com/
|
| 47 |
-
- Sign up for a new account
|
| 48 |
-
- Create a new organization (corresponds to your integration)
|
| 49 |
-
|
| 50 |
-
2. **Create API Key**
|
| 51 |
-
- Navigate to "API Keys" tab
|
| 52 |
-
- Click "Create new key"
|
| 53 |
-
- Give it a descriptive name (e.g., "Somira Production")
|
| 54 |
-
- Copy the key immediately and store securely (never shown again)
|
| 55 |
-
|
| 56 |
-
3. **Add Credits**
|
| 57 |
-
- Go to "Billing" tab
|
| 58 |
-
- Add credits to your organization
|
| 59 |
-
- Minimum payment: $10 (at $0.01 per credit)
|
| 60 |
-
|
| 61 |
-
4. **Add to Your Environment**
|
| 62 |
-
```bash
|
| 63 |
-
export RUNWAYML_API_KEY="key_your_api_key_here"
|
| 64 |
-
```
|
| 65 |
-
|
| 66 |
-
### Pricing
|
| 67 |
-
- Pay-per-use model with credits
|
| 68 |
-
- Gen-4 Turbo: ~5-10 credits per 10-second video
|
| 69 |
-
- Minimum: $10 to start
|
| 70 |
-
|
| 71 |
-
### Documentation
|
| 72 |
-
- https://docs.dev.runwayml.com/
|
| 73 |
-
|
| 74 |
-
---
|
| 75 |
-
|
| 76 |
-
## 3. Google Cloud Text-to-Speech (Azure Alternative)
|
| 77 |
-
|
| 78 |
-
### Purpose
|
| 79 |
-
Converts text scripts to natural-sounding speech with timing data for lip-sync.
|
| 80 |
-
|
| 81 |
-
### Option A: Google Cloud TTS (Recommended)
|
| 82 |
-
|
| 83 |
-
#### How to Get Your API Key
|
| 84 |
-
|
| 85 |
-
1. **Create Google Cloud Project**
|
| 86 |
-
- Visit: https://console.cloud.google.com/
|
| 87 |
-
- Create new project or select existing
|
| 88 |
-
|
| 89 |
-
2. **Enable Text-to-Speech API**
|
| 90 |
-
- Go to "APIs & Services" > "Library"
|
| 91 |
-
- Search "Text-to-Speech API"
|
| 92 |
-
- Click "Enable"
|
| 93 |
-
|
| 94 |
-
3. **Create Service Account**
|
| 95 |
-
- Go to "APIs & Services" > "Credentials"
|
| 96 |
-
- Click "Create Credentials" > "Service Account"
|
| 97 |
-
- Download JSON key file
|
| 98 |
-
|
| 99 |
-
4. **Add to Your Environment**
|
| 100 |
-
```bash
|
| 101 |
-
export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account-key.json"
|
| 102 |
-
```
|
| 103 |
-
|
| 104 |
-
#### Pricing
|
| 105 |
-
- Free tier: 1 million characters/month (Standard voices)
|
| 106 |
-
- $4 per million characters after (Standard)
|
| 107 |
-
- $16 per million characters (Neural2/Studio voices)
|
| 108 |
-
|
| 109 |
-
### Option B: Azure Cognitive Services TTS
|
| 110 |
-
|
| 111 |
-
#### How to Get Your API Key
|
| 112 |
-
|
| 113 |
-
1. **Create Azure Account**
|
| 114 |
-
- Visit: https://portal.azure.com/
|
| 115 |
-
- Sign up (free tier available)
|
| 116 |
-
|
| 117 |
-
2. **Create Speech Service Resource**
|
| 118 |
-
- Search "Speech Services" in Azure Portal
|
| 119 |
-
- Click "Create"
|
| 120 |
-
- Select subscription, resource group, region
|
| 121 |
-
- Choose pricing tier (F0 for free)
|
| 122 |
-
|
| 123 |
-
3. **Get Keys**
|
| 124 |
-
- Go to your Speech Service resource
|
| 125 |
-
- Navigate to "Keys and Endpoint"
|
| 126 |
-
- Copy Key 1 or Key 2
|
| 127 |
-
- Copy the Region (e.g., eastus)
|
| 128 |
-
|
| 129 |
-
4. **Add to Your Environment**
|
| 130 |
-
```bash
|
| 131 |
-
export AZURE_SPEECH_KEY="your_key_here"
|
| 132 |
-
export AZURE_SPEECH_REGION="eastus"
|
| 133 |
-
```
|
| 134 |
-
|
| 135 |
-
#### Pricing
|
| 136 |
-
- Free tier: 5 audio hours/month
|
| 137 |
-
- Standard: $1 per audio hour
|
| 138 |
-
- Neural: $16 per million characters
|
| 139 |
-
|
| 140 |
-
### Documentation
|
| 141 |
-
- Google: https://cloud.google.com/text-to-speech/docs
|
| 142 |
-
- Azure: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/
|
| 143 |
-
|
| 144 |
-
---
|
| 145 |
-
|
| 146 |
-
## 4. Google Cloud Storage (Video Storage)
|
| 147 |
-
|
| 148 |
-
### Purpose
|
| 149 |
-
Stores generated videos, audio files, and video library.
|
| 150 |
-
|
| 151 |
-
### How to Set Up
|
| 152 |
-
|
| 153 |
-
1. **Create GCS Bucket**
|
| 154 |
-
- Go to: https://console.cloud.google.com/storage
|
| 155 |
-
- Click "Create Bucket"
|
| 156 |
-
- Choose unique name (e.g., "somira-videos")
|
| 157 |
-
- Select region (same as your app for best performance)
|
| 158 |
-
- Choose "Standard" storage class
|
| 159 |
-
|
| 160 |
-
2. **Set Permissions**
|
| 161 |
-
- Make bucket public (if videos should be publicly accessible)
|
| 162 |
-
- Or configure IAM for service account access
|
| 163 |
-
|
| 164 |
-
3. **Add to Your Environment**
|
| 165 |
-
```bash
|
| 166 |
-
export GCS_BUCKET_NAME="somira-videos"
|
| 167 |
-
```
|
| 168 |
-
|
| 169 |
-
### Pricing
|
| 170 |
-
- $0.020 per GB/month (Standard storage)
|
| 171 |
-
- $0.12 per GB egress (after free tier)
|
| 172 |
-
- Free tier: 5GB storage
|
| 173 |
-
|
| 174 |
-
---
|
| 175 |
-
|
| 176 |
-
## Complete .env File Example
|
| 177 |
-
|
| 178 |
-
Create a `.env` file in your project root:
|
| 179 |
-
|
| 180 |
-
```bash
|
| 181 |
-
# Gemini API (Prompt Enhancement)
|
| 182 |
-
GEMINI_API_KEY=AIzaSyC_your_gemini_key_here
|
| 183 |
-
|
| 184 |
-
# RunwayML API (Video Generation)
|
| 185 |
-
RUNWAYML_API_KEY=key_1234567890abcdefghijklmnop
|
| 186 |
-
|
| 187 |
-
# Google Cloud TTS (Option A - Recommended)
|
| 188 |
-
GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json
|
| 189 |
-
|
| 190 |
-
# OR Azure TTS (Option B)
|
| 191 |
-
# AZURE_SPEECH_KEY=your_azure_key_here
|
| 192 |
-
# AZURE_SPEECH_REGION=eastus
|
| 193 |
-
|
| 194 |
-
# Google Cloud Storage
|
| 195 |
-
GCS_BUCKET_NAME=somira-videos
|
| 196 |
-
|
| 197 |
-
# Configuration
|
| 198 |
-
AUDIO_LIBRARY_SIZE=27
|
| 199 |
-
VIDEO_LIBRARY_SIZE=47
|
| 200 |
-
DEFAULT_VOICE=en-US-AriaNeural
|
| 201 |
-
```
|
| 202 |
-
|
| 203 |
-
---
|
| 204 |
-
|
| 205 |
-
## Security Best Practices
|
| 206 |
-
|
| 207 |
-
### DO:
|
| 208 |
-
- Store API keys in environment variables or secret managers
|
| 209 |
-
- Never commit API keys to version control (add .env to .gitignore)
|
| 210 |
-
- Use descriptive names for API keys so you can revoke them later
|
| 211 |
-
- Rotate keys regularly
|
| 212 |
-
- Use separate keys for development and production
|
| 213 |
-
|
| 214 |
-
### DON'T:
|
| 215 |
-
- Never expose API keys on the client-side or in client-side code
|
| 216 |
-
- Never hard-code API keys directly in source code
|
| 217 |
-
- Don't share keys in public repositories
|
| 218 |
-
|
| 219 |
-
---
|
| 220 |
-
|
| 221 |
-
## Installation Steps
|
| 222 |
-
|
| 223 |
-
1. **Install Dependencies**
|
| 224 |
-
```bash
|
| 225 |
-
pip install -r requirements.txt
|
| 226 |
-
```
|
| 227 |
-
|
| 228 |
-
2. **Set Up Environment Variables**
|
| 229 |
-
```bash
|
| 230 |
-
cp .env.example .env
|
| 231 |
-
# Edit .env with your actual keys
|
| 232 |
-
```
|
| 233 |
-
|
| 234 |
-
3. **Load Environment Variables**
|
| 235 |
-
```python
|
| 236 |
-
from dotenv import load_dotenv
|
| 237 |
-
load_dotenv()
|
| 238 |
-
```
|
| 239 |
-
|
| 240 |
-
4. **Test API Connections**
|
| 241 |
-
```python
|
| 242 |
-
from api_clients import APIClients
|
| 243 |
-
|
| 244 |
-
config = {
|
| 245 |
-
'gemini_api_key': os.getenv('GEMINI_API_KEY'),
|
| 246 |
-
'runwayml_api_key': os.getenv('RUNWAYML_API_KEY'),
|
| 247 |
-
'gcs_bucket_name': os.getenv('GCS_BUCKET_NAME'),
|
| 248 |
-
'video_library_size': 47,
|
| 249 |
-
'default_voice': 'en-US-AriaNeural'
|
| 250 |
-
}
|
| 251 |
-
|
| 252 |
-
clients = APIClients(config)
|
| 253 |
-
```
|
| 254 |
-
|
| 255 |
-
---
|
| 256 |
-
|
| 257 |
-
## Cost Estimates (Monthly)
|
| 258 |
-
|
| 259 |
-
For a moderate usage scenario (100 videos/month):
|
| 260 |
-
|
| 261 |
-
| Service | Usage | Cost |
|
| 262 |
-
|---------|-------|------|
|
| 263 |
-
| Gemini API | ~200K tokens | Free (within limits) |
|
| 264 |
-
| RunwayML | 100 videos × 10 sec | ~$50-100 |
|
| 265 |
-
| Google TTS | ~100K characters | Free (within limits) |
|
| 266 |
-
| Google Cloud Storage | 50GB storage + egress | ~$2-5 |
|
| 267 |
-
| **Total** | | **~$52-105/month** |
|
| 268 |
-
|
| 269 |
-
Most of the cost comes from RunwayML video generation. Consider:
|
| 270 |
-
- Using shorter video durations (5s instead of 10s)
|
| 271 |
-
- Caching generated videos
|
| 272 |
-
- Using Gen-4 Turbo for faster/cheaper results
|
| 273 |
-
|
| 274 |
-
---
|
| 275 |
-
|
| 276 |
-
## Troubleshooting
|
| 277 |
-
|
| 278 |
-
### Common Issues
|
| 279 |
-
|
| 280 |
-
1. **"API key not found" errors**
|
| 281 |
-
- Check environment variables are loaded
|
| 282 |
-
- Verify .env file location
|
| 283 |
-
- Restart your application after adding keys
|
| 284 |
-
|
| 285 |
-
2. **RunwayML "Insufficient credits"**
|
| 286 |
-
- Add credits in the billing tab of developer portal
|
| 287 |
-
- Minimum $10 required to start
|
| 288 |
-
|
| 289 |
-
3. **Google Cloud authentication errors**
|
| 290 |
-
- Verify service account JSON path is correct
|
| 291 |
-
- Check service account has necessary permissions
|
| 292 |
-
- Ensure APIs are enabled in Cloud Console
|
| 293 |
-
|
| 294 |
-
4. **Rate limiting**
|
| 295 |
-
- Implement exponential backoff
|
| 296 |
-
- Add delays between API calls
|
| 297 |
-
- Consider upgrading to paid tiers
|
| 298 |
-
|
| 299 |
-
---
|
| 300 |
-
|
| 301 |
-
## Support Resources
|
| 302 |
-
|
| 303 |
-
- **Gemini**: https://ai.google.dev/support
|
| 304 |
-
- **RunwayML**: https://help.runwayml.com/
|
| 305 |
-
- **Google Cloud**: https://cloud.google.com/support
|
| 306 |
-
- **Azure**: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/get-started-text-to-speech
|
| 307 |
-
|
| 308 |
-
---
|
| 309 |
-
|
| 310 |
-
## Next Steps
|
| 311 |
-
|
| 312 |
-
1. Obtain all API keys following the instructions above
|
| 313 |
-
2. Configure your .env file
|
| 314 |
-
3. Test each API endpoint individually
|
| 315 |
-
4. Run the full video generation pipeline
|
| 316 |
-
5. Monitor usage and costs in each platform's dashboard
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
QUICKSTART.md
DELETED
|
@@ -1,313 +0,0 @@
|
|
| 1 |
-
# 🚀 Quick Start Guide
|
| 2 |
-
|
| 3 |
-
Get your Somira Content Automation System up and running in 5 minutes!
|
| 4 |
-
|
| 5 |
-
---
|
| 6 |
-
|
| 7 |
-
## Prerequisites
|
| 8 |
-
|
| 9 |
-
- Python 3.8 or higher
|
| 10 |
-
- pip (Python package manager)
|
| 11 |
-
- API keys (see [API_SETUP_GUIDE.md](API_SETUP_GUIDE.md))
|
| 12 |
-
|
| 13 |
-
---
|
| 14 |
-
|
| 15 |
-
## Installation
|
| 16 |
-
|
| 17 |
-
### 1. Clone or Download the Project
|
| 18 |
-
|
| 19 |
-
```bash
|
| 20 |
-
cd somira-automation
|
| 21 |
-
```
|
| 22 |
-
|
| 23 |
-
### 2. Create Virtual Environment (Recommended)
|
| 24 |
-
|
| 25 |
-
```bash
|
| 26 |
-
# Create virtual environment
|
| 27 |
-
python -m venv venv
|
| 28 |
-
|
| 29 |
-
# Activate it
|
| 30 |
-
# On macOS/Linux:
|
| 31 |
-
source venv/bin/activate
|
| 32 |
-
# On Windows:
|
| 33 |
-
venv\Scripts\activate
|
| 34 |
-
```
|
| 35 |
-
|
| 36 |
-
### 3. Install Dependencies
|
| 37 |
-
|
| 38 |
-
```bash
|
| 39 |
-
pip install -r requirements.txt
|
| 40 |
-
```
|
| 41 |
-
|
| 42 |
-
---
|
| 43 |
-
|
| 44 |
-
## Configuration
|
| 45 |
-
|
| 46 |
-
### 1. Set Up Environment Variables
|
| 47 |
-
|
| 48 |
-
```bash
|
| 49 |
-
# Copy example file
|
| 50 |
-
cp .env.example .env
|
| 51 |
-
|
| 52 |
-
# Edit with your API keys
|
| 53 |
-
nano .env # or use your favorite editor
|
| 54 |
-
```
|
| 55 |
-
|
| 56 |
-
**Required values in `.env`:**
|
| 57 |
-
- `GEMINI_API_KEY` - Get from https://aistudio.google.com/app/apikey
|
| 58 |
-
- `RUNWAYML_API_KEY` - Get from https://dev.runwayml.com/
|
| 59 |
-
- `GOOGLE_APPLICATION_CREDENTIALS` - Path to GCP service account JSON
|
| 60 |
-
- `GCS_BUCKET_NAME` - Your Google Cloud Storage bucket name
|
| 61 |
-
|
| 62 |
-
### 2. Verify Configuration
|
| 63 |
-
|
| 64 |
-
```bash
|
| 65 |
-
python main.py --health-check
|
| 66 |
-
```
|
| 67 |
-
|
| 68 |
-
You should see:
|
| 69 |
-
```
|
| 70 |
-
✓ Gemini API: Connected
|
| 71 |
-
✓ RunwayML API: Configured
|
| 72 |
-
✓ TTS API: Configured
|
| 73 |
-
✓ Google Cloud Storage: Connected
|
| 74 |
-
✅ Health check passed
|
| 75 |
-
```
|
| 76 |
-
|
| 77 |
-
---
|
| 78 |
-
|
| 79 |
-
## Usage
|
| 80 |
-
|
| 81 |
-
### Basic Usage (Default Content)
|
| 82 |
-
|
| 83 |
-
```bash
|
| 84 |
-
python main.py
|
| 85 |
-
```
|
| 86 |
-
|
| 87 |
-
This will:
|
| 88 |
-
1. Generate a hook video using AI
|
| 89 |
-
2. Select background music
|
| 90 |
-
3. Choose 3 relevant product videos
|
| 91 |
-
4. Generate text-to-speech audio
|
| 92 |
-
5. Render the final video with subtitles
|
| 93 |
-
6. Upload to Google Cloud Storage
|
| 94 |
-
|
| 95 |
-
### Custom Content
|
| 96 |
-
|
| 97 |
-
```bash
|
| 98 |
-
python main.py \
|
| 99 |
-
--strategy example_strategy.json \
|
| 100 |
-
--script example_script.txt \
|
| 101 |
-
--output ./output/my_video
|
| 102 |
-
```
|
| 103 |
-
|
| 104 |
-
### Run a Quick Test
|
| 105 |
-
|
| 106 |
-
```bash
|
| 107 |
-
python main.py --test
|
| 108 |
-
```
|
| 109 |
-
|
| 110 |
-
This runs a minimal test to verify everything works without using many credits.
|
| 111 |
-
|
| 112 |
-
---
|
| 113 |
-
|
| 114 |
-
## Command Line Options
|
| 115 |
-
|
| 116 |
-
```bash
|
| 117 |
-
python main.py [OPTIONS]
|
| 118 |
-
|
| 119 |
-
Options:
|
| 120 |
-
--strategy FILE Path to JSON file with content strategy
|
| 121 |
-
--script FILE Path to text file with TTS script
|
| 122 |
-
--output DIR Output directory for results
|
| 123 |
-
--health-check Run health check on all services
|
| 124 |
-
--test Run test pipeline with minimal resources
|
| 125 |
-
--verbose Enable verbose logging
|
| 126 |
-
--help Show help message
|
| 127 |
-
```
|
| 128 |
-
|
| 129 |
-
---
|
| 130 |
-
|
| 131 |
-
## Example Workflows
|
| 132 |
-
|
| 133 |
-
### Create Multiple Videos from Different Scripts
|
| 134 |
-
|
| 135 |
-
```bash
|
| 136 |
-
# Video 1
|
| 137 |
-
python main.py \
|
| 138 |
-
--script scripts/script1.txt \
|
| 139 |
-
--output output/video1
|
| 140 |
-
|
| 141 |
-
# Video 2
|
| 142 |
-
python main.py \
|
| 143 |
-
--script scripts/script2.txt \
|
| 144 |
-
--output output/video2
|
| 145 |
-
|
| 146 |
-
# Video 3
|
| 147 |
-
python main.py \
|
| 148 |
-
--script scripts/script3.txt \
|
| 149 |
-
--output output/video3
|
| 150 |
-
```
|
| 151 |
-
|
| 152 |
-
### Custom Strategy with Different Style
|
| 153 |
-
|
| 154 |
-
Create `my_strategy.json`:
|
| 155 |
-
```json
|
| 156 |
-
{
|
| 157 |
-
"brand": "Somira",
|
| 158 |
-
"gemini_prompt": "Your custom prompt here...",
|
| 159 |
-
"runway_prompt": "Your custom RunwayML prompt...",
|
| 160 |
-
"style": "minimal",
|
| 161 |
-
"aspect_ratio": "16:9",
|
| 162 |
-
"duration": 10
|
| 163 |
-
}
|
| 164 |
-
```
|
| 165 |
-
|
| 166 |
-
Then run:
|
| 167 |
-
```bash
|
| 168 |
-
python main.py --strategy my_strategy.json
|
| 169 |
-
```
|
| 170 |
-
|
| 171 |
-
---
|
| 172 |
-
|
| 173 |
-
## Understanding the Pipeline
|
| 174 |
-
|
| 175 |
-
The automation runs in 4 steps:
|
| 176 |
-
|
| 177 |
-
**Step 1: Asset Generation (Parallel)** ⚡
|
| 178 |
-
- Generate hook video with AI (RunwayML)
|
| 179 |
-
- Select background music (from library)
|
| 180 |
-
- Select 3 product videos (AI-powered)
|
| 181 |
-
- Generate voice-over (TTS)
|
| 182 |
-
|
| 183 |
-
**Step 2: Video Rendering** 🎬
|
| 184 |
-
- Merge all videos
|
| 185 |
-
- Add audio tracks
|
| 186 |
-
- Apply transitions and effects
|
| 187 |
-
|
| 188 |
-
**Step 3: Subtitle Addition** 📝
|
| 189 |
-
- Generate subtitles from TTS timing
|
| 190 |
-
- Overlay on video
|
| 191 |
-
|
| 192 |
-
**Step 4: Cloud Upload** ☁️
|
| 193 |
-
- Upload to Google Cloud Storage
|
| 194 |
-
- Generate public URL
|
| 195 |
-
|
| 196 |
-
---
|
| 197 |
-
|
| 198 |
-
## File Structure
|
| 199 |
-
|
| 200 |
-
```
|
| 201 |
-
somira-automation/
|
| 202 |
-
├── main.py # Main entry point
|
| 203 |
-
├── automation.py # Pipeline orchestrator
|
| 204 |
-
├── api_clients.py # API integrations
|
| 205 |
-
├── video_renderer.py # Video processing
|
| 206 |
-
├── utils.py # Utilities and logging
|
| 207 |
-
├── requirements.txt # Python dependencies
|
| 208 |
-
├── .env # Your API keys (DO NOT COMMIT)
|
| 209 |
-
├── .env.example # Template for .env
|
| 210 |
-
├── example_strategy.json # Sample content strategy
|
| 211 |
-
├── example_script.txt # Sample TTS script
|
| 212 |
-
├── API_SETUP_GUIDE.md # Detailed API setup
|
| 213 |
-
└── QUICKSTART.md # This file
|
| 214 |
-
```
|
| 215 |
-
|
| 216 |
-
---
|
| 217 |
-
|
| 218 |
-
## Troubleshooting
|
| 219 |
-
|
| 220 |
-
### "Module not found" errors
|
| 221 |
-
```bash
|
| 222 |
-
pip install -r requirements.txt
|
| 223 |
-
```
|
| 224 |
-
|
| 225 |
-
### "API key not found" errors
|
| 226 |
-
```bash
|
| 227 |
-
# Check your .env file exists and has the right keys
|
| 228 |
-
cat .env
|
| 229 |
-
|
| 230 |
-
# Make sure you've loaded it
|
| 231 |
-
python -c "from dotenv import load_dotenv; load_dotenv(); import os; print(os.getenv('GEMINI_API_KEY'))"
|
| 232 |
-
```
|
| 233 |
-
|
| 234 |
-
### RunwayML "Insufficient credits"
|
| 235 |
-
- Add credits at https://dev.runwayml.com/ (minimum $10)
|
| 236 |
-
|
| 237 |
-
### Google Cloud authentication errors
|
| 238 |
-
```bash
|
| 239 |
-
# Verify your service account JSON exists
|
| 240 |
-
ls -l /path/to/service-account-key.json
|
| 241 |
-
|
| 242 |
-
# Set it in your .env
|
| 243 |
-
GOOGLE_APPLICATION_CREDENTIALS=/full/path/to/service-account-key.json
|
| 244 |
-
```
|
| 245 |
-
|
| 246 |
-
### Videos taking too long
|
| 247 |
-
- RunwayML video generation takes 30-60 seconds typically
|
| 248 |
-
- The `--test` command uses minimal resources for quick testing
|
| 249 |
-
|
| 250 |
-
---
|
| 251 |
-
|
| 252 |
-
## Cost Estimates
|
| 253 |
-
|
| 254 |
-
For 100 videos per month:
|
| 255 |
-
|
| 256 |
-
| Service | Cost |
|
| 257 |
-
|---------|------|
|
| 258 |
-
| Gemini API | Free (within limits) |
|
| 259 |
-
| RunwayML | ~$50-100 |
|
| 260 |
-
| Google TTS | Free (within limits) |
|
| 261 |
-
| Google Storage | ~$2-5 |
|
| 262 |
-
| **Total** | **~$52-105/month** |
|
| 263 |
-
|
| 264 |
-
💡 **Tip:** Use the `--test` command frequently to avoid unnecessary API costs during development.
|
| 265 |
-
|
| 266 |
-
---
|
| 267 |
-
|
| 268 |
-
## Next Steps
|
| 269 |
-
|
| 270 |
-
1. ✅ Complete API setup (see [API_SETUP_GUIDE.md](API_SETUP_GUIDE.md))
|
| 271 |
-
2. ✅ Run health check: `python main.py --health-check`
|
| 272 |
-
3. ✅ Run test: `python main.py --test`
|
| 273 |
-
4. ✅ Generate your first video: `python main.py`
|
| 274 |
-
5. 📚 Customize: Edit `example_strategy.json` and `example_script.txt`
|
| 275 |
-
6. 🚀 Scale: Create multiple strategies and automate batch processing
|
| 276 |
-
|
| 277 |
-
---
|
| 278 |
-
|
| 279 |
-
## Support
|
| 280 |
-
|
| 281 |
-
- **API Issues:** See [API_SETUP_GUIDE.md](API_SETUP_GUIDE.md)
|
| 282 |
-
- **Bugs:** Check logs in console output
|
| 283 |
-
- **Questions:** Review code comments in `main.py` and `automation.py`
|
| 284 |
-
|
| 285 |
-
---
|
| 286 |
-
|
| 287 |
-
## Tips for Best Results
|
| 288 |
-
|
| 289 |
-
### Prompt Engineering
|
| 290 |
-
- Be specific about visual details
|
| 291 |
-
- Include camera movements
|
| 292 |
-
- Specify lighting and mood
|
| 293 |
-
- Mention aspect ratio for consistency
|
| 294 |
-
|
| 295 |
-
### TTS Scripts
|
| 296 |
-
- Keep sentences natural and conversational
|
| 297 |
-
- Use pauses (commas, periods) for pacing
|
| 298 |
-
- Test different voices in `DEFAULT_VOICE` setting
|
| 299 |
-
- Aim for 15-30 seconds of speech
|
| 300 |
-
|
| 301 |
-
### Video Selection
|
| 302 |
-
- The AI analyzes your script for context
|
| 303 |
-
- More descriptive scripts = better video selection
|
| 304 |
-
- Review selected videos in logs
|
| 305 |
-
|
| 306 |
-
### Performance
|
| 307 |
-
- Parallel execution makes Step 1 fast
|
| 308 |
-
- Most time is spent waiting for RunwayML
|
| 309 |
-
- Use `--test` to verify setup without long waits
|
| 310 |
-
|
| 311 |
-
---
|
| 312 |
-
|
| 313 |
-
Happy automating! 🎉
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -1,359 +1,298 @@
|
|
| 1 |
-
# 🎬 Somira Content Automation
|
| 2 |
|
| 3 |
-
**
|
| 4 |
-
|
| 5 |
-
Transform text scripts into professional product videos with AI-generated content, voice-overs, and intelligent video selection - all automated end-to-end.
|
| 6 |
-
|
| 7 |
-
---
|
| 8 |
-
|
| 9 |
-
## ✨ Features
|
| 10 |
-
|
| 11 |
-
- **🤖 AI-Powered Video Generation** - Create unique hook videos using RunwayML Gen-4
|
| 12 |
-
- **🧠 Intelligent Prompt Enhancement** - Gemini AI optimizes prompts for better results
|
| 13 |
-
- **🎙️ Professional Text-to-Speech** - Natural voice-overs with Google Cloud TTS
|
| 14 |
-
- **📹 Smart Video Selection** - AI analyzes scripts to select relevant product footage
|
| 15 |
-
- **🎵 Automatic Music Integration** - Background music from curated library
|
| 16 |
-
- **📝 Subtitle Generation** - Automatic subtitle overlay with timing
|
| 17 |
-
- **⚡ Parallel Processing** - Concurrent API calls for maximum speed
|
| 18 |
-
- **☁️ Cloud Storage** - Automatic upload to Google Cloud Storage
|
| 19 |
-
- **🔄 Robust Error Handling** - Fallback mechanisms for reliability
|
| 20 |
-
|
| 21 |
-
---
|
| 22 |
-
|
| 23 |
-
## 🎯 Use Cases
|
| 24 |
-
|
| 25 |
-
- Product advertisement videos for social media
|
| 26 |
-
- Instagram Reels and TikTok content
|
| 27 |
-
- Automated marketing video generation
|
| 28 |
-
- A/B testing different video hooks
|
| 29 |
-
- Scalable video production pipelines
|
| 30 |
-
- Content marketing automation
|
| 31 |
-
|
| 32 |
-
---
|
| 33 |
-
|
| 34 |
-
## 📋 Requirements
|
| 35 |
-
|
| 36 |
-
- **Python 3.8+**
|
| 37 |
-
- **API Keys:**
|
| 38 |
-
- Google Gemini API (free tier available)
|
| 39 |
-
- RunwayML API ($10 minimum)
|
| 40 |
-
- Google Cloud Platform account (TTS + Storage)
|
| 41 |
-
- **Storage:** ~1GB for video library
|
| 42 |
-
- **RAM:** 4GB minimum
|
| 43 |
|
| 44 |
---
|
| 45 |
|
| 46 |
## 🚀 Quick Start
|
| 47 |
|
| 48 |
### 1. Installation
|
| 49 |
-
|
| 50 |
```bash
|
| 51 |
-
# Clone
|
| 52 |
-
git clone <your-repo
|
| 53 |
cd somira-automation
|
| 54 |
|
| 55 |
-
# Create virtual environment
|
| 56 |
python -m venv venv
|
| 57 |
-
source venv/bin/activate #
|
| 58 |
|
| 59 |
# Install dependencies
|
| 60 |
pip install -r requirements.txt
|
| 61 |
```
|
| 62 |
|
| 63 |
-
### 2.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
```bash
|
| 66 |
-
# Copy environment
|
| 67 |
cp .env.example .env
|
| 68 |
-
|
| 69 |
-
# Edit with your API keys
|
| 70 |
-
nano .env
|
| 71 |
```
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
|
|
|
| 81 |
```bash
|
| 82 |
python main.py --health-check
|
| 83 |
```
|
|
|
|
| 84 |
|
| 85 |
-
###
|
| 86 |
-
|
| 87 |
```bash
|
| 88 |
python main.py
|
| 89 |
```
|
| 90 |
|
| 91 |
-
**📚 For detailed setup instructions, see [QUICKSTART.md](QUICKSTART.md)**
|
| 92 |
-
|
| 93 |
---
|
| 94 |
|
| 95 |
-
##
|
| 96 |
-
|
| 97 |
-
| Document | Description |
|
| 98 |
-
|----------|-------------|
|
| 99 |
-
| [QUICKSTART.md](QUICKSTART.md) | Get started in 5 minutes |
|
| 100 |
-
| [API_SETUP_GUIDE.md](API_SETUP_GUIDE.md) | Detailed API key setup |
|
| 101 |
-
| [example_strategy.json](example_strategy.json) | Sample content strategy |
|
| 102 |
-
| [example_script.txt](example_script.txt) | Sample TTS script |
|
| 103 |
|
| 104 |
-
|
| 105 |
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
-
|
| 109 |
-
┌─────────────────────────────────────────────────────┐
|
| 110 |
-
│ MAIN PIPELINE │
|
| 111 |
-
└─────────────────────────────────────────────────────┘
|
| 112 |
-
│
|
| 113 |
-
▼
|
| 114 |
-
┌─────────────────────────────────────────────────────┐
|
| 115 |
-
│ STEP 1: Asset Generation (Parallel) │
|
| 116 |
-
├─────────────────────────────────────────────────────┤
|
| 117 |
-
│ ┌──────────────┐ ┌──────────────┐ │
|
| 118 |
-
│ │ Gemini API │→ │ RunwayML API │ │
|
| 119 |
-
│ │ (Enhance) │ │ (Hook Video) │ │
|
| 120 |
-
│ └──────────────┘ └──────────────┘ │
|
| 121 |
-
│ │
|
| 122 |
-
│ ┌──────────────┐ ┌──────────────┐ │
|
| 123 |
-
│ │ Music │ │ Video │ │
|
| 124 |
-
│ │ Selection │ │ Selection AI │ │
|
| 125 |
-
│ └──────────────┘ └──────────────┘ │
|
| 126 |
-
│ │
|
| 127 |
-
│ ┌──────────────┐ │
|
| 128 |
-
│ │ Google TTS │ │
|
| 129 |
-
│ │ (Voice-over) │ │
|
| 130 |
-
│ └──────────────┘ │
|
| 131 |
-
└─────────────────────────────────────────────────────┘
|
| 132 |
-
│
|
| 133 |
-
▼
|
| 134 |
-
┌─────────────────────────────────────────────────────┐
|
| 135 |
-
│ STEP 2: Video Rendering & Merging │
|
| 136 |
-
├─────────────────────────────────────────────────────┤
|
| 137 |
-
│ • Merge hook + library videos │
|
| 138 |
-
│ • Add background music │
|
| 139 |
-
│ • Mix voice-over audio │
|
| 140 |
-
│ • Apply transitions │
|
| 141 |
-
└─────────────────────────────────────────────────────┘
|
| 142 |
-
│
|
| 143 |
-
▼
|
| 144 |
-
┌─────────────────────────────────────────────────────┐
|
| 145 |
-
│ STEP 3: Subtitle Generation │
|
| 146 |
-
├─────────────────────────────────────────────────────┤
|
| 147 |
-
│ • Extract timing from TTS │
|
| 148 |
-
│ • Generate subtitle file │
|
| 149 |
-
│ • Overlay on video │
|
| 150 |
-
└─────────────────────────────────────────────────────┘
|
| 151 |
-
│
|
| 152 |
-
▼
|
| 153 |
-
┌─────────────────────────────────────────────────────┐
|
| 154 |
-
│ STEP 4: Cloud Storage Upload │
|
| 155 |
-
├─────────────────────────────────────────────────────┤
|
| 156 |
-
│ • Upload to Google Cloud Storage │
|
| 157 |
-
│ • Generate public URL │
|
| 158 |
-
│ • Save metadata │
|
| 159 |
-
└─────────────────────────────────────────────────────┘
|
| 160 |
-
```
|
| 161 |
|
| 162 |
---
|
| 163 |
|
| 164 |
-
## 💻 Usage
|
| 165 |
-
|
| 166 |
-
### Basic Usage
|
| 167 |
|
|
|
|
| 168 |
```bash
|
| 169 |
-
#
|
| 170 |
python main.py
|
| 171 |
|
| 172 |
-
#
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
```
|
| 176 |
|
| 177 |
### Custom Content
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
```bash
|
| 180 |
-
|
| 181 |
-
python main.py \
|
| 182 |
-
--strategy campaigns/holiday_2025.json \
|
| 183 |
-
--script scripts/holiday_promo.txt \
|
| 184 |
-
--output ./output/holiday_video
|
| 185 |
```
|
| 186 |
|
| 187 |
### Batch Processing
|
| 188 |
-
|
| 189 |
```python
|
| 190 |
import asyncio
|
| 191 |
from automation import ContentAutomation
|
| 192 |
|
| 193 |
-
async def
|
| 194 |
automation = ContentAutomation(config)
|
| 195 |
|
| 196 |
-
scripts = [
|
| 197 |
-
"scripts/script1.txt",
|
| 198 |
-
"scripts/script2.txt",
|
| 199 |
-
"scripts/script3.txt"
|
| 200 |
-
]
|
| 201 |
-
|
| 202 |
for script_file in scripts:
|
| 203 |
with open(script_file) as f:
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
result = await automation.execute_pipeline(
|
| 207 |
-
content_strategy=strategy,
|
| 208 |
-
tts_script=script
|
| 209 |
-
)
|
| 210 |
-
print(f"Generated: {result['final_url']}")
|
| 211 |
-
|
| 212 |
-
asyncio.run(generate_multiple_videos())
|
| 213 |
-
```
|
| 214 |
-
|
| 215 |
-
### Health Check
|
| 216 |
-
|
| 217 |
-
```bash
|
| 218 |
-
python main.py --health-check
|
| 219 |
|
| 220 |
-
|
| 221 |
-
# 🏥 Running health check...
|
| 222 |
-
# ✓ Gemini API: Connected
|
| 223 |
-
# ✓ RunwayML API: Configured
|
| 224 |
-
# ✓ TTS API: Configured
|
| 225 |
-
# ✓ Google Cloud Storage: Connected
|
| 226 |
-
# ✅ All systems operational!
|
| 227 |
```
|
| 228 |
|
| 229 |
---
|
| 230 |
|
| 231 |
-
##
|
| 232 |
|
| 233 |
-
###
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
"runway_prompt": "Specific prompt for video generation",
|
| 240 |
-
"style": "commercial",
|
| 241 |
-
"aspect_ratio": "9:16",
|
| 242 |
-
"duration": 5,
|
| 243 |
-
"platform": "Instagram Reels / TikTok"
|
| 244 |
-
}
|
| 245 |
-
```
|
| 246 |
|
| 247 |
-
|
| 248 |
|
| 249 |
-
|
| 250 |
-
|----------|----------|-------------|
|
| 251 |
-
| `GEMINI_API_KEY` | Yes | Google Gemini API key |
|
| 252 |
-
| `RUNWAYML_API_KEY` | Yes | RunwayML API key |
|
| 253 |
-
| `GOOGLE_APPLICATION_CREDENTIALS` | Yes | Path to GCP service account JSON |
|
| 254 |
-
| `GCS_BUCKET_NAME` | Yes | Google Cloud Storage bucket |
|
| 255 |
-
| `AUDIO_LIBRARY_SIZE` | No | Number of music tracks (default: 27) |
|
| 256 |
-
| `VIDEO_LIBRARY_SIZE` | No | Number of video clips (default: 47) |
|
| 257 |
-
| `DEFAULT_VOICE` | No | TTS voice name (default: en-US-Neural2-F) |
|
| 258 |
|
| 259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
| 262 |
|
| 263 |
-
|
| 264 |
-
-
|
| 265 |
-
-
|
| 266 |
-
- **Step 4 (Upload):** 5-15 seconds
|
| 267 |
|
| 268 |
-
**
|
|
|
|
|
|
|
| 269 |
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
-
|
| 273 |
|
| 274 |
-
|
| 275 |
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
-
###
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
|
| 286 |
-
|
| 287 |
-
-
|
| 288 |
-
-
|
| 289 |
-
-
|
| 290 |
-
- **
|
| 291 |
|
| 292 |
---
|
| 293 |
|
| 294 |
-
##
|
| 295 |
|
| 296 |
-
|
| 297 |
|
| 298 |
-
|
| 299 |
-
-
|
| 300 |
-
-
|
| 301 |
-
- ✅ **Detailed logging** for debugging
|
| 302 |
-
- ✅ **Partial results** saved on pipeline failure
|
| 303 |
|
| 304 |
-
|
|
|
|
|
|
|
| 305 |
|
| 306 |
-
|
|
|
|
|
|
|
| 307 |
|
|
|
|
|
|
|
|
|
|
| 308 |
```
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
├── README.md # This file
|
| 320 |
-
├── QUICKSTART.md # Quick start guide
|
| 321 |
-
└── API_SETUP_GUIDE.md # Detailed API setup instructions
|
| 322 |
-
```
|
| 323 |
|
| 324 |
---
|
| 325 |
|
| 326 |
-
##
|
| 327 |
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
5. **Monitor API usage** - Set up billing alerts
|
| 333 |
-
6. **Use service accounts** - For GCP resources
|
| 334 |
|
| 335 |
-
|
|
|
|
|
|
|
|
|
|
| 336 |
|
| 337 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 338 |
|
| 339 |
-
|
| 340 |
|
| 341 |
-
|
| 342 |
-
```bash
|
| 343 |
-
pip install -r requirements.txt
|
| 344 |
-
```
|
| 345 |
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
|
|
|
|
|
|
| 350 |
|
| 351 |
-
**
|
| 352 |
-
- Add credits at https://dev.runwayml.com/
|
| 353 |
-
- Minimum $10 required
|
| 354 |
|
| 355 |
-
|
| 356 |
-
- Check service account has Storage Admin role
|
| 357 |
-
- Verify `GOOGLE_APPLICATION_CREDENTIALS` path is correct
|
| 358 |
|
| 359 |
-
|
|
|
|
| 1 |
+
# 🎬 Somira Content Automation
|
| 2 |
|
| 3 |
+
**AI-powered video generation pipeline that transforms text scripts into professional product advertisements.**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
---
|
| 6 |
|
| 7 |
## 🚀 Quick Start
|
| 8 |
|
| 9 |
### 1. Installation
|
|
|
|
| 10 |
```bash
|
| 11 |
+
# Clone and setup
|
| 12 |
+
git clone <your-repo>
|
| 13 |
cd somira-automation
|
| 14 |
|
| 15 |
+
# Create virtual environment (recommended)
|
| 16 |
python -m venv venv
|
| 17 |
+
source venv/bin/activate # Windows: venv\Scripts\activate
|
| 18 |
|
| 19 |
# Install dependencies
|
| 20 |
pip install -r requirements.txt
|
| 21 |
```
|
| 22 |
|
| 23 |
+
### 2. API Setup
|
| 24 |
+
|
| 25 |
+
**You need these API keys:**
|
| 26 |
+
|
| 27 |
+
#### Gemini API (Free)
|
| 28 |
+
1. Go to https://aistudio.google.com/app/apikey
|
| 29 |
+
2. Click "Create API Key"
|
| 30 |
+
3. Copy the key
|
| 31 |
+
|
| 32 |
+
#### RunwayML API ($10 minimum)
|
| 33 |
+
1. Go to https://dev.runwayml.com/
|
| 34 |
+
2. Sign up and create organization
|
| 35 |
+
3. Go to "API Keys" → "Create new key"
|
| 36 |
+
4. Add $10+ credits in "Billing" tab
|
| 37 |
|
| 38 |
+
#### Google Cloud (Free tier available)
|
| 39 |
+
1. Go to https://console.cloud.google.com/
|
| 40 |
+
2. Create project → Enable "Text-to-Speech API"
|
| 41 |
+
3. Create service account → Download JSON key
|
| 42 |
+
4. Create storage bucket
|
| 43 |
+
|
| 44 |
+
### 3. Configuration
|
| 45 |
```bash
|
| 46 |
+
# Copy and edit environment file
|
| 47 |
cp .env.example .env
|
|
|
|
|
|
|
|
|
|
| 48 |
```
|
| 49 |
|
| 50 |
+
Edit `.env` with your keys:
|
| 51 |
+
```bash
|
| 52 |
+
# Required API Keys
|
| 53 |
+
GEMINI_API_KEY=AIzaSyC_your_key_here
|
| 54 |
+
RUNWAYML_API_KEY=key_your_runwayml_key_here
|
| 55 |
+
GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json
|
| 56 |
+
GCS_BUCKET_NAME=your-bucket-name
|
| 57 |
+
|
| 58 |
+
# Optional Settings
|
| 59 |
+
DEFAULT_VOICE=en-US-Neural2-F
|
| 60 |
+
AUDIO_LIBRARY_SIZE=27
|
| 61 |
+
VIDEO_LIBRARY_SIZE=47
|
| 62 |
+
```
|
| 63 |
|
| 64 |
+
### 4. Verify Setup
|
| 65 |
```bash
|
| 66 |
python main.py --health-check
|
| 67 |
```
|
| 68 |
+
You should see: `✅ All systems operational!`
|
| 69 |
|
| 70 |
+
### 5. Generate Your First Video
|
|
|
|
| 71 |
```bash
|
| 72 |
python main.py
|
| 73 |
```
|
| 74 |
|
|
|
|
|
|
|
| 75 |
---
|
| 76 |
|
| 77 |
+
## 🎯 What It Does
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
+
This system automatically creates 15-second vertical videos (perfect for TikTok/Reels) by:
|
| 80 |
|
| 81 |
+
1. **AI Video Generation** - Creates unique hook videos using RunwayML Gen-4
|
| 82 |
+
2. **Smart Content Selection** - Gemini AI analyzes your script to pick relevant product footage
|
| 83 |
+
3. **Professional Voice-overs** - Converts text to natural speech using Google TTS
|
| 84 |
+
4. **Auto Editing** - Merges videos, adds background music, subtitles, and effects
|
| 85 |
+
5. **Cloud Storage** - Uploads final videos to Google Cloud Storage
|
| 86 |
|
| 87 |
+
**Pipeline Time**: ~1-2 minutes per video
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
---
|
| 90 |
|
| 91 |
+
## 💻 Usage
|
|
|
|
|
|
|
| 92 |
|
| 93 |
+
### Basic Commands
|
| 94 |
```bash
|
| 95 |
+
# Generate video with default content
|
| 96 |
python main.py
|
| 97 |
|
| 98 |
+
# Test system (uses minimal credits)
|
| 99 |
+
python main.py --test
|
| 100 |
+
|
| 101 |
+
# Health check
|
| 102 |
+
python main.py --health-check
|
| 103 |
+
|
| 104 |
+
# Custom content
|
| 105 |
+
python main.py --strategy strategy.json --script script.txt
|
| 106 |
```
|
| 107 |
|
| 108 |
### Custom Content
|
| 109 |
+
Create `my_script.txt`:
|
| 110 |
+
```
|
| 111 |
+
I heard a pop and my neck was stuck. After one minute with Somira massager, the pain was gone. This product actually works!
|
| 112 |
+
```
|
| 113 |
|
| 114 |
+
Create `my_strategy.json`:
|
| 115 |
+
```json
|
| 116 |
+
{
|
| 117 |
+
"brand": "Somira",
|
| 118 |
+
"gemini_prompt": "A dramatic scene showing neck pain relief",
|
| 119 |
+
"runway_prompt": "Person experiencing neck pain then relief",
|
| 120 |
+
"style": "commercial",
|
| 121 |
+
"aspect_ratio": "9:16",
|
| 122 |
+
"duration": 5
|
| 123 |
+
}
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
Run:
|
| 127 |
```bash
|
| 128 |
+
python main.py --strategy my_strategy.json --script my_script.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
```
|
| 130 |
|
| 131 |
### Batch Processing
|
|
|
|
| 132 |
```python
|
| 133 |
import asyncio
|
| 134 |
from automation import ContentAutomation
|
| 135 |
|
| 136 |
+
async def create_videos():
|
| 137 |
automation = ContentAutomation(config)
|
| 138 |
|
| 139 |
+
scripts = ["script1.txt", "script2.txt", "script3.txt"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
for script_file in scripts:
|
| 141 |
with open(script_file) as f:
|
| 142 |
+
result = await automation.execute_pipeline(strategy, f.read())
|
| 143 |
+
print(f"Created: {result['final_url']}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
+
asyncio.run(create_videos())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
```
|
| 147 |
|
| 148 |
---
|
| 149 |
|
| 150 |
+
## 💰 Pricing
|
| 151 |
|
| 152 |
+
### Cost Per Video
|
| 153 |
+
| Service | Cost |
|
| 154 |
+
|---------|------|
|
| 155 |
+
| RunwayML (5s video) | ~$0.50 |
|
| 156 |
+
| Gemini API | ~$0.001 |
|
| 157 |
+
| Google TTS | ~$0.001 |
|
| 158 |
+
| Cloud Storage | ~$0.001 |
|
| 159 |
+
| **Total** | **~$0.50** |
|
| 160 |
|
| 161 |
+
### Monthly Estimate (100 videos)
|
| 162 |
+
- **RunwayML**: $50
|
| 163 |
+
- **Other services**: $2-5
|
| 164 |
+
- **Total**: ~$55/month
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
+
---
|
| 167 |
|
| 168 |
+
## 🏗️ How It Works
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
+
### Pipeline Steps
|
| 171 |
+
1. **Asset Generation** (30-60s)
|
| 172 |
+
- AI creates hook video from prompt
|
| 173 |
+
- Selects 3 relevant product videos
|
| 174 |
+
- Generates voice-over from script
|
| 175 |
+
- Picks background music
|
| 176 |
|
| 177 |
+
2. **Video Composition** (10-20s)
|
| 178 |
+
- Merges all video clips
|
| 179 |
+
- Adds audio tracks and music
|
| 180 |
+
- Applies transitions
|
| 181 |
|
| 182 |
+
3. **Subtitles** (5-10s)
|
| 183 |
+
- Generates animated subtitles
|
| 184 |
+
- Times them to voice-over
|
|
|
|
| 185 |
|
| 186 |
+
4. **Cloud Upload** (5-15s)
|
| 187 |
+
- Uploads to Google Cloud Storage
|
| 188 |
+
- Returns public URL
|
| 189 |
|
| 190 |
+
### Output Specifications
|
| 191 |
+
- **Format**: MP4, H.264
|
| 192 |
+
- **Aspect Ratio**: 9:16 (vertical)
|
| 193 |
+
- **Duration**: 15 seconds max
|
| 194 |
+
- **Resolution**: 1080x1920
|
| 195 |
+
- **Audio**: 44.1kHz, stereo
|
| 196 |
|
| 197 |
+
---
|
| 198 |
|
| 199 |
+
## 🔧 Technical Details
|
| 200 |
|
| 201 |
+
### Project Structure
|
| 202 |
+
```
|
| 203 |
+
somira-automation/
|
| 204 |
+
├── main.py # CLI entry point
|
| 205 |
+
├── automation.py # Pipeline orchestrator
|
| 206 |
+
├── api_clients.py # Gemini, RunwayML, TTS, GCS
|
| 207 |
+
├── video_renderer.py # Video processing engine
|
| 208 |
+
├── asset_selector.py # AI video selection
|
| 209 |
+
├── utils.py # Logging & utilities
|
| 210 |
+
├── requirements.txt # Python dependencies
|
| 211 |
+
└── config/
|
| 212 |
+
├── api_keys.yaml # API configurations
|
| 213 |
+
└── content_strategies.yaml
|
| 214 |
+
```
|
| 215 |
|
| 216 |
+
### Key Dependencies
|
| 217 |
+
- `moviepy` - Video editing and composition
|
| 218 |
+
- `google-generativeai` - Gemini API client
|
| 219 |
+
- `google-cloud-texttospeech` - TTS service
|
| 220 |
+
- `google-cloud-storage` - Cloud storage
|
| 221 |
+
- `aiohttp` - Async HTTP requests
|
| 222 |
+
- `pandas` - Data processing
|
| 223 |
|
| 224 |
+
### API Requirements
|
| 225 |
+
- **Gemini**: Free tier available
|
| 226 |
+
- **RunwayML**: $10 minimum deposit
|
| 227 |
+
- **Google Cloud**: $300 free credits for new accounts
|
| 228 |
+
- **Storage**: 5GB free tier
|
| 229 |
|
| 230 |
---
|
| 231 |
|
| 232 |
+
## 🐛 Troubleshooting
|
| 233 |
|
| 234 |
+
### Common Issues
|
| 235 |
|
| 236 |
+
**"API key not found"**
|
| 237 |
+
- Check `.env` file exists and has correct keys
|
| 238 |
+
- Restart terminal after adding keys to `.env`
|
|
|
|
|
|
|
| 239 |
|
| 240 |
+
**"Insufficient RunwayML credits"**
|
| 241 |
+
- Add credits at https://dev.runwayml.com/
|
| 242 |
+
- Minimum $10 required
|
| 243 |
|
| 244 |
+
**"Google Cloud permission denied"**
|
| 245 |
+
- Verify service account JSON path in `.env`
|
| 246 |
+
- Check service account has "Storage Admin" role
|
| 247 |
|
| 248 |
+
**"Module not found"**
|
| 249 |
+
```bash
|
| 250 |
+
pip install -r requirements.txt
|
| 251 |
```
|
| 252 |
+
|
| 253 |
+
**Videos taking too long**
|
| 254 |
+
- RunwayML generation takes 30-60 seconds
|
| 255 |
+
- Use `--test` for quick verification
|
| 256 |
+
|
| 257 |
+
### Performance Tips
|
| 258 |
+
- Keep scripts under 200 characters for optimal TTS
|
| 259 |
+
- Use specific, visual prompts for better AI videos
|
| 260 |
+
- Test with `--test` flag before full runs
|
| 261 |
+
- Monitor API usage in respective dashboards
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
|
| 263 |
---
|
| 264 |
|
| 265 |
+
## 📞 Support
|
| 266 |
|
| 267 |
+
### Debugging
|
| 268 |
+
- Run with `--verbose` for detailed logs
|
| 269 |
+
- Check console output for specific error messages
|
| 270 |
+
- Verify all APIs are enabled in their consoles
|
|
|
|
|
|
|
| 271 |
|
| 272 |
+
### Cost Control
|
| 273 |
+
- Use `--test` frequently during development
|
| 274 |
+
- Set billing alerts in Google Cloud & RunwayML
|
| 275 |
+
- Monitor usage in API dashboards
|
| 276 |
|
| 277 |
+
### Security
|
| 278 |
+
- ✅ Never commit `.env` file (included in `.gitignore`)
|
| 279 |
+
- ✅ Use environment variables for all keys
|
| 280 |
+
- ✅ Rotate API keys every 90 days
|
| 281 |
+
- ❌ Never hardcode keys in source files
|
| 282 |
|
| 283 |
+
---
|
| 284 |
|
| 285 |
+
## 🎉 Next Steps
|
|
|
|
|
|
|
|
|
|
| 286 |
|
| 287 |
+
1. ✅ Complete API setup
|
| 288 |
+
2. ✅ Run `python main.py --health-check`
|
| 289 |
+
3. ✅ Test with `python main.py --test`
|
| 290 |
+
4. ✅ Generate first video with `python main.py`
|
| 291 |
+
5. 🚀 Customize scripts and strategies for your products
|
| 292 |
+
6. 📈 Scale with batch processing for multiple videos
|
| 293 |
|
| 294 |
+
**Need help?** Check the error messages in console - they're designed to be helpful and specific about what went wrong.
|
|
|
|
|
|
|
| 295 |
|
| 296 |
+
---
|
|
|
|
|
|
|
| 297 |
|
| 298 |
+
*Happy video generating! 🎬*
|
config/api_keys.yaml
CHANGED
|
@@ -1,17 +1,24 @@
|
|
| 1 |
-
# API Configuration
|
| 2 |
gemini:
|
| 3 |
base_url: "https://generativelanguage.googleapis.com/v1beta"
|
| 4 |
-
model: "gemini-
|
| 5 |
|
| 6 |
runwayml:
|
| 7 |
base_url: "https://api.runwayml.com/v1"
|
| 8 |
timeout: 300
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
tts:
|
| 11 |
-
provider: "
|
| 12 |
-
voice: "en-US-
|
| 13 |
-
rate: "medium"
|
| 14 |
|
| 15 |
gcs:
|
| 16 |
bucket: "somira-videos"
|
| 17 |
video_prefix: "automated-content/"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
gemini:
|
| 2 |
base_url: "https://generativelanguage.googleapis.com/v1beta"
|
| 3 |
+
model: "gemini-2.0-flash-exp"
|
| 4 |
|
| 5 |
runwayml:
|
| 6 |
base_url: "https://api.runwayml.com/v1"
|
| 7 |
timeout: 300
|
| 8 |
|
| 9 |
+
deepseek:
|
| 10 |
+
base_url: "https://api.deepseek.com/v1"
|
| 11 |
+
model: "deepseek-chat"
|
| 12 |
+
|
| 13 |
tts:
|
| 14 |
+
provider: "google"
|
| 15 |
+
voice: "en-US-Neural2-F"
|
|
|
|
| 16 |
|
| 17 |
gcs:
|
| 18 |
bucket: "somira-videos"
|
| 19 |
video_prefix: "automated-content/"
|
| 20 |
+
|
| 21 |
+
video:
|
| 22 |
+
max_duration: 15
|
| 23 |
+
aspect_ratio: "9:16"
|
| 24 |
+
target_resolution: "1080x1920"
|
requirements.txt
CHANGED
|
@@ -1,17 +1,56 @@
|
|
| 1 |
-
# Core async HTTP
|
| 2 |
-
aiohttp==3.9.5
|
| 3 |
aiofiles==23.2.1
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
google-cloud-storage==2.18.2
|
| 10 |
google-cloud-texttospeech==2.17.2
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
python-dotenv==1.0.1
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
aiofiles==23.2.1
|
| 2 |
+
aiohttp==3.9.5
|
| 3 |
+
aiosignal==1.4.0
|
| 4 |
+
annotated-types==0.7.0
|
| 5 |
+
attrs==25.3.0
|
| 6 |
+
cachetools==5.5.2
|
| 7 |
+
certifi==2025.8.3
|
| 8 |
+
charset-normalizer==3.4.3
|
| 9 |
+
decorator==4.4.2
|
| 10 |
+
frozenlist==1.7.0
|
| 11 |
+
google-ai-generativelanguage==0.6.10
|
| 12 |
+
google-api-core==2.25.1
|
| 13 |
+
google-api-python-client==2.184.0
|
| 14 |
+
google-auth==2.40.3
|
| 15 |
+
google-auth-httplib2==0.2.0
|
| 16 |
+
google-cloud-core==2.4.3
|
| 17 |
google-cloud-storage==2.18.2
|
| 18 |
google-cloud-texttospeech==2.17.2
|
| 19 |
+
google-crc32c==1.7.1
|
| 20 |
+
google-generativeai==0.8.3
|
| 21 |
+
google-resumable-media==2.7.2
|
| 22 |
+
googleapis-common-protos==1.70.0
|
| 23 |
+
grpcio==1.75.1
|
| 24 |
+
grpcio-status==1.71.2
|
| 25 |
+
httplib2==0.31.0
|
| 26 |
+
idna==3.10
|
| 27 |
+
imageio==2.37.0
|
| 28 |
+
imageio-ffmpeg==0.6.0
|
| 29 |
+
moviepy==1.0.3
|
| 30 |
+
multidict==6.6.4
|
| 31 |
+
numpy==1.26.4
|
| 32 |
+
pandas==2.3.3
|
| 33 |
+
pillow==11.3.0
|
| 34 |
+
proglog==0.1.12
|
| 35 |
+
propcache==0.4.0
|
| 36 |
+
proto-plus==1.26.1
|
| 37 |
+
protobuf==5.29.5
|
| 38 |
+
pyasn1==0.6.1
|
| 39 |
+
pyasn1_modules==0.4.2
|
| 40 |
+
pydantic==2.11.10
|
| 41 |
+
pydantic_core==2.33.2
|
| 42 |
+
pyparsing==3.2.5
|
| 43 |
+
python-dateutil==2.9.0.post0
|
| 44 |
python-dotenv==1.0.1
|
| 45 |
+
pytz==2025.2
|
| 46 |
+
PyYAML==6.0.3
|
| 47 |
+
requests==2.32.5
|
| 48 |
+
rsa==4.9.1
|
| 49 |
+
six==1.17.0
|
| 50 |
+
tqdm==4.67.1
|
| 51 |
+
typing-inspection==0.4.2
|
| 52 |
+
typing_extensions==4.15.0
|
| 53 |
+
tzdata==2025.2
|
| 54 |
+
uritemplate==4.2.0
|
| 55 |
+
urllib3==2.5.0
|
| 56 |
+
yarl==1.21.0
|
src/api_clients.py
CHANGED
|
@@ -5,7 +5,7 @@ import aiohttp
|
|
| 5 |
import json
|
| 6 |
import os
|
| 7 |
from typing import Dict, List, Optional
|
| 8 |
-
|
| 9 |
from google.cloud import storage, texttospeech
|
| 10 |
import asyncio
|
| 11 |
from utils import logger
|
|
@@ -16,9 +16,8 @@ class APIClients:
|
|
| 16 |
self.config = config
|
| 17 |
|
| 18 |
# Initialize Gemini client
|
| 19 |
-
self.gemini_client = genai
|
| 20 |
-
|
| 21 |
-
)
|
| 22 |
|
| 23 |
# Initialize GCS client
|
| 24 |
self.gcs_client = storage.Client()
|
|
@@ -57,11 +56,9 @@ class APIClients:
|
|
| 57 |
|
| 58 |
Return only the enhanced prompt, nothing else.
|
| 59 |
"""
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
contents=enhancement_instruction
|
| 64 |
-
)
|
| 65 |
|
| 66 |
enhanced_prompt = response.text.strip()
|
| 67 |
logger.info(f"Enhanced prompt: {enhanced_prompt[:100]}...")
|
|
@@ -75,20 +72,14 @@ class APIClients:
|
|
| 75 |
async def generate_video(self, prompt: str, duration: int = 10) -> Dict:
|
| 76 |
"""
|
| 77 |
Generate video using RunwayML Gen-4 API
|
| 78 |
-
|
| 79 |
-
Args:
|
| 80 |
-
prompt: Text prompt for video generation
|
| 81 |
-
duration: Video duration in seconds (5 or 10)
|
| 82 |
-
|
| 83 |
-
Returns:
|
| 84 |
-
Dict with video URL and metadata
|
| 85 |
"""
|
| 86 |
try:
|
| 87 |
logger.info(f"Generating video with RunwayML: {prompt[:100]}...")
|
| 88 |
|
| 89 |
headers = {
|
| 90 |
"Authorization": f"Bearer {self.runway_api_key}",
|
| 91 |
-
"Content-Type": "application/json"
|
|
|
|
| 92 |
}
|
| 93 |
|
| 94 |
payload = {
|
|
@@ -151,20 +142,13 @@ class APIClients:
|
|
| 151 |
|
| 152 |
async def generate_tts(self, text: str, voice_name: Optional[str] = None) -> Dict:
|
| 153 |
"""
|
| 154 |
-
Generate TTS audio using
|
| 155 |
-
|
| 156 |
-
Args:
|
| 157 |
-
text: Text to convert to speech
|
| 158 |
-
voice_name: Azure voice name (default from config)
|
| 159 |
-
|
| 160 |
-
Returns:
|
| 161 |
-
Dict with audio URL, duration, and lip sync data
|
| 162 |
"""
|
| 163 |
try:
|
| 164 |
logger.info(f"Generating TTS for text: {text[:100]}...")
|
| 165 |
|
| 166 |
if not voice_name:
|
| 167 |
-
voice_name = self.config.get('default_voice', 'en-US-
|
| 168 |
|
| 169 |
# Configure the speech synthesis request
|
| 170 |
synthesis_input = texttospeech.SynthesisInput(text=text)
|
|
@@ -184,15 +168,16 @@ class APIClients:
|
|
| 184 |
pitch=0.0
|
| 185 |
)
|
| 186 |
|
| 187 |
-
#
|
| 188 |
response = self.tts_client.synthesize_speech(
|
| 189 |
input=synthesis_input,
|
| 190 |
voice=voice,
|
| 191 |
-
audio_config=audio_config
|
| 192 |
-
enable_time_pointing=[texttospeech.TimePointingType.SSML_MARK]
|
| 193 |
)
|
| 194 |
|
| 195 |
# Save audio to temporary file
|
|
|
|
| 196 |
audio_filename = f"tts_{hash(text)}.mp3"
|
| 197 |
audio_path = f"/tmp/{audio_filename}"
|
| 198 |
|
|
@@ -202,23 +187,111 @@ class APIClients:
|
|
| 202 |
# Upload to GCS
|
| 203 |
audio_url = await self.store_in_gcs(audio_path, 'audio')
|
| 204 |
|
| 205 |
-
#
|
| 206 |
-
lip_sync_data = self._extract_timing_data(response)
|
| 207 |
-
|
| 208 |
logger.info(f"TTS generated successfully: {audio_url}")
|
| 209 |
|
| 210 |
return {
|
| 211 |
'audio_url': audio_url,
|
| 212 |
'duration': len(response.audio_content) / 32000, # Approximate
|
| 213 |
-
'lip_sync_data': lip_sync_data,
|
| 214 |
'voice': voice_name,
|
| 215 |
-
'text': text
|
|
|
|
| 216 |
}
|
| 217 |
|
| 218 |
except Exception as e:
|
| 219 |
logger.error(f"Error generating TTS: {e}")
|
| 220 |
raise
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
async def select_videos(self, tts_script: str, count: int = 3) -> List[Dict]:
|
| 223 |
"""
|
| 224 |
AI agent selects videos based on script using Gemini
|
|
@@ -246,11 +319,8 @@ class APIClients:
|
|
| 246 |
Return as JSON array with format:
|
| 247 |
[{{"keyword": "...", "timing": "0-5", "style": "..."}}, ...]
|
| 248 |
"""
|
| 249 |
-
|
| 250 |
-
response =
|
| 251 |
-
model="gemini-2.0-flash-exp",
|
| 252 |
-
contents=analysis_prompt
|
| 253 |
-
)
|
| 254 |
|
| 255 |
# Parse Gemini response
|
| 256 |
try:
|
|
|
|
| 5 |
import json
|
| 6 |
import os
|
| 7 |
from typing import Dict, List, Optional
|
| 8 |
+
import google.generativeai as genai
|
| 9 |
from google.cloud import storage, texttospeech
|
| 10 |
import asyncio
|
| 11 |
from utils import logger
|
|
|
|
| 16 |
self.config = config
|
| 17 |
|
| 18 |
# Initialize Gemini client
|
| 19 |
+
self.gemini_client = genai
|
| 20 |
+
genai.configure(api_key=config.get('gemini_api_key') or os.getenv('GEMINI_API_KEY'))
|
|
|
|
| 21 |
|
| 22 |
# Initialize GCS client
|
| 23 |
self.gcs_client = storage.Client()
|
|
|
|
| 56 |
|
| 57 |
Return only the enhanced prompt, nothing else.
|
| 58 |
"""
|
| 59 |
+
|
| 60 |
+
model = genai.GenerativeModel('gemini-2.0-flash-exp')
|
| 61 |
+
response = model.generate_content(enhancement_instruction)
|
|
|
|
|
|
|
| 62 |
|
| 63 |
enhanced_prompt = response.text.strip()
|
| 64 |
logger.info(f"Enhanced prompt: {enhanced_prompt[:100]}...")
|
|
|
|
| 72 |
async def generate_video(self, prompt: str, duration: int = 10) -> Dict:
|
| 73 |
"""
|
| 74 |
Generate video using RunwayML Gen-4 API
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
"""
|
| 76 |
try:
|
| 77 |
logger.info(f"Generating video with RunwayML: {prompt[:100]}...")
|
| 78 |
|
| 79 |
headers = {
|
| 80 |
"Authorization": f"Bearer {self.runway_api_key}",
|
| 81 |
+
"Content-Type": "application/json",
|
| 82 |
+
"X-Runway-Version": "1.0.0" # Add this required header
|
| 83 |
}
|
| 84 |
|
| 85 |
payload = {
|
|
|
|
| 142 |
|
| 143 |
async def generate_tts(self, text: str, voice_name: Optional[str] = None) -> Dict:
|
| 144 |
"""
|
| 145 |
+
Generate TTS audio using Google Cloud TTS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
"""
|
| 147 |
try:
|
| 148 |
logger.info(f"Generating TTS for text: {text[:100]}...")
|
| 149 |
|
| 150 |
if not voice_name:
|
| 151 |
+
voice_name = self.config.get('default_voice', 'en-US-Neural2-F')
|
| 152 |
|
| 153 |
# Configure the speech synthesis request
|
| 154 |
synthesis_input = texttospeech.SynthesisInput(text=text)
|
|
|
|
| 168 |
pitch=0.0
|
| 169 |
)
|
| 170 |
|
| 171 |
+
# Remove TimePointingType as it's not available in this version
|
| 172 |
response = self.tts_client.synthesize_speech(
|
| 173 |
input=synthesis_input,
|
| 174 |
voice=voice,
|
| 175 |
+
audio_config=audio_config
|
| 176 |
+
# Remove: enable_time_pointing=[texttospeech.TimePointingType.SSML_MARK]
|
| 177 |
)
|
| 178 |
|
| 179 |
# Save audio to temporary file
|
| 180 |
+
import tempfile
|
| 181 |
audio_filename = f"tts_{hash(text)}.mp3"
|
| 182 |
audio_path = f"/tmp/{audio_filename}"
|
| 183 |
|
|
|
|
| 187 |
# Upload to GCS
|
| 188 |
audio_url = await self.store_in_gcs(audio_path, 'audio')
|
| 189 |
|
| 190 |
+
# Remove lip sync data extraction
|
|
|
|
|
|
|
| 191 |
logger.info(f"TTS generated successfully: {audio_url}")
|
| 192 |
|
| 193 |
return {
|
| 194 |
'audio_url': audio_url,
|
| 195 |
'duration': len(response.audio_content) / 32000, # Approximate
|
|
|
|
| 196 |
'voice': voice_name,
|
| 197 |
+
'text': text,
|
| 198 |
+
'local_path': audio_path # Add local path directly
|
| 199 |
}
|
| 200 |
|
| 201 |
except Exception as e:
|
| 202 |
logger.error(f"Error generating TTS: {e}")
|
| 203 |
raise
|
| 204 |
+
|
| 205 |
+
async def download_file(self, url: str, filename: str) -> str:
|
| 206 |
+
"""Download file from URL to local temporary file"""
|
| 207 |
+
import aiohttp
|
| 208 |
+
import tempfile
|
| 209 |
+
from pathlib import Path
|
| 210 |
+
|
| 211 |
+
local_path = Path(tempfile.gettempdir()) / filename
|
| 212 |
+
|
| 213 |
+
try:
|
| 214 |
+
async with aiohttp.ClientSession() as session:
|
| 215 |
+
async with session.get(url) as response:
|
| 216 |
+
if response.status == 200:
|
| 217 |
+
with open(local_path, 'wb') as f:
|
| 218 |
+
f.write(await response.read())
|
| 219 |
+
logger.info(f"✓ Downloaded {filename} from {url}")
|
| 220 |
+
return str(local_path)
|
| 221 |
+
else:
|
| 222 |
+
raise Exception(f"Download failed: {response.status}")
|
| 223 |
+
except Exception as e:
|
| 224 |
+
logger.error(f"Failed to download {url}: {e}")
|
| 225 |
+
raise
|
| 226 |
+
|
| 227 |
+
async def health_check(self) -> Dict[str, bool]:
|
| 228 |
+
"""
|
| 229 |
+
Check health of all API connections
|
| 230 |
+
|
| 231 |
+
Returns:
|
| 232 |
+
Dict with service health status
|
| 233 |
+
"""
|
| 234 |
+
logger.info("🏥 Running health check...")
|
| 235 |
+
|
| 236 |
+
health = {
|
| 237 |
+
'gemini': False,
|
| 238 |
+
'runwayml': False,
|
| 239 |
+
'tts': False,
|
| 240 |
+
'gcs': False
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
try:
|
| 244 |
+
# Test Gemini with a simple prompt
|
| 245 |
+
test_prompt = "Hello"
|
| 246 |
+
enhanced = await self.enhance_prompt(test_prompt)
|
| 247 |
+
if enhanced and len(enhanced) > 0:
|
| 248 |
+
health['gemini'] = True
|
| 249 |
+
logger.info(" ✅ Gemini API: Connected")
|
| 250 |
+
else:
|
| 251 |
+
logger.error(" ❌ Gemini API: No response")
|
| 252 |
+
except Exception as e:
|
| 253 |
+
logger.error(f" ❌ Gemini API: {e}")
|
| 254 |
+
|
| 255 |
+
try:
|
| 256 |
+
# Test GCS - check if bucket exists and is accessible
|
| 257 |
+
from google.cloud.exceptions import NotFound
|
| 258 |
+
try:
|
| 259 |
+
self.gcs_bucket.exists()
|
| 260 |
+
health['gcs'] = True
|
| 261 |
+
logger.info(" ✅ Google Cloud Storage: Connected")
|
| 262 |
+
except NotFound:
|
| 263 |
+
logger.error(" ❌ Google Cloud Storage: Bucket not found")
|
| 264 |
+
except Exception as e:
|
| 265 |
+
logger.error(f" ❌ Google Cloud Storage: {e}")
|
| 266 |
+
except Exception as e:
|
| 267 |
+
logger.error(f" ❌ Google Cloud Storage check failed: {e}")
|
| 268 |
+
|
| 269 |
+
# Check if API keys are configured (without making actual API calls)
|
| 270 |
+
if self.runway_api_key and len(self.runway_api_key) > 10:
|
| 271 |
+
health['runwayml'] = True
|
| 272 |
+
logger.info(" ✅ RunwayML API: Configured")
|
| 273 |
+
else:
|
| 274 |
+
logger.error(" ❌ RunwayML API: Not configured or invalid key")
|
| 275 |
+
|
| 276 |
+
if self.tts_client:
|
| 277 |
+
health['tts'] = True
|
| 278 |
+
logger.info(" ✅ TTS API: Configured")
|
| 279 |
+
else:
|
| 280 |
+
logger.error(" ❌ TTS API: Not configured")
|
| 281 |
+
|
| 282 |
+
# Check DeepSeek configuration
|
| 283 |
+
deepseek_key = self.config.get('deepseek_api_key')
|
| 284 |
+
if deepseek_key and len(deepseek_key) > 10:
|
| 285 |
+
logger.info(" ✅ DeepSeek API: Configured")
|
| 286 |
+
else:
|
| 287 |
+
logger.warning(" ⚠️ DeepSeek API: Not configured")
|
| 288 |
+
|
| 289 |
+
all_healthy = all(health.values())
|
| 290 |
+
status = "✅ All systems operational!" if all_healthy else "⚠️ Some services have issues"
|
| 291 |
+
logger.info(f"\n{status}")
|
| 292 |
+
|
| 293 |
+
return health
|
| 294 |
+
|
| 295 |
async def select_videos(self, tts_script: str, count: int = 3) -> List[Dict]:
|
| 296 |
"""
|
| 297 |
AI agent selects videos based on script using Gemini
|
|
|
|
| 319 |
Return as JSON array with format:
|
| 320 |
[{{"keyword": "...", "timing": "0-5", "style": "..."}}, ...]
|
| 321 |
"""
|
| 322 |
+
model = genai.GenerativeModel('gemini-2.0-flash-exp')
|
| 323 |
+
response = model.generate_content(analysis_prompt)
|
|
|
|
|
|
|
|
|
|
| 324 |
|
| 325 |
# Parse Gemini response
|
| 326 |
try:
|
src/asset_selector.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
AI-powered asset selection using DeepSeek for contextual video matching
|
| 3 |
+
"""
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import aiohttp
|
| 6 |
+
import json
|
| 7 |
+
from typing import List, Dict, Optional
|
| 8 |
+
from utils import logger
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class AssetSelector:
|
| 12 |
+
def __init__(self, config: Dict):
|
| 13 |
+
self.config = config
|
| 14 |
+
self.video_library = self._load_video_library()
|
| 15 |
+
self.audio_library = self._load_audio_library()
|
| 16 |
+
|
| 17 |
+
def _load_video_library(self) -> pd.DataFrame:
|
| 18 |
+
"""Load video library from CSV data"""
|
| 19 |
+
try:
|
| 20 |
+
# Create a simple video library from your provided data
|
| 21 |
+
video_data = [
|
| 22 |
+
{
|
| 23 |
+
'url': 'https://storage.googleapis.com/somira/Somira%20Massager.mp4',
|
| 24 |
+
'duration': 2,
|
| 25 |
+
'alignment': 'product mention, solution, features',
|
| 26 |
+
'energy': 5,
|
| 27 |
+
'description': 'Product showcase'
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
'url': 'https://storage.googleapis.com/somira/FemaleWomenPuttingOnNeckMassagerr.mp4',
|
| 31 |
+
'duration': 2,
|
| 32 |
+
'alignment': 'using the product, turning on, operation',
|
| 33 |
+
'energy': 35,
|
| 34 |
+
'description': 'Product usage demonstration'
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
'url': 'https://storage.googleapis.com/somira/PersonEnjoyingTheNeckMassager.mp4',
|
| 38 |
+
'duration': 1.5,
|
| 39 |
+
'alignment': 'comfort, relaxation, satisfaction',
|
| 40 |
+
'energy': 40,
|
| 41 |
+
'description': 'User satisfaction'
|
| 42 |
+
},
|
| 43 |
+
# Add more videos as needed for testing
|
| 44 |
+
]
|
| 45 |
+
|
| 46 |
+
return pd.DataFrame(video_data)
|
| 47 |
+
|
| 48 |
+
except Exception as e:
|
| 49 |
+
logger.error(f"Failed to load video library: {e}")
|
| 50 |
+
return pd.DataFrame()
|
| 51 |
+
|
| 52 |
+
def _load_audio_library(self) -> List[str]:
|
| 53 |
+
"""Load audio library URLs"""
|
| 54 |
+
return [f"https://storage.googleapis.com/somira/{i}.mp3" for i in range(1, 27)]
|
| 55 |
+
|
| 56 |
+
async def select_videos(self, tts_script: str, max_duration: int = 10) -> List[Dict]:
|
| 57 |
+
"""
|
| 58 |
+
Select videos using AI analysis of TTS script
|
| 59 |
+
|
| 60 |
+
Args:
|
| 61 |
+
tts_script: The script to analyze
|
| 62 |
+
max_duration: Maximum total duration for selected videos
|
| 63 |
+
|
| 64 |
+
Returns:
|
| 65 |
+
List of selected video metadata
|
| 66 |
+
"""
|
| 67 |
+
try:
|
| 68 |
+
logger.info(f"🤖 AI video selection for script: {tts_script[:100]}...")
|
| 69 |
+
|
| 70 |
+
# Use DeepSeek for intelligent selection
|
| 71 |
+
selected_videos = await self._analyze_with_deepseek(tts_script, max_duration)
|
| 72 |
+
|
| 73 |
+
if not selected_videos:
|
| 74 |
+
logger.warning("⚠️ AI selection failed, using fallback")
|
| 75 |
+
selected_videos = self._fallback_selection(tts_script, max_duration)
|
| 76 |
+
|
| 77 |
+
total_duration = sum(v['duration'] for v in selected_videos)
|
| 78 |
+
logger.info(f"✓ Selected {len(selected_videos)} videos, total: {total_duration}s")
|
| 79 |
+
|
| 80 |
+
return selected_videos
|
| 81 |
+
|
| 82 |
+
except Exception as e:
|
| 83 |
+
logger.error(f"❌ Video selection failed: {e}")
|
| 84 |
+
return self._fallback_selection(tts_script, max_duration)
|
| 85 |
+
|
| 86 |
+
async def _analyze_with_deepseek(self, tts_script: str, max_duration: int) -> List[Dict]:
|
| 87 |
+
"""Use DeepSeek API for contextual video selection"""
|
| 88 |
+
try:
|
| 89 |
+
# Prepare video library context
|
| 90 |
+
video_context = "\n".join([
|
| 91 |
+
f"{i}. {row['description']} - {row['duration']}s - Alignment: {row['alignment']}"
|
| 92 |
+
for i, row in self.video_library.iterrows()
|
| 93 |
+
])
|
| 94 |
+
|
| 95 |
+
prompt = f"""
|
| 96 |
+
TTS Script: "{tts_script}"
|
| 97 |
+
|
| 98 |
+
Available Videos:
|
| 99 |
+
{video_context}
|
| 100 |
+
|
| 101 |
+
Select 3-4 videos that best match the script content. Consider:
|
| 102 |
+
- Video alignment descriptions
|
| 103 |
+
- Logical flow (problem -> solution -> result)
|
| 104 |
+
- Total duration under {max_duration} seconds
|
| 105 |
+
- Energy level appropriateness
|
| 106 |
+
|
| 107 |
+
Return JSON format:
|
| 108 |
+
{{
|
| 109 |
+
"selected_videos": [
|
| 110 |
+
{{
|
| 111 |
+
"index": 0,
|
| 112 |
+
"reason": "Matches product mention in script",
|
| 113 |
+
"start_time": 0
|
| 114 |
+
}}
|
| 115 |
+
],
|
| 116 |
+
"total_duration": 8,
|
| 117 |
+
"rationale": "Overall selection strategy"
|
| 118 |
+
}}
|
| 119 |
+
"""
|
| 120 |
+
|
| 121 |
+
# DeepSeek API call
|
| 122 |
+
headers = {
|
| 123 |
+
"Authorization": f"Bearer {self.config.get('deepseek_api_key')}",
|
| 124 |
+
"Content-Type": "application/json"
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
payload = {
|
| 128 |
+
"model": "deepseek-chat",
|
| 129 |
+
"messages": [
|
| 130 |
+
{"role": "system", "content": "You are a video editor AI that selects the most relevant videos for advertising content."},
|
| 131 |
+
{"role": "user", "content": prompt}
|
| 132 |
+
],
|
| 133 |
+
"temperature": 0.3,
|
| 134 |
+
"max_tokens": 2000
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
async with aiohttp.ClientSession() as session:
|
| 138 |
+
async with session.post(
|
| 139 |
+
"https://api.deepseek.com/v1/chat/completions",
|
| 140 |
+
headers=headers,
|
| 141 |
+
json=payload
|
| 142 |
+
) as response:
|
| 143 |
+
if response.status == 200:
|
| 144 |
+
result = await response.json()
|
| 145 |
+
selection = json.loads(result['choices'][0]['message']['content'])
|
| 146 |
+
|
| 147 |
+
# Map to actual video data
|
| 148 |
+
selected = []
|
| 149 |
+
for item in selection['selected_videos']:
|
| 150 |
+
if item['index'] < len(self.video_library):
|
| 151 |
+
video = self.video_library.iloc[item['index']]
|
| 152 |
+
selected.append({
|
| 153 |
+
'url': video['url'],
|
| 154 |
+
'duration': video['duration'],
|
| 155 |
+
'reason': item['reason'],
|
| 156 |
+
'alignment': video['alignment'],
|
| 157 |
+
'energy': video['energy']
|
| 158 |
+
})
|
| 159 |
+
|
| 160 |
+
return selected
|
| 161 |
+
else:
|
| 162 |
+
logger.error(f"DeepSeek API error: {response.status}")
|
| 163 |
+
return []
|
| 164 |
+
|
| 165 |
+
except Exception as e:
|
| 166 |
+
logger.error(f"DeepSeek analysis failed: {e}")
|
| 167 |
+
return []
|
| 168 |
+
|
| 169 |
+
def _fallback_selection(self, tts_script: str, max_duration: int) -> List[Dict]:
|
| 170 |
+
"""Fallback selection based on keyword matching"""
|
| 171 |
+
script_lower = tts_script.lower()
|
| 172 |
+
selected = []
|
| 173 |
+
total_duration = 0
|
| 174 |
+
|
| 175 |
+
# Define keyword mappings for fallback
|
| 176 |
+
keyword_mappings = {
|
| 177 |
+
'pain': ['pop', 'stuck', 'neck', 'pain'],
|
| 178 |
+
'solution': ['somira', 'massager', 'solution', 'relief'],
|
| 179 |
+
'satisfaction': ['gone', 'comfort', 'satisfaction']
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
# Simple fallback videos
|
| 183 |
+
fallback_videos = [
|
| 184 |
+
{
|
| 185 |
+
'url': 'https://storage.googleapis.com/somira/Somira%20Massager.mp4',
|
| 186 |
+
'duration': 2,
|
| 187 |
+
'reason': 'Product showcase',
|
| 188 |
+
'alignment': 'product',
|
| 189 |
+
'energy': 5
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
'url': 'https://storage.googleapis.com/somira/FemaleWomenPuttingOnNeckMassagerr.mp4',
|
| 193 |
+
'duration': 2,
|
| 194 |
+
'reason': 'Usage demonstration',
|
| 195 |
+
'alignment': 'usage',
|
| 196 |
+
'energy': 35
|
| 197 |
+
},
|
| 198 |
+
{
|
| 199 |
+
'url': 'https://storage.googleapis.com/somira/PersonEnjoyingTheNeckMassager.mp4',
|
| 200 |
+
'duration': 1.5,
|
| 201 |
+
'reason': 'User satisfaction',
|
| 202 |
+
'alignment': 'satisfaction',
|
| 203 |
+
'energy': 40
|
| 204 |
+
}
|
| 205 |
+
]
|
| 206 |
+
|
| 207 |
+
# Select based on keywords in script
|
| 208 |
+
for video in fallback_videos:
|
| 209 |
+
if total_duration + video['duration'] <= max_duration:
|
| 210 |
+
selected.append(video)
|
| 211 |
+
total_duration += video['duration']
|
| 212 |
+
|
| 213 |
+
return selected[:3] # Max 3 videos
|
| 214 |
+
|
| 215 |
+
def _find_video_for_category(self, category: str) -> Optional[Dict]:
|
| 216 |
+
"""Find best video for a category"""
|
| 217 |
+
for _, row in self.video_library.iterrows():
|
| 218 |
+
if category in str(row['alignment']).lower():
|
| 219 |
+
return {
|
| 220 |
+
'url': row['url'],
|
| 221 |
+
'duration': row['duration'],
|
| 222 |
+
'reason': f"Matches {category} category",
|
| 223 |
+
'alignment': row['alignment'],
|
| 224 |
+
'energy': row['energy']
|
| 225 |
+
}
|
| 226 |
+
return None
|
| 227 |
+
|
| 228 |
+
def select_background_music(self) -> str:
|
| 229 |
+
"""Select background music using round-robin"""
|
| 230 |
+
import random
|
| 231 |
+
selected = random.choice(self.audio_library)
|
| 232 |
+
logger.info(f"🎵 Selected background music: {selected}")
|
| 233 |
+
return selected
|
src/automation.py
CHANGED
|
@@ -1,12 +1,15 @@
|
|
| 1 |
"""
|
| 2 |
-
Main automation orchestrator with
|
| 3 |
"""
|
| 4 |
import asyncio
|
| 5 |
import os
|
| 6 |
import time
|
| 7 |
from typing import Dict, List, Optional, Any
|
|
|
|
|
|
|
| 8 |
from api_clients import APIClients
|
| 9 |
from video_renderer import VideoRenderer
|
|
|
|
| 10 |
from utils import logger
|
| 11 |
|
| 12 |
|
|
@@ -15,393 +18,391 @@ class ContentAutomation:
|
|
| 15 |
self.config = config
|
| 16 |
self.api_clients = APIClients(config)
|
| 17 |
self.video_renderer = VideoRenderer(config)
|
| 18 |
-
self.
|
| 19 |
self.pipeline_start_time = None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
"""
|
| 38 |
self.pipeline_start_time = time.time()
|
| 39 |
-
logger.info("
|
| 40 |
-
logger.info("🚀 Starting Content Automation Pipeline")
|
| 41 |
-
logger.info("=" * 60)
|
| 42 |
|
| 43 |
try:
|
| 44 |
-
# Step 1: Generate all assets
|
| 45 |
-
logger.info("\n📦 STEP 1:
|
| 46 |
-
assets = await self.
|
| 47 |
-
self._log_step_completion(1, assets)
|
| 48 |
-
|
| 49 |
-
# Validate critical assets
|
| 50 |
-
if not self._validate_assets(assets):
|
| 51 |
-
raise Exception("Critical assets failed to generate")
|
| 52 |
-
|
| 53 |
-
# Step 2: Merge videos and audio
|
| 54 |
-
logger.info("\n🎬 STEP 2: Rendering Video")
|
| 55 |
-
rendered_video = await self.video_renderer.render_video(
|
| 56 |
-
assets,
|
| 57 |
-
video_config or {}
|
| 58 |
-
)
|
| 59 |
-
self._log_step_completion(2, {'rendered_video': rendered_video})
|
| 60 |
-
|
| 61 |
-
# Step 3: Add subtitles
|
| 62 |
-
logger.info("\n📝 STEP 3: Adding Subtitles")
|
| 63 |
-
subtitled_video = await self.video_renderer.add_subtitles(
|
| 64 |
-
rendered_video,
|
| 65 |
-
tts_script,
|
| 66 |
-
assets.get('tts_audio', {})
|
| 67 |
-
)
|
| 68 |
-
self._log_step_completion(3, {'subtitled_video': subtitled_video})
|
| 69 |
|
| 70 |
-
#
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
)
|
| 76 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
elapsed_time = time.time() - self.pipeline_start_time
|
| 80 |
-
logger.info("\n
|
| 81 |
-
logger.info(f"✅ Pipeline Completed Successfully in {elapsed_time:.2f}s")
|
| 82 |
-
logger.info(f"📹 Final Video: {final_url}")
|
| 83 |
-
logger.info("=" * 60)
|
| 84 |
|
| 85 |
return {
|
| 86 |
'success': True,
|
| 87 |
'final_url': final_url,
|
| 88 |
-
'local_path':
|
| 89 |
-
'assets': assets,
|
| 90 |
'duration': elapsed_time,
|
| 91 |
-
'
|
| 92 |
-
'
|
| 93 |
-
'
|
| 94 |
-
'
|
| 95 |
}
|
| 96 |
}
|
| 97 |
|
| 98 |
except Exception as e:
|
| 99 |
elapsed_time = time.time() - self.pipeline_start_time if self.pipeline_start_time else 0
|
| 100 |
-
logger.error(f"\n❌ Pipeline
|
| 101 |
|
| 102 |
return {
|
| 103 |
'success': False,
|
| 104 |
'error': str(e),
|
| 105 |
-
'duration': elapsed_time
|
| 106 |
-
'partial_assets': locals().get('assets', {})
|
| 107 |
}
|
| 108 |
-
|
| 109 |
-
async def
|
| 110 |
-
|
| 111 |
-
content_strategy: Dict[str, str],
|
| 112 |
-
tts_script: str
|
| 113 |
-
) -> Dict[str, Any]:
|
| 114 |
-
"""
|
| 115 |
-
Execute all step 1 processes simultaneously for maximum efficiency
|
| 116 |
-
|
| 117 |
-
Args:
|
| 118 |
-
content_strategy: Content generation strategy
|
| 119 |
-
tts_script: Text for TTS generation
|
| 120 |
-
|
| 121 |
-
Returns:
|
| 122 |
-
Dict containing all generated assets
|
| 123 |
-
"""
|
| 124 |
-
logger.info("⚡ Launching parallel tasks...")
|
| 125 |
-
|
| 126 |
-
# Create all tasks
|
| 127 |
tasks = {
|
| 128 |
-
'hook_video': self.
|
| 129 |
-
'
|
| 130 |
-
'
|
| 131 |
-
'tts_audio': self.generate_tts_audio(tts_script)
|
| 132 |
}
|
| 133 |
|
| 134 |
-
# Execute all tasks concurrently
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
)
|
| 140 |
-
execution_time = time.time() - start_time
|
| 141 |
-
|
| 142 |
-
# Map results back to task names
|
| 143 |
-
assets = {}
|
| 144 |
-
for (task_name, _), result in zip(tasks.items(), results):
|
| 145 |
-
if isinstance(result, Exception):
|
| 146 |
-
logger.error(f"❌ {task_name} failed: {result}")
|
| 147 |
-
assets[task_name] = None
|
| 148 |
-
else:
|
| 149 |
logger.info(f"✓ {task_name} completed")
|
| 150 |
-
|
|
|
|
|
|
|
| 151 |
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
async def generate_hook_video(self, strategy: Dict[str, str]) -> Optional[Dict]:
|
| 156 |
-
"""
|
| 157 |
-
Generate hook video using AI APIs with prompt enhancement
|
| 158 |
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
Dict with video URL and metadata, or None if failed
|
| 164 |
-
"""
|
| 165 |
try:
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
-
#
|
| 169 |
-
|
| 170 |
-
if not base_prompt:
|
| 171 |
-
raise ValueError("No prompt found in strategy")
|
| 172 |
|
| 173 |
-
#
|
| 174 |
-
logger.info(" → Enhancing prompt with Gemini AI...")
|
| 175 |
-
enhanced_prompt = await self.api_clients.enhance_prompt(base_prompt)
|
| 176 |
-
|
| 177 |
-
# Generate video with RunwayML
|
| 178 |
-
logger.info(" → Generating video with RunwayML Gen-4...")
|
| 179 |
video_data = await self.api_clients.generate_video(
|
| 180 |
enhanced_prompt,
|
| 181 |
-
duration=
|
| 182 |
)
|
| 183 |
|
| 184 |
-
logger.info(f" ✓ Hook video generated: {video_data.get('task_id', 'N/A')}")
|
| 185 |
return video_data
|
| 186 |
|
| 187 |
except Exception as e:
|
| 188 |
-
logger.error(f"
|
| 189 |
return None
|
| 190 |
-
|
| 191 |
-
async def
|
| 192 |
-
"""
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
Returns:
|
| 196 |
-
URL to background music file
|
| 197 |
-
"""
|
| 198 |
-
try:
|
| 199 |
-
logger.info("🎵 Selecting background music...")
|
| 200 |
-
|
| 201 |
-
# Linear selection with rotation
|
| 202 |
-
audio_index = self.current_audio_index
|
| 203 |
-
self.current_audio_index = (self.current_audio_index + 1) % self.config['audio_library_size']
|
| 204 |
-
|
| 205 |
-
# Construct GCS URL
|
| 206 |
-
bucket_name = self.config.get('gcs_bucket_name', 'somira-videos')
|
| 207 |
-
audio_url = f"gs://{bucket_name}/audio-library/audio{audio_index + 1}.mp3"
|
| 208 |
-
|
| 209 |
-
logger.info(f" ✓ Selected audio #{audio_index + 1}: {audio_url}")
|
| 210 |
-
return audio_url
|
| 211 |
-
|
| 212 |
-
except Exception as e:
|
| 213 |
-
logger.error(f" ✗ Music selection failed: {e}")
|
| 214 |
-
# Return default/fallback audio
|
| 215 |
-
return f"gs://{self.config.get('gcs_bucket_name')}/audio-library/default.mp3"
|
| 216 |
-
|
| 217 |
-
async def select_videos_from_library(self, tts_script: str) -> List[Dict]:
|
| 218 |
-
"""
|
| 219 |
-
AI agent selects 3 videos based on TTS script content
|
| 220 |
-
|
| 221 |
-
Args:
|
| 222 |
-
tts_script: The voice-over script to analyze
|
| 223 |
-
|
| 224 |
-
Returns:
|
| 225 |
-
List of selected video metadata dicts
|
| 226 |
-
"""
|
| 227 |
-
try:
|
| 228 |
-
logger.info("🎬 Selecting videos from library...")
|
| 229 |
-
logger.info(f" → Analyzing script: {tts_script[:80]}...")
|
| 230 |
-
|
| 231 |
-
# Use AI to select contextually relevant videos
|
| 232 |
-
selected_videos = await self.api_clients.select_videos(tts_script, count=3)
|
| 233 |
-
|
| 234 |
-
if not selected_videos:
|
| 235 |
-
logger.warning(" ⚠ No videos selected, using fallback")
|
| 236 |
-
return self._get_fallback_videos()
|
| 237 |
-
|
| 238 |
-
logger.info(f" ✓ Selected {len(selected_videos)} videos:")
|
| 239 |
-
for i, video in enumerate(selected_videos, 1):
|
| 240 |
-
logger.info(f" {i}. {video.get('keyword', 'N/A')} - {video.get('reason', 'N/A')}")
|
| 241 |
-
|
| 242 |
-
return selected_videos
|
| 243 |
-
|
| 244 |
-
except Exception as e:
|
| 245 |
-
logger.error(f" ✗ Video selection failed: {e}")
|
| 246 |
-
return self._get_fallback_videos()
|
| 247 |
-
|
| 248 |
-
async def generate_tts_audio(self, tts_script: str) -> Optional[Dict]:
|
| 249 |
-
"""
|
| 250 |
-
Generate TTS audio with timing data for lip-sync and subtitles
|
| 251 |
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
logger.info(f" → Script length: {len(tts_script)} characters")
|
| 261 |
-
|
| 262 |
-
# Get voice from config
|
| 263 |
-
voice_name = self.config.get('default_voice', 'en-US-AriaNeural')
|
| 264 |
-
|
| 265 |
-
# Generate TTS with timing data
|
| 266 |
-
tts_result = await self.api_clients.generate_tts(
|
| 267 |
-
tts_script,
|
| 268 |
-
voice_name=voice_name
|
| 269 |
)
|
| 270 |
-
|
| 271 |
-
if tts_result:
|
| 272 |
-
duration = tts_result.get('duration', 0)
|
| 273 |
-
logger.info(f" ✓ TTS generated: {duration:.2f}s duration")
|
| 274 |
-
logger.info(f" ✓ Audio URL: {tts_result.get('audio_url', 'N/A')}")
|
| 275 |
-
|
| 276 |
-
return tts_result
|
| 277 |
-
|
| 278 |
-
except Exception as e:
|
| 279 |
-
logger.error(f" ✗ TTS generation failed: {e}")
|
| 280 |
-
return None
|
| 281 |
-
|
| 282 |
-
def _validate_assets(self, assets: Dict[str, Any]) -> bool:
|
| 283 |
-
"""
|
| 284 |
-
Validate that critical assets were generated successfully
|
| 285 |
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
for asset_name in critical_assets:
|
| 297 |
-
if not assets.get(asset_name):
|
| 298 |
-
logger.error(f"❌ Critical asset missing: {asset_name}")
|
| 299 |
-
return False
|
| 300 |
|
| 301 |
-
#
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
Get fallback videos if AI selection fails
|
| 312 |
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
'style': 'general',
|
| 324 |
-
'reason': 'Fallback selection'
|
| 325 |
-
},
|
| 326 |
-
{
|
| 327 |
-
'id': 15,
|
| 328 |
-
'url': f"gs://{bucket_name}/library/video15.mp4",
|
| 329 |
-
'keyword': 'lifestyle',
|
| 330 |
-
'timing': '5-10',
|
| 331 |
-
'style': 'general',
|
| 332 |
-
'reason': 'Fallback selection'
|
| 333 |
-
},
|
| 334 |
-
{
|
| 335 |
-
'id': 30,
|
| 336 |
-
'url': f"gs://{bucket_name}/library/video30.mp4",
|
| 337 |
-
'keyword': 'usage',
|
| 338 |
-
'timing': '10-15',
|
| 339 |
-
'style': 'general',
|
| 340 |
-
'reason': 'Fallback selection'
|
| 341 |
-
}
|
| 342 |
-
]
|
| 343 |
-
|
| 344 |
-
def _log_step_completion(self, step: int, data: Dict[str, Any]):
|
| 345 |
-
"""Log step completion with summary"""
|
| 346 |
-
step_names = {
|
| 347 |
-
1: "Asset Generation",
|
| 348 |
-
2: "Video Rendering",
|
| 349 |
-
3: "Subtitle Addition",
|
| 350 |
-
4: "Cloud Upload"
|
| 351 |
-
}
|
| 352 |
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
async def health_check(self) -> Dict[str, bool]:
|
| 357 |
-
"""
|
| 358 |
-
|
| 359 |
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
"""
|
| 363 |
-
logger.info("🏥 Running health check...")
|
| 364 |
-
|
| 365 |
-
health = {
|
| 366 |
-
'gemini': False,
|
| 367 |
-
'runwayml': False,
|
| 368 |
-
'tts': False,
|
| 369 |
-
'gcs': False
|
| 370 |
-
}
|
| 371 |
|
|
|
|
| 372 |
try:
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
health['gemini'] = True
|
| 377 |
-
logger.info(" ✓ Gemini API: Connected")
|
| 378 |
except Exception as e:
|
| 379 |
-
|
|
|
|
| 380 |
|
|
|
|
| 381 |
try:
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
health['gcs'] = True
|
| 386 |
-
logger.info(" ✓ Google Cloud Storage: Connected")
|
| 387 |
except Exception as e:
|
| 388 |
-
|
|
|
|
| 389 |
|
| 390 |
-
#
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
logger.error(" ✗ RunwayML API: Not configured")
|
| 397 |
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
else:
|
| 402 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
|
| 404 |
-
|
| 405 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 406 |
|
| 407 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
Main automation orchestrator with production-ready video pipeline
|
| 3 |
"""
|
| 4 |
import asyncio
|
| 5 |
import os
|
| 6 |
import time
|
| 7 |
from typing import Dict, List, Optional, Any
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
from api_clients import APIClients
|
| 11 |
from video_renderer import VideoRenderer
|
| 12 |
+
from asset_selector import AssetSelector
|
| 13 |
from utils import logger
|
| 14 |
|
| 15 |
|
|
|
|
| 18 |
self.config = config
|
| 19 |
self.api_clients = APIClients(config)
|
| 20 |
self.video_renderer = VideoRenderer(config)
|
| 21 |
+
self.asset_selector = AssetSelector(config)
|
| 22 |
self.pipeline_start_time = None
|
| 23 |
+
|
| 24 |
+
async def simple_demo(self):
|
| 25 |
+
"""Simple demo with proper audio handling"""
|
| 26 |
+
logger.info("🎬 Starting Simple Demo with Audio Fix...")
|
| 27 |
|
| 28 |
+
try:
|
| 29 |
+
# Create videos
|
| 30 |
+
logger.info("1. Creating video clips...")
|
| 31 |
+
from moviepy.editor import ColorClip
|
| 32 |
+
|
| 33 |
+
# Create simple color videos
|
| 34 |
+
clip1 = ColorClip(size=(640, 480), color=(255, 0, 0), duration=2)
|
| 35 |
+
clip1 = clip1.set_fps(24)
|
| 36 |
+
clip1_path = '/tmp/simple_red.mp4'
|
| 37 |
+
clip1.write_videofile(clip1_path, verbose=False, logger=None)
|
| 38 |
+
clip1.close()
|
| 39 |
+
|
| 40 |
+
clip2 = ColorClip(size=(640, 480), color=(0, 255, 0), duration=2)
|
| 41 |
+
clip2 = clip2.set_fps(24)
|
| 42 |
+
clip2_path = '/tmp/simple_green.mp4'
|
| 43 |
+
clip2.write_videofile(clip2_path, verbose=False, logger=None)
|
| 44 |
+
clip2.close()
|
| 45 |
+
|
| 46 |
+
logger.info(" ✅ Videos created")
|
| 47 |
+
|
| 48 |
+
# Create proper audio files using a different approach
|
| 49 |
+
logger.info("2. Creating proper audio files...")
|
| 50 |
+
|
| 51 |
+
# Method 1: Use a very simple approach - create WAV files directly
|
| 52 |
+
import wave
|
| 53 |
+
import struct
|
| 54 |
+
import numpy as np
|
| 55 |
+
|
| 56 |
+
# Create a simple sine wave WAV file
|
| 57 |
+
def create_sine_wave(filename, duration=4, freq=440, sample_rate=44100):
|
| 58 |
+
# Generate sine wave
|
| 59 |
+
t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
|
| 60 |
+
audio_data = 0.3 * np.sin(2 * np.pi * freq * t)
|
| 61 |
+
|
| 62 |
+
# Convert to 16-bit PCM
|
| 63 |
+
audio_data = (audio_data * 32767).astype(np.int16)
|
| 64 |
+
|
| 65 |
+
# Write WAV file
|
| 66 |
+
with wave.open(filename, 'w') as wav_file:
|
| 67 |
+
wav_file.setnchannels(1) # Mono
|
| 68 |
+
wav_file.setsampwidth(2) # 16-bit
|
| 69 |
+
wav_file.setframerate(sample_rate)
|
| 70 |
+
wav_file.writeframes(audio_data.tobytes())
|
| 71 |
+
|
| 72 |
+
# Create audio files
|
| 73 |
+
tts_audio_path = '/tmp/tts_audio.wav'
|
| 74 |
+
bg_audio_path = '/tmp/bg_audio.wav'
|
| 75 |
+
|
| 76 |
+
create_sine_wave(tts_audio_path, duration=4, freq=440) # A tone
|
| 77 |
+
create_sine_wave(bg_audio_path, duration=4, freq=220) # Lower tone
|
| 78 |
+
|
| 79 |
+
logger.info(" ✅ Audio files created")
|
| 80 |
+
|
| 81 |
+
# Test video rendering
|
| 82 |
+
logger.info("3. Testing video rendering...")
|
| 83 |
+
simple_assets = {
|
| 84 |
+
'selected_videos': [
|
| 85 |
+
{
|
| 86 |
+
'local_path': clip1_path,
|
| 87 |
+
'duration': 2,
|
| 88 |
+
'reason': 'Red clip'
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
'local_path': clip2_path,
|
| 92 |
+
'duration': 2,
|
| 93 |
+
'reason': 'Green clip'
|
| 94 |
+
}
|
| 95 |
+
],
|
| 96 |
+
'tts_audio': {
|
| 97 |
+
'local_path': tts_audio_path,
|
| 98 |
+
'duration': 4
|
| 99 |
+
},
|
| 100 |
+
'tts_script': 'Simple demo with proper audio.',
|
| 101 |
+
'background_music_local': bg_audio_path
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
output_path = await self.video_renderer.render_video(simple_assets)
|
| 105 |
+
|
| 106 |
+
logger.info(f"\n🎉 DEMO SUCCESSFUL!")
|
| 107 |
+
logger.info(f"📹 Video created: {output_path}")
|
| 108 |
+
|
| 109 |
+
return True
|
| 110 |
+
|
| 111 |
+
except Exception as e:
|
| 112 |
+
logger.error(f"❌ Demo failed: {e}")
|
| 113 |
+
import traceback
|
| 114 |
+
logger.error(f"📋 Debug: {traceback.format_exc()}")
|
| 115 |
+
return False
|
| 116 |
+
|
| 117 |
+
async def local_test(self):
|
| 118 |
+
"""Run a local test without external APIs"""
|
| 119 |
+
logger.info("🧪 Running local functionality test...")
|
| 120 |
|
| 121 |
+
try:
|
| 122 |
+
# Test 1: Check if we can create basic video clips
|
| 123 |
+
logger.info("1. Testing video clip creation...")
|
| 124 |
+
from moviepy.editor import ColorClip
|
| 125 |
+
test_clip = ColorClip(size=(100, 100), color=(255, 0, 0), duration=1)
|
| 126 |
+
test_clip = test_clip.set_fps(24) # Add FPS
|
| 127 |
+
test_clip.write_videofile('/tmp/test_color.mp4', verbose=False, logger=None)
|
| 128 |
+
test_clip.close()
|
| 129 |
+
logger.info(" ✅ Video clip creation: OK")
|
| 130 |
+
|
| 131 |
+
# Test 2: Check if we can create audio clips
|
| 132 |
+
logger.info("2. Testing audio clip creation...")
|
| 133 |
+
from moviepy.editor import AudioClip
|
| 134 |
+
import numpy as np
|
| 135 |
+
|
| 136 |
+
def make_tone(duration):
|
| 137 |
+
return lambda t: 0.1 * np.sin(440 * 2 * np.pi * t)
|
| 138 |
+
|
| 139 |
+
test_audio = AudioClip(make_tone(1), duration=1)
|
| 140 |
+
test_audio.write_audiofile('/tmp/test_audio.mp3', verbose=False, logger=None)
|
| 141 |
+
test_audio.close()
|
| 142 |
+
logger.info(" ✅ Audio clip creation: OK")
|
| 143 |
+
|
| 144 |
+
# Test 3: Check video rendering with simple assets
|
| 145 |
+
logger.info("3. Testing video rendering pipeline...")
|
| 146 |
+
test_assets = {
|
| 147 |
+
'selected_videos': [
|
| 148 |
+
{
|
| 149 |
+
'local_path': '/tmp/test_color.mp4',
|
| 150 |
+
'duration': 1,
|
| 151 |
+
'reason': 'Test video'
|
| 152 |
+
}
|
| 153 |
+
],
|
| 154 |
+
'tts_audio': {
|
| 155 |
+
'local_path': '/tmp/test_audio.mp3',
|
| 156 |
+
'duration': 1
|
| 157 |
+
},
|
| 158 |
+
'tts_script': 'Test script.',
|
| 159 |
+
'background_music_local': '/tmp/test_audio.mp3'
|
| 160 |
+
}
|
| 161 |
|
| 162 |
+
output_path = await self.video_renderer.render_video(test_assets)
|
| 163 |
+
logger.info(f" ✅ Video rendering: OK - {output_path}")
|
| 164 |
+
|
| 165 |
+
logger.info("\n🎉 Local functionality test passed!")
|
| 166 |
+
return True
|
| 167 |
+
|
| 168 |
+
except Exception as e:
|
| 169 |
+
logger.error(f"❌ Local test failed: {e}")
|
| 170 |
+
return False
|
| 171 |
+
|
| 172 |
+
async def execute_pipeline(self, content_strategy: Dict[str, str], tts_script: str) -> Dict[str, Any]:
|
| 173 |
+
"""
|
| 174 |
+
Execute complete production video pipeline with better error handling
|
| 175 |
"""
|
| 176 |
self.pipeline_start_time = time.time()
|
| 177 |
+
logger.info("🚀 Starting Production Video Pipeline")
|
|
|
|
|
|
|
| 178 |
|
| 179 |
try:
|
| 180 |
+
# Step 1: Generate all assets in parallel
|
| 181 |
+
logger.info("\n📦 STEP 1: Parallel Asset Generation")
|
| 182 |
+
assets = await self._generate_assets_parallel(content_strategy, tts_script)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
+
# Check if we have minimum required assets
|
| 185 |
+
if not assets.get('selected_videos') or not assets.get('tts_audio'):
|
| 186 |
+
raise ValueError("Missing critical assets: videos or TTS audio")
|
| 187 |
+
|
| 188 |
+
# Step 2: Download all remote assets
|
| 189 |
+
logger.info("\n⬇️ STEP 2: Downloading Remote Assets")
|
| 190 |
+
await self._download_assets(assets)
|
| 191 |
+
|
| 192 |
+
# Step 3: Render final video
|
| 193 |
+
logger.info("\n🎬 STEP 3: Video Composition & Rendering")
|
| 194 |
+
final_video_path = await self.video_renderer.render_video(assets)
|
| 195 |
|
| 196 |
+
# Step 4: Upload to cloud storage
|
| 197 |
+
logger.info("\n☁️ STEP 4: Cloud Storage Upload")
|
| 198 |
+
final_url = await self.api_clients.store_in_gcs(final_video_path, 'video')
|
| 199 |
+
|
| 200 |
+
# Pipeline completion
|
| 201 |
elapsed_time = time.time() - self.pipeline_start_time
|
| 202 |
+
logger.info(f"\n✅ Pipeline completed in {elapsed_time:.2f}s")
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
return {
|
| 205 |
'success': True,
|
| 206 |
'final_url': final_url,
|
| 207 |
+
'local_path': final_video_path,
|
|
|
|
| 208 |
'duration': elapsed_time,
|
| 209 |
+
'assets_metadata': {
|
| 210 |
+
'hook_video': assets.get('hook_video', {}).get('task_id'),
|
| 211 |
+
'selected_videos_count': len(assets.get('selected_videos', [])),
|
| 212 |
+
'total_duration': sum(v.get('duration', 0) for v in assets.get('selected_videos', []))
|
| 213 |
}
|
| 214 |
}
|
| 215 |
|
| 216 |
except Exception as e:
|
| 217 |
elapsed_time = time.time() - self.pipeline_start_time if self.pipeline_start_time else 0
|
| 218 |
+
logger.error(f"\n❌ Pipeline failed after {elapsed_time:.2f}s: {e}")
|
| 219 |
|
| 220 |
return {
|
| 221 |
'success': False,
|
| 222 |
'error': str(e),
|
| 223 |
+
'duration': elapsed_time
|
|
|
|
| 224 |
}
|
| 225 |
+
|
| 226 |
+
async def _generate_assets_parallel(self, content_strategy: Dict, tts_script: str) -> Dict:
|
| 227 |
+
"""Generate all assets in parallel for maximum efficiency"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
tasks = {
|
| 229 |
+
'hook_video': self._generate_hook_video(content_strategy),
|
| 230 |
+
'selected_videos': self.asset_selector.select_videos(tts_script),
|
| 231 |
+
'tts_audio': self.api_clients.generate_tts(tts_script),
|
|
|
|
| 232 |
}
|
| 233 |
|
| 234 |
+
# Execute all async tasks concurrently
|
| 235 |
+
results = {}
|
| 236 |
+
for task_name, task in tasks.items():
|
| 237 |
+
try:
|
| 238 |
+
results[task_name] = await task
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
logger.info(f"✓ {task_name} completed")
|
| 240 |
+
except Exception as e:
|
| 241 |
+
logger.error(f"❌ {task_name} failed: {e}")
|
| 242 |
+
results[task_name] = None
|
| 243 |
|
| 244 |
+
# Add synchronous operations
|
| 245 |
+
results['background_music_url'] = self.asset_selector.select_background_music()
|
| 246 |
+
results['tts_script'] = tts_script
|
|
|
|
|
|
|
|
|
|
| 247 |
|
| 248 |
+
return results
|
| 249 |
+
|
| 250 |
+
async def _generate_hook_video(self, strategy: Dict) -> Optional[Dict]:
|
| 251 |
+
"""Generate hook video using RunwayML"""
|
|
|
|
|
|
|
| 252 |
try:
|
| 253 |
+
prompt = strategy.get('runway_prompt') or strategy.get('gemini_prompt')
|
| 254 |
+
if not prompt:
|
| 255 |
+
logger.warning("No prompt available for hook video")
|
| 256 |
+
return None
|
| 257 |
|
| 258 |
+
# Enhance prompt with Gemini
|
| 259 |
+
enhanced_prompt = await self.api_clients.enhance_prompt(prompt)
|
|
|
|
|
|
|
| 260 |
|
| 261 |
+
# Generate video
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
video_data = await self.api_clients.generate_video(
|
| 263 |
enhanced_prompt,
|
| 264 |
+
duration=5 # 5-second hook video
|
| 265 |
)
|
| 266 |
|
|
|
|
| 267 |
return video_data
|
| 268 |
|
| 269 |
except Exception as e:
|
| 270 |
+
logger.error(f"Hook video generation failed: {e}")
|
| 271 |
return None
|
| 272 |
+
|
| 273 |
+
async def _download_assets(self, assets: Dict):
|
| 274 |
+
"""Download all remote assets to local files"""
|
| 275 |
+
download_tasks = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
|
| 277 |
+
# Download hook video
|
| 278 |
+
if assets.get('hook_video') and assets['hook_video'].get('video_url'):
|
| 279 |
+
download_tasks.append(
|
| 280 |
+
self._download_to_local(
|
| 281 |
+
assets['hook_video']['video_url'],
|
| 282 |
+
'hook_video.mp4',
|
| 283 |
+
assets['hook_video']
|
| 284 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
|
| 287 |
+
# Download library videos
|
| 288 |
+
for i, video in enumerate(assets.get('selected_videos', [])):
|
| 289 |
+
if video.get('url'):
|
| 290 |
+
download_tasks.append(
|
| 291 |
+
self._download_to_local(
|
| 292 |
+
video['url'],
|
| 293 |
+
f'library_video_{i}.mp4',
|
| 294 |
+
video
|
| 295 |
+
)
|
| 296 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
|
| 298 |
+
# Download background music
|
| 299 |
+
if assets.get('background_music_url'):
|
| 300 |
+
download_tasks.append(
|
| 301 |
+
self._download_to_local(
|
| 302 |
+
assets['background_music_url'],
|
| 303 |
+
'background_music.mp3',
|
| 304 |
+
assets,
|
| 305 |
+
'background_music_local'
|
| 306 |
+
)
|
| 307 |
+
)
|
|
|
|
| 308 |
|
| 309 |
+
# Download TTS audio
|
| 310 |
+
if assets.get('tts_audio') and assets['tts_audio'].get('audio_url'):
|
| 311 |
+
download_tasks.append(
|
| 312 |
+
self._download_to_local(
|
| 313 |
+
assets['tts_audio']['audio_url'],
|
| 314 |
+
'tts_audio.mp3',
|
| 315 |
+
assets['tts_audio'],
|
| 316 |
+
'local_path'
|
| 317 |
+
)
|
| 318 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
|
| 320 |
+
# Execute all downloads concurrently
|
| 321 |
+
if download_tasks:
|
| 322 |
+
await asyncio.gather(*download_tasks, return_exceptions=True)
|
| 323 |
+
|
| 324 |
+
async def _download_to_local(self, url: str, filename: str, target_dict: Dict, key: str = 'local_path'):
|
| 325 |
+
"""Download file from URL and store local path in target dictionary"""
|
| 326 |
+
try:
|
| 327 |
+
local_path = await self.api_clients.download_file(url, filename)
|
| 328 |
+
target_dict[key] = local_path
|
| 329 |
+
logger.info(f"✓ Downloaded {filename} from {url}")
|
| 330 |
+
except Exception as e:
|
| 331 |
+
logger.error(f"❌ Failed to download {filename}: {e}")
|
| 332 |
+
|
| 333 |
async def health_check(self) -> Dict[str, bool]:
|
| 334 |
+
"""Comprehensive health check of all components"""
|
| 335 |
+
logger.info("🏥 Running comprehensive health check...")
|
| 336 |
|
| 337 |
+
# Check API clients
|
| 338 |
+
api_health = await self.api_clients.health_check()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
|
| 340 |
+
# Check asset selector
|
| 341 |
try:
|
| 342 |
+
asset_selector_healthy = len(self.asset_selector.video_library) > 0
|
| 343 |
+
if not asset_selector_healthy:
|
| 344 |
+
logger.warning(" ⚠️ Asset Selector: Video library is empty")
|
|
|
|
|
|
|
| 345 |
except Exception as e:
|
| 346 |
+
asset_selector_healthy = False
|
| 347 |
+
logger.error(f" ❌ Asset Selector: {e}")
|
| 348 |
|
| 349 |
+
# Check video renderer
|
| 350 |
try:
|
| 351 |
+
video_renderer_healthy = self.video_renderer.temp_dir.exists()
|
| 352 |
+
if not video_renderer_healthy:
|
| 353 |
+
logger.warning(" ⚠️ Video Renderer: Temp directory issue")
|
|
|
|
|
|
|
| 354 |
except Exception as e:
|
| 355 |
+
video_renderer_healthy = False
|
| 356 |
+
logger.error(f" ❌ Video Renderer: {e}")
|
| 357 |
|
| 358 |
+
# Combine all health statuses
|
| 359 |
+
health_status = {
|
| 360 |
+
**api_health,
|
| 361 |
+
'asset_selector': asset_selector_healthy,
|
| 362 |
+
'video_renderer': video_renderer_healthy
|
| 363 |
+
}
|
|
|
|
| 364 |
|
| 365 |
+
# Print summary
|
| 366 |
+
operational_services = sum(health_status.values())
|
| 367 |
+
total_services = len(health_status)
|
| 368 |
+
|
| 369 |
+
print(f"\n📊 Health Summary: {operational_services}/{total_services} services operational")
|
| 370 |
+
|
| 371 |
+
if operational_services == total_services:
|
| 372 |
+
print("🎉 System is fully operational and ready for production!")
|
| 373 |
+
elif operational_services >= total_services - 2:
|
| 374 |
+
print("⚠️ System is mostly operational, but some features may be limited")
|
| 375 |
else:
|
| 376 |
+
print("❌ System has significant issues that need attention")
|
| 377 |
+
|
| 378 |
+
return health_status
|
| 379 |
+
|
| 380 |
+
async def basic_test(self):
|
| 381 |
+
"""Basic test without external APIs"""
|
| 382 |
+
logger.info("🧪 Running basic pipeline test...")
|
| 383 |
|
| 384 |
+
# Use local test assets
|
| 385 |
+
test_assets = {
|
| 386 |
+
'selected_videos': [
|
| 387 |
+
{
|
| 388 |
+
'url': 'https://example.com/video1.mp4',
|
| 389 |
+
'duration': 2,
|
| 390 |
+
'reason': 'Test video 1',
|
| 391 |
+
'local_path': '/tmp/test_video1.mp4' # You'd need to create this
|
| 392 |
+
}
|
| 393 |
+
],
|
| 394 |
+
'tts_audio': {
|
| 395 |
+
'local_path': '/tmp/test_audio.mp3', # You'd need to create this
|
| 396 |
+
'duration': 10
|
| 397 |
+
},
|
| 398 |
+
'background_music_local': '/tmp/test_music.mp3',
|
| 399 |
+
'tts_script': 'Test script for video generation.'
|
| 400 |
+
}
|
| 401 |
|
| 402 |
+
try:
|
| 403 |
+
final_video_path = await self.video_renderer.render_video(test_assets)
|
| 404 |
+
logger.info(f"✅ Basic test passed: {final_video_path}")
|
| 405 |
+
return True
|
| 406 |
+
except Exception as e:
|
| 407 |
+
logger.error(f"❌ Basic test failed: {e}")
|
| 408 |
+
return False
|
src/main.py
CHANGED
|
@@ -159,41 +159,44 @@ async def run_pipeline(
|
|
| 159 |
|
| 160 |
async def health_check_command(automation: ContentAutomation):
|
| 161 |
"""Run health check on all services"""
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
return 1
|
| 170 |
|
| 171 |
|
| 172 |
async def test_command(automation: ContentAutomation):
|
| 173 |
-
"""Run
|
| 174 |
-
logger.info("\n🧪 Running
|
| 175 |
-
|
| 176 |
-
test_strategy = {
|
| 177 |
-
'gemini_prompt': 'A simple product shot of a modern massager device',
|
| 178 |
-
'runway_prompt': 'Static product shot of a sleek white massager on a clean background',
|
| 179 |
-
'style': 'minimal',
|
| 180 |
-
'aspect_ratio': '9:16',
|
| 181 |
-
'duration': 5,
|
| 182 |
-
'brand': 'Test'
|
| 183 |
-
}
|
| 184 |
|
| 185 |
-
|
| 186 |
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
logger.info("\n✅ Test completed successfully!")
|
| 191 |
return 0
|
| 192 |
else:
|
| 193 |
-
logger.error(f"\n❌
|
| 194 |
return 1
|
| 195 |
|
| 196 |
-
|
| 197 |
def parse_arguments():
|
| 198 |
"""Parse command line arguments"""
|
| 199 |
parser = argparse.ArgumentParser(
|
|
|
|
| 159 |
|
| 160 |
async def health_check_command(automation: ContentAutomation):
|
| 161 |
"""Run health check on all services"""
|
| 162 |
+
try:
|
| 163 |
+
health_status = await automation.health_check()
|
| 164 |
+
|
| 165 |
+
print("\n" + "="*50)
|
| 166 |
+
print("🏥 SYSTEM HEALTH CHECK RESULTS")
|
| 167 |
+
print("="*50)
|
| 168 |
+
|
| 169 |
+
for service, status in health_status.items():
|
| 170 |
+
icon = "✅" if status else "❌"
|
| 171 |
+
print(f"{icon} {service.upper():<15} {'OPERATIONAL' if status else 'ISSUE DETECTED'}")
|
| 172 |
+
|
| 173 |
+
if all(health_status.values()):
|
| 174 |
+
print("\n🎉 All systems are ready for production!")
|
| 175 |
+
return 0
|
| 176 |
+
else:
|
| 177 |
+
print("\n⚠️ Some services need attention before running the pipeline.")
|
| 178 |
+
print(" Check the logs above for details.")
|
| 179 |
+
return 1
|
| 180 |
+
|
| 181 |
+
except Exception as e:
|
| 182 |
+
logger.error(f"Health check failed: {e}")
|
| 183 |
return 1
|
| 184 |
|
| 185 |
|
| 186 |
async def test_command(automation: ContentAutomation):
|
| 187 |
+
"""Run simple demo test"""
|
| 188 |
+
logger.info("\n🧪 Running Simple Demo Test...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
+
success = await automation.simple_demo()
|
| 191 |
|
| 192 |
+
if success:
|
| 193 |
+
logger.info("\n✅ Demo test completed successfully!")
|
| 194 |
+
logger.info("🎉 Your video automation system is working!")
|
|
|
|
| 195 |
return 0
|
| 196 |
else:
|
| 197 |
+
logger.error(f"\n❌ Demo test failed")
|
| 198 |
return 1
|
| 199 |
|
|
|
|
| 200 |
def parse_arguments():
|
| 201 |
"""Parse command line arguments"""
|
| 202 |
parser = argparse.ArgumentParser(
|
src/video_renderer.py
CHANGED
|
@@ -1,62 +1,389 @@
|
|
| 1 |
"""
|
| 2 |
-
|
| 3 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import os
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
class VideoRenderer:
|
| 8 |
-
def __init__(self, config):
|
| 9 |
self.config = config
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
Production video rendering engine with proper error handling and resource management
|
| 3 |
"""
|
| 4 |
+
# FIX FOR PIL ANTIALIAS ISSUE - ADD THIS AT THE VERY TOP
|
| 5 |
+
import PIL.Image
|
| 6 |
+
if not hasattr(PIL.Image, 'ANTIALIAS'):
|
| 7 |
+
PIL.Image.ANTIALIAS = PIL.Image.LANCZOS
|
| 8 |
+
|
| 9 |
import os
|
| 10 |
+
import tempfile
|
| 11 |
+
from typing import List, Dict, Optional
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
# Rest of your imports...
|
| 15 |
+
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, concatenate_videoclips, TextClip, CompositeAudioClip
|
| 16 |
+
import numpy as np
|
| 17 |
+
import textwrap
|
| 18 |
+
|
| 19 |
+
from utils import logger, format_duration
|
| 20 |
+
|
| 21 |
|
| 22 |
class VideoRenderer:
|
| 23 |
+
def __init__(self, config: Dict):
|
| 24 |
self.config = config
|
| 25 |
+
self.temp_dir = Path(tempfile.mkdtemp())
|
| 26 |
+
logger.info(f"Initialized VideoRenderer with temp dir: {self.temp_dir}")
|
| 27 |
+
|
| 28 |
+
async def render_video(self, assets: Dict, video_config: Optional[Dict] = None) -> str:
|
| 29 |
+
"""
|
| 30 |
+
Render final video composition with all assets
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
assets: Dictionary containing all video/audio assets
|
| 34 |
+
video_config: Video configuration (aspect ratio, style, etc.)
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
Path to rendered video file
|
| 38 |
+
"""
|
| 39 |
+
try:
|
| 40 |
+
logger.info("🎬 Starting video rendering pipeline")
|
| 41 |
+
|
| 42 |
+
# Validate inputs
|
| 43 |
+
if not self._validate_assets(assets):
|
| 44 |
+
raise ValueError("Invalid assets provided for video rendering")
|
| 45 |
+
|
| 46 |
+
# Load and prepare all assets
|
| 47 |
+
video_clips = await self._prepare_video_clips(assets)
|
| 48 |
+
audio_clips = await self._prepare_audio_clips(assets)
|
| 49 |
+
|
| 50 |
+
# Create video sequence
|
| 51 |
+
final_video = await self._create_video_sequence(video_clips, video_config)
|
| 52 |
+
|
| 53 |
+
# Add audio
|
| 54 |
+
final_video = await self._add_audio_track(final_video, audio_clips)
|
| 55 |
+
|
| 56 |
+
# Add subtitles if script provided
|
| 57 |
+
if assets.get('tts_script'):
|
| 58 |
+
final_video = await self._add_subtitles(final_video, assets['tts_script'])
|
| 59 |
+
|
| 60 |
+
# Render final video
|
| 61 |
+
output_path = await self._render_final_video(final_video)
|
| 62 |
+
|
| 63 |
+
# Cleanup temporary files
|
| 64 |
+
self._cleanup_temp_files(video_clips + [final_video])
|
| 65 |
+
|
| 66 |
+
logger.info(f"✅ Video rendering completed: {output_path}")
|
| 67 |
+
return output_path
|
| 68 |
+
|
| 69 |
+
except Exception as e:
|
| 70 |
+
logger.error(f"❌ Video rendering failed: {e}")
|
| 71 |
+
raise
|
| 72 |
+
|
| 73 |
+
async def _prepare_video_clips(self, assets: Dict) -> List[VideoFileClip]:
|
| 74 |
+
"""Load and prepare all video clips"""
|
| 75 |
+
clips = []
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
# Load RunwayML hook video
|
| 79 |
+
if assets.get('hook_video'):
|
| 80 |
+
hook_clip = VideoFileClip(assets['hook_video']['local_path'])
|
| 81 |
+
hook_clip = hook_clip.without_audio()
|
| 82 |
+
clips.append(('hook', hook_clip))
|
| 83 |
+
logger.info(f"✓ Loaded hook video: {hook_clip.duration:.2f}s")
|
| 84 |
+
|
| 85 |
+
# Load library videos
|
| 86 |
+
for i, lib_video in enumerate(assets.get('selected_videos', [])):
|
| 87 |
+
if lib_video.get('local_path'):
|
| 88 |
+
lib_clip = VideoFileClip(lib_video['local_path'])
|
| 89 |
+
lib_clip = lib_clip.without_audio()
|
| 90 |
+
clips.append((f'library_{i}', lib_clip))
|
| 91 |
+
logger.info(f"✓ Loaded library video {i}: {lib_clip.duration:.2f}s")
|
| 92 |
+
|
| 93 |
+
return [clip for _, clip in clips]
|
| 94 |
+
|
| 95 |
+
except Exception as e:
|
| 96 |
+
logger.error(f"❌ Failed to prepare video clips: {e}")
|
| 97 |
+
# Cleanup on error
|
| 98 |
+
for name, clip in clips:
|
| 99 |
+
clip.close()
|
| 100 |
+
raise
|
| 101 |
+
|
| 102 |
+
async def _prepare_audio_clips(self, assets: Dict) -> List[AudioFileClip]:
|
| 103 |
+
"""Load and prepare all audio clips with proper error handling"""
|
| 104 |
+
clips = []
|
| 105 |
+
|
| 106 |
+
try:
|
| 107 |
+
# Load TTS audio
|
| 108 |
+
if assets.get('tts_audio') and assets['tts_audio'].get('local_path'):
|
| 109 |
+
try:
|
| 110 |
+
tts_clip = AudioFileClip(assets['tts_audio']['local_path'])
|
| 111 |
+
# Ensure the clip has proper duration
|
| 112 |
+
if tts_clip.duration > 0:
|
| 113 |
+
clips.append(('tts', tts_clip))
|
| 114 |
+
logger.info(f"✓ Loaded TTS audio: {tts_clip.duration:.2f}s")
|
| 115 |
+
else:
|
| 116 |
+
logger.warning("⚠️ TTS audio has zero duration")
|
| 117 |
+
tts_clip.close()
|
| 118 |
+
except Exception as e:
|
| 119 |
+
logger.error(f"❌ Failed to load TTS audio: {e}")
|
| 120 |
+
|
| 121 |
+
# Load background music
|
| 122 |
+
if assets.get('background_music_local'):
|
| 123 |
+
try:
|
| 124 |
+
bg_clip = AudioFileClip(assets['background_music_local'])
|
| 125 |
+
# Ensure the clip has proper duration
|
| 126 |
+
if bg_clip.duration > 0:
|
| 127 |
+
# Reduce volume using volumex instead of custom function
|
| 128 |
+
bg_clip = bg_clip.volumex(0.3)
|
| 129 |
+
clips.append(('background', bg_clip))
|
| 130 |
+
logger.info(f"✓ Loaded background music: {bg_clip.duration:.2f}s")
|
| 131 |
+
else:
|
| 132 |
+
logger.warning("⚠️ Background music has zero duration")
|
| 133 |
+
bg_clip.close()
|
| 134 |
+
except Exception as e:
|
| 135 |
+
logger.error(f"❌ Failed to load background music: {e}")
|
| 136 |
+
|
| 137 |
+
return [clip for _, clip in clips]
|
| 138 |
+
|
| 139 |
+
except Exception as e:
|
| 140 |
+
logger.error(f"❌ Failed to prepare audio clips: {e}")
|
| 141 |
+
# Cleanup on error
|
| 142 |
+
for name, clip in clips:
|
| 143 |
+
try:
|
| 144 |
+
clip.close()
|
| 145 |
+
except:
|
| 146 |
+
pass
|
| 147 |
+
raise
|
| 148 |
+
|
| 149 |
+
async def _create_video_sequence(self, video_clips: List[VideoFileClip],
|
| 150 |
+
video_config: Optional[Dict]) -> VideoFileClip:
|
| 151 |
+
"""Create the final video sequence with proper timing"""
|
| 152 |
+
try:
|
| 153 |
+
if not video_clips:
|
| 154 |
+
raise ValueError("No video clips available for sequence")
|
| 155 |
+
|
| 156 |
+
# Calculate total available duration (max 15 seconds)
|
| 157 |
+
max_duration = 15.0
|
| 158 |
+
current_duration = sum(clip.duration for clip in video_clips)
|
| 159 |
+
|
| 160 |
+
if current_duration > max_duration:
|
| 161 |
+
logger.warning(f"⚠️ Video sequence too long ({current_duration:.1f}s), will trim to {max_duration}s")
|
| 162 |
+
video_clips = self._trim_clips_to_fit(video_clips, max_duration)
|
| 163 |
+
|
| 164 |
+
# Resize all clips to target aspect ratio (9:16 vertical)
|
| 165 |
+
target_size = (1080, 1920) # 9:16 vertical
|
| 166 |
+
resized_clips = [self._resize_for_vertical(clip, target_size) for clip in video_clips]
|
| 167 |
+
|
| 168 |
+
# Create sequence
|
| 169 |
+
final_sequence = concatenate_videoclips(resized_clips)
|
| 170 |
+
logger.info(f"✓ Created video sequence: {final_sequence.duration:.2f}s")
|
| 171 |
+
|
| 172 |
+
return final_sequence
|
| 173 |
+
|
| 174 |
+
except Exception as e:
|
| 175 |
+
logger.error(f"❌ Failed to create video sequence: {e}")
|
| 176 |
+
for clip in video_clips:
|
| 177 |
+
clip.close()
|
| 178 |
+
raise
|
| 179 |
+
|
| 180 |
+
def _resize_for_vertical(self, clip: VideoFileClip, target_size: tuple) -> VideoFileClip:
|
| 181 |
+
"""Resize clip to fit vertical 9:16 aspect ratio"""
|
| 182 |
+
target_w, target_h = target_size
|
| 183 |
+
clip_aspect = clip.w / clip.h
|
| 184 |
+
target_aspect = target_w / target_h
|
| 185 |
+
|
| 186 |
+
if clip_aspect > target_aspect:
|
| 187 |
+
# Clip is wider, fit to height and crop width
|
| 188 |
+
new_clip = clip.resize(height=target_h)
|
| 189 |
+
else:
|
| 190 |
+
# Clip is taller, fit to width and crop height
|
| 191 |
+
new_clip = clip.resize(width=target_w)
|
| 192 |
+
|
| 193 |
+
# Center crop to exact size using a more compatible method
|
| 194 |
+
try:
|
| 195 |
+
# Try the new method first
|
| 196 |
+
new_clip = new_clip.crop(
|
| 197 |
+
x_center=new_clip.w / 2,
|
| 198 |
+
y_center=new_clip.h / 2,
|
| 199 |
+
width=target_w,
|
| 200 |
+
height=target_h
|
| 201 |
+
)
|
| 202 |
+
except Exception:
|
| 203 |
+
# Fallback method for cropping
|
| 204 |
+
x1 = (new_clip.w - target_w) // 2
|
| 205 |
+
y1 = (new_clip.h - target_h) // 2
|
| 206 |
+
new_clip = new_clip.crop(x1=x1, y1=y1, x2=x1+target_w, y2=y1+target_h)
|
| 207 |
|
| 208 |
+
return new_clip
|
| 209 |
+
|
| 210 |
+
def _trim_clips_to_fit(self, clips: List[VideoFileClip], max_duration: float) -> List[VideoFileClip]:
|
| 211 |
+
"""Trim video clips to fit within max duration"""
|
| 212 |
+
trimmed_clips = []
|
| 213 |
+
remaining_duration = max_duration
|
| 214 |
+
|
| 215 |
+
for clip in clips:
|
| 216 |
+
if remaining_duration <= 0:
|
| 217 |
+
break
|
| 218 |
+
|
| 219 |
+
use_duration = min(clip.duration, remaining_duration)
|
| 220 |
+
if use_duration < clip.duration:
|
| 221 |
+
trimmed_clip = clip.subclip(0, use_duration)
|
| 222 |
+
trimmed_clips.append(trimmed_clip)
|
| 223 |
+
logger.info(f"Trimmed clip from {clip.duration:.1f}s to {use_duration:.1f}s")
|
| 224 |
+
else:
|
| 225 |
+
trimmed_clips.append(clip)
|
| 226 |
+
|
| 227 |
+
remaining_duration -= use_duration
|
| 228 |
+
|
| 229 |
+
return trimmed_clips
|
| 230 |
+
|
| 231 |
+
async def _add_audio_track(self, video_clip: VideoFileClip, audio_clips: List[AudioFileClip]) -> VideoFileClip:
|
| 232 |
+
"""Add audio track to video with proper timing"""
|
| 233 |
+
if not audio_clips:
|
| 234 |
+
return video_clip
|
| 235 |
+
|
| 236 |
+
try:
|
| 237 |
+
# Filter out invalid audio clips
|
| 238 |
+
valid_audio_clips = []
|
| 239 |
+
for clip in audio_clips:
|
| 240 |
+
if clip.duration > 0:
|
| 241 |
+
valid_audio_clips.append(clip)
|
| 242 |
+
else:
|
| 243 |
+
logger.warning(f"⚠️ Skipping audio clip with zero duration")
|
| 244 |
+
clip.close()
|
| 245 |
+
|
| 246 |
+
if not valid_audio_clips:
|
| 247 |
+
return video_clip
|
| 248 |
+
|
| 249 |
+
# Mix all valid audio clips
|
| 250 |
+
mixed_audio = CompositeAudioClip(valid_audio_clips)
|
| 251 |
+
|
| 252 |
+
# Ensure audio doesn't exceed video duration
|
| 253 |
+
video_duration = video_clip.duration
|
| 254 |
+
if mixed_audio.duration > video_duration:
|
| 255 |
+
logger.info(f"Trimming audio from {mixed_audio.duration:.2f}s to {video_duration:.2f}s")
|
| 256 |
+
mixed_audio = mixed_audio.subclip(0, video_duration)
|
| 257 |
+
|
| 258 |
+
# Add audio to video
|
| 259 |
+
video_with_audio = video_clip.set_audio(mixed_audio)
|
| 260 |
+
logger.info(f"✓ Added audio track: {mixed_audio.duration:.2f}s")
|
| 261 |
+
|
| 262 |
+
return video_with_audio
|
| 263 |
+
|
| 264 |
+
except Exception as e:
|
| 265 |
+
logger.error(f"❌ Failed to add audio track: {e}")
|
| 266 |
+
# Cleanup audio clips
|
| 267 |
+
for clip in audio_clips:
|
| 268 |
+
try:
|
| 269 |
+
clip.close()
|
| 270 |
+
except:
|
| 271 |
+
pass
|
| 272 |
+
return video_clip
|
| 273 |
+
|
| 274 |
+
async def _add_subtitles(self, video_clip: VideoFileClip, script: str) -> CompositeVideoClip:
|
| 275 |
+
"""Add animated subtitles to video"""
|
| 276 |
+
try:
|
| 277 |
+
phrases = self._split_script_into_phrases(script)
|
| 278 |
+
text_clips = []
|
| 279 |
+
|
| 280 |
+
total_duration = video_clip.duration
|
| 281 |
+
duration_per_phrase = total_duration / len(phrases)
|
| 282 |
+
fade_duration = 0.3
|
| 283 |
+
|
| 284 |
+
target_width, target_height = video_clip.size
|
| 285 |
+
|
| 286 |
+
for i, phrase in enumerate(phrases):
|
| 287 |
+
start_time = i * duration_per_phrase
|
| 288 |
+
|
| 289 |
+
# Word wrapping for vertical format
|
| 290 |
+
max_chars_per_line = 25
|
| 291 |
+
wrapped_text = '\n'.join(textwrap.wrap(phrase, width=max_chars_per_line))
|
| 292 |
+
|
| 293 |
+
# Create text clip
|
| 294 |
+
text_clip = TextClip(
|
| 295 |
+
txt=wrapped_text,
|
| 296 |
+
fontsize=65,
|
| 297 |
+
color='yellow' if i % 2 == 1 else 'white',
|
| 298 |
+
font='Helvetica',
|
| 299 |
+
stroke_color='black',
|
| 300 |
+
stroke_width=4,
|
| 301 |
+
method='caption',
|
| 302 |
+
size=(int(target_width * 0.85), None)
|
| 303 |
+
)
|
| 304 |
+
|
| 305 |
+
# Position in center-upper area (safe zone for vertical video)
|
| 306 |
+
vertical_position = int(target_height * 0.40)
|
| 307 |
+
text_clip = text_clip.set_position(('center', vertical_position))
|
| 308 |
+
text_clip = text_clip.set_start(start_time)
|
| 309 |
+
text_clip = text_clip.set_duration(duration_per_phrase)
|
| 310 |
+
|
| 311 |
+
# Add fade effects manually
|
| 312 |
+
text_clip = text_clip.crossfadein(fade_duration).crossfadeout(fade_duration)
|
| 313 |
+
|
| 314 |
+
text_clips.append(text_clip)
|
| 315 |
+
|
| 316 |
+
# Combine video with subtitles
|
| 317 |
+
final_video = CompositeVideoClip([video_clip] + text_clips)
|
| 318 |
+
logger.info(f"✓ Added {len(text_clips)} subtitle segments")
|
| 319 |
+
|
| 320 |
+
return final_video
|
| 321 |
+
|
| 322 |
+
except Exception as e:
|
| 323 |
+
logger.error(f"❌ Failed to add subtitles: {e}")
|
| 324 |
+
return video_clip
|
| 325 |
+
|
| 326 |
+
def _split_script_into_phrases(self, script: str) -> List[str]:
|
| 327 |
+
"""Split script into subtitle phrases"""
|
| 328 |
+
# Simple sentence splitting - can be enhanced with NLP
|
| 329 |
+
sentences = [s.strip() + '.' for s in script.split('.') if s.strip()]
|
| 330 |
+
return sentences[:6] # Limit to 6 phrases max
|
| 331 |
+
|
| 332 |
+
async def _render_final_video(self, video_clip: VideoFileClip) -> str:
|
| 333 |
+
"""Render final video to file"""
|
| 334 |
+
output_path = self.temp_dir / "final_video.mp4"
|
| 335 |
+
|
| 336 |
+
try:
|
| 337 |
+
logger.info("📹 Rendering final video file...")
|
| 338 |
+
|
| 339 |
+
video_clip.write_videofile(
|
| 340 |
+
str(output_path),
|
| 341 |
+
codec='libx264',
|
| 342 |
+
audio_codec='aac',
|
| 343 |
+
temp_audiofile=str(self.temp_dir / 'temp_audio.m4a'),
|
| 344 |
+
remove_temp=True,
|
| 345 |
+
fps=24,
|
| 346 |
+
verbose=False,
|
| 347 |
+
logger=None # Suppress moviepy progress bars
|
| 348 |
+
)
|
| 349 |
+
|
| 350 |
+
logger.info(f"✓ Final video rendered: {output_path}")
|
| 351 |
+
return str(output_path)
|
| 352 |
+
|
| 353 |
+
except Exception as e:
|
| 354 |
+
logger.error(f"❌ Final video rendering failed: {e}")
|
| 355 |
+
raise
|
| 356 |
+
finally:
|
| 357 |
+
video_clip.close()
|
| 358 |
+
|
| 359 |
+
def _validate_assets(self, assets: Dict) -> bool:
|
| 360 |
+
"""Validate that required assets are present"""
|
| 361 |
+
required = ['selected_videos', 'tts_audio']
|
| 362 |
+
|
| 363 |
+
for req in required:
|
| 364 |
+
if not assets.get(req):
|
| 365 |
+
logger.error(f"Missing required asset: {req}")
|
| 366 |
+
return False
|
| 367 |
+
|
| 368 |
+
if not assets.get('selected_videos'):
|
| 369 |
+
logger.error("No selected videos provided")
|
| 370 |
+
return False
|
| 371 |
+
|
| 372 |
+
return True
|
| 373 |
+
|
| 374 |
+
def _cleanup_temp_files(self, clips: List):
|
| 375 |
+
"""Clean up temporary video/audio clips"""
|
| 376 |
+
for clip in clips:
|
| 377 |
+
try:
|
| 378 |
+
clip.close()
|
| 379 |
+
except:
|
| 380 |
+
pass
|
| 381 |
+
|
| 382 |
+
def __del__(self):
|
| 383 |
+
"""Cleanup on destruction"""
|
| 384 |
+
try:
|
| 385 |
+
import shutil
|
| 386 |
+
if self.temp_dir.exists():
|
| 387 |
+
shutil.rmtree(self.temp_dir)
|
| 388 |
+
except:
|
| 389 |
+
pass
|