Zumrat Kochshegulov commited on
Commit
e4d57c9
·
unverified ·
2 Parent(s): 0b94fac cb9baf6

Merge pull request #2 from ElvoroLtd/feat/video-editor

Browse files
Files changed (11) hide show
  1. .env.example +6 -59
  2. API_SETUP_GUIDE.md +0 -316
  3. QUICKSTART.md +0 -313
  4. README.md +200 -261
  5. config/api_keys.yaml +12 -5
  6. requirements.txt +52 -13
  7. src/api_clients.py +110 -40
  8. src/asset_selector.py +233 -0
  9. src/automation.py +330 -329
  10. src/main.py +28 -25
  11. src/video_renderer.py +382 -55
.env.example CHANGED
@@ -1,75 +1,22 @@
1
- # ============================================
2
- # SOMIRA CONTENT AUTOMATION - CONFIGURATION
3
- # ============================================
4
-
5
- # -------------------- API KEYS --------------------
6
-
7
- # Gemini API (Google AI) - For prompt enhancement and video selection
8
- # Get yours at: https://aistudio.google.com/app/apikey
9
  GEMINI_API_KEY=your_gemini_api_key_here
10
-
11
- # RunwayML API - For AI video generation
12
- # Get yours at: https://dev.runwayml.com/
13
- RUNWAYML_API_KEY=key_your_runwayml_api_key_here
14
-
15
- # Google Cloud - Service Account for TTS and Storage
16
- # Path to your service account JSON key file
17
  GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/service-account-key.json
18
 
19
- # OR use Azure TTS (Alternative to Google TTS)
20
- # AZURE_SPEECH_KEY=your_azure_speech_key_here
21
- # AZURE_SPEECH_REGION=eastus
22
-
23
-
24
- # -------------------- CLOUD STORAGE --------------------
25
-
26
- # Google Cloud Storage bucket name for video storage
27
- # Create bucket at: https://console.cloud.google.com/storage
28
  GCS_BUCKET_NAME=your_bucket_name_here
29
 
30
-
31
- # -------------------- CONFIGURATION --------------------
32
-
33
- # Audio library size (number of background music tracks available)
34
  AUDIO_LIBRARY_SIZE=27
35
-
36
- # Video library size (number of product video clips available)
37
  VIDEO_LIBRARY_SIZE=47
38
-
39
- # Default TTS voice (Google Cloud TTS voices)
40
- # Options: en-US-AriaNeural, en-US-JennyNeural, en-US-GuyNeural, etc.
41
- # Full list: https://cloud.google.com/text-to-speech/docs/voices
42
  DEFAULT_VOICE=en-US-Neural2-F
43
-
44
- # Video rendering quality (low, medium, high, ultra)
45
  VIDEO_QUALITY=high
46
-
47
- # Enable debug logging (true/false)
48
  DEBUG_MODE=false
49
 
50
-
51
- # -------------------- OPTIONAL SETTINGS --------------------
52
-
53
- # Maximum video generation timeout (seconds)
54
  VIDEO_GENERATION_TIMEOUT=300
55
-
56
- # Maximum concurrent API requests
57
  MAX_CONCURRENT_REQUESTS=4
58
-
59
- # Retry attempts for failed API calls
60
  MAX_RETRY_ATTEMPTS=3
61
-
62
- # Output directory for generated videos
63
  OUTPUT_DIRECTORY=./output
64
-
65
- # Temp directory for intermediate files
66
  TEMP_DIRECTORY=/tmp/somira
67
-
68
-
69
- # -------------------- NOTES --------------------
70
- #
71
- # 1. Never commit this file with actual API keys to version control
72
- # 2. Copy this file to .env and fill in your actual values
73
- # 3. Make sure .env is listed in your .gitignore file
74
- # 4. See API_SETUP_GUIDE.md for detailed setup instructions
75
- #
 
1
+ # API Keys
 
 
 
 
 
 
 
2
  GEMINI_API_KEY=your_gemini_api_key_here
3
+ RUNWAYML_API_KEY=your_runwayml_api_key_here
4
+ DEEPSEEK_API_KEY=your_deepseek_api_key_here
 
 
 
 
 
5
  GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/service-account-key.json
6
 
7
+ # Cloud Storage
 
 
 
 
 
 
 
 
8
  GCS_BUCKET_NAME=your_bucket_name_here
9
 
10
+ # Configuration
 
 
 
11
  AUDIO_LIBRARY_SIZE=27
 
 
12
  VIDEO_LIBRARY_SIZE=47
 
 
 
 
13
  DEFAULT_VOICE=en-US-Neural2-F
 
 
14
  VIDEO_QUALITY=high
 
 
15
  DEBUG_MODE=false
16
 
17
+ # Optional Settings
 
 
 
18
  VIDEO_GENERATION_TIMEOUT=300
 
 
19
  MAX_CONCURRENT_REQUESTS=4
 
 
20
  MAX_RETRY_ATTEMPTS=3
 
 
21
  OUTPUT_DIRECTORY=./output
 
 
22
  TEMP_DIRECTORY=/tmp/somira
 
 
 
 
 
 
 
 
 
API_SETUP_GUIDE.md DELETED
@@ -1,316 +0,0 @@
1
- # API Setup Guide - Complete Instructions
2
-
3
- This guide will walk you through obtaining all necessary API keys for your Somira video generation system.
4
-
5
- ---
6
-
7
- ## 1. Google Gemini API (Prompt Enhancement)
8
-
9
- ### Purpose
10
- Enhances user prompts and analyzes scripts for intelligent video selection.
11
-
12
- ### How to Get Your API Key
13
-
14
- 1. **Go to Google AI Studio**
15
- - Visit: https://aistudio.google.com/app/apikey
16
- - Sign in with your Google account
17
-
18
- 2. **Create API Key**
19
- - Click "Get API key" button (top left)
20
- - Click "Create API key"
21
- - Choose "Create API key in new project" (or select existing project)
22
- - Copy the API key immediately (shown only once!)
23
-
24
- 3. **Add to Your Environment**
25
- ```bash
26
- export GEMINI_API_KEY="your_api_key_here"
27
- ```
28
-
29
- ### Pricing
30
- - Free tier available with rate limits
31
- - Model used: `gemini-2.0-flash-exp` (optimized for speed and cost)
32
-
33
- ### Documentation
34
- - https://ai.google.dev/gemini-api/docs
35
-
36
- ---
37
-
38
- ## 2. RunwayML API (Video Generation)
39
-
40
- ### Purpose
41
- Generates AI videos from text prompts using Gen-4 model.
42
-
43
- ### How to Get Your API Key
44
-
45
- 1. **Create Developer Account**
46
- - Visit: https://dev.runwayml.com/
47
- - Sign up for a new account
48
- - Create a new organization (corresponds to your integration)
49
-
50
- 2. **Create API Key**
51
- - Navigate to "API Keys" tab
52
- - Click "Create new key"
53
- - Give it a descriptive name (e.g., "Somira Production")
54
- - Copy the key immediately and store securely (never shown again)
55
-
56
- 3. **Add Credits**
57
- - Go to "Billing" tab
58
- - Add credits to your organization
59
- - Minimum payment: $10 (at $0.01 per credit)
60
-
61
- 4. **Add to Your Environment**
62
- ```bash
63
- export RUNWAYML_API_KEY="key_your_api_key_here"
64
- ```
65
-
66
- ### Pricing
67
- - Pay-per-use model with credits
68
- - Gen-4 Turbo: ~5-10 credits per 10-second video
69
- - Minimum: $10 to start
70
-
71
- ### Documentation
72
- - https://docs.dev.runwayml.com/
73
-
74
- ---
75
-
76
- ## 3. Google Cloud Text-to-Speech (Azure Alternative)
77
-
78
- ### Purpose
79
- Converts text scripts to natural-sounding speech with timing data for lip-sync.
80
-
81
- ### Option A: Google Cloud TTS (Recommended)
82
-
83
- #### How to Get Your API Key
84
-
85
- 1. **Create Google Cloud Project**
86
- - Visit: https://console.cloud.google.com/
87
- - Create new project or select existing
88
-
89
- 2. **Enable Text-to-Speech API**
90
- - Go to "APIs & Services" > "Library"
91
- - Search "Text-to-Speech API"
92
- - Click "Enable"
93
-
94
- 3. **Create Service Account**
95
- - Go to "APIs & Services" > "Credentials"
96
- - Click "Create Credentials" > "Service Account"
97
- - Download JSON key file
98
-
99
- 4. **Add to Your Environment**
100
- ```bash
101
- export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account-key.json"
102
- ```
103
-
104
- #### Pricing
105
- - Free tier: 1 million characters/month (Standard voices)
106
- - $4 per million characters after (Standard)
107
- - $16 per million characters (Neural2/Studio voices)
108
-
109
- ### Option B: Azure Cognitive Services TTS
110
-
111
- #### How to Get Your API Key
112
-
113
- 1. **Create Azure Account**
114
- - Visit: https://portal.azure.com/
115
- - Sign up (free tier available)
116
-
117
- 2. **Create Speech Service Resource**
118
- - Search "Speech Services" in Azure Portal
119
- - Click "Create"
120
- - Select subscription, resource group, region
121
- - Choose pricing tier (F0 for free)
122
-
123
- 3. **Get Keys**
124
- - Go to your Speech Service resource
125
- - Navigate to "Keys and Endpoint"
126
- - Copy Key 1 or Key 2
127
- - Copy the Region (e.g., eastus)
128
-
129
- 4. **Add to Your Environment**
130
- ```bash
131
- export AZURE_SPEECH_KEY="your_key_here"
132
- export AZURE_SPEECH_REGION="eastus"
133
- ```
134
-
135
- #### Pricing
136
- - Free tier: 5 audio hours/month
137
- - Standard: $1 per audio hour
138
- - Neural: $16 per million characters
139
-
140
- ### Documentation
141
- - Google: https://cloud.google.com/text-to-speech/docs
142
- - Azure: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/
143
-
144
- ---
145
-
146
- ## 4. Google Cloud Storage (Video Storage)
147
-
148
- ### Purpose
149
- Stores generated videos, audio files, and video library.
150
-
151
- ### How to Set Up
152
-
153
- 1. **Create GCS Bucket**
154
- - Go to: https://console.cloud.google.com/storage
155
- - Click "Create Bucket"
156
- - Choose unique name (e.g., "somira-videos")
157
- - Select region (same as your app for best performance)
158
- - Choose "Standard" storage class
159
-
160
- 2. **Set Permissions**
161
- - Make bucket public (if videos should be publicly accessible)
162
- - Or configure IAM for service account access
163
-
164
- 3. **Add to Your Environment**
165
- ```bash
166
- export GCS_BUCKET_NAME="somira-videos"
167
- ```
168
-
169
- ### Pricing
170
- - $0.020 per GB/month (Standard storage)
171
- - $0.12 per GB egress (after free tier)
172
- - Free tier: 5GB storage
173
-
174
- ---
175
-
176
- ## Complete .env File Example
177
-
178
- Create a `.env` file in your project root:
179
-
180
- ```bash
181
- # Gemini API (Prompt Enhancement)
182
- GEMINI_API_KEY=AIzaSyC_your_gemini_key_here
183
-
184
- # RunwayML API (Video Generation)
185
- RUNWAYML_API_KEY=key_1234567890abcdefghijklmnop
186
-
187
- # Google Cloud TTS (Option A - Recommended)
188
- GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json
189
-
190
- # OR Azure TTS (Option B)
191
- # AZURE_SPEECH_KEY=your_azure_key_here
192
- # AZURE_SPEECH_REGION=eastus
193
-
194
- # Google Cloud Storage
195
- GCS_BUCKET_NAME=somira-videos
196
-
197
- # Configuration
198
- AUDIO_LIBRARY_SIZE=27
199
- VIDEO_LIBRARY_SIZE=47
200
- DEFAULT_VOICE=en-US-AriaNeural
201
- ```
202
-
203
- ---
204
-
205
- ## Security Best Practices
206
-
207
- ### DO:
208
- - Store API keys in environment variables or secret managers
209
- - Never commit API keys to version control (add .env to .gitignore)
210
- - Use descriptive names for API keys so you can revoke them later
211
- - Rotate keys regularly
212
- - Use separate keys for development and production
213
-
214
- ### DON'T:
215
- - Never expose API keys on the client-side or in client-side code
216
- - Never hard-code API keys directly in source code
217
- - Don't share keys in public repositories
218
-
219
- ---
220
-
221
- ## Installation Steps
222
-
223
- 1. **Install Dependencies**
224
- ```bash
225
- pip install -r requirements.txt
226
- ```
227
-
228
- 2. **Set Up Environment Variables**
229
- ```bash
230
- cp .env.example .env
231
- # Edit .env with your actual keys
232
- ```
233
-
234
- 3. **Load Environment Variables**
235
- ```python
236
- from dotenv import load_dotenv
237
- load_dotenv()
238
- ```
239
-
240
- 4. **Test API Connections**
241
- ```python
242
- from api_clients import APIClients
243
-
244
- config = {
245
- 'gemini_api_key': os.getenv('GEMINI_API_KEY'),
246
- 'runwayml_api_key': os.getenv('RUNWAYML_API_KEY'),
247
- 'gcs_bucket_name': os.getenv('GCS_BUCKET_NAME'),
248
- 'video_library_size': 47,
249
- 'default_voice': 'en-US-AriaNeural'
250
- }
251
-
252
- clients = APIClients(config)
253
- ```
254
-
255
- ---
256
-
257
- ## Cost Estimates (Monthly)
258
-
259
- For a moderate usage scenario (100 videos/month):
260
-
261
- | Service | Usage | Cost |
262
- |---------|-------|------|
263
- | Gemini API | ~200K tokens | Free (within limits) |
264
- | RunwayML | 100 videos × 10 sec | ~$50-100 |
265
- | Google TTS | ~100K characters | Free (within limits) |
266
- | Google Cloud Storage | 50GB storage + egress | ~$2-5 |
267
- | **Total** | | **~$52-105/month** |
268
-
269
- Most of the cost comes from RunwayML video generation. Consider:
270
- - Using shorter video durations (5s instead of 10s)
271
- - Caching generated videos
272
- - Using Gen-4 Turbo for faster/cheaper results
273
-
274
- ---
275
-
276
- ## Troubleshooting
277
-
278
- ### Common Issues
279
-
280
- 1. **"API key not found" errors**
281
- - Check environment variables are loaded
282
- - Verify .env file location
283
- - Restart your application after adding keys
284
-
285
- 2. **RunwayML "Insufficient credits"**
286
- - Add credits in the billing tab of developer portal
287
- - Minimum $10 required to start
288
-
289
- 3. **Google Cloud authentication errors**
290
- - Verify service account JSON path is correct
291
- - Check service account has necessary permissions
292
- - Ensure APIs are enabled in Cloud Console
293
-
294
- 4. **Rate limiting**
295
- - Implement exponential backoff
296
- - Add delays between API calls
297
- - Consider upgrading to paid tiers
298
-
299
- ---
300
-
301
- ## Support Resources
302
-
303
- - **Gemini**: https://ai.google.dev/support
304
- - **RunwayML**: https://help.runwayml.com/
305
- - **Google Cloud**: https://cloud.google.com/support
306
- - **Azure**: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/get-started-text-to-speech
307
-
308
- ---
309
-
310
- ## Next Steps
311
-
312
- 1. Obtain all API keys following the instructions above
313
- 2. Configure your .env file
314
- 3. Test each API endpoint individually
315
- 4. Run the full video generation pipeline
316
- 5. Monitor usage and costs in each platform's dashboard
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
QUICKSTART.md DELETED
@@ -1,313 +0,0 @@
1
- # 🚀 Quick Start Guide
2
-
3
- Get your Somira Content Automation System up and running in 5 minutes!
4
-
5
- ---
6
-
7
- ## Prerequisites
8
-
9
- - Python 3.8 or higher
10
- - pip (Python package manager)
11
- - API keys (see [API_SETUP_GUIDE.md](API_SETUP_GUIDE.md))
12
-
13
- ---
14
-
15
- ## Installation
16
-
17
- ### 1. Clone or Download the Project
18
-
19
- ```bash
20
- cd somira-automation
21
- ```
22
-
23
- ### 2. Create Virtual Environment (Recommended)
24
-
25
- ```bash
26
- # Create virtual environment
27
- python -m venv venv
28
-
29
- # Activate it
30
- # On macOS/Linux:
31
- source venv/bin/activate
32
- # On Windows:
33
- venv\Scripts\activate
34
- ```
35
-
36
- ### 3. Install Dependencies
37
-
38
- ```bash
39
- pip install -r requirements.txt
40
- ```
41
-
42
- ---
43
-
44
- ## Configuration
45
-
46
- ### 1. Set Up Environment Variables
47
-
48
- ```bash
49
- # Copy example file
50
- cp .env.example .env
51
-
52
- # Edit with your API keys
53
- nano .env # or use your favorite editor
54
- ```
55
-
56
- **Required values in `.env`:**
57
- - `GEMINI_API_KEY` - Get from https://aistudio.google.com/app/apikey
58
- - `RUNWAYML_API_KEY` - Get from https://dev.runwayml.com/
59
- - `GOOGLE_APPLICATION_CREDENTIALS` - Path to GCP service account JSON
60
- - `GCS_BUCKET_NAME` - Your Google Cloud Storage bucket name
61
-
62
- ### 2. Verify Configuration
63
-
64
- ```bash
65
- python main.py --health-check
66
- ```
67
-
68
- You should see:
69
- ```
70
- ✓ Gemini API: Connected
71
- ✓ RunwayML API: Configured
72
- ✓ TTS API: Configured
73
- ✓ Google Cloud Storage: Connected
74
- ✅ Health check passed
75
- ```
76
-
77
- ---
78
-
79
- ## Usage
80
-
81
- ### Basic Usage (Default Content)
82
-
83
- ```bash
84
- python main.py
85
- ```
86
-
87
- This will:
88
- 1. Generate a hook video using AI
89
- 2. Select background music
90
- 3. Choose 3 relevant product videos
91
- 4. Generate text-to-speech audio
92
- 5. Render the final video with subtitles
93
- 6. Upload to Google Cloud Storage
94
-
95
- ### Custom Content
96
-
97
- ```bash
98
- python main.py \
99
- --strategy example_strategy.json \
100
- --script example_script.txt \
101
- --output ./output/my_video
102
- ```
103
-
104
- ### Run a Quick Test
105
-
106
- ```bash
107
- python main.py --test
108
- ```
109
-
110
- This runs a minimal test to verify everything works without using many credits.
111
-
112
- ---
113
-
114
- ## Command Line Options
115
-
116
- ```bash
117
- python main.py [OPTIONS]
118
-
119
- Options:
120
- --strategy FILE Path to JSON file with content strategy
121
- --script FILE Path to text file with TTS script
122
- --output DIR Output directory for results
123
- --health-check Run health check on all services
124
- --test Run test pipeline with minimal resources
125
- --verbose Enable verbose logging
126
- --help Show help message
127
- ```
128
-
129
- ---
130
-
131
- ## Example Workflows
132
-
133
- ### Create Multiple Videos from Different Scripts
134
-
135
- ```bash
136
- # Video 1
137
- python main.py \
138
- --script scripts/script1.txt \
139
- --output output/video1
140
-
141
- # Video 2
142
- python main.py \
143
- --script scripts/script2.txt \
144
- --output output/video2
145
-
146
- # Video 3
147
- python main.py \
148
- --script scripts/script3.txt \
149
- --output output/video3
150
- ```
151
-
152
- ### Custom Strategy with Different Style
153
-
154
- Create `my_strategy.json`:
155
- ```json
156
- {
157
- "brand": "Somira",
158
- "gemini_prompt": "Your custom prompt here...",
159
- "runway_prompt": "Your custom RunwayML prompt...",
160
- "style": "minimal",
161
- "aspect_ratio": "16:9",
162
- "duration": 10
163
- }
164
- ```
165
-
166
- Then run:
167
- ```bash
168
- python main.py --strategy my_strategy.json
169
- ```
170
-
171
- ---
172
-
173
- ## Understanding the Pipeline
174
-
175
- The automation runs in 4 steps:
176
-
177
- **Step 1: Asset Generation (Parallel)** ⚡
178
- - Generate hook video with AI (RunwayML)
179
- - Select background music (from library)
180
- - Select 3 product videos (AI-powered)
181
- - Generate voice-over (TTS)
182
-
183
- **Step 2: Video Rendering** 🎬
184
- - Merge all videos
185
- - Add audio tracks
186
- - Apply transitions and effects
187
-
188
- **Step 3: Subtitle Addition** 📝
189
- - Generate subtitles from TTS timing
190
- - Overlay on video
191
-
192
- **Step 4: Cloud Upload** ☁️
193
- - Upload to Google Cloud Storage
194
- - Generate public URL
195
-
196
- ---
197
-
198
- ## File Structure
199
-
200
- ```
201
- somira-automation/
202
- ├── main.py # Main entry point
203
- ├── automation.py # Pipeline orchestrator
204
- ├── api_clients.py # API integrations
205
- ├── video_renderer.py # Video processing
206
- ├── utils.py # Utilities and logging
207
- ├── requirements.txt # Python dependencies
208
- ├── .env # Your API keys (DO NOT COMMIT)
209
- ├── .env.example # Template for .env
210
- ├── example_strategy.json # Sample content strategy
211
- ├── example_script.txt # Sample TTS script
212
- ├── API_SETUP_GUIDE.md # Detailed API setup
213
- └── QUICKSTART.md # This file
214
- ```
215
-
216
- ---
217
-
218
- ## Troubleshooting
219
-
220
- ### "Module not found" errors
221
- ```bash
222
- pip install -r requirements.txt
223
- ```
224
-
225
- ### "API key not found" errors
226
- ```bash
227
- # Check your .env file exists and has the right keys
228
- cat .env
229
-
230
- # Make sure you've loaded it
231
- python -c "from dotenv import load_dotenv; load_dotenv(); import os; print(os.getenv('GEMINI_API_KEY'))"
232
- ```
233
-
234
- ### RunwayML "Insufficient credits"
235
- - Add credits at https://dev.runwayml.com/ (minimum $10)
236
-
237
- ### Google Cloud authentication errors
238
- ```bash
239
- # Verify your service account JSON exists
240
- ls -l /path/to/service-account-key.json
241
-
242
- # Set it in your .env
243
- GOOGLE_APPLICATION_CREDENTIALS=/full/path/to/service-account-key.json
244
- ```
245
-
246
- ### Videos taking too long
247
- - RunwayML video generation takes 30-60 seconds typically
248
- - The `--test` command uses minimal resources for quick testing
249
-
250
- ---
251
-
252
- ## Cost Estimates
253
-
254
- For 100 videos per month:
255
-
256
- | Service | Cost |
257
- |---------|------|
258
- | Gemini API | Free (within limits) |
259
- | RunwayML | ~$50-100 |
260
- | Google TTS | Free (within limits) |
261
- | Google Storage | ~$2-5 |
262
- | **Total** | **~$52-105/month** |
263
-
264
- 💡 **Tip:** Use the `--test` command frequently to avoid unnecessary API costs during development.
265
-
266
- ---
267
-
268
- ## Next Steps
269
-
270
- 1. ✅ Complete API setup (see [API_SETUP_GUIDE.md](API_SETUP_GUIDE.md))
271
- 2. ✅ Run health check: `python main.py --health-check`
272
- 3. ✅ Run test: `python main.py --test`
273
- 4. ✅ Generate your first video: `python main.py`
274
- 5. 📚 Customize: Edit `example_strategy.json` and `example_script.txt`
275
- 6. 🚀 Scale: Create multiple strategies and automate batch processing
276
-
277
- ---
278
-
279
- ## Support
280
-
281
- - **API Issues:** See [API_SETUP_GUIDE.md](API_SETUP_GUIDE.md)
282
- - **Bugs:** Check logs in console output
283
- - **Questions:** Review code comments in `main.py` and `automation.py`
284
-
285
- ---
286
-
287
- ## Tips for Best Results
288
-
289
- ### Prompt Engineering
290
- - Be specific about visual details
291
- - Include camera movements
292
- - Specify lighting and mood
293
- - Mention aspect ratio for consistency
294
-
295
- ### TTS Scripts
296
- - Keep sentences natural and conversational
297
- - Use pauses (commas, periods) for pacing
298
- - Test different voices in `DEFAULT_VOICE` setting
299
- - Aim for 15-30 seconds of speech
300
-
301
- ### Video Selection
302
- - The AI analyzes your script for context
303
- - More descriptive scripts = better video selection
304
- - Review selected videos in logs
305
-
306
- ### Performance
307
- - Parallel execution makes Step 1 fast
308
- - Most time is spent waiting for RunwayML
309
- - Use `--test` to verify setup without long waits
310
-
311
- ---
312
-
313
- Happy automating! 🎉
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,359 +1,298 @@
1
- # 🎬 Somira Content Automation System
2
 
3
- **Automated video generation pipeline for product advertisements using AI**
4
-
5
- Transform text scripts into professional product videos with AI-generated content, voice-overs, and intelligent video selection - all automated end-to-end.
6
-
7
- ---
8
-
9
- ## ✨ Features
10
-
11
- - **🤖 AI-Powered Video Generation** - Create unique hook videos using RunwayML Gen-4
12
- - **🧠 Intelligent Prompt Enhancement** - Gemini AI optimizes prompts for better results
13
- - **🎙️ Professional Text-to-Speech** - Natural voice-overs with Google Cloud TTS
14
- - **📹 Smart Video Selection** - AI analyzes scripts to select relevant product footage
15
- - **🎵 Automatic Music Integration** - Background music from curated library
16
- - **📝 Subtitle Generation** - Automatic subtitle overlay with timing
17
- - **⚡ Parallel Processing** - Concurrent API calls for maximum speed
18
- - **☁️ Cloud Storage** - Automatic upload to Google Cloud Storage
19
- - **🔄 Robust Error Handling** - Fallback mechanisms for reliability
20
-
21
- ---
22
-
23
- ## 🎯 Use Cases
24
-
25
- - Product advertisement videos for social media
26
- - Instagram Reels and TikTok content
27
- - Automated marketing video generation
28
- - A/B testing different video hooks
29
- - Scalable video production pipelines
30
- - Content marketing automation
31
-
32
- ---
33
-
34
- ## 📋 Requirements
35
-
36
- - **Python 3.8+**
37
- - **API Keys:**
38
- - Google Gemini API (free tier available)
39
- - RunwayML API ($10 minimum)
40
- - Google Cloud Platform account (TTS + Storage)
41
- - **Storage:** ~1GB for video library
42
- - **RAM:** 4GB minimum
43
 
44
  ---
45
 
46
  ## 🚀 Quick Start
47
 
48
  ### 1. Installation
49
-
50
  ```bash
51
- # Clone repository
52
- git clone <your-repo-url>
53
  cd somira-automation
54
 
55
- # Create virtual environment
56
  python -m venv venv
57
- source venv/bin/activate # On Windows: venv\Scripts\activate
58
 
59
  # Install dependencies
60
  pip install -r requirements.txt
61
  ```
62
 
63
- ### 2. Configuration
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
 
 
 
 
 
 
 
65
  ```bash
66
- # Copy environment template
67
  cp .env.example .env
68
-
69
- # Edit with your API keys
70
- nano .env
71
  ```
72
 
73
- **Required API Keys:**
74
- - `GEMINI_API_KEY` - https://aistudio.google.com/app/apikey
75
- - `RUNWAYML_API_KEY` - https://dev.runwayml.com/
76
- - `GOOGLE_APPLICATION_CREDENTIALS` - GCP service account JSON
77
- - `GCS_BUCKET_NAME` - Your GCS bucket name
78
-
79
- ### 3. Verify Setup
 
 
 
 
 
 
80
 
 
81
  ```bash
82
  python main.py --health-check
83
  ```
 
84
 
85
- ### 4. Generate Your First Video
86
-
87
  ```bash
88
  python main.py
89
  ```
90
 
91
- **📚 For detailed setup instructions, see [QUICKSTART.md](QUICKSTART.md)**
92
-
93
  ---
94
 
95
- ## 📖 Documentation
96
-
97
- | Document | Description |
98
- |----------|-------------|
99
- | [QUICKSTART.md](QUICKSTART.md) | Get started in 5 minutes |
100
- | [API_SETUP_GUIDE.md](API_SETUP_GUIDE.md) | Detailed API key setup |
101
- | [example_strategy.json](example_strategy.json) | Sample content strategy |
102
- | [example_script.txt](example_script.txt) | Sample TTS script |
103
 
104
- ---
105
 
106
- ## 🏗️ Architecture
 
 
 
 
107
 
108
- ```
109
- ┌─────────────────────────────────────────────────────┐
110
- │ MAIN PIPELINE │
111
- └─────────────────────────────────────────────────────┘
112
-
113
-
114
- ┌─────────────────────────────────────────────────────┐
115
- │ STEP 1: Asset Generation (Parallel) │
116
- ├─────────────────────────────────────────────────────┤
117
- │ ┌──────────────┐ ┌──────────────┐ │
118
- │ │ Gemini API │→ │ RunwayML API │ │
119
- │ │ (Enhance) │ │ (Hook Video) │ │
120
- │ └──────────────┘ └──────────────┘ │
121
- │ │
122
- │ ┌──────────────┐ ┌──────────────┐ │
123
- │ │ Music │ │ Video │ │
124
- │ │ Selection │ │ Selection AI │ │
125
- │ └──────────────┘ └──────────────┘ │
126
- │ │
127
- │ ┌──────────────┐ │
128
- │ │ Google TTS │ │
129
- │ │ (Voice-over) │ │
130
- │ └──────────────┘ │
131
- └─────────────────────────────────────────────────────┘
132
-
133
-
134
- ┌─────────────────────────────────────────────────────┐
135
- │ STEP 2: Video Rendering & Merging │
136
- ├─────────────────────────────────────────────────────┤
137
- │ • Merge hook + library videos │
138
- │ • Add background music │
139
- │ • Mix voice-over audio │
140
- │ • Apply transitions │
141
- └─────────────────────────────────────────────────────┘
142
-
143
-
144
- ┌─────────────────────────────────────────────────────┐
145
- │ STEP 3: Subtitle Generation │
146
- ├─────────────────────────────────────────────────────┤
147
- │ • Extract timing from TTS │
148
- │ • Generate subtitle file │
149
- │ • Overlay on video │
150
- └─────────────────────────────────────────────────────┘
151
-
152
-
153
- ┌─────────────────────────────────────────────────────┐
154
- │ STEP 4: Cloud Storage Upload │
155
- ├─────────────────────────────────────────────────────┤
156
- │ • Upload to Google Cloud Storage │
157
- │ • Generate public URL │
158
- │ • Save metadata │
159
- └─────────────────────────────────────────────────────┘
160
- ```
161
 
162
  ---
163
 
164
- ## 💻 Usage Examples
165
-
166
- ### Basic Usage
167
 
 
168
  ```bash
169
- # Use default content
170
  python main.py
171
 
172
- # Output:
173
- # Pipeline completed successfully
174
- # 📹 Final Video: https://storage.googleapis.com/...
 
 
 
 
 
175
  ```
176
 
177
  ### Custom Content
 
 
 
 
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  ```bash
180
- # Use custom strategy and script
181
- python main.py \
182
- --strategy campaigns/holiday_2025.json \
183
- --script scripts/holiday_promo.txt \
184
- --output ./output/holiday_video
185
  ```
186
 
187
  ### Batch Processing
188
-
189
  ```python
190
  import asyncio
191
  from automation import ContentAutomation
192
 
193
- async def generate_multiple_videos():
194
  automation = ContentAutomation(config)
195
 
196
- scripts = [
197
- "scripts/script1.txt",
198
- "scripts/script2.txt",
199
- "scripts/script3.txt"
200
- ]
201
-
202
  for script_file in scripts:
203
  with open(script_file) as f:
204
- script = f.read()
205
-
206
- result = await automation.execute_pipeline(
207
- content_strategy=strategy,
208
- tts_script=script
209
- )
210
- print(f"Generated: {result['final_url']}")
211
-
212
- asyncio.run(generate_multiple_videos())
213
- ```
214
-
215
- ### Health Check
216
-
217
- ```bash
218
- python main.py --health-check
219
 
220
- # Output:
221
- # 🏥 Running health check...
222
- # ✓ Gemini API: Connected
223
- # ✓ RunwayML API: Configured
224
- # ✓ TTS API: Configured
225
- # ✓ Google Cloud Storage: Connected
226
- # ✅ All systems operational!
227
  ```
228
 
229
  ---
230
 
231
- ## 🔧 Configuration
232
 
233
- ### Content Strategy Format
 
 
 
 
 
 
 
234
 
235
- ```json
236
- {
237
- "brand": "Somira",
238
- "gemini_prompt": "Descriptive prompt for enhancement",
239
- "runway_prompt": "Specific prompt for video generation",
240
- "style": "commercial",
241
- "aspect_ratio": "9:16",
242
- "duration": 5,
243
- "platform": "Instagram Reels / TikTok"
244
- }
245
- ```
246
 
247
- ### Environment Variables
248
 
249
- | Variable | Required | Description |
250
- |----------|----------|-------------|
251
- | `GEMINI_API_KEY` | Yes | Google Gemini API key |
252
- | `RUNWAYML_API_KEY` | Yes | RunwayML API key |
253
- | `GOOGLE_APPLICATION_CREDENTIALS` | Yes | Path to GCP service account JSON |
254
- | `GCS_BUCKET_NAME` | Yes | Google Cloud Storage bucket |
255
- | `AUDIO_LIBRARY_SIZE` | No | Number of music tracks (default: 27) |
256
- | `VIDEO_LIBRARY_SIZE` | No | Number of video clips (default: 47) |
257
- | `DEFAULT_VOICE` | No | TTS voice name (default: en-US-Neural2-F) |
258
 
259
- ---
 
 
 
 
 
260
 
261
- ## 📊 Performance
 
 
 
262
 
263
- - **Step 1 (Parallel):** 30-60 seconds (depends on RunwayML)
264
- - **Step 2 (Rendering):** 10-20 seconds
265
- - **Step 3 (Subtitles):** 5-10 seconds
266
- - **Step 4 (Upload):** 5-15 seconds
267
 
268
- **Total:** ~50-105 seconds per video
 
 
269
 
270
- ---
 
 
 
 
 
271
 
272
- ## 💰 Cost Analysis
273
 
274
- ### Per Video Cost
275
 
276
- | Service | Cost | Notes |
277
- |---------|------|-------|
278
- | Gemini API | ~$0.001 | Usually free tier |
279
- | RunwayML Gen-4 | $0.50-1.00 | Varies by duration |
280
- | Google TTS | ~$0.001 | Usually free tier |
281
- | GCS Storage | ~$0.001 | Per video |
282
- | **Total per video** | **~$0.50-1.00** | |
 
 
 
 
 
 
 
283
 
284
- ### Monthly Estimates (100 videos)
 
 
 
 
 
 
285
 
286
- - Gemini: Free (within free tier)
287
- - RunwayML: $50-100
288
- - Google TTS: Free (within 1M chars/month)
289
- - GCS: $2-5
290
- - **Total: $52-105/month**
291
 
292
  ---
293
 
294
- ## 🛡️ Error Handling
295
 
296
- The system includes comprehensive error handling:
297
 
298
- - **Automatic retries** for transient API failures
299
- - **Fallback mechanisms** for video/music selection
300
- - **Graceful degradation** when optional features fail
301
- - ✅ **Detailed logging** for debugging
302
- - ✅ **Partial results** saved on pipeline failure
303
 
304
- ---
 
 
305
 
306
- ## 📁 Project Structure
 
 
307
 
 
 
 
308
  ```
309
- somira-automation/
310
- ├── main.py # CLI entry point
311
- ├── automation.py # Pipeline orchestrator
312
- ├── api_clients.py # API integrations (Gemini, RunwayML, TTS, GCS)
313
- ├── video_renderer.py # Video processing and rendering
314
- ├── utils.py # Logging and utility functions
315
- ├── requirements.txt # Python dependencies
316
- ├── .env.example # Environment variables template
317
- ├── example_strategy.json # Sample content strategy
318
- ├── example_script.txt # Sample TTS script
319
- ├── README.md # This file
320
- ├── QUICKSTART.md # Quick start guide
321
- └── API_SETUP_GUIDE.md # Detailed API setup instructions
322
- ```
323
 
324
  ---
325
 
326
- ## 🔐 Security Best Practices
327
 
328
- 1. **Never commit `.env` file** - Added to `.gitignore`
329
- 2. **Use environment variables** - No hardcoded keys
330
- 3. **Restrict API key permissions** - Minimum necessary access
331
- 4. **Rotate keys regularly** - Every 90 days recommended
332
- 5. **Monitor API usage** - Set up billing alerts
333
- 6. **Use service accounts** - For GCP resources
334
 
335
- ---
 
 
 
336
 
337
- ## 🐛 Troubleshooting
 
 
 
 
338
 
339
- ### Common Issues
340
 
341
- **"Module not found"**
342
- ```bash
343
- pip install -r requirements.txt
344
- ```
345
 
346
- **"API key not valid"**
347
- - Check your `.env` file
348
- - Verify keys are correctly copied (no extra spaces)
349
- - Ensure APIs are enabled in respective consoles
 
 
350
 
351
- **"Insufficient credits" (RunwayML)**
352
- - Add credits at https://dev.runwayml.com/
353
- - Minimum $10 required
354
 
355
- **"Permission denied" (GCS)**
356
- - Check service account has Storage Admin role
357
- - Verify `GOOGLE_APPLICATION_CREDENTIALS` path is correct
358
 
359
- **Videos taking too long**
 
1
+ # 🎬 Somira Content Automation
2
 
3
+ **AI-powered video generation pipeline that transforms text scripts into professional product advertisements.**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  ---
6
 
7
  ## 🚀 Quick Start
8
 
9
  ### 1. Installation
 
10
  ```bash
11
+ # Clone and setup
12
+ git clone <your-repo>
13
  cd somira-automation
14
 
15
+ # Create virtual environment (recommended)
16
  python -m venv venv
17
+ source venv/bin/activate # Windows: venv\Scripts\activate
18
 
19
  # Install dependencies
20
  pip install -r requirements.txt
21
  ```
22
 
23
+ ### 2. API Setup
24
+
25
+ **You need these API keys:**
26
+
27
+ #### Gemini API (Free)
28
+ 1. Go to https://aistudio.google.com/app/apikey
29
+ 2. Click "Create API Key"
30
+ 3. Copy the key
31
+
32
+ #### RunwayML API ($10 minimum)
33
+ 1. Go to https://dev.runwayml.com/
34
+ 2. Sign up and create organization
35
+ 3. Go to "API Keys" → "Create new key"
36
+ 4. Add $10+ credits in "Billing" tab
37
 
38
+ #### Google Cloud (Free tier available)
39
+ 1. Go to https://console.cloud.google.com/
40
+ 2. Create project → Enable "Text-to-Speech API"
41
+ 3. Create service account → Download JSON key
42
+ 4. Create storage bucket
43
+
44
+ ### 3. Configuration
45
  ```bash
46
+ # Copy and edit environment file
47
  cp .env.example .env
 
 
 
48
  ```
49
 
50
+ Edit `.env` with your keys:
51
+ ```bash
52
+ # Required API Keys
53
+ GEMINI_API_KEY=AIzaSyC_your_key_here
54
+ RUNWAYML_API_KEY=key_your_runwayml_key_here
55
+ GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json
56
+ GCS_BUCKET_NAME=your-bucket-name
57
+
58
+ # Optional Settings
59
+ DEFAULT_VOICE=en-US-Neural2-F
60
+ AUDIO_LIBRARY_SIZE=27
61
+ VIDEO_LIBRARY_SIZE=47
62
+ ```
63
 
64
+ ### 4. Verify Setup
65
  ```bash
66
  python main.py --health-check
67
  ```
68
+ You should see: `✅ All systems operational!`
69
 
70
+ ### 5. Generate Your First Video
 
71
  ```bash
72
  python main.py
73
  ```
74
 
 
 
75
  ---
76
 
77
+ ## 🎯 What It Does
 
 
 
 
 
 
 
78
 
79
+ This system automatically creates 15-second vertical videos (perfect for TikTok/Reels) by:
80
 
81
+ 1. **AI Video Generation** - Creates unique hook videos using RunwayML Gen-4
82
+ 2. **Smart Content Selection** - Gemini AI analyzes your script to pick relevant product footage
83
+ 3. **Professional Voice-overs** - Converts text to natural speech using Google TTS
84
+ 4. **Auto Editing** - Merges videos, adds background music, subtitles, and effects
85
+ 5. **Cloud Storage** - Uploads final videos to Google Cloud Storage
86
 
87
+ **Pipeline Time**: ~1-2 minutes per video
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  ---
90
 
91
+ ## 💻 Usage
 
 
92
 
93
+ ### Basic Commands
94
  ```bash
95
+ # Generate video with default content
96
  python main.py
97
 
98
+ # Test system (uses minimal credits)
99
+ python main.py --test
100
+
101
+ # Health check
102
+ python main.py --health-check
103
+
104
+ # Custom content
105
+ python main.py --strategy strategy.json --script script.txt
106
  ```
107
 
108
  ### Custom Content
109
+ Create `my_script.txt`:
110
+ ```
111
+ I heard a pop and my neck was stuck. After one minute with Somira massager, the pain was gone. This product actually works!
112
+ ```
113
 
114
+ Create `my_strategy.json`:
115
+ ```json
116
+ {
117
+ "brand": "Somira",
118
+ "gemini_prompt": "A dramatic scene showing neck pain relief",
119
+ "runway_prompt": "Person experiencing neck pain then relief",
120
+ "style": "commercial",
121
+ "aspect_ratio": "9:16",
122
+ "duration": 5
123
+ }
124
+ ```
125
+
126
+ Run:
127
  ```bash
128
+ python main.py --strategy my_strategy.json --script my_script.txt
 
 
 
 
129
  ```
130
 
131
  ### Batch Processing
 
132
  ```python
133
  import asyncio
134
  from automation import ContentAutomation
135
 
136
+ async def create_videos():
137
  automation = ContentAutomation(config)
138
 
139
+ scripts = ["script1.txt", "script2.txt", "script3.txt"]
 
 
 
 
 
140
  for script_file in scripts:
141
  with open(script_file) as f:
142
+ result = await automation.execute_pipeline(strategy, f.read())
143
+ print(f"Created: {result['final_url']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
+ asyncio.run(create_videos())
 
 
 
 
 
 
146
  ```
147
 
148
  ---
149
 
150
+ ## 💰 Pricing
151
 
152
+ ### Cost Per Video
153
+ | Service | Cost |
154
+ |---------|------|
155
+ | RunwayML (5s video) | ~$0.50 |
156
+ | Gemini API | ~$0.001 |
157
+ | Google TTS | ~$0.001 |
158
+ | Cloud Storage | ~$0.001 |
159
+ | **Total** | **~$0.50** |
160
 
161
+ ### Monthly Estimate (100 videos)
162
+ - **RunwayML**: $50
163
+ - **Other services**: $2-5
164
+ - **Total**: ~$55/month
 
 
 
 
 
 
 
165
 
166
+ ---
167
 
168
+ ## 🏗️ How It Works
 
 
 
 
 
 
 
 
169
 
170
+ ### Pipeline Steps
171
+ 1. **Asset Generation** (30-60s)
172
+ - AI creates hook video from prompt
173
+ - Selects 3 relevant product videos
174
+ - Generates voice-over from script
175
+ - Picks background music
176
 
177
+ 2. **Video Composition** (10-20s)
178
+ - Merges all video clips
179
+ - Adds audio tracks and music
180
+ - Applies transitions
181
 
182
+ 3. **Subtitles** (5-10s)
183
+ - Generates animated subtitles
184
+ - Times them to voice-over
 
185
 
186
+ 4. **Cloud Upload** (5-15s)
187
+ - Uploads to Google Cloud Storage
188
+ - Returns public URL
189
 
190
+ ### Output Specifications
191
+ - **Format**: MP4, H.264
192
+ - **Aspect Ratio**: 9:16 (vertical)
193
+ - **Duration**: 15 seconds max
194
+ - **Resolution**: 1080x1920
195
+ - **Audio**: 44.1kHz, stereo
196
 
197
+ ---
198
 
199
+ ## 🔧 Technical Details
200
 
201
+ ### Project Structure
202
+ ```
203
+ somira-automation/
204
+ ├── main.py # CLI entry point
205
+ ├── automation.py # Pipeline orchestrator
206
+ ├── api_clients.py # Gemini, RunwayML, TTS, GCS
207
+ ├── video_renderer.py # Video processing engine
208
+ ├── asset_selector.py # AI video selection
209
+ ├── utils.py # Logging & utilities
210
+ ├── requirements.txt # Python dependencies
211
+ └── config/
212
+ ├── api_keys.yaml # API configurations
213
+ └── content_strategies.yaml
214
+ ```
215
 
216
+ ### Key Dependencies
217
+ - `moviepy` - Video editing and composition
218
+ - `google-generativeai` - Gemini API client
219
+ - `google-cloud-texttospeech` - TTS service
220
+ - `google-cloud-storage` - Cloud storage
221
+ - `aiohttp` - Async HTTP requests
222
+ - `pandas` - Data processing
223
 
224
+ ### API Requirements
225
+ - **Gemini**: Free tier available
226
+ - **RunwayML**: $10 minimum deposit
227
+ - **Google Cloud**: $300 free credits for new accounts
228
+ - **Storage**: 5GB free tier
229
 
230
  ---
231
 
232
+ ## 🐛 Troubleshooting
233
 
234
+ ### Common Issues
235
 
236
+ **"API key not found"**
237
+ - Check `.env` file exists and has correct keys
238
+ - Restart terminal after adding keys to `.env`
 
 
239
 
240
+ **"Insufficient RunwayML credits"**
241
+ - Add credits at https://dev.runwayml.com/
242
+ - Minimum $10 required
243
 
244
+ **"Google Cloud permission denied"**
245
+ - Verify service account JSON path in `.env`
246
+ - Check service account has "Storage Admin" role
247
 
248
+ **"Module not found"**
249
+ ```bash
250
+ pip install -r requirements.txt
251
  ```
252
+
253
+ **Videos taking too long**
254
+ - RunwayML generation takes 30-60 seconds
255
+ - Use `--test` for quick verification
256
+
257
+ ### Performance Tips
258
+ - Keep scripts under 200 characters for optimal TTS
259
+ - Use specific, visual prompts for better AI videos
260
+ - Test with `--test` flag before full runs
261
+ - Monitor API usage in respective dashboards
 
 
 
 
262
 
263
  ---
264
 
265
+ ## 📞 Support
266
 
267
+ ### Debugging
268
+ - Run with `--verbose` for detailed logs
269
+ - Check console output for specific error messages
270
+ - Verify all APIs are enabled in their consoles
 
 
271
 
272
+ ### Cost Control
273
+ - Use `--test` frequently during development
274
+ - Set billing alerts in Google Cloud & RunwayML
275
+ - Monitor usage in API dashboards
276
 
277
+ ### Security
278
+ - ✅ Never commit `.env` file (included in `.gitignore`)
279
+ - ✅ Use environment variables for all keys
280
+ - ✅ Rotate API keys every 90 days
281
+ - ❌ Never hardcode keys in source files
282
 
283
+ ---
284
 
285
+ ## 🎉 Next Steps
 
 
 
286
 
287
+ 1. Complete API setup
288
+ 2. Run `python main.py --health-check`
289
+ 3. Test with `python main.py --test`
290
+ 4. Generate first video with `python main.py`
291
+ 5. 🚀 Customize scripts and strategies for your products
292
+ 6. 📈 Scale with batch processing for multiple videos
293
 
294
+ **Need help?** Check the error messages in console - they're designed to be helpful and specific about what went wrong.
 
 
295
 
296
+ ---
 
 
297
 
298
+ *Happy video generating! 🎬*
config/api_keys.yaml CHANGED
@@ -1,17 +1,24 @@
1
- # API Configuration
2
  gemini:
3
  base_url: "https://generativelanguage.googleapis.com/v1beta"
4
- model: "gemini-pro"
5
 
6
  runwayml:
7
  base_url: "https://api.runwayml.com/v1"
8
  timeout: 300
9
 
 
 
 
 
10
  tts:
11
- provider: "azure" # or "google", "amazon"
12
- voice: "en-US-AriaNeural"
13
- rate: "medium"
14
 
15
  gcs:
16
  bucket: "somira-videos"
17
  video_prefix: "automated-content/"
 
 
 
 
 
 
 
1
  gemini:
2
  base_url: "https://generativelanguage.googleapis.com/v1beta"
3
+ model: "gemini-2.0-flash-exp"
4
 
5
  runwayml:
6
  base_url: "https://api.runwayml.com/v1"
7
  timeout: 300
8
 
9
+ deepseek:
10
+ base_url: "https://api.deepseek.com/v1"
11
+ model: "deepseek-chat"
12
+
13
  tts:
14
+ provider: "google"
15
+ voice: "en-US-Neural2-F"
 
16
 
17
  gcs:
18
  bucket: "somira-videos"
19
  video_prefix: "automated-content/"
20
+
21
+ video:
22
+ max_duration: 15
23
+ aspect_ratio: "9:16"
24
+ target_resolution: "1080x1920"
requirements.txt CHANGED
@@ -1,17 +1,56 @@
1
- # Core async HTTP
2
- aiohttp==3.9.5
3
  aiofiles==23.2.1
4
-
5
- # Google AI (Gemini)
6
- google-generativeai==0.8.3
7
-
8
- # Google Cloud Services
 
 
 
 
 
 
 
 
 
 
9
  google-cloud-storage==2.18.2
10
  google-cloud-texttospeech==2.17.2
11
-
12
- # Environment variables
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  python-dotenv==1.0.1
14
-
15
- # Utilities
16
- asyncio==3.4.3
17
- typing-extensions==4.12.2
 
 
 
 
 
 
 
 
 
 
 
1
  aiofiles==23.2.1
2
+ aiohttp==3.9.5
3
+ aiosignal==1.4.0
4
+ annotated-types==0.7.0
5
+ attrs==25.3.0
6
+ cachetools==5.5.2
7
+ certifi==2025.8.3
8
+ charset-normalizer==3.4.3
9
+ decorator==4.4.2
10
+ frozenlist==1.7.0
11
+ google-ai-generativelanguage==0.6.10
12
+ google-api-core==2.25.1
13
+ google-api-python-client==2.184.0
14
+ google-auth==2.40.3
15
+ google-auth-httplib2==0.2.0
16
+ google-cloud-core==2.4.3
17
  google-cloud-storage==2.18.2
18
  google-cloud-texttospeech==2.17.2
19
+ google-crc32c==1.7.1
20
+ google-generativeai==0.8.3
21
+ google-resumable-media==2.7.2
22
+ googleapis-common-protos==1.70.0
23
+ grpcio==1.75.1
24
+ grpcio-status==1.71.2
25
+ httplib2==0.31.0
26
+ idna==3.10
27
+ imageio==2.37.0
28
+ imageio-ffmpeg==0.6.0
29
+ moviepy==1.0.3
30
+ multidict==6.6.4
31
+ numpy==1.26.4
32
+ pandas==2.3.3
33
+ pillow==11.3.0
34
+ proglog==0.1.12
35
+ propcache==0.4.0
36
+ proto-plus==1.26.1
37
+ protobuf==5.29.5
38
+ pyasn1==0.6.1
39
+ pyasn1_modules==0.4.2
40
+ pydantic==2.11.10
41
+ pydantic_core==2.33.2
42
+ pyparsing==3.2.5
43
+ python-dateutil==2.9.0.post0
44
  python-dotenv==1.0.1
45
+ pytz==2025.2
46
+ PyYAML==6.0.3
47
+ requests==2.32.5
48
+ rsa==4.9.1
49
+ six==1.17.0
50
+ tqdm==4.67.1
51
+ typing-inspection==0.4.2
52
+ typing_extensions==4.15.0
53
+ tzdata==2025.2
54
+ uritemplate==4.2.0
55
+ urllib3==2.5.0
56
+ yarl==1.21.0
src/api_clients.py CHANGED
@@ -5,7 +5,7 @@ import aiohttp
5
  import json
6
  import os
7
  from typing import Dict, List, Optional
8
- from google import genai
9
  from google.cloud import storage, texttospeech
10
  import asyncio
11
  from utils import logger
@@ -16,9 +16,8 @@ class APIClients:
16
  self.config = config
17
 
18
  # Initialize Gemini client
19
- self.gemini_client = genai.Client(
20
- api_key=config.get('gemini_api_key') or os.getenv('GEMINI_API_KEY')
21
- )
22
 
23
  # Initialize GCS client
24
  self.gcs_client = storage.Client()
@@ -57,11 +56,9 @@ class APIClients:
57
 
58
  Return only the enhanced prompt, nothing else.
59
  """
60
-
61
- response = self.gemini_client.models.generate_content(
62
- model="gemini-2.0-flash-exp",
63
- contents=enhancement_instruction
64
- )
65
 
66
  enhanced_prompt = response.text.strip()
67
  logger.info(f"Enhanced prompt: {enhanced_prompt[:100]}...")
@@ -75,20 +72,14 @@ class APIClients:
75
  async def generate_video(self, prompt: str, duration: int = 10) -> Dict:
76
  """
77
  Generate video using RunwayML Gen-4 API
78
-
79
- Args:
80
- prompt: Text prompt for video generation
81
- duration: Video duration in seconds (5 or 10)
82
-
83
- Returns:
84
- Dict with video URL and metadata
85
  """
86
  try:
87
  logger.info(f"Generating video with RunwayML: {prompt[:100]}...")
88
 
89
  headers = {
90
  "Authorization": f"Bearer {self.runway_api_key}",
91
- "Content-Type": "application/json"
 
92
  }
93
 
94
  payload = {
@@ -151,20 +142,13 @@ class APIClients:
151
 
152
  async def generate_tts(self, text: str, voice_name: Optional[str] = None) -> Dict:
153
  """
154
- Generate TTS audio using Azure Cognitive Services
155
-
156
- Args:
157
- text: Text to convert to speech
158
- voice_name: Azure voice name (default from config)
159
-
160
- Returns:
161
- Dict with audio URL, duration, and lip sync data
162
  """
163
  try:
164
  logger.info(f"Generating TTS for text: {text[:100]}...")
165
 
166
  if not voice_name:
167
- voice_name = self.config.get('default_voice', 'en-US-AriaNeural')
168
 
169
  # Configure the speech synthesis request
170
  synthesis_input = texttospeech.SynthesisInput(text=text)
@@ -184,15 +168,16 @@ class APIClients:
184
  pitch=0.0
185
  )
186
 
187
- # Perform the text-to-speech request
188
  response = self.tts_client.synthesize_speech(
189
  input=synthesis_input,
190
  voice=voice,
191
- audio_config=audio_config,
192
- enable_time_pointing=[texttospeech.TimePointingType.SSML_MARK]
193
  )
194
 
195
  # Save audio to temporary file
 
196
  audio_filename = f"tts_{hash(text)}.mp3"
197
  audio_path = f"/tmp/{audio_filename}"
198
 
@@ -202,23 +187,111 @@ class APIClients:
202
  # Upload to GCS
203
  audio_url = await self.store_in_gcs(audio_path, 'audio')
204
 
205
- # Extract timing information for lip sync
206
- lip_sync_data = self._extract_timing_data(response)
207
-
208
  logger.info(f"TTS generated successfully: {audio_url}")
209
 
210
  return {
211
  'audio_url': audio_url,
212
  'duration': len(response.audio_content) / 32000, # Approximate
213
- 'lip_sync_data': lip_sync_data,
214
  'voice': voice_name,
215
- 'text': text
 
216
  }
217
 
218
  except Exception as e:
219
  logger.error(f"Error generating TTS: {e}")
220
  raise
221
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  async def select_videos(self, tts_script: str, count: int = 3) -> List[Dict]:
223
  """
224
  AI agent selects videos based on script using Gemini
@@ -246,11 +319,8 @@ class APIClients:
246
  Return as JSON array with format:
247
  [{{"keyword": "...", "timing": "0-5", "style": "..."}}, ...]
248
  """
249
-
250
- response = self.gemini_client.models.generate_content(
251
- model="gemini-2.0-flash-exp",
252
- contents=analysis_prompt
253
- )
254
 
255
  # Parse Gemini response
256
  try:
 
5
  import json
6
  import os
7
  from typing import Dict, List, Optional
8
+ import google.generativeai as genai
9
  from google.cloud import storage, texttospeech
10
  import asyncio
11
  from utils import logger
 
16
  self.config = config
17
 
18
  # Initialize Gemini client
19
+ self.gemini_client = genai
20
+ genai.configure(api_key=config.get('gemini_api_key') or os.getenv('GEMINI_API_KEY'))
 
21
 
22
  # Initialize GCS client
23
  self.gcs_client = storage.Client()
 
56
 
57
  Return only the enhanced prompt, nothing else.
58
  """
59
+
60
+ model = genai.GenerativeModel('gemini-2.0-flash-exp')
61
+ response = model.generate_content(enhancement_instruction)
 
 
62
 
63
  enhanced_prompt = response.text.strip()
64
  logger.info(f"Enhanced prompt: {enhanced_prompt[:100]}...")
 
72
  async def generate_video(self, prompt: str, duration: int = 10) -> Dict:
73
  """
74
  Generate video using RunwayML Gen-4 API
 
 
 
 
 
 
 
75
  """
76
  try:
77
  logger.info(f"Generating video with RunwayML: {prompt[:100]}...")
78
 
79
  headers = {
80
  "Authorization": f"Bearer {self.runway_api_key}",
81
+ "Content-Type": "application/json",
82
+ "X-Runway-Version": "1.0.0" # Add this required header
83
  }
84
 
85
  payload = {
 
142
 
143
  async def generate_tts(self, text: str, voice_name: Optional[str] = None) -> Dict:
144
  """
145
+ Generate TTS audio using Google Cloud TTS
 
 
 
 
 
 
 
146
  """
147
  try:
148
  logger.info(f"Generating TTS for text: {text[:100]}...")
149
 
150
  if not voice_name:
151
+ voice_name = self.config.get('default_voice', 'en-US-Neural2-F')
152
 
153
  # Configure the speech synthesis request
154
  synthesis_input = texttospeech.SynthesisInput(text=text)
 
168
  pitch=0.0
169
  )
170
 
171
+ # Remove TimePointingType as it's not available in this version
172
  response = self.tts_client.synthesize_speech(
173
  input=synthesis_input,
174
  voice=voice,
175
+ audio_config=audio_config
176
+ # Remove: enable_time_pointing=[texttospeech.TimePointingType.SSML_MARK]
177
  )
178
 
179
  # Save audio to temporary file
180
+ import tempfile
181
  audio_filename = f"tts_{hash(text)}.mp3"
182
  audio_path = f"/tmp/{audio_filename}"
183
 
 
187
  # Upload to GCS
188
  audio_url = await self.store_in_gcs(audio_path, 'audio')
189
 
190
+ # Remove lip sync data extraction
 
 
191
  logger.info(f"TTS generated successfully: {audio_url}")
192
 
193
  return {
194
  'audio_url': audio_url,
195
  'duration': len(response.audio_content) / 32000, # Approximate
 
196
  'voice': voice_name,
197
+ 'text': text,
198
+ 'local_path': audio_path # Add local path directly
199
  }
200
 
201
  except Exception as e:
202
  logger.error(f"Error generating TTS: {e}")
203
  raise
204
+
205
+ async def download_file(self, url: str, filename: str) -> str:
206
+ """Download file from URL to local temporary file"""
207
+ import aiohttp
208
+ import tempfile
209
+ from pathlib import Path
210
+
211
+ local_path = Path(tempfile.gettempdir()) / filename
212
+
213
+ try:
214
+ async with aiohttp.ClientSession() as session:
215
+ async with session.get(url) as response:
216
+ if response.status == 200:
217
+ with open(local_path, 'wb') as f:
218
+ f.write(await response.read())
219
+ logger.info(f"✓ Downloaded {filename} from {url}")
220
+ return str(local_path)
221
+ else:
222
+ raise Exception(f"Download failed: {response.status}")
223
+ except Exception as e:
224
+ logger.error(f"Failed to download {url}: {e}")
225
+ raise
226
+
227
+ async def health_check(self) -> Dict[str, bool]:
228
+ """
229
+ Check health of all API connections
230
+
231
+ Returns:
232
+ Dict with service health status
233
+ """
234
+ logger.info("🏥 Running health check...")
235
+
236
+ health = {
237
+ 'gemini': False,
238
+ 'runwayml': False,
239
+ 'tts': False,
240
+ 'gcs': False
241
+ }
242
+
243
+ try:
244
+ # Test Gemini with a simple prompt
245
+ test_prompt = "Hello"
246
+ enhanced = await self.enhance_prompt(test_prompt)
247
+ if enhanced and len(enhanced) > 0:
248
+ health['gemini'] = True
249
+ logger.info(" ✅ Gemini API: Connected")
250
+ else:
251
+ logger.error(" ❌ Gemini API: No response")
252
+ except Exception as e:
253
+ logger.error(f" ❌ Gemini API: {e}")
254
+
255
+ try:
256
+ # Test GCS - check if bucket exists and is accessible
257
+ from google.cloud.exceptions import NotFound
258
+ try:
259
+ self.gcs_bucket.exists()
260
+ health['gcs'] = True
261
+ logger.info(" ✅ Google Cloud Storage: Connected")
262
+ except NotFound:
263
+ logger.error(" ❌ Google Cloud Storage: Bucket not found")
264
+ except Exception as e:
265
+ logger.error(f" ❌ Google Cloud Storage: {e}")
266
+ except Exception as e:
267
+ logger.error(f" ❌ Google Cloud Storage check failed: {e}")
268
+
269
+ # Check if API keys are configured (without making actual API calls)
270
+ if self.runway_api_key and len(self.runway_api_key) > 10:
271
+ health['runwayml'] = True
272
+ logger.info(" ✅ RunwayML API: Configured")
273
+ else:
274
+ logger.error(" ❌ RunwayML API: Not configured or invalid key")
275
+
276
+ if self.tts_client:
277
+ health['tts'] = True
278
+ logger.info(" ✅ TTS API: Configured")
279
+ else:
280
+ logger.error(" ❌ TTS API: Not configured")
281
+
282
+ # Check DeepSeek configuration
283
+ deepseek_key = self.config.get('deepseek_api_key')
284
+ if deepseek_key and len(deepseek_key) > 10:
285
+ logger.info(" ✅ DeepSeek API: Configured")
286
+ else:
287
+ logger.warning(" ⚠️ DeepSeek API: Not configured")
288
+
289
+ all_healthy = all(health.values())
290
+ status = "✅ All systems operational!" if all_healthy else "⚠️ Some services have issues"
291
+ logger.info(f"\n{status}")
292
+
293
+ return health
294
+
295
  async def select_videos(self, tts_script: str, count: int = 3) -> List[Dict]:
296
  """
297
  AI agent selects videos based on script using Gemini
 
319
  Return as JSON array with format:
320
  [{{"keyword": "...", "timing": "0-5", "style": "..."}}, ...]
321
  """
322
+ model = genai.GenerativeModel('gemini-2.0-flash-exp')
323
+ response = model.generate_content(analysis_prompt)
 
 
 
324
 
325
  # Parse Gemini response
326
  try:
src/asset_selector.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AI-powered asset selection using DeepSeek for contextual video matching
3
+ """
4
+ import pandas as pd
5
+ import aiohttp
6
+ import json
7
+ from typing import List, Dict, Optional
8
+ from utils import logger
9
+
10
+
11
+ class AssetSelector:
12
+ def __init__(self, config: Dict):
13
+ self.config = config
14
+ self.video_library = self._load_video_library()
15
+ self.audio_library = self._load_audio_library()
16
+
17
+ def _load_video_library(self) -> pd.DataFrame:
18
+ """Load video library from CSV data"""
19
+ try:
20
+ # Create a simple video library from your provided data
21
+ video_data = [
22
+ {
23
+ 'url': 'https://storage.googleapis.com/somira/Somira%20Massager.mp4',
24
+ 'duration': 2,
25
+ 'alignment': 'product mention, solution, features',
26
+ 'energy': 5,
27
+ 'description': 'Product showcase'
28
+ },
29
+ {
30
+ 'url': 'https://storage.googleapis.com/somira/FemaleWomenPuttingOnNeckMassagerr.mp4',
31
+ 'duration': 2,
32
+ 'alignment': 'using the product, turning on, operation',
33
+ 'energy': 35,
34
+ 'description': 'Product usage demonstration'
35
+ },
36
+ {
37
+ 'url': 'https://storage.googleapis.com/somira/PersonEnjoyingTheNeckMassager.mp4',
38
+ 'duration': 1.5,
39
+ 'alignment': 'comfort, relaxation, satisfaction',
40
+ 'energy': 40,
41
+ 'description': 'User satisfaction'
42
+ },
43
+ # Add more videos as needed for testing
44
+ ]
45
+
46
+ return pd.DataFrame(video_data)
47
+
48
+ except Exception as e:
49
+ logger.error(f"Failed to load video library: {e}")
50
+ return pd.DataFrame()
51
+
52
+ def _load_audio_library(self) -> List[str]:
53
+ """Load audio library URLs"""
54
+ return [f"https://storage.googleapis.com/somira/{i}.mp3" for i in range(1, 27)]
55
+
56
+ async def select_videos(self, tts_script: str, max_duration: int = 10) -> List[Dict]:
57
+ """
58
+ Select videos using AI analysis of TTS script
59
+
60
+ Args:
61
+ tts_script: The script to analyze
62
+ max_duration: Maximum total duration for selected videos
63
+
64
+ Returns:
65
+ List of selected video metadata
66
+ """
67
+ try:
68
+ logger.info(f"🤖 AI video selection for script: {tts_script[:100]}...")
69
+
70
+ # Use DeepSeek for intelligent selection
71
+ selected_videos = await self._analyze_with_deepseek(tts_script, max_duration)
72
+
73
+ if not selected_videos:
74
+ logger.warning("⚠️ AI selection failed, using fallback")
75
+ selected_videos = self._fallback_selection(tts_script, max_duration)
76
+
77
+ total_duration = sum(v['duration'] for v in selected_videos)
78
+ logger.info(f"✓ Selected {len(selected_videos)} videos, total: {total_duration}s")
79
+
80
+ return selected_videos
81
+
82
+ except Exception as e:
83
+ logger.error(f"❌ Video selection failed: {e}")
84
+ return self._fallback_selection(tts_script, max_duration)
85
+
86
+ async def _analyze_with_deepseek(self, tts_script: str, max_duration: int) -> List[Dict]:
87
+ """Use DeepSeek API for contextual video selection"""
88
+ try:
89
+ # Prepare video library context
90
+ video_context = "\n".join([
91
+ f"{i}. {row['description']} - {row['duration']}s - Alignment: {row['alignment']}"
92
+ for i, row in self.video_library.iterrows()
93
+ ])
94
+
95
+ prompt = f"""
96
+ TTS Script: "{tts_script}"
97
+
98
+ Available Videos:
99
+ {video_context}
100
+
101
+ Select 3-4 videos that best match the script content. Consider:
102
+ - Video alignment descriptions
103
+ - Logical flow (problem -> solution -> result)
104
+ - Total duration under {max_duration} seconds
105
+ - Energy level appropriateness
106
+
107
+ Return JSON format:
108
+ {{
109
+ "selected_videos": [
110
+ {{
111
+ "index": 0,
112
+ "reason": "Matches product mention in script",
113
+ "start_time": 0
114
+ }}
115
+ ],
116
+ "total_duration": 8,
117
+ "rationale": "Overall selection strategy"
118
+ }}
119
+ """
120
+
121
+ # DeepSeek API call
122
+ headers = {
123
+ "Authorization": f"Bearer {self.config.get('deepseek_api_key')}",
124
+ "Content-Type": "application/json"
125
+ }
126
+
127
+ payload = {
128
+ "model": "deepseek-chat",
129
+ "messages": [
130
+ {"role": "system", "content": "You are a video editor AI that selects the most relevant videos for advertising content."},
131
+ {"role": "user", "content": prompt}
132
+ ],
133
+ "temperature": 0.3,
134
+ "max_tokens": 2000
135
+ }
136
+
137
+ async with aiohttp.ClientSession() as session:
138
+ async with session.post(
139
+ "https://api.deepseek.com/v1/chat/completions",
140
+ headers=headers,
141
+ json=payload
142
+ ) as response:
143
+ if response.status == 200:
144
+ result = await response.json()
145
+ selection = json.loads(result['choices'][0]['message']['content'])
146
+
147
+ # Map to actual video data
148
+ selected = []
149
+ for item in selection['selected_videos']:
150
+ if item['index'] < len(self.video_library):
151
+ video = self.video_library.iloc[item['index']]
152
+ selected.append({
153
+ 'url': video['url'],
154
+ 'duration': video['duration'],
155
+ 'reason': item['reason'],
156
+ 'alignment': video['alignment'],
157
+ 'energy': video['energy']
158
+ })
159
+
160
+ return selected
161
+ else:
162
+ logger.error(f"DeepSeek API error: {response.status}")
163
+ return []
164
+
165
+ except Exception as e:
166
+ logger.error(f"DeepSeek analysis failed: {e}")
167
+ return []
168
+
169
+ def _fallback_selection(self, tts_script: str, max_duration: int) -> List[Dict]:
170
+ """Fallback selection based on keyword matching"""
171
+ script_lower = tts_script.lower()
172
+ selected = []
173
+ total_duration = 0
174
+
175
+ # Define keyword mappings for fallback
176
+ keyword_mappings = {
177
+ 'pain': ['pop', 'stuck', 'neck', 'pain'],
178
+ 'solution': ['somira', 'massager', 'solution', 'relief'],
179
+ 'satisfaction': ['gone', 'comfort', 'satisfaction']
180
+ }
181
+
182
+ # Simple fallback videos
183
+ fallback_videos = [
184
+ {
185
+ 'url': 'https://storage.googleapis.com/somira/Somira%20Massager.mp4',
186
+ 'duration': 2,
187
+ 'reason': 'Product showcase',
188
+ 'alignment': 'product',
189
+ 'energy': 5
190
+ },
191
+ {
192
+ 'url': 'https://storage.googleapis.com/somira/FemaleWomenPuttingOnNeckMassagerr.mp4',
193
+ 'duration': 2,
194
+ 'reason': 'Usage demonstration',
195
+ 'alignment': 'usage',
196
+ 'energy': 35
197
+ },
198
+ {
199
+ 'url': 'https://storage.googleapis.com/somira/PersonEnjoyingTheNeckMassager.mp4',
200
+ 'duration': 1.5,
201
+ 'reason': 'User satisfaction',
202
+ 'alignment': 'satisfaction',
203
+ 'energy': 40
204
+ }
205
+ ]
206
+
207
+ # Select based on keywords in script
208
+ for video in fallback_videos:
209
+ if total_duration + video['duration'] <= max_duration:
210
+ selected.append(video)
211
+ total_duration += video['duration']
212
+
213
+ return selected[:3] # Max 3 videos
214
+
215
+ def _find_video_for_category(self, category: str) -> Optional[Dict]:
216
+ """Find best video for a category"""
217
+ for _, row in self.video_library.iterrows():
218
+ if category in str(row['alignment']).lower():
219
+ return {
220
+ 'url': row['url'],
221
+ 'duration': row['duration'],
222
+ 'reason': f"Matches {category} category",
223
+ 'alignment': row['alignment'],
224
+ 'energy': row['energy']
225
+ }
226
+ return None
227
+
228
+ def select_background_music(self) -> str:
229
+ """Select background music using round-robin"""
230
+ import random
231
+ selected = random.choice(self.audio_library)
232
+ logger.info(f"🎵 Selected background music: {selected}")
233
+ return selected
src/automation.py CHANGED
@@ -1,12 +1,15 @@
1
  """
2
- Main automation orchestrator with full implementation
3
  """
4
  import asyncio
5
  import os
6
  import time
7
  from typing import Dict, List, Optional, Any
 
 
8
  from api_clients import APIClients
9
  from video_renderer import VideoRenderer
 
10
  from utils import logger
11
 
12
 
@@ -15,393 +18,391 @@ class ContentAutomation:
15
  self.config = config
16
  self.api_clients = APIClients(config)
17
  self.video_renderer = VideoRenderer(config)
18
- self.current_audio_index = 0
19
  self.pipeline_start_time = None
 
 
 
 
20
 
21
- async def execute_pipeline(
22
- self,
23
- content_strategy: Dict[str, str],
24
- tts_script: str,
25
- video_config: Optional[Dict] = None
26
- ) -> Dict[str, Any]:
27
- """
28
- Execute the complete automation pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- Args:
31
- content_strategy: Dict with prompts and style preferences
32
- tts_script: Text script for voice-over
33
- video_config: Optional video rendering configuration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- Returns:
36
- Dict with final video URL and metadata
 
 
 
 
 
 
 
 
 
 
 
37
  """
38
  self.pipeline_start_time = time.time()
39
- logger.info("=" * 60)
40
- logger.info("🚀 Starting Content Automation Pipeline")
41
- logger.info("=" * 60)
42
 
43
  try:
44
- # Step 1: Generate all assets simultaneously
45
- logger.info("\n📦 STEP 1: Generating Assets (Parallel Execution)")
46
- assets = await self.execute_step_1(content_strategy, tts_script)
47
- self._log_step_completion(1, assets)
48
-
49
- # Validate critical assets
50
- if not self._validate_assets(assets):
51
- raise Exception("Critical assets failed to generate")
52
-
53
- # Step 2: Merge videos and audio
54
- logger.info("\n🎬 STEP 2: Rendering Video")
55
- rendered_video = await self.video_renderer.render_video(
56
- assets,
57
- video_config or {}
58
- )
59
- self._log_step_completion(2, {'rendered_video': rendered_video})
60
-
61
- # Step 3: Add subtitles
62
- logger.info("\n📝 STEP 3: Adding Subtitles")
63
- subtitled_video = await self.video_renderer.add_subtitles(
64
- rendered_video,
65
- tts_script,
66
- assets.get('tts_audio', {})
67
- )
68
- self._log_step_completion(3, {'subtitled_video': subtitled_video})
69
 
70
- # Step 4: Store final video in GCS
71
- logger.info("\n☁️ STEP 4: Uploading to Cloud Storage")
72
- final_url = await self.api_clients.store_in_gcs(
73
- subtitled_video,
74
- content_type='video'
75
- )
76
- self._log_step_completion(4, {'final_url': final_url})
 
 
 
 
77
 
78
- # Pipeline completion summary
 
 
 
 
79
  elapsed_time = time.time() - self.pipeline_start_time
80
- logger.info("\n" + "=" * 60)
81
- logger.info(f"✅ Pipeline Completed Successfully in {elapsed_time:.2f}s")
82
- logger.info(f"📹 Final Video: {final_url}")
83
- logger.info("=" * 60)
84
 
85
  return {
86
  'success': True,
87
  'final_url': final_url,
88
- 'local_path': subtitled_video,
89
- 'assets': assets,
90
  'duration': elapsed_time,
91
- 'metadata': {
92
- 'content_strategy': content_strategy,
93
- 'tts_script': tts_script,
94
- 'timestamp': time.time()
95
  }
96
  }
97
 
98
  except Exception as e:
99
  elapsed_time = time.time() - self.pipeline_start_time if self.pipeline_start_time else 0
100
- logger.error(f"\n❌ Pipeline Failed after {elapsed_time:.2f}s: {e}")
101
 
102
  return {
103
  'success': False,
104
  'error': str(e),
105
- 'duration': elapsed_time,
106
- 'partial_assets': locals().get('assets', {})
107
  }
108
-
109
- async def execute_step_1(
110
- self,
111
- content_strategy: Dict[str, str],
112
- tts_script: str
113
- ) -> Dict[str, Any]:
114
- """
115
- Execute all step 1 processes simultaneously for maximum efficiency
116
-
117
- Args:
118
- content_strategy: Content generation strategy
119
- tts_script: Text for TTS generation
120
-
121
- Returns:
122
- Dict containing all generated assets
123
- """
124
- logger.info("⚡ Launching parallel tasks...")
125
-
126
- # Create all tasks
127
  tasks = {
128
- 'hook_video': self.generate_hook_video(content_strategy),
129
- 'background_music': self.select_background_music(),
130
- 'selected_videos': self.select_videos_from_library(tts_script),
131
- 'tts_audio': self.generate_tts_audio(tts_script)
132
  }
133
 
134
- # Execute all tasks concurrently
135
- start_time = time.time()
136
- results = await asyncio.gather(
137
- *tasks.values(),
138
- return_exceptions=True
139
- )
140
- execution_time = time.time() - start_time
141
-
142
- # Map results back to task names
143
- assets = {}
144
- for (task_name, _), result in zip(tasks.items(), results):
145
- if isinstance(result, Exception):
146
- logger.error(f"❌ {task_name} failed: {result}")
147
- assets[task_name] = None
148
- else:
149
  logger.info(f"✓ {task_name} completed")
150
- assets[task_name] = result
 
 
151
 
152
- logger.info(f"\n⚡ Parallel execution completed in {execution_time:.2f}s")
153
- return assets
154
-
155
- async def generate_hook_video(self, strategy: Dict[str, str]) -> Optional[Dict]:
156
- """
157
- Generate hook video using AI APIs with prompt enhancement
158
 
159
- Args:
160
- strategy: Content strategy with prompts
161
-
162
- Returns:
163
- Dict with video URL and metadata, or None if failed
164
- """
165
  try:
166
- logger.info("🎥 Generating hook video...")
 
 
 
167
 
168
- # Choose the right prompt
169
- base_prompt = strategy.get('runway_prompt') or strategy.get('gemini_prompt')
170
- if not base_prompt:
171
- raise ValueError("No prompt found in strategy")
172
 
173
- # Enhance prompt with Gemini for better video quality
174
- logger.info(" → Enhancing prompt with Gemini AI...")
175
- enhanced_prompt = await self.api_clients.enhance_prompt(base_prompt)
176
-
177
- # Generate video with RunwayML
178
- logger.info(" → Generating video with RunwayML Gen-4...")
179
  video_data = await self.api_clients.generate_video(
180
  enhanced_prompt,
181
- duration=strategy.get('duration', 5) # Default 5s for hook
182
  )
183
 
184
- logger.info(f" ✓ Hook video generated: {video_data.get('task_id', 'N/A')}")
185
  return video_data
186
 
187
  except Exception as e:
188
- logger.error(f"Hook video generation failed: {e}")
189
  return None
190
-
191
- async def select_background_music(self) -> str:
192
- """
193
- Select background music from library using linear rotation
194
-
195
- Returns:
196
- URL to background music file
197
- """
198
- try:
199
- logger.info("🎵 Selecting background music...")
200
-
201
- # Linear selection with rotation
202
- audio_index = self.current_audio_index
203
- self.current_audio_index = (self.current_audio_index + 1) % self.config['audio_library_size']
204
-
205
- # Construct GCS URL
206
- bucket_name = self.config.get('gcs_bucket_name', 'somira-videos')
207
- audio_url = f"gs://{bucket_name}/audio-library/audio{audio_index + 1}.mp3"
208
-
209
- logger.info(f" ✓ Selected audio #{audio_index + 1}: {audio_url}")
210
- return audio_url
211
-
212
- except Exception as e:
213
- logger.error(f" ✗ Music selection failed: {e}")
214
- # Return default/fallback audio
215
- return f"gs://{self.config.get('gcs_bucket_name')}/audio-library/default.mp3"
216
-
217
- async def select_videos_from_library(self, tts_script: str) -> List[Dict]:
218
- """
219
- AI agent selects 3 videos based on TTS script content
220
-
221
- Args:
222
- tts_script: The voice-over script to analyze
223
-
224
- Returns:
225
- List of selected video metadata dicts
226
- """
227
- try:
228
- logger.info("🎬 Selecting videos from library...")
229
- logger.info(f" → Analyzing script: {tts_script[:80]}...")
230
-
231
- # Use AI to select contextually relevant videos
232
- selected_videos = await self.api_clients.select_videos(tts_script, count=3)
233
-
234
- if not selected_videos:
235
- logger.warning(" ⚠ No videos selected, using fallback")
236
- return self._get_fallback_videos()
237
-
238
- logger.info(f" ✓ Selected {len(selected_videos)} videos:")
239
- for i, video in enumerate(selected_videos, 1):
240
- logger.info(f" {i}. {video.get('keyword', 'N/A')} - {video.get('reason', 'N/A')}")
241
-
242
- return selected_videos
243
-
244
- except Exception as e:
245
- logger.error(f" ✗ Video selection failed: {e}")
246
- return self._get_fallback_videos()
247
-
248
- async def generate_tts_audio(self, tts_script: str) -> Optional[Dict]:
249
- """
250
- Generate TTS audio with timing data for lip-sync and subtitles
251
 
252
- Args:
253
- tts_script: Text to convert to speech
254
-
255
- Returns:
256
- Dict with audio URL, duration, and timing data
257
- """
258
- try:
259
- logger.info("🎙️ Generating TTS audio...")
260
- logger.info(f" → Script length: {len(tts_script)} characters")
261
-
262
- # Get voice from config
263
- voice_name = self.config.get('default_voice', 'en-US-AriaNeural')
264
-
265
- # Generate TTS with timing data
266
- tts_result = await self.api_clients.generate_tts(
267
- tts_script,
268
- voice_name=voice_name
269
  )
270
-
271
- if tts_result:
272
- duration = tts_result.get('duration', 0)
273
- logger.info(f" ✓ TTS generated: {duration:.2f}s duration")
274
- logger.info(f" ✓ Audio URL: {tts_result.get('audio_url', 'N/A')}")
275
-
276
- return tts_result
277
-
278
- except Exception as e:
279
- logger.error(f" ✗ TTS generation failed: {e}")
280
- return None
281
-
282
- def _validate_assets(self, assets: Dict[str, Any]) -> bool:
283
- """
284
- Validate that critical assets were generated successfully
285
 
286
- Args:
287
- assets: Dict of generated assets
288
-
289
- Returns:
290
- True if valid, False otherwise
291
- """
292
- critical_assets = ['tts_audio', 'selected_videos']
293
- optional_assets = ['hook_video', 'background_music']
294
-
295
- # Check critical assets
296
- for asset_name in critical_assets:
297
- if not assets.get(asset_name):
298
- logger.error(f"❌ Critical asset missing: {asset_name}")
299
- return False
300
 
301
- # Warn about optional assets
302
- for asset_name in optional_assets:
303
- if not assets.get(asset_name):
304
- logger.warning(f"⚠️ Optional asset missing: {asset_name}")
305
-
306
- logger.info("✓ Asset validation passed")
307
- return True
308
-
309
- def _get_fallback_videos(self) -> List[Dict]:
310
- """
311
- Get fallback videos if AI selection fails
312
 
313
- Returns:
314
- List of default video selections
315
- """
316
- bucket_name = self.config.get('gcs_bucket_name', 'somira-videos')
317
- return [
318
- {
319
- 'id': 1,
320
- 'url': f"gs://{bucket_name}/library/video1.mp4",
321
- 'keyword': 'product',
322
- 'timing': '0-5',
323
- 'style': 'general',
324
- 'reason': 'Fallback selection'
325
- },
326
- {
327
- 'id': 15,
328
- 'url': f"gs://{bucket_name}/library/video15.mp4",
329
- 'keyword': 'lifestyle',
330
- 'timing': '5-10',
331
- 'style': 'general',
332
- 'reason': 'Fallback selection'
333
- },
334
- {
335
- 'id': 30,
336
- 'url': f"gs://{bucket_name}/library/video30.mp4",
337
- 'keyword': 'usage',
338
- 'timing': '10-15',
339
- 'style': 'general',
340
- 'reason': 'Fallback selection'
341
- }
342
- ]
343
-
344
- def _log_step_completion(self, step: int, data: Dict[str, Any]):
345
- """Log step completion with summary"""
346
- step_names = {
347
- 1: "Asset Generation",
348
- 2: "Video Rendering",
349
- 3: "Subtitle Addition",
350
- 4: "Cloud Upload"
351
- }
352
 
353
- elapsed = time.time() - self.pipeline_start_time if self.pipeline_start_time else 0
354
- logger.info(f"✓ Step {step} ({step_names.get(step, 'Unknown')}) completed [{elapsed:.2f}s total]")
355
-
 
 
 
 
 
 
 
 
 
 
356
  async def health_check(self) -> Dict[str, bool]:
357
- """
358
- Check health of all API connections
359
 
360
- Returns:
361
- Dict with service health status
362
- """
363
- logger.info("🏥 Running health check...")
364
-
365
- health = {
366
- 'gemini': False,
367
- 'runwayml': False,
368
- 'tts': False,
369
- 'gcs': False
370
- }
371
 
 
372
  try:
373
- # Test Gemini
374
- test_prompt = "Hello"
375
- await self.api_clients.enhance_prompt(test_prompt)
376
- health['gemini'] = True
377
- logger.info(" ✓ Gemini API: Connected")
378
  except Exception as e:
379
- logger.error(f" ✗ Gemini API: {e}")
 
380
 
 
381
  try:
382
- # Test GCS (just check bucket exists)
383
- bucket = self.api_clients.gcs_bucket
384
- bucket.exists()
385
- health['gcs'] = True
386
- logger.info(" ✓ Google Cloud Storage: Connected")
387
  except Exception as e:
388
- logger.error(f" ✗ Google Cloud Storage: {e}")
 
389
 
390
- # RunwayML and TTS are harder to test without using credits
391
- # So we just check if API keys are configured
392
- if self.api_clients.runway_api_key:
393
- health['runwayml'] = True
394
- logger.info(" ✓ RunwayML API: Configured")
395
- else:
396
- logger.error(" ✗ RunwayML API: Not configured")
397
 
398
- if self.api_clients.tts_client:
399
- health['tts'] = True
400
- logger.info(" ✓ TTS API: Configured")
 
 
 
 
 
 
 
401
  else:
402
- logger.error(" TTS API: Not configured")
 
 
 
 
 
 
403
 
404
- all_healthy = all(health.values())
405
- logger.info(f"\n{'✅' if all_healthy else '⚠️'} Health check {'passed' if all_healthy else 'failed'}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
 
407
- return health
 
 
 
 
 
 
 
1
  """
2
+ Main automation orchestrator with production-ready video pipeline
3
  """
4
  import asyncio
5
  import os
6
  import time
7
  from typing import Dict, List, Optional, Any
8
+ from pathlib import Path
9
+
10
  from api_clients import APIClients
11
  from video_renderer import VideoRenderer
12
+ from asset_selector import AssetSelector
13
  from utils import logger
14
 
15
 
 
18
  self.config = config
19
  self.api_clients = APIClients(config)
20
  self.video_renderer = VideoRenderer(config)
21
+ self.asset_selector = AssetSelector(config)
22
  self.pipeline_start_time = None
23
+
24
+ async def simple_demo(self):
25
+ """Simple demo with proper audio handling"""
26
+ logger.info("🎬 Starting Simple Demo with Audio Fix...")
27
 
28
+ try:
29
+ # Create videos
30
+ logger.info("1. Creating video clips...")
31
+ from moviepy.editor import ColorClip
32
+
33
+ # Create simple color videos
34
+ clip1 = ColorClip(size=(640, 480), color=(255, 0, 0), duration=2)
35
+ clip1 = clip1.set_fps(24)
36
+ clip1_path = '/tmp/simple_red.mp4'
37
+ clip1.write_videofile(clip1_path, verbose=False, logger=None)
38
+ clip1.close()
39
+
40
+ clip2 = ColorClip(size=(640, 480), color=(0, 255, 0), duration=2)
41
+ clip2 = clip2.set_fps(24)
42
+ clip2_path = '/tmp/simple_green.mp4'
43
+ clip2.write_videofile(clip2_path, verbose=False, logger=None)
44
+ clip2.close()
45
+
46
+ logger.info(" ✅ Videos created")
47
+
48
+ # Create proper audio files using a different approach
49
+ logger.info("2. Creating proper audio files...")
50
+
51
+ # Method 1: Use a very simple approach - create WAV files directly
52
+ import wave
53
+ import struct
54
+ import numpy as np
55
+
56
+ # Create a simple sine wave WAV file
57
+ def create_sine_wave(filename, duration=4, freq=440, sample_rate=44100):
58
+ # Generate sine wave
59
+ t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
60
+ audio_data = 0.3 * np.sin(2 * np.pi * freq * t)
61
+
62
+ # Convert to 16-bit PCM
63
+ audio_data = (audio_data * 32767).astype(np.int16)
64
+
65
+ # Write WAV file
66
+ with wave.open(filename, 'w') as wav_file:
67
+ wav_file.setnchannels(1) # Mono
68
+ wav_file.setsampwidth(2) # 16-bit
69
+ wav_file.setframerate(sample_rate)
70
+ wav_file.writeframes(audio_data.tobytes())
71
+
72
+ # Create audio files
73
+ tts_audio_path = '/tmp/tts_audio.wav'
74
+ bg_audio_path = '/tmp/bg_audio.wav'
75
+
76
+ create_sine_wave(tts_audio_path, duration=4, freq=440) # A tone
77
+ create_sine_wave(bg_audio_path, duration=4, freq=220) # Lower tone
78
+
79
+ logger.info(" ✅ Audio files created")
80
+
81
+ # Test video rendering
82
+ logger.info("3. Testing video rendering...")
83
+ simple_assets = {
84
+ 'selected_videos': [
85
+ {
86
+ 'local_path': clip1_path,
87
+ 'duration': 2,
88
+ 'reason': 'Red clip'
89
+ },
90
+ {
91
+ 'local_path': clip2_path,
92
+ 'duration': 2,
93
+ 'reason': 'Green clip'
94
+ }
95
+ ],
96
+ 'tts_audio': {
97
+ 'local_path': tts_audio_path,
98
+ 'duration': 4
99
+ },
100
+ 'tts_script': 'Simple demo with proper audio.',
101
+ 'background_music_local': bg_audio_path
102
+ }
103
+
104
+ output_path = await self.video_renderer.render_video(simple_assets)
105
+
106
+ logger.info(f"\n🎉 DEMO SUCCESSFUL!")
107
+ logger.info(f"📹 Video created: {output_path}")
108
+
109
+ return True
110
+
111
+ except Exception as e:
112
+ logger.error(f"❌ Demo failed: {e}")
113
+ import traceback
114
+ logger.error(f"📋 Debug: {traceback.format_exc()}")
115
+ return False
116
+
117
+ async def local_test(self):
118
+ """Run a local test without external APIs"""
119
+ logger.info("🧪 Running local functionality test...")
120
 
121
+ try:
122
+ # Test 1: Check if we can create basic video clips
123
+ logger.info("1. Testing video clip creation...")
124
+ from moviepy.editor import ColorClip
125
+ test_clip = ColorClip(size=(100, 100), color=(255, 0, 0), duration=1)
126
+ test_clip = test_clip.set_fps(24) # Add FPS
127
+ test_clip.write_videofile('/tmp/test_color.mp4', verbose=False, logger=None)
128
+ test_clip.close()
129
+ logger.info(" ✅ Video clip creation: OK")
130
+
131
+ # Test 2: Check if we can create audio clips
132
+ logger.info("2. Testing audio clip creation...")
133
+ from moviepy.editor import AudioClip
134
+ import numpy as np
135
+
136
+ def make_tone(duration):
137
+ return lambda t: 0.1 * np.sin(440 * 2 * np.pi * t)
138
+
139
+ test_audio = AudioClip(make_tone(1), duration=1)
140
+ test_audio.write_audiofile('/tmp/test_audio.mp3', verbose=False, logger=None)
141
+ test_audio.close()
142
+ logger.info(" ✅ Audio clip creation: OK")
143
+
144
+ # Test 3: Check video rendering with simple assets
145
+ logger.info("3. Testing video rendering pipeline...")
146
+ test_assets = {
147
+ 'selected_videos': [
148
+ {
149
+ 'local_path': '/tmp/test_color.mp4',
150
+ 'duration': 1,
151
+ 'reason': 'Test video'
152
+ }
153
+ ],
154
+ 'tts_audio': {
155
+ 'local_path': '/tmp/test_audio.mp3',
156
+ 'duration': 1
157
+ },
158
+ 'tts_script': 'Test script.',
159
+ 'background_music_local': '/tmp/test_audio.mp3'
160
+ }
161
 
162
+ output_path = await self.video_renderer.render_video(test_assets)
163
+ logger.info(f" ✅ Video rendering: OK - {output_path}")
164
+
165
+ logger.info("\n🎉 Local functionality test passed!")
166
+ return True
167
+
168
+ except Exception as e:
169
+ logger.error(f"❌ Local test failed: {e}")
170
+ return False
171
+
172
+ async def execute_pipeline(self, content_strategy: Dict[str, str], tts_script: str) -> Dict[str, Any]:
173
+ """
174
+ Execute complete production video pipeline with better error handling
175
  """
176
  self.pipeline_start_time = time.time()
177
+ logger.info("🚀 Starting Production Video Pipeline")
 
 
178
 
179
  try:
180
+ # Step 1: Generate all assets in parallel
181
+ logger.info("\n📦 STEP 1: Parallel Asset Generation")
182
+ assets = await self._generate_assets_parallel(content_strategy, tts_script)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
+ # Check if we have minimum required assets
185
+ if not assets.get('selected_videos') or not assets.get('tts_audio'):
186
+ raise ValueError("Missing critical assets: videos or TTS audio")
187
+
188
+ # Step 2: Download all remote assets
189
+ logger.info("\n⬇️ STEP 2: Downloading Remote Assets")
190
+ await self._download_assets(assets)
191
+
192
+ # Step 3: Render final video
193
+ logger.info("\n🎬 STEP 3: Video Composition & Rendering")
194
+ final_video_path = await self.video_renderer.render_video(assets)
195
 
196
+ # Step 4: Upload to cloud storage
197
+ logger.info("\n☁️ STEP 4: Cloud Storage Upload")
198
+ final_url = await self.api_clients.store_in_gcs(final_video_path, 'video')
199
+
200
+ # Pipeline completion
201
  elapsed_time = time.time() - self.pipeline_start_time
202
+ logger.info(f"\n Pipeline completed in {elapsed_time:.2f}s")
 
 
 
203
 
204
  return {
205
  'success': True,
206
  'final_url': final_url,
207
+ 'local_path': final_video_path,
 
208
  'duration': elapsed_time,
209
+ 'assets_metadata': {
210
+ 'hook_video': assets.get('hook_video', {}).get('task_id'),
211
+ 'selected_videos_count': len(assets.get('selected_videos', [])),
212
+ 'total_duration': sum(v.get('duration', 0) for v in assets.get('selected_videos', []))
213
  }
214
  }
215
 
216
  except Exception as e:
217
  elapsed_time = time.time() - self.pipeline_start_time if self.pipeline_start_time else 0
218
+ logger.error(f"\n❌ Pipeline failed after {elapsed_time:.2f}s: {e}")
219
 
220
  return {
221
  'success': False,
222
  'error': str(e),
223
+ 'duration': elapsed_time
 
224
  }
225
+
226
+ async def _generate_assets_parallel(self, content_strategy: Dict, tts_script: str) -> Dict:
227
+ """Generate all assets in parallel for maximum efficiency"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  tasks = {
229
+ 'hook_video': self._generate_hook_video(content_strategy),
230
+ 'selected_videos': self.asset_selector.select_videos(tts_script),
231
+ 'tts_audio': self.api_clients.generate_tts(tts_script),
 
232
  }
233
 
234
+ # Execute all async tasks concurrently
235
+ results = {}
236
+ for task_name, task in tasks.items():
237
+ try:
238
+ results[task_name] = await task
 
 
 
 
 
 
 
 
 
 
239
  logger.info(f"✓ {task_name} completed")
240
+ except Exception as e:
241
+ logger.error(f"❌ {task_name} failed: {e}")
242
+ results[task_name] = None
243
 
244
+ # Add synchronous operations
245
+ results['background_music_url'] = self.asset_selector.select_background_music()
246
+ results['tts_script'] = tts_script
 
 
 
247
 
248
+ return results
249
+
250
+ async def _generate_hook_video(self, strategy: Dict) -> Optional[Dict]:
251
+ """Generate hook video using RunwayML"""
 
 
252
  try:
253
+ prompt = strategy.get('runway_prompt') or strategy.get('gemini_prompt')
254
+ if not prompt:
255
+ logger.warning("No prompt available for hook video")
256
+ return None
257
 
258
+ # Enhance prompt with Gemini
259
+ enhanced_prompt = await self.api_clients.enhance_prompt(prompt)
 
 
260
 
261
+ # Generate video
 
 
 
 
 
262
  video_data = await self.api_clients.generate_video(
263
  enhanced_prompt,
264
+ duration=5 # 5-second hook video
265
  )
266
 
 
267
  return video_data
268
 
269
  except Exception as e:
270
+ logger.error(f"Hook video generation failed: {e}")
271
  return None
272
+
273
+ async def _download_assets(self, assets: Dict):
274
+ """Download all remote assets to local files"""
275
+ download_tasks = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
+ # Download hook video
278
+ if assets.get('hook_video') and assets['hook_video'].get('video_url'):
279
+ download_tasks.append(
280
+ self._download_to_local(
281
+ assets['hook_video']['video_url'],
282
+ 'hook_video.mp4',
283
+ assets['hook_video']
284
+ )
 
 
 
 
 
 
 
 
 
285
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
+ # Download library videos
288
+ for i, video in enumerate(assets.get('selected_videos', [])):
289
+ if video.get('url'):
290
+ download_tasks.append(
291
+ self._download_to_local(
292
+ video['url'],
293
+ f'library_video_{i}.mp4',
294
+ video
295
+ )
296
+ )
 
 
 
 
297
 
298
+ # Download background music
299
+ if assets.get('background_music_url'):
300
+ download_tasks.append(
301
+ self._download_to_local(
302
+ assets['background_music_url'],
303
+ 'background_music.mp3',
304
+ assets,
305
+ 'background_music_local'
306
+ )
307
+ )
 
308
 
309
+ # Download TTS audio
310
+ if assets.get('tts_audio') and assets['tts_audio'].get('audio_url'):
311
+ download_tasks.append(
312
+ self._download_to_local(
313
+ assets['tts_audio']['audio_url'],
314
+ 'tts_audio.mp3',
315
+ assets['tts_audio'],
316
+ 'local_path'
317
+ )
318
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
 
320
+ # Execute all downloads concurrently
321
+ if download_tasks:
322
+ await asyncio.gather(*download_tasks, return_exceptions=True)
323
+
324
+ async def _download_to_local(self, url: str, filename: str, target_dict: Dict, key: str = 'local_path'):
325
+ """Download file from URL and store local path in target dictionary"""
326
+ try:
327
+ local_path = await self.api_clients.download_file(url, filename)
328
+ target_dict[key] = local_path
329
+ logger.info(f"✓ Downloaded {filename} from {url}")
330
+ except Exception as e:
331
+ logger.error(f"❌ Failed to download {filename}: {e}")
332
+
333
  async def health_check(self) -> Dict[str, bool]:
334
+ """Comprehensive health check of all components"""
335
+ logger.info("🏥 Running comprehensive health check...")
336
 
337
+ # Check API clients
338
+ api_health = await self.api_clients.health_check()
 
 
 
 
 
 
 
 
 
339
 
340
+ # Check asset selector
341
  try:
342
+ asset_selector_healthy = len(self.asset_selector.video_library) > 0
343
+ if not asset_selector_healthy:
344
+ logger.warning(" ⚠️ Asset Selector: Video library is empty")
 
 
345
  except Exception as e:
346
+ asset_selector_healthy = False
347
+ logger.error(f" ❌ Asset Selector: {e}")
348
 
349
+ # Check video renderer
350
  try:
351
+ video_renderer_healthy = self.video_renderer.temp_dir.exists()
352
+ if not video_renderer_healthy:
353
+ logger.warning(" ⚠️ Video Renderer: Temp directory issue")
 
 
354
  except Exception as e:
355
+ video_renderer_healthy = False
356
+ logger.error(f" ❌ Video Renderer: {e}")
357
 
358
+ # Combine all health statuses
359
+ health_status = {
360
+ **api_health,
361
+ 'asset_selector': asset_selector_healthy,
362
+ 'video_renderer': video_renderer_healthy
363
+ }
 
364
 
365
+ # Print summary
366
+ operational_services = sum(health_status.values())
367
+ total_services = len(health_status)
368
+
369
+ print(f"\n📊 Health Summary: {operational_services}/{total_services} services operational")
370
+
371
+ if operational_services == total_services:
372
+ print("🎉 System is fully operational and ready for production!")
373
+ elif operational_services >= total_services - 2:
374
+ print("⚠️ System is mostly operational, but some features may be limited")
375
  else:
376
+ print(" System has significant issues that need attention")
377
+
378
+ return health_status
379
+
380
+ async def basic_test(self):
381
+ """Basic test without external APIs"""
382
+ logger.info("🧪 Running basic pipeline test...")
383
 
384
+ # Use local test assets
385
+ test_assets = {
386
+ 'selected_videos': [
387
+ {
388
+ 'url': 'https://example.com/video1.mp4',
389
+ 'duration': 2,
390
+ 'reason': 'Test video 1',
391
+ 'local_path': '/tmp/test_video1.mp4' # You'd need to create this
392
+ }
393
+ ],
394
+ 'tts_audio': {
395
+ 'local_path': '/tmp/test_audio.mp3', # You'd need to create this
396
+ 'duration': 10
397
+ },
398
+ 'background_music_local': '/tmp/test_music.mp3',
399
+ 'tts_script': 'Test script for video generation.'
400
+ }
401
 
402
+ try:
403
+ final_video_path = await self.video_renderer.render_video(test_assets)
404
+ logger.info(f"✅ Basic test passed: {final_video_path}")
405
+ return True
406
+ except Exception as e:
407
+ logger.error(f"❌ Basic test failed: {e}")
408
+ return False
src/main.py CHANGED
@@ -159,41 +159,44 @@ async def run_pipeline(
159
 
160
  async def health_check_command(automation: ContentAutomation):
161
  """Run health check on all services"""
162
- health_status = await automation.health_check()
163
-
164
- if all(health_status.values()):
165
- logger.info("\n All systems operational!")
166
- return 0
167
- else:
168
- logger.error("\n❌ Some systems are not operational")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  return 1
170
 
171
 
172
  async def test_command(automation: ContentAutomation):
173
- """Run a quick test of the pipeline with minimal resources"""
174
- logger.info("\n🧪 Running test pipeline...")
175
-
176
- test_strategy = {
177
- 'gemini_prompt': 'A simple product shot of a modern massager device',
178
- 'runway_prompt': 'Static product shot of a sleek white massager on a clean background',
179
- 'style': 'minimal',
180
- 'aspect_ratio': '9:16',
181
- 'duration': 5,
182
- 'brand': 'Test'
183
- }
184
 
185
- test_script = "This is a test of the text-to-speech system. It should be brief."
186
 
187
- result = await automation.execute_pipeline(test_strategy, test_script)
188
-
189
- if result.get('success'):
190
- logger.info("\n✅ Test completed successfully!")
191
  return 0
192
  else:
193
- logger.error(f"\n❌ Test failed: {result.get('error', 'Unknown error')}")
194
  return 1
195
 
196
-
197
  def parse_arguments():
198
  """Parse command line arguments"""
199
  parser = argparse.ArgumentParser(
 
159
 
160
  async def health_check_command(automation: ContentAutomation):
161
  """Run health check on all services"""
162
+ try:
163
+ health_status = await automation.health_check()
164
+
165
+ print("\n" + "="*50)
166
+ print("🏥 SYSTEM HEALTH CHECK RESULTS")
167
+ print("="*50)
168
+
169
+ for service, status in health_status.items():
170
+ icon = "✅" if status else "❌"
171
+ print(f"{icon} {service.upper():<15} {'OPERATIONAL' if status else 'ISSUE DETECTED'}")
172
+
173
+ if all(health_status.values()):
174
+ print("\n🎉 All systems are ready for production!")
175
+ return 0
176
+ else:
177
+ print("\n⚠️ Some services need attention before running the pipeline.")
178
+ print(" Check the logs above for details.")
179
+ return 1
180
+
181
+ except Exception as e:
182
+ logger.error(f"Health check failed: {e}")
183
  return 1
184
 
185
 
186
  async def test_command(automation: ContentAutomation):
187
+ """Run simple demo test"""
188
+ logger.info("\n🧪 Running Simple Demo Test...")
 
 
 
 
 
 
 
 
 
189
 
190
+ success = await automation.simple_demo()
191
 
192
+ if success:
193
+ logger.info("\n✅ Demo test completed successfully!")
194
+ logger.info("🎉 Your video automation system is working!")
 
195
  return 0
196
  else:
197
+ logger.error(f"\n❌ Demo test failed")
198
  return 1
199
 
 
200
  def parse_arguments():
201
  """Parse command line arguments"""
202
  parser = argparse.ArgumentParser(
src/video_renderer.py CHANGED
@@ -1,62 +1,389 @@
1
  """
2
- Video rendering and subtitle engine
3
  """
 
 
 
 
 
4
  import os
5
- from utils import logger
 
 
 
 
 
 
 
 
 
 
6
 
7
  class VideoRenderer:
8
- def __init__(self, config):
9
  self.config = config
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- async def render_video(self, assets):
12
- """Render final video by merging all assets"""
13
- logger.info("Rendering video with assets...")
14
-
15
- # Simplified implementation - replace with actual video rendering
16
- # This would use moviepy or similar library
17
-
18
- hook_video = assets.get('hook_video')
19
- background_music = assets.get('background_music')
20
- selected_videos = assets.get('selected_videos', [])
21
- tts_audio = assets.get('tts_audio')
22
-
23
- logger.info(f"Merging {len(selected_videos)} selected videos")
24
- logger.info(f"Using hook video: {hook_video}")
25
- logger.info(f"Using background music: {background_music}")
26
-
27
- # Placeholder for actual video rendering logic
28
- output_path = "outputs/videos/rendered_video.mp4"
29
- logger.info(f"Video rendered to: {output_path}")
30
-
31
- return output_path
32
-
33
- async def add_subtitles(self, video_path, tts_script):
34
- """Add subtitles to video"""
35
- logger.info("Adding subtitles to video...")
36
-
37
- # Simplified implementation - replace with actual subtitle engine
38
- # This would add subtitles in the middle of the screen
39
-
40
- subtitles = self._generate_subtitle_segments(tts_script)
41
- logger.info(f"Generated {len(subtitles)} subtitle segments")
42
-
43
- # Placeholder for actual subtitle rendering
44
- output_path = video_path.replace('.mp4', '_subtitled.mp4')
45
- logger.info(f"Subtitled video saved to: {output_path}")
46
-
47
- return output_path
48
-
49
- def _generate_subtitle_segments(self, text):
50
- """Generate subtitle segments from text"""
51
- sentences = [s.strip() + '.' for s in text.split('.') if s.strip()]
52
- segments = []
53
-
54
- for i, sentence in enumerate(sentences):
55
- segments.append({
56
- 'text': sentence,
57
- 'start_time': i * 3, # 3 seconds per segment
58
- 'end_time': (i + 1) * 3,
59
- 'position': 'middle' # Your nuance: middle of screen
60
- })
61
-
62
- return segments
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Production video rendering engine with proper error handling and resource management
3
  """
4
+ # FIX FOR PIL ANTIALIAS ISSUE - ADD THIS AT THE VERY TOP
5
+ import PIL.Image
6
+ if not hasattr(PIL.Image, 'ANTIALIAS'):
7
+ PIL.Image.ANTIALIAS = PIL.Image.LANCZOS
8
+
9
  import os
10
+ import tempfile
11
+ from typing import List, Dict, Optional
12
+ from pathlib import Path
13
+
14
+ # Rest of your imports...
15
+ from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, concatenate_videoclips, TextClip, CompositeAudioClip
16
+ import numpy as np
17
+ import textwrap
18
+
19
+ from utils import logger, format_duration
20
+
21
 
22
  class VideoRenderer:
23
+ def __init__(self, config: Dict):
24
  self.config = config
25
+ self.temp_dir = Path(tempfile.mkdtemp())
26
+ logger.info(f"Initialized VideoRenderer with temp dir: {self.temp_dir}")
27
+
28
+ async def render_video(self, assets: Dict, video_config: Optional[Dict] = None) -> str:
29
+ """
30
+ Render final video composition with all assets
31
+
32
+ Args:
33
+ assets: Dictionary containing all video/audio assets
34
+ video_config: Video configuration (aspect ratio, style, etc.)
35
+
36
+ Returns:
37
+ Path to rendered video file
38
+ """
39
+ try:
40
+ logger.info("🎬 Starting video rendering pipeline")
41
+
42
+ # Validate inputs
43
+ if not self._validate_assets(assets):
44
+ raise ValueError("Invalid assets provided for video rendering")
45
+
46
+ # Load and prepare all assets
47
+ video_clips = await self._prepare_video_clips(assets)
48
+ audio_clips = await self._prepare_audio_clips(assets)
49
+
50
+ # Create video sequence
51
+ final_video = await self._create_video_sequence(video_clips, video_config)
52
+
53
+ # Add audio
54
+ final_video = await self._add_audio_track(final_video, audio_clips)
55
+
56
+ # Add subtitles if script provided
57
+ if assets.get('tts_script'):
58
+ final_video = await self._add_subtitles(final_video, assets['tts_script'])
59
+
60
+ # Render final video
61
+ output_path = await self._render_final_video(final_video)
62
+
63
+ # Cleanup temporary files
64
+ self._cleanup_temp_files(video_clips + [final_video])
65
+
66
+ logger.info(f"✅ Video rendering completed: {output_path}")
67
+ return output_path
68
+
69
+ except Exception as e:
70
+ logger.error(f"❌ Video rendering failed: {e}")
71
+ raise
72
+
73
+ async def _prepare_video_clips(self, assets: Dict) -> List[VideoFileClip]:
74
+ """Load and prepare all video clips"""
75
+ clips = []
76
+
77
+ try:
78
+ # Load RunwayML hook video
79
+ if assets.get('hook_video'):
80
+ hook_clip = VideoFileClip(assets['hook_video']['local_path'])
81
+ hook_clip = hook_clip.without_audio()
82
+ clips.append(('hook', hook_clip))
83
+ logger.info(f"✓ Loaded hook video: {hook_clip.duration:.2f}s")
84
+
85
+ # Load library videos
86
+ for i, lib_video in enumerate(assets.get('selected_videos', [])):
87
+ if lib_video.get('local_path'):
88
+ lib_clip = VideoFileClip(lib_video['local_path'])
89
+ lib_clip = lib_clip.without_audio()
90
+ clips.append((f'library_{i}', lib_clip))
91
+ logger.info(f"✓ Loaded library video {i}: {lib_clip.duration:.2f}s")
92
+
93
+ return [clip for _, clip in clips]
94
+
95
+ except Exception as e:
96
+ logger.error(f"❌ Failed to prepare video clips: {e}")
97
+ # Cleanup on error
98
+ for name, clip in clips:
99
+ clip.close()
100
+ raise
101
+
102
+ async def _prepare_audio_clips(self, assets: Dict) -> List[AudioFileClip]:
103
+ """Load and prepare all audio clips with proper error handling"""
104
+ clips = []
105
+
106
+ try:
107
+ # Load TTS audio
108
+ if assets.get('tts_audio') and assets['tts_audio'].get('local_path'):
109
+ try:
110
+ tts_clip = AudioFileClip(assets['tts_audio']['local_path'])
111
+ # Ensure the clip has proper duration
112
+ if tts_clip.duration > 0:
113
+ clips.append(('tts', tts_clip))
114
+ logger.info(f"✓ Loaded TTS audio: {tts_clip.duration:.2f}s")
115
+ else:
116
+ logger.warning("⚠️ TTS audio has zero duration")
117
+ tts_clip.close()
118
+ except Exception as e:
119
+ logger.error(f"❌ Failed to load TTS audio: {e}")
120
+
121
+ # Load background music
122
+ if assets.get('background_music_local'):
123
+ try:
124
+ bg_clip = AudioFileClip(assets['background_music_local'])
125
+ # Ensure the clip has proper duration
126
+ if bg_clip.duration > 0:
127
+ # Reduce volume using volumex instead of custom function
128
+ bg_clip = bg_clip.volumex(0.3)
129
+ clips.append(('background', bg_clip))
130
+ logger.info(f"✓ Loaded background music: {bg_clip.duration:.2f}s")
131
+ else:
132
+ logger.warning("⚠️ Background music has zero duration")
133
+ bg_clip.close()
134
+ except Exception as e:
135
+ logger.error(f"❌ Failed to load background music: {e}")
136
+
137
+ return [clip for _, clip in clips]
138
+
139
+ except Exception as e:
140
+ logger.error(f"❌ Failed to prepare audio clips: {e}")
141
+ # Cleanup on error
142
+ for name, clip in clips:
143
+ try:
144
+ clip.close()
145
+ except:
146
+ pass
147
+ raise
148
+
149
+ async def _create_video_sequence(self, video_clips: List[VideoFileClip],
150
+ video_config: Optional[Dict]) -> VideoFileClip:
151
+ """Create the final video sequence with proper timing"""
152
+ try:
153
+ if not video_clips:
154
+ raise ValueError("No video clips available for sequence")
155
+
156
+ # Calculate total available duration (max 15 seconds)
157
+ max_duration = 15.0
158
+ current_duration = sum(clip.duration for clip in video_clips)
159
+
160
+ if current_duration > max_duration:
161
+ logger.warning(f"⚠️ Video sequence too long ({current_duration:.1f}s), will trim to {max_duration}s")
162
+ video_clips = self._trim_clips_to_fit(video_clips, max_duration)
163
+
164
+ # Resize all clips to target aspect ratio (9:16 vertical)
165
+ target_size = (1080, 1920) # 9:16 vertical
166
+ resized_clips = [self._resize_for_vertical(clip, target_size) for clip in video_clips]
167
+
168
+ # Create sequence
169
+ final_sequence = concatenate_videoclips(resized_clips)
170
+ logger.info(f"✓ Created video sequence: {final_sequence.duration:.2f}s")
171
+
172
+ return final_sequence
173
+
174
+ except Exception as e:
175
+ logger.error(f"❌ Failed to create video sequence: {e}")
176
+ for clip in video_clips:
177
+ clip.close()
178
+ raise
179
+
180
+ def _resize_for_vertical(self, clip: VideoFileClip, target_size: tuple) -> VideoFileClip:
181
+ """Resize clip to fit vertical 9:16 aspect ratio"""
182
+ target_w, target_h = target_size
183
+ clip_aspect = clip.w / clip.h
184
+ target_aspect = target_w / target_h
185
+
186
+ if clip_aspect > target_aspect:
187
+ # Clip is wider, fit to height and crop width
188
+ new_clip = clip.resize(height=target_h)
189
+ else:
190
+ # Clip is taller, fit to width and crop height
191
+ new_clip = clip.resize(width=target_w)
192
+
193
+ # Center crop to exact size using a more compatible method
194
+ try:
195
+ # Try the new method first
196
+ new_clip = new_clip.crop(
197
+ x_center=new_clip.w / 2,
198
+ y_center=new_clip.h / 2,
199
+ width=target_w,
200
+ height=target_h
201
+ )
202
+ except Exception:
203
+ # Fallback method for cropping
204
+ x1 = (new_clip.w - target_w) // 2
205
+ y1 = (new_clip.h - target_h) // 2
206
+ new_clip = new_clip.crop(x1=x1, y1=y1, x2=x1+target_w, y2=y1+target_h)
207
 
208
+ return new_clip
209
+
210
+ def _trim_clips_to_fit(self, clips: List[VideoFileClip], max_duration: float) -> List[VideoFileClip]:
211
+ """Trim video clips to fit within max duration"""
212
+ trimmed_clips = []
213
+ remaining_duration = max_duration
214
+
215
+ for clip in clips:
216
+ if remaining_duration <= 0:
217
+ break
218
+
219
+ use_duration = min(clip.duration, remaining_duration)
220
+ if use_duration < clip.duration:
221
+ trimmed_clip = clip.subclip(0, use_duration)
222
+ trimmed_clips.append(trimmed_clip)
223
+ logger.info(f"Trimmed clip from {clip.duration:.1f}s to {use_duration:.1f}s")
224
+ else:
225
+ trimmed_clips.append(clip)
226
+
227
+ remaining_duration -= use_duration
228
+
229
+ return trimmed_clips
230
+
231
+ async def _add_audio_track(self, video_clip: VideoFileClip, audio_clips: List[AudioFileClip]) -> VideoFileClip:
232
+ """Add audio track to video with proper timing"""
233
+ if not audio_clips:
234
+ return video_clip
235
+
236
+ try:
237
+ # Filter out invalid audio clips
238
+ valid_audio_clips = []
239
+ for clip in audio_clips:
240
+ if clip.duration > 0:
241
+ valid_audio_clips.append(clip)
242
+ else:
243
+ logger.warning(f"⚠️ Skipping audio clip with zero duration")
244
+ clip.close()
245
+
246
+ if not valid_audio_clips:
247
+ return video_clip
248
+
249
+ # Mix all valid audio clips
250
+ mixed_audio = CompositeAudioClip(valid_audio_clips)
251
+
252
+ # Ensure audio doesn't exceed video duration
253
+ video_duration = video_clip.duration
254
+ if mixed_audio.duration > video_duration:
255
+ logger.info(f"Trimming audio from {mixed_audio.duration:.2f}s to {video_duration:.2f}s")
256
+ mixed_audio = mixed_audio.subclip(0, video_duration)
257
+
258
+ # Add audio to video
259
+ video_with_audio = video_clip.set_audio(mixed_audio)
260
+ logger.info(f"✓ Added audio track: {mixed_audio.duration:.2f}s")
261
+
262
+ return video_with_audio
263
+
264
+ except Exception as e:
265
+ logger.error(f"❌ Failed to add audio track: {e}")
266
+ # Cleanup audio clips
267
+ for clip in audio_clips:
268
+ try:
269
+ clip.close()
270
+ except:
271
+ pass
272
+ return video_clip
273
+
274
+ async def _add_subtitles(self, video_clip: VideoFileClip, script: str) -> CompositeVideoClip:
275
+ """Add animated subtitles to video"""
276
+ try:
277
+ phrases = self._split_script_into_phrases(script)
278
+ text_clips = []
279
+
280
+ total_duration = video_clip.duration
281
+ duration_per_phrase = total_duration / len(phrases)
282
+ fade_duration = 0.3
283
+
284
+ target_width, target_height = video_clip.size
285
+
286
+ for i, phrase in enumerate(phrases):
287
+ start_time = i * duration_per_phrase
288
+
289
+ # Word wrapping for vertical format
290
+ max_chars_per_line = 25
291
+ wrapped_text = '\n'.join(textwrap.wrap(phrase, width=max_chars_per_line))
292
+
293
+ # Create text clip
294
+ text_clip = TextClip(
295
+ txt=wrapped_text,
296
+ fontsize=65,
297
+ color='yellow' if i % 2 == 1 else 'white',
298
+ font='Helvetica',
299
+ stroke_color='black',
300
+ stroke_width=4,
301
+ method='caption',
302
+ size=(int(target_width * 0.85), None)
303
+ )
304
+
305
+ # Position in center-upper area (safe zone for vertical video)
306
+ vertical_position = int(target_height * 0.40)
307
+ text_clip = text_clip.set_position(('center', vertical_position))
308
+ text_clip = text_clip.set_start(start_time)
309
+ text_clip = text_clip.set_duration(duration_per_phrase)
310
+
311
+ # Add fade effects manually
312
+ text_clip = text_clip.crossfadein(fade_duration).crossfadeout(fade_duration)
313
+
314
+ text_clips.append(text_clip)
315
+
316
+ # Combine video with subtitles
317
+ final_video = CompositeVideoClip([video_clip] + text_clips)
318
+ logger.info(f"✓ Added {len(text_clips)} subtitle segments")
319
+
320
+ return final_video
321
+
322
+ except Exception as e:
323
+ logger.error(f"❌ Failed to add subtitles: {e}")
324
+ return video_clip
325
+
326
+ def _split_script_into_phrases(self, script: str) -> List[str]:
327
+ """Split script into subtitle phrases"""
328
+ # Simple sentence splitting - can be enhanced with NLP
329
+ sentences = [s.strip() + '.' for s in script.split('.') if s.strip()]
330
+ return sentences[:6] # Limit to 6 phrases max
331
+
332
+ async def _render_final_video(self, video_clip: VideoFileClip) -> str:
333
+ """Render final video to file"""
334
+ output_path = self.temp_dir / "final_video.mp4"
335
+
336
+ try:
337
+ logger.info("📹 Rendering final video file...")
338
+
339
+ video_clip.write_videofile(
340
+ str(output_path),
341
+ codec='libx264',
342
+ audio_codec='aac',
343
+ temp_audiofile=str(self.temp_dir / 'temp_audio.m4a'),
344
+ remove_temp=True,
345
+ fps=24,
346
+ verbose=False,
347
+ logger=None # Suppress moviepy progress bars
348
+ )
349
+
350
+ logger.info(f"✓ Final video rendered: {output_path}")
351
+ return str(output_path)
352
+
353
+ except Exception as e:
354
+ logger.error(f"❌ Final video rendering failed: {e}")
355
+ raise
356
+ finally:
357
+ video_clip.close()
358
+
359
+ def _validate_assets(self, assets: Dict) -> bool:
360
+ """Validate that required assets are present"""
361
+ required = ['selected_videos', 'tts_audio']
362
+
363
+ for req in required:
364
+ if not assets.get(req):
365
+ logger.error(f"Missing required asset: {req}")
366
+ return False
367
+
368
+ if not assets.get('selected_videos'):
369
+ logger.error("No selected videos provided")
370
+ return False
371
+
372
+ return True
373
+
374
+ def _cleanup_temp_files(self, clips: List):
375
+ """Clean up temporary video/audio clips"""
376
+ for clip in clips:
377
+ try:
378
+ clip.close()
379
+ except:
380
+ pass
381
+
382
+ def __del__(self):
383
+ """Cleanup on destruction"""
384
+ try:
385
+ import shutil
386
+ if self.temp_dir.exists():
387
+ shutil.rmtree(self.temp_dir)
388
+ except:
389
+ pass