Mustafa-albakkar commited on
Commit
6c4eb06
·
verified ·
1 Parent(s): 8c24408

Upload app_youtube_shorts.py

Browse files
Files changed (1) hide show
  1. app_youtube_shorts.py +1177 -0
app_youtube_shorts.py ADDED
@@ -0,0 +1,1177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # === FILE: app.py (Video Agent - FIXED: نص ثابت على شاشة متحركة) ===
2
+ #
3
+ # IMPORTANT: For proper text rendering, make sure Roboto-Bold.ttf font is installed
4
+ # Install with: apt-get install -y fonts-roboto
5
+ # Or download from: https://fonts.google.com/specimen/Roboto
6
+ #
7
+ # If font is not available, the system will fall back to Arial-Bold
8
+ #
9
+ # ✅ UPDATED: Video output dimensions set to YouTube Shorts (1080x1920 - 9:16 aspect ratio)
10
+ # ✅ UPDATED: Smart background color extraction from image for letterboxing
11
+
12
+ import os
13
+ import io
14
+ import json
15
+ import base64
16
+ import logging
17
+ import random
18
+ from typing import Optional, Dict, Any, Tuple, List
19
+ from datetime import datetime
20
+ import tempfile
21
+ from collections import Counter
22
+
23
+ import gradio as gr
24
+ import numpy as np
25
+ from PIL import Image, ImageDraw, ImageFont
26
+
27
+ # Fix for Pillow 10.0.0+ compatibility with MoviePy
28
+ if not hasattr(Image, 'ANTIALIAS'):
29
+ Image.ANTIALIAS = Image.LANCZOS
30
+
31
+ # استيراد مكتبات معالجة الصوت والفيديو
32
+ from kokoro_engine import KokoroEngine
33
+
34
+ KOKORO_AVAILABLE = True
35
+
36
+ try:
37
+ from moviepy.editor import ImageClip, AudioFileClip, CompositeVideoClip, TextClip
38
+ MOVIEPY_AVAILABLE = True
39
+ except ImportError:
40
+ MOVIEPY_AVAILABLE = False
41
+ logging.warning("⚠️ MoviePy not available. Install with: pip install moviepy")
42
+
43
+ # ---------------- Logging Setup ----------------
44
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
45
+ log = logging.getLogger("video_agent")
46
+
47
+ # ---------------- Environment Variables ----------------
48
+ VIDEO_HISTORY_DIR = os.getenv("VIDEO_HISTORY_DIR", "video_history")
49
+ MAX_HISTORY_COUNT = int(os.getenv("MAX_HISTORY_COUNT", "10"))
50
+ VOICE_STATE_FILE = os.getenv("VOICE_STATE_FILE", "voice_rotation_state.json")
51
+
52
+ # ---------------- YouTube Shorts Dimensions ----------------
53
+ YOUTUBE_SHORTS_WIDTH = 1080
54
+ YOUTUBE_SHORTS_HEIGHT = 1920
55
+
56
+ # ---------------- Kokoro Voices List ----------------
57
+ KOKORO_VOICES = [
58
+ # British Female
59
+ "bf_alice",
60
+ "bf_emma",
61
+ "bf_isabella",
62
+ "bf_lily",
63
+ # American Female
64
+ "af_alloy",
65
+ "af_aoede",
66
+ "af_bella",
67
+ "af_heart",
68
+ "af_jessica",
69
+ "af_kore",
70
+ "af_nicole",
71
+ "af_nova",
72
+ "af_river",
73
+ "af_sarah",
74
+ "af_sky",
75
+ # British Male
76
+ "bm_daniel",
77
+ "bm_fable",
78
+ "bm_george",
79
+ "bm_lewis",
80
+ # American Male
81
+ "am_adam",
82
+ "am_echo",
83
+ "am_eric",
84
+ "am_fenrir",
85
+ "am_liam",
86
+ "am_michael",
87
+ "am_onyx",
88
+ "am_puck"
89
+ ]
90
+
91
+ # ---------------- Initialization Check ----------------
92
+ IS_SERVICE_READY = True
93
+
94
+ # ---------------- Color Extraction Function ----------------
95
+ def get_dominant_color(image: Image.Image, sample_size: int = 100) -> Tuple[int, int, int]:
96
+ """
97
+ استخراج اللون السائد من الصورة باستخدام تحليل الألوان الأكثر شيوعاً.
98
+
99
+ Args:
100
+ image: صورة PIL
101
+ sample_size: حجم العينة لتسريع المعالجة
102
+
103
+ Returns:
104
+ Tuple من (R, G, B) للون السائد
105
+ """
106
+ try:
107
+ # تصغير الصورة لتسريع المعالجة
108
+ img_small = image.copy()
109
+ img_small.thumbnail((sample_size, sample_size))
110
+
111
+ # تحويل إلى RGB إذا لزم الأمر
112
+ if img_small.mode != 'RGB':
113
+ img_small = img_small.convert('RGB')
114
+
115
+ # الحصول على جميع الألوان
116
+ pixels = list(img_small.getdata())
117
+
118
+ # حساب اللون الأكثر شيوعاً
119
+ color_counter = Counter(pixels)
120
+ dominant_color = color_counter.most_common(1)[0][0]
121
+
122
+ log.info(f"Dominant color extracted: RGB{dominant_color}")
123
+ return dominant_color
124
+
125
+ except Exception as e:
126
+ log.warning(f"Failed to extract dominant color: {e}, using default (30, 30, 30)")
127
+ return (30, 30, 30) # لون رمادي غامق كخيار احتياطي
128
+
129
+ def get_edge_average_color(image: Image.Image, border_width: int = 50) -> Tuple[int, int, int]:
130
+ """
131
+ استخراج متوسط اللون من حواف الصورة (أكثر دقة للخلفية).
132
+
133
+ Args:
134
+ image: صورة PIL
135
+ border_width: عرض الحدود للعينة
136
+
137
+ Returns:
138
+ Tuple من (R, G, B) لمتوسط لون الحواف
139
+ """
140
+ try:
141
+ if image.mode != 'RGB':
142
+ image = image.convert('RGB')
143
+
144
+ width, height = image.size
145
+
146
+ # استخراج عينات من الحواف
147
+ edge_pixels = []
148
+
149
+ # الحافة العلوية
150
+ for x in range(width):
151
+ for y in range(min(border_width, height)):
152
+ edge_pixels.append(image.getpixel((x, y)))
153
+
154
+ # الحافة السفلية
155
+ for x in range(width):
156
+ for y in range(max(0, height - border_width), height):
157
+ edge_pixels.append(image.getpixel((x, y)))
158
+
159
+ # الحافة اليسرى
160
+ for y in range(height):
161
+ for x in range(min(border_width, width)):
162
+ edge_pixels.append(image.getpixel((x, y)))
163
+
164
+ # الحافة اليمنى
165
+ for y in range(height):
166
+ for x in range(max(0, width - border_width), width):
167
+ edge_pixels.append(image.getpixel((x, y)))
168
+
169
+ # حساب المتوسط
170
+ if edge_pixels:
171
+ avg_r = int(sum(p[0] for p in edge_pixels) / len(edge_pixels))
172
+ avg_g = int(sum(p[1] for p in edge_pixels) / len(edge_pixels))
173
+ avg_b = int(sum(p[2] for p in edge_pixels) / len(edge_pixels))
174
+
175
+ log.info(f"Edge average color: RGB({avg_r}, {avg_g}, {avg_b})")
176
+ return (avg_r, avg_g, avg_b)
177
+ else:
178
+ return (30, 30, 30)
179
+
180
+ except Exception as e:
181
+ log.warning(f"Failed to extract edge color: {e}, using default")
182
+ return (30, 30, 30)
183
+
184
+ def prepare_image_for_shorts(image: Image.Image) -> Image.Image:
185
+ """
186
+ تحضير الصورة لتناسب أبعاد YouTube Shorts (1080x1920) مع خلفية ملونة.
187
+
188
+ Args:
189
+ image: الصورة الأصلية
190
+
191
+ Returns:
192
+ صورة بأبعاد 1080x1920 مع خلفية ملونة مناسبة
193
+ """
194
+ try:
195
+ # تحويل إلى RGB
196
+ if image.mode != 'RGB':
197
+ image = image.convert('RGB')
198
+
199
+ # استخراج لون الخلفية المناسب (من حواف الصورة)
200
+ bg_color = get_edge_average_color(image, border_width=30)
201
+
202
+ # إنشاء canvas جديد بأبعاد YouTube Shorts
203
+ canvas = Image.new('RGB', (YOUTUBE_SHORTS_WIDTH, YOUTUBE_SHORTS_HEIGHT), bg_color)
204
+
205
+ # حساب نسبة القياس للصورة للحفاظ على النسب
206
+ img_width, img_height = image.size
207
+ target_ratio = YOUTUBE_SHORTS_WIDTH / YOUTUBE_SHORTS_HEIGHT
208
+ img_ratio = img_width / img_height
209
+
210
+ if img_ratio > target_ratio:
211
+ # الصورة أعرض من النسبة المطلوبة
212
+ new_width = YOUTUBE_SHORTS_WIDTH
213
+ new_height = int(new_width / img_ratio)
214
+ else:
215
+ # الصورة أطول من النسبة المطلوبة
216
+ new_height = YOUTUBE_SHORTS_HEIGHT
217
+ new_width = int(new_height * img_ratio)
218
+
219
+ # تغيير حجم الصورة
220
+ resized_image = image.resize((new_width, new_height), Image.LANCZOS)
221
+
222
+ # حساب موضع اللصق لتوسيط الصورة
223
+ paste_x = (YOUTUBE_SHORTS_WIDTH - new_width) // 2
224
+ paste_y = (YOUTUBE_SHORTS_HEIGHT - new_height) // 2
225
+
226
+ # لصق الصورة على الـ canvas
227
+ canvas.paste(resized_image, (paste_x, paste_y))
228
+
229
+ log.info(f"✅ Image prepared for YouTube Shorts: {YOUTUBE_SHORTS_WIDTH}x{YOUTUBE_SHORTS_HEIGHT} with background color {bg_color}")
230
+
231
+ return canvas
232
+
233
+ except Exception as e:
234
+ log.error(f"Failed to prepare image for Shorts: {e}")
235
+ raise
236
+
237
+ # ---------------- Voice Rotation Manager ----------------
238
+ class VoiceRotationManager:
239
+ """إدارة تدوير الأصوات بشكل دوري"""
240
+
241
+ def __init__(self, state_file: str, voices: List[str]):
242
+ self.state_file = state_file
243
+ self.voices = voices
244
+ self.current_index = 0
245
+ self.load_state()
246
+
247
+ def load_state(self):
248
+ """تحميل حالة التدوير من الملف"""
249
+ if os.path.exists(self.state_file):
250
+ try:
251
+ with open(self.state_file, "r") as f:
252
+ data = json.load(f)
253
+ self.current_index = data.get("current_voice_index", 0)
254
+ # التأكد من أن المؤشر في النطاق الصحيح
255
+ if self.current_index >= len(self.voices):
256
+ self.current_index = 0
257
+ log.info(f"Voice rotation state loaded: index={self.current_index}")
258
+ except Exception as e:
259
+ log.warning(f"Could not load voice rotation state: {e}")
260
+ self.current_index = 0
261
+ else:
262
+ log.info("No voice rotation state file found, starting from index 0")
263
+
264
+ def save_state(self):
265
+ """حفظ حالة التدوير في الملف"""
266
+ try:
267
+ with open(self.state_file, "w") as f:
268
+ json.dump({"current_voice_index": self.current_index}, f)
269
+ log.info(f"Voice rotation state saved: index={self.current_index}")
270
+ except Exception as e:
271
+ log.error(f"Failed to save voice rotation state: {e}")
272
+
273
+ def get_next_voice(self) -> str:
274
+ """الحصول على الصوت التالي والانتقال للصوت الذي يليه"""
275
+ voice = self.voices[self.current_index]
276
+ log.info(f"Selected voice: {voice} (index: {self.current_index}/{len(self.voices)-1})")
277
+
278
+ # الانتقال للصوت التالي
279
+ self.current_index = (self.current_index + 1) % len(self.voices)
280
+ self.save_state()
281
+
282
+ return voice
283
+
284
+ def get_current_voice(self) -> str:
285
+ """الحصول على الصوت الحالي بدون تغيير المؤشر"""
286
+ return self.voices[self.current_index]
287
+
288
+ def reset(self):
289
+ """إعادة تعيين التدوير إلى البداية"""
290
+ self.current_index = 0
291
+ self.save_state()
292
+ log.info("Voice rotation reset to index 0")
293
+
294
+ # ---------------- Motion Effects Functions ----------------
295
+ def apply_zoom_in_effect(clip, duration):
296
+ """تأثير التكبير التدريجي - من 100% إلى 120%"""
297
+ w, h = clip.size
298
+
299
+ def effect(gf, t):
300
+ frame = gf(t)
301
+ progress = min(t / duration, 1.0)
302
+ zoom_factor = 1.0 + (progress * 0.2)
303
+
304
+ new_w = int(w * zoom_factor)
305
+ new_h = int(h * zoom_factor)
306
+
307
+ from PIL import Image as PILImage
308
+ img = PILImage.fromarray(frame.astype('uint8'))
309
+ img_resized = img.resize((new_w, new_h), PILImage.LANCZOS)
310
+
311
+ left = (new_w - w) // 2
312
+ top = (new_h - h) // 2
313
+ img_cropped = img_resized.crop((left, top, left + w, top + h))
314
+
315
+ return np.array(img_cropped)
316
+
317
+ return clip.fl(effect)
318
+
319
+ def apply_zoom_out_effect(clip, duration):
320
+ """تأثير التصغير التدريجي - من 120% إلى 100%"""
321
+ w, h = clip.size
322
+
323
+ def effect(gf, t):
324
+ frame = gf(t)
325
+ progress = min(t / duration, 1.0)
326
+ zoom_factor = 1.2 - (progress * 0.2)
327
+
328
+ new_w = int(w * zoom_factor)
329
+ new_h = int(h * zoom_factor)
330
+
331
+ from PIL import Image as PILImage
332
+ img = PILImage.fromarray(frame.astype('uint8'))
333
+ img_resized = img.resize((new_w, new_h), PILImage.LANCZOS)
334
+
335
+ left = (new_w - w) // 2
336
+ top = (new_h - h) // 2
337
+ img_cropped = img_resized.crop((left, top, left + w, top + h))
338
+
339
+ return np.array(img_cropped)
340
+
341
+ return clip.fl(effect)
342
+
343
+ def apply_pan_right_effect(clip, duration):
344
+ """تأثير الانسحاب لليمين"""
345
+ w, h = clip.size
346
+
347
+ def effect(gf, t):
348
+ frame = gf(t)
349
+ progress = min(t / duration, 1.0)
350
+
351
+ zoom_factor = 1.2
352
+ new_w = int(w * zoom_factor)
353
+ new_h = int(h * zoom_factor)
354
+
355
+ from PIL import Image as PILImage
356
+ img = PILImage.fromarray(frame.astype('uint8'))
357
+ img_resized = img.resize((new_w, new_h), PILImage.LANCZOS)
358
+
359
+ max_offset = (new_w - w) // 2
360
+ left = int(max_offset * (1 - progress))
361
+ top = (new_h - h) // 2
362
+
363
+ img_cropped = img_resized.crop((left, top, left + w, top + h))
364
+
365
+ return np.array(img_cropped)
366
+
367
+ return clip.fl(effect)
368
+
369
+ def apply_pan_left_effect(clip, duration):
370
+ """تأثير الانسحاب لليسار"""
371
+ w, h = clip.size
372
+
373
+ def effect(gf, t):
374
+ frame = gf(t)
375
+ progress = min(t / duration, 1.0)
376
+
377
+ zoom_factor = 1.2
378
+ new_w = int(w * zoom_factor)
379
+ new_h = int(h * zoom_factor)
380
+
381
+ from PIL import Image as PILImage
382
+ img = PILImage.fromarray(frame.astype('uint8'))
383
+ img_resized = img.resize((new_w, new_h), PILImage.LANCZOS)
384
+
385
+ max_offset = (new_w - w) // 2
386
+ left = int(max_offset * progress)
387
+ top = (new_h - h) // 2
388
+
389
+ img_cropped = img_resized.crop((left, top, left + w, top + h))
390
+
391
+ return np.array(img_cropped)
392
+
393
+ return clip.fl(effect)
394
+
395
+ def apply_pan_down_effect(clip, duration):
396
+ """تأثير الانسحاب للأسفل"""
397
+ w, h = clip.size
398
+
399
+ def effect(gf, t):
400
+ frame = gf(t)
401
+ progress = min(t / duration, 1.0)
402
+
403
+ zoom_factor = 1.2
404
+ new_w = int(w * zoom_factor)
405
+ new_h = int(h * zoom_factor)
406
+
407
+ from PIL import Image as PILImage
408
+ img = PILImage.fromarray(frame.astype('uint8'))
409
+ img_resized = img.resize((new_w, new_h), PILImage.LANCZOS)
410
+
411
+ max_offset = (new_h - h) // 2
412
+ left = (new_w - w) // 2
413
+ top = int(max_offset * (1 - progress))
414
+
415
+ img_cropped = img_resized.crop((left, top, left + w, top + h))
416
+
417
+ return np.array(img_cropped)
418
+
419
+ return clip.fl(effect)
420
+
421
+ def apply_pan_up_effect(clip, duration):
422
+ """تأثير الانسحاب للأعلى"""
423
+ w, h = clip.size
424
+
425
+ def effect(gf, t):
426
+ frame = gf(t)
427
+ progress = min(t / duration, 1.0)
428
+
429
+ zoom_factor = 1.2
430
+ new_w = int(w * zoom_factor)
431
+ new_h = int(h * zoom_factor)
432
+
433
+ from PIL import Image as PILImage
434
+ img = PILImage.fromarray(frame.astype('uint8'))
435
+ img_resized = img.resize((new_w, new_h), PILImage.LANCZOS)
436
+
437
+ max_offset = (new_h - h) // 2
438
+ left = (new_w - w) // 2
439
+ top = int(max_offset * progress)
440
+
441
+ img_cropped = img_resized.crop((left, top, left + w, top + h))
442
+
443
+ return np.array(img_cropped)
444
+
445
+ return clip.fl(effect)
446
+
447
+ def apply_ken_burns_effect(clip, duration):
448
+ """تأثير Ken Burns - تكبير وحركة قطرية"""
449
+ w, h = clip.size
450
+
451
+ def effect(gf, t):
452
+ frame = gf(t)
453
+ progress = min(t / duration, 1.0)
454
+
455
+ zoom_factor = 1.0 + (progress * 0.3)
456
+ new_w = int(w * zoom_factor)
457
+ new_h = int(h * zoom_factor)
458
+
459
+ from PIL import Image as PILImage
460
+ img = PILImage.fromarray(frame.astype('uint8'))
461
+ img_resized = img.resize((new_w, new_h), PILImage.LANCZOS)
462
+
463
+ max_offset_w = (new_w - w) // 2
464
+ max_offset_h = (new_h - h) // 2
465
+
466
+ left = int(max_offset_w * (1 - progress * 0.5))
467
+ top = int(max_offset_h * (1 - progress * 0.5))
468
+
469
+ img_cropped = img_resized.crop((left, top, left + w, top + h))
470
+
471
+ return np.array(img_cropped)
472
+
473
+ return clip.fl(effect)
474
+
475
+ def get_random_motion_effect():
476
+ """اختيار تأثير حركة عشوائي"""
477
+ effects = [
478
+ ('zoom_in', apply_zoom_in_effect),
479
+ ('zoom_out', apply_zoom_out_effect),
480
+ ('pan_right', apply_pan_right_effect),
481
+ ('pan_left', apply_pan_left_effect),
482
+ ('pan_down', apply_pan_down_effect),
483
+ ('pan_up', apply_pan_up_effect),
484
+ ('ken_burns', apply_ken_burns_effect)
485
+ ]
486
+
487
+ effect_name, effect_func = random.choice(effects)
488
+ log.info(f"Selected motion effect: {effect_name}")
489
+ return effect_name, effect_func
490
+
491
+ # ---------------- FIXED: Text Overlay Function ----------------
492
+ def create_text_overlay(text: str, video_size: Tuple[int, int], duration: float) -> Optional[ImageClip]:
493
+ """
494
+ إنشاء طبقة نص ثابتة شفافة فوق الفيديو.
495
+ النص يبقى في نفس الموضع بالنسبة للشاشة، بينما الصورة تتحرك خلفه.
496
+
497
+ Args:
498
+ text: النص المراد عرضه
499
+ video_size: حجم الفيديو (width, height)
500
+ duration: المدة الكلية
501
+
502
+ Returns:
503
+ ImageClip شفاف مع النص أو None
504
+ """
505
+ # تنظيف النص من الرموز الخاصة
506
+ clean_text = text.replace("...", "").replace("—", "-").strip()
507
+
508
+ if not clean_text:
509
+ log.warning("Empty text after cleaning, skipping text overlay")
510
+ return None
511
+
512
+ log.info(f"Creating fixed text overlay: '{clean_text[:50]}...'")
513
+
514
+ try:
515
+ width, height = video_size
516
+
517
+ # إنشاء صورة شفافة بالكامل (RGBA)
518
+ img = Image.new('RGBA', (width, height), (0, 0, 0, 0))
519
+ draw = ImageDraw.Draw(img)
520
+
521
+ # حساب حجم الخط (محسّن لأبعاد YouTube Shorts)
522
+ fontsize = int(width * 0.07) # زيادة حجم الخط قليلاً للشاشات العمودية
523
+ stroke_width = max(3, int(fontsize / 18))
524
+
525
+ # تحميل الخط
526
+ font = None
527
+ for font_name in ["Roboto-Bold.ttf", "DejaVuSans-Bold.ttf", "Arial.ttf", "LiberationSans-Bold.ttf"]:
528
+ try:
529
+ font = ImageFont.truetype(font_name, fontsize)
530
+ log.info(f"✅ Font loaded: {font_name}")
531
+ break
532
+ except:
533
+ pass
534
+
535
+ if not font:
536
+ font = ImageFont.load_default()
537
+ log.warning("⚠️ Using default font")
538
+
539
+ # تقسيم النص إلى أسطر
540
+ def wrap_text(text, font, max_width):
541
+ lines = []
542
+ words = text.split()
543
+ while words:
544
+ line = ''
545
+ while words and draw.textlength(line + words[0] + ' ', font=font) < max_width:
546
+ line += (words.pop(0) + ' ')
547
+ if not line and words:
548
+ line = words.pop(0)
549
+ lines.append(line.strip())
550
+ return lines
551
+
552
+ wrapped_lines = wrap_text(clean_text, font, width * 0.9)
553
+
554
+ # حساب الموضع الثابت في منتصف الشاشة
555
+ line_height = fontsize + 12
556
+ total_height = len(wrapped_lines) * line_height
557
+
558
+ # موضع ثابت في منتصف الشاشة
559
+ start_y = (height - total_height) // 2
560
+
561
+ # رسم كل سطر في موضع ثابت
562
+ current_y = start_y
563
+
564
+ for line in wrapped_lines:
565
+ # حساب عرض السطر
566
+ bbox = draw.textbbox((0, 0), line, font=font)
567
+ line_width = bbox[2] - bbox[0]
568
+ line_x = (width - line_width) // 2
569
+
570
+ # رسم النص مع الحدود (موضع ثابت)
571
+ draw.text(
572
+ (line_x, current_y),
573
+ line,
574
+ font=font,
575
+ fill=(255, 255, 255, 255), # أبيض بالكامل
576
+ stroke_width=stroke_width,
577
+ stroke_fill=(0, 0, 0, 255) # حدود سوداء
578
+ )
579
+
580
+ current_y += line_height
581
+
582
+ # تحويل إلى numpy array مع الحفاظ على الشفافية
583
+ img_array = np.array(img)
584
+
585
+ # إنشاء ImageClip من الصورة الشفافة
586
+ text_clip = ImageClip(img_array, duration=duration, ismask=False, transparent=True)
587
+
588
+ log.info(f"✅ Fixed text overlay created successfully (transparent layer)")
589
+ return text_clip
590
+
591
+ except Exception as e:
592
+ log.error(f"❌ Failed to create text overlay: {e}")
593
+ import traceback
594
+ traceback.print_exc()
595
+ return None
596
+
597
+ # ---------------- Video Agent Class ----------------
598
+ class VideoAgent:
599
+ def __init__(self):
600
+ self.log = logging.getLogger("video_agent")
601
+ self.history = []
602
+ self._setup_history_dir()
603
+
604
+ # تهيئة مدير تدوير الأصوات
605
+ self.voice_manager = VoiceRotationManager(VOICE_STATE_FILE, KOKORO_VOICES)
606
+
607
+ # تهيئة نموذج Kokoro TTS
608
+ self.tts = None
609
+ self.kokoro_available = False
610
+
611
+ self.log.info("Initializing Kokoro TTS (ONNX - NeuML model)...")
612
+
613
+ try:
614
+ self.tts = KokoroEngine()
615
+ self.kokoro_available = True
616
+ self.log.info("✅ Kokoro-ONNX initialized successfully (NeuML model)")
617
+ except Exception as e:
618
+ self.tts = None
619
+ self.kokoro_available = False
620
+ self.log.error("❌ Kokoro TTS initialization failed")
621
+ self.log.error(str(e))
622
+ import traceback
623
+ traceback.print_exc()
624
+
625
+ def _setup_history_dir(self):
626
+ """إنشاء مجلد السجل إذا لم يكن موجوداً"""
627
+ if not os.path.exists(VIDEO_HISTORY_DIR):
628
+ os.makedirs(VIDEO_HISTORY_DIR)
629
+ self.log.info(f"Created video history directory: {VIDEO_HISTORY_DIR}")
630
+
631
+ def generate_audio(self, text: str, output_path: str, voice: Optional[str] = None, speed: float = 0.8) -> Tuple[bool, str]:
632
+ """توليد ملف صوتي من النص باستخدام Kokoro TTS."""
633
+ if not self.kokoro_available or self.tts is None:
634
+ self.log.error("Kokoro TTS not available.")
635
+ return False, ""
636
+
637
+ try:
638
+ if voice is None:
639
+ voice = self.voice_manager.get_next_voice()
640
+ else:
641
+ if voice not in KOKORO_VOICES:
642
+ self.log.warning(f"Voice '{voice}' not found, using next voice from rotation")
643
+ voice = self.voice_manager.get_next_voice()
644
+
645
+ self.log.info(f"Generating audio with voice: {voice}, speed: {speed}")
646
+ self.log.info(f"Text: {text[:50]}...")
647
+
648
+ # تعيين الصوت
649
+ self.tts.set_voice(voice)
650
+
651
+ # توليد الصوت
652
+ audio_data = self.tts.synthesize(text, speed=speed)
653
+
654
+ # حفظ الملف
655
+ import scipy.io.wavfile as wavfile
656
+ sample_rate = 24000
657
+
658
+ # التأكد من أن البيانات في النطاق الصحيح لـ int16
659
+ audio_int16 = np.clip(audio_data * 32767, -32768, 32767).astype(np.int16)
660
+ wavfile.write(output_path, sample_rate, audio_int16)
661
+
662
+ self.log.info(f"✅ Audio generated successfully: {output_path}")
663
+ return True, voice
664
+
665
+ except Exception as e:
666
+ self.log.error(f"Audio generation failed: {e}")
667
+ import traceback
668
+ traceback.print_exc()
669
+ return False, ""
670
+
671
+ def create_video(self, image_bytes: bytes, audio_path: str, output_path: str, display_text: str) -> bool:
672
+ """
673
+ إنشاء فيديو من صورة وملف صوتي مع نص ثابت على الشاشة.
674
+ الفيديو الناتج بأبعاد YouTube Shorts (1080x1920).
675
+
676
+ Args:
677
+ image_bytes: بيانات الصورة
678
+ audio_path: مسار الملف الصوتي
679
+ output_path: مسار حفظ الفيديو
680
+ display_text: النص المراد عرضه (ثابت على الشاشة)
681
+ """
682
+ if not MOVIEPY_AVAILABLE:
683
+ self.log.error("MoviePy not available.")
684
+ return False
685
+
686
+ audio = None
687
+ video = None
688
+ base_clip = None
689
+ animated_clip = None
690
+ text_clip = None
691
+
692
+ try:
693
+ image = Image.open(io.BytesIO(image_bytes))
694
+
695
+ # ✅ تحضير الصورة لأبعاد YouTube Shorts مع خلفية ملونة
696
+ self.log.info(f"Preparing image for YouTube Shorts ({YOUTUBE_SHORTS_WIDTH}x{YOUTUBE_SHORTS_HEIGHT})...")
697
+ shorts_image = prepare_image_for_shorts(image)
698
+ img_array = np.array(shorts_image)
699
+
700
+ if not os.path.exists(audio_path):
701
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
702
+
703
+ self.log.info(f"Loading audio from: {audio_path}")
704
+ audio = AudioFileClip(audio_path)
705
+ audio_duration = audio.duration
706
+
707
+ self.log.info(f"Creating YouTube Shorts video ({YOUTUBE_SHORTS_WIDTH}x{YOUTUBE_SHORTS_HEIGHT}) with duration: {audio_duration:.2f}s")
708
+
709
+ # اختيار التأثير
710
+ effect_name, effect_func = get_random_motion_effect()
711
+ self.log.info(f"Applying effect: {effect_name}")
712
+
713
+ # إنشاء clip أساسي
714
+ base_clip = ImageClip(img_array, duration=audio_duration)
715
+
716
+ # تطبيق التأثير على الصورة
717
+ animated_clip = effect_func(base_clip, audio_duration)
718
+
719
+ # إنشاء طبقة نص ثابتة شفافة
720
+ self.log.info(f"Creating fixed text overlay for: '{display_text[:50]}...'")
721
+ text_clip = create_text_overlay(display_text, animated_clip.size, audio_duration)
722
+
723
+ if text_clip:
724
+ # دمج الصورة المتحركة مع النص الثابت
725
+ self.log.info("Compositing video: moving image + fixed text overlay...")
726
+ video = CompositeVideoClip([animated_clip, text_clip])
727
+ self.log.info("✅ Text overlay composited successfully (fixed position)")
728
+ else:
729
+ self.log.warning("⚠️ Text overlay creation failed, using video without text")
730
+ video = animated_clip
731
+
732
+ # إضافة الصوت
733
+ video = video.set_audio(audio)
734
+
735
+ # كتابة الفيديو
736
+ self.log.info(f"Writing YouTube Shorts video to: {output_path}")
737
+ video.write_videofile(
738
+ output_path,
739
+ fps=24,
740
+ codec='libx264',
741
+ audio_codec='aac',
742
+ temp_audiofile='temp-audio.m4a',
743
+ remove_temp=True,
744
+ verbose=False,
745
+ logger=None,
746
+ preset='ultrafast',
747
+ threads=4
748
+ )
749
+
750
+ self.log.info(f"✅ YouTube Shorts video created successfully ({YOUTUBE_SHORTS_WIDTH}x{YOUTUBE_SHORTS_HEIGHT}) with {effect_name} effect + fixed text: {output_path}")
751
+ return True
752
+
753
+ except Exception as e:
754
+ self.log.error(f"Video creation failed: {e}")
755
+ import traceback
756
+ traceback.print_exc()
757
+ return False
758
+
759
+ finally:
760
+ # تنظيف الموارد
761
+ try:
762
+ if text_clip is not None:
763
+ text_clip.close()
764
+ self.log.debug("Text clip closed")
765
+ except Exception as e:
766
+ self.log.warning(f"Error closing text clip: {e}")
767
+
768
+ try:
769
+ if animated_clip is not None:
770
+ animated_clip.close()
771
+ self.log.debug("Animated clip closed")
772
+ except Exception as e:
773
+ self.log.warning(f"Error closing animated clip: {e}")
774
+
775
+ try:
776
+ if base_clip is not None:
777
+ base_clip.close()
778
+ self.log.debug("Base clip closed")
779
+ except Exception as e:
780
+ self.log.warning(f"Error closing base clip: {e}")
781
+
782
+ try:
783
+ if audio is not None:
784
+ audio.close()
785
+ self.log.debug("Audio closed")
786
+ except Exception as e:
787
+ self.log.warning(f"Error closing audio: {e}")
788
+
789
+ try:
790
+ if video is not None:
791
+ video.close()
792
+ self.log.debug("Video closed")
793
+ except Exception as e:
794
+ self.log.warning(f"Error closing video: {e}")
795
+
796
+ def process_request_custom(
797
+ self,
798
+ image_base64: str,
799
+ tts_text: str,
800
+ voice: Optional[str] = None,
801
+ speed: float = 0.8
802
+ ) -> Dict[str, Any]:
803
+ """
804
+ معالجة طلب إنشاء فيديو مع نص صوتي مخصص.
805
+
806
+ Args:
807
+ image_base64: الصورة (بدون نص مكتوب عليها)
808
+ tts_text: النص الصوتي المخصص من حقل tts_kokoro
809
+ voice: الصوت (اختياري)
810
+ speed: السرعة
811
+ """
812
+ if not self.kokoro_available or self.tts is None:
813
+ raise RuntimeError("Kokoro TTS not available")
814
+
815
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
816
+ safe_text = tts_text[:30].replace(" ", "_").replace("/", "_").strip()
817
+
818
+ temp_dir = tempfile.gettempdir()
819
+ audio_path = os.path.join(temp_dir, f"audio_{timestamp}.wav")
820
+ video_filename = f"{timestamp}_{safe_text}.mp4"
821
+ video_path = os.path.join(VIDEO_HISTORY_DIR, video_filename)
822
+
823
+ try:
824
+ image_bytes = base64.b64decode(image_base64)
825
+
826
+ # توليد الصوت
827
+ success, voice_used = self.generate_audio(tts_text, audio_path, voice, speed)
828
+ if not success:
829
+ raise RuntimeError("Failed to generate audio.")
830
+
831
+ # إنشاء الفيديو مع النص الثابت
832
+ if not self.create_video(image_bytes, audio_path, video_path, tts_text):
833
+ raise RuntimeError("Failed to create video.")
834
+
835
+ with open(video_path, "rb") as f:
836
+ video_bytes = f.read()
837
+ video_base64 = base64.b64encode(video_bytes).decode('utf-8')
838
+
839
+ entry = {
840
+ "timestamp": timestamp,
841
+ "tts_text": tts_text,
842
+ "voice": voice_used,
843
+ "video_path": video_path,
844
+ "duration": self._get_video_duration(video_path)
845
+ }
846
+
847
+ self.history.insert(0, entry)
848
+ self.history = self.history[:MAX_HISTORY_COUNT]
849
+
850
+ if os.path.exists(audio_path):
851
+ os.remove(audio_path)
852
+
853
+ self.log.info(f"✅ Video processing completed: {video_filename}")
854
+
855
+ return {
856
+ "video_base64": video_base64,
857
+ "video_path": video_path,
858
+ "tts_text": tts_text,
859
+ "voice_used": voice_used,
860
+ "status": "success",
861
+ "message": f"YouTube Shorts video ({YOUTUBE_SHORTS_WIDTH}x{YOUTUBE_SHORTS_HEIGHT}) generated successfully with voice: {voice_used} at speed: {speed}"
862
+ }
863
+
864
+ except Exception as e:
865
+ self.log.error(f"Video processing failed: {e}")
866
+ if os.path.exists(audio_path):
867
+ os.remove(audio_path)
868
+ raise RuntimeError(f"Video generation failed: {str(e)}")
869
+
870
+ def _get_video_duration(self, video_path: str) -> float:
871
+ """الحصول على مدة الفيديو بالثواني"""
872
+ try:
873
+ if MOVIEPY_AVAILABLE:
874
+ from moviepy.editor import VideoFileClip
875
+ clip = VideoFileClip(video_path)
876
+ duration = clip.duration
877
+ clip.close()
878
+ return duration
879
+ except Exception as e:
880
+ self.log.warning(f"Could not get video duration: {e}")
881
+ return 0.0
882
+
883
+ def get_history(self) -> List[Dict[str, Any]]:
884
+ """الحصول على سجل الفيديوهات"""
885
+ return self.history
886
+
887
+ def get_voice_rotation_info(self) -> Dict[str, Any]:
888
+ """الحصول على معلومات حالة تدوير الأصوات"""
889
+ return {
890
+ "current_voice": self.voice_manager.get_current_voice(),
891
+ "current_index": self.voice_manager.current_index,
892
+ "total_voices": len(KOKORO_VOICES),
893
+ "all_voices": KOKORO_VOICES
894
+ }
895
+
896
+ # ---------------- Global Agent Instance ----------------
897
+ agent = VideoAgent()
898
+
899
+ if IS_SERVICE_READY:
900
+ if agent.tts is not None:
901
+ current_voice = agent.voice_manager.get_current_voice()
902
+ STATUS_MESSAGE = f"✅ Video Agent ready | Format: YouTube Shorts ({YOUTUBE_SHORTS_WIDTH}x{YOUTUBE_SHORTS_HEIGHT}) | Voice: {current_voice} | Speed: 0.8"
903
+ else:
904
+ STATUS_MESSAGE = "⚠️ Video Agent ready but Kokoro-ONNX failed to initialize."
905
+ else:
906
+ STATUS_MESSAGE = "❌ Service not ready"
907
+
908
+ # ---------------- Gradio Functions ----------------
909
+
910
+ def gradio_generate_video_custom(
911
+ image_input,
912
+ tts_text: str,
913
+ voice_override: str,
914
+ speed: float
915
+ ) -> Tuple[Optional[str], str, str]:
916
+ """دالة Gradio للواجهة التفاعلية مع نص مخصص."""
917
+ if not IS_SERVICE_READY:
918
+ return None, f"❌ Service not ready: {STATUS_MESSAGE}", ""
919
+
920
+ if not image_input:
921
+ return None, "❌ Please provide an image.", ""
922
+
923
+ if not tts_text:
924
+ return None, "❌ Please provide TTS text.", ""
925
+
926
+ try:
927
+ if isinstance(image_input, str):
928
+ with open(image_input, "rb") as f:
929
+ image_bytes = f.read()
930
+ else:
931
+ buffered = io.BytesIO()
932
+ image_input.save(buffered, format="JPEG")
933
+ image_bytes = buffered.getvalue()
934
+
935
+ image_base64 = base64.b64encode(image_bytes).decode('utf-8')
936
+ voice = None if voice_override == "Auto" or not voice_override else voice_override
937
+
938
+ result = agent.process_request_custom(
939
+ image_base64=image_base64,
940
+ tts_text=tts_text,
941
+ voice=voice,
942
+ speed=speed
943
+ )
944
+
945
+ video_path = result["video_path"]
946
+ voice_used = result["voice_used"]
947
+
948
+ voice_info = agent.get_voice_rotation_info()
949
+ next_voice = voice_info["current_voice"]
950
+ current_index = voice_info["current_index"]
951
+ total_voices = voice_info["total_voices"]
952
+
953
+ status_msg = (
954
+ f"✅ {result['message']}\n\n"
955
+ f"**Format:** YouTube Shorts ({YOUTUBE_SHORTS_WIDTH}x{YOUTUBE_SHORTS_HEIGHT})\n"
956
+ f"**TTS Text:** {result['tts_text'][:100]}...\n"
957
+ f"**Duration:** {result.get('duration', 0):.2f}s\n"
958
+ f"**Effect:** Moving image + fixed text overlay"
959
+ )
960
+
961
+ voice_rotation_msg = (
962
+ f"**Voice Used:** {voice_used}\n"
963
+ f"**Next Voice:** {next_voice}\n"
964
+ f"**Progress:** {current_index}/{total_voices}"
965
+ )
966
+
967
+ return video_path, status_msg, voice_rotation_msg
968
+
969
+ except Exception as e:
970
+ log.error(f"Video generation failed: {e}")
971
+ import traceback
972
+ traceback.print_exc()
973
+ return None, f"❌ Error: {str(e)}", ""
974
+
975
+ def gradio_api_endpoint_custom(
976
+ image_base64: str,
977
+ tts_text: str,
978
+ voice: Optional[str] = None,
979
+ speed: float = 0.8
980
+ ) -> Dict[str, Any]:
981
+ """نقطة النهاية للـ API مع نص مخصص (مع معاملات اختيارية)"""
982
+ if not IS_SERVICE_READY:
983
+ raise RuntimeError(f"Service not ready: {STATUS_MESSAGE}")
984
+
985
+ log.info(f"API request received for TTS text: {tts_text[:50]}... with speed: {speed}")
986
+
987
+ return agent.process_request_custom(
988
+ image_base64=image_base64,
989
+ tts_text=tts_text,
990
+ voice=voice,
991
+ speed=speed
992
+ )
993
+
994
+ def gradio_api_simple(
995
+ image_base64: str,
996
+ tts_text: str
997
+ ) -> Dict[str, Any]:
998
+ """
999
+ نقطة نهاية API مبسطة للاتصال من وكيل النشر.
1000
+ تستقبل فقط الصورة والنص الصوتي مع استخدام القيم الافتراضية للصوت والسرعة.
1001
+ """
1002
+ if not IS_SERVICE_READY:
1003
+ raise RuntimeError(f"Service not ready: {STATUS_MESSAGE}")
1004
+
1005
+ log.info(f"🎬 Simple API request received for TTS text: {tts_text[:50]}...")
1006
+
1007
+ return agent.process_request_custom(
1008
+ image_base64=image_base64,
1009
+ tts_text=tts_text,
1010
+ voice=None, # استخدام التدوير التلقائي
1011
+ speed=0.8 # السرعة الافتراضية
1012
+ )
1013
+
1014
+ def format_history_for_gallery() -> List[Tuple[str, str]]:
1015
+ """تنسيق السجل لعرضه في Gallery"""
1016
+ formatted = []
1017
+ for entry in agent.get_history():
1018
+ if entry.get("video_path") and os.path.exists(entry["video_path"]):
1019
+ caption = (
1020
+ f'{entry["tts_text"][:80]}...\n'
1021
+ f'Voice: {entry.get("voice", "N/A")} | Duration: {entry.get("duration", 0):.1f}s'
1022
+ )
1023
+ formatted.append((entry["video_path"], caption))
1024
+ return formatted
1025
+
1026
+ def gradio_refresh_history():
1027
+ return format_history_for_gallery()
1028
+
1029
+ def gradio_reset_voice_rotation():
1030
+ agent.voice_manager.reset()
1031
+ voice_info = agent.get_voice_rotation_info()
1032
+ return f"✅ Voice rotation reset! Next voice: {voice_info['current_voice']}"
1033
+
1034
+ def gradio_get_voice_info():
1035
+ voice_info = agent.get_voice_rotation_info()
1036
+ return (
1037
+ f"**Current Voice:** {voice_info['current_voice']}\n"
1038
+ f"**Index:** {voice_info['current_index']}/{voice_info['total_voices']}\n"
1039
+ f"**Total Voices:** {len(voice_info['all_voices'])}"
1040
+ )
1041
+
1042
+ # ---------------- Gradio Interface ----------------
1043
+
1044
+ with gr.Blocks(title="Video Agent - YouTube Shorts") as demo:
1045
+ gr.Markdown("# 🎬 Video Agent - YouTube Shorts Format (1080x1920)")
1046
+ gr.Markdown(f"**Status:** {STATUS_MESSAGE}")
1047
+
1048
+ if IS_SERVICE_READY:
1049
+ gr.Markdown(f"**Features:** YouTube Shorts ({YOUTUBE_SHORTS_WIDTH}x{YOUTUBE_SHORTS_HEIGHT}) | {len(KOKORO_VOICES)} voices | Auto rotation | Smart background color | Fixed text overlay")
1050
+ gr.Markdown("✅ **NEW:** *Videos optimized for YouTube Shorts with intelligent background filling*")
1051
+
1052
+ gr.Markdown("---")
1053
+
1054
+ with gr.Tab("Generate Video"):
1055
+ with gr.Row():
1056
+ with gr.Column(scale=1):
1057
+ image_input = gr.Image(label="Upload Image (any size - will be adapted)", type="pil")
1058
+ tts_text_input = gr.Textbox(
1059
+ label="TTS Text (will appear as fixed overlay)",
1060
+ lines=4,
1061
+ placeholder="Enter the text to display on screen..."
1062
+ )
1063
+
1064
+ voice_dropdown = gr.Dropdown(
1065
+ choices=["Auto"] + KOKORO_VOICES,
1066
+ value="Auto",
1067
+ label="Voice (Auto = rotation)"
1068
+ )
1069
+
1070
+ speed_slider = gr.Slider(
1071
+ minimum=0.5,
1072
+ maximum=2.0,
1073
+ value=0.8,
1074
+ step=0.1,
1075
+ label="Speech Speed"
1076
+ )
1077
+
1078
+ generate_btn = gr.Button("🎬 Generate YouTube Shorts Video", variant="primary")
1079
+ status_output = gr.Textbox(label="Status", lines=5)
1080
+ voice_rotation_output = gr.Textbox(label="Voice Info", lines=3)
1081
+
1082
+ with gr.Column(scale=1):
1083
+ video_output = gr.Video(label="Generated YouTube Shorts Video (1080x1920)")
1084
+
1085
+ generate_btn.click(
1086
+ fn=gradio_generate_video_custom,
1087
+ inputs=[image_input, tts_text_input, voice_dropdown, speed_slider],
1088
+ outputs=[video_output, status_output, voice_rotation_output]
1089
+ )
1090
+
1091
+ with gr.Tab("History"):
1092
+ refresh_btn = gr.Button("🔄 Refresh")
1093
+ history_gallery = gr.Gallery(label="Recent Videos", columns=2)
1094
+ refresh_btn.click(fn=gradio_refresh_history, outputs=[history_gallery])
1095
+
1096
+ with gr.Tab("Voice Management"):
1097
+ with gr.Row():
1098
+ voice_info_btn = gr.Button("📊 Get Info")
1099
+ reset_btn = gr.Button("🔄 Reset Rotation")
1100
+
1101
+ voice_mgmt_output = gr.Textbox(label="Voice Info", lines=5)
1102
+
1103
+ voice_info_btn.click(fn=gradio_get_voice_info, outputs=[voice_mgmt_output])
1104
+ reset_btn.click(fn=gradio_reset_voice_rotation, outputs=[voice_mgmt_output])
1105
+
1106
+ gr.Markdown(f"### {len(KOKORO_VOICES)} Voices Available")
1107
+ gr.Markdown("**British Female (4):** " + ", ".join([v for v in KOKORO_VOICES if v.startswith("bf_")]))
1108
+ gr.Markdown("**American Female (11):** " + ", ".join([v for v in KOKORO_VOICES if v.startswith("af_")]))
1109
+ gr.Markdown("**British Male (4):** " + ", ".join([v for v in KOKORO_VOICES if v.startswith("bm_")]))
1110
+ gr.Markdown("**American Male (8):** " + ", ".join([v for v in KOKORO_VOICES if v.startswith("am_")]))
1111
+
1112
+ with gr.Tab("API Endpoints") as api_tab:
1113
+ gr.Markdown("### 🔌 API Endpoints for External Integration")
1114
+ gr.Markdown("Use these endpoints to integrate with other agents (e.g., Publisher Agent)")
1115
+
1116
+ gr.Markdown("---")
1117
+ gr.Markdown("#### 📡 Endpoint 1: Simple API (Recommended for Publisher Agent)")
1118
+ gr.Markdown("- **API Name**: `generate_video_custom`")
1119
+ gr.Markdown("- **Parameters**: `image_base64` (str), `tts_text` (str)")
1120
+ gr.Markdown("- **Returns**: JSON with video_base64, video_path, status")
1121
+ gr.Markdown(f"- **Format**: YouTube Shorts ({YOUTUBE_SHORTS_WIDTH}x{YOUTUBE_SHORTS_HEIGHT})")
1122
+ gr.Markdown("- **Auto-settings**: Voice rotation enabled, Speed = 0.8, Smart background filling")
1123
+
1124
+ gr.Markdown("---")
1125
+ gr.Markdown("#### 📡 Endpoint 2: Full API (Advanced)")
1126
+ gr.Markdown("- **API Name**: `generate_video_full`")
1127
+ gr.Markdown("- **Parameters**: `image_base64` (str), `tts_text` (str), `voice` (str, optional), `speed` (float, optional)")
1128
+ gr.Markdown("- **Returns**: JSON with video_base64, video_path, status")
1129
+ gr.Markdown(f"- **Format**: YouTube Shorts ({YOUTUBE_SHORTS_WIDTH}x{YOUTUBE_SHORTS_HEIGHT})")
1130
+
1131
+ # ✅ CRITICAL FIX: Define API endpoints as separate gr.Interface outside Blocks
1132
+ # This is the CORRECT way to expose API endpoints in Gradio
1133
+
1134
+ # API Endpoint 1: Simple API (for Publisher Agent)
1135
+ simple_api = gr.Interface(
1136
+ fn=gradio_api_simple,
1137
+ inputs=[
1138
+ gr.Textbox(label="image_base64", placeholder="Base64 encoded image (any size - will be adapted to 1080x1920)"),
1139
+ gr.Textbox(label="tts_text", placeholder="Text for TTS audio synthesis")
1140
+ ],
1141
+ outputs=gr.JSON(label="API Response"),
1142
+ title="Simple YouTube Shorts Video API",
1143
+ description=f"Generate YouTube Shorts video ({YOUTUBE_SHORTS_WIDTH}x{YOUTUBE_SHORTS_HEIGHT}) with automatic voice rotation and speed=0.8",
1144
+ api_name="generate_video_custom"
1145
+ )
1146
+
1147
+ # API Endpoint 2: Full API (with all options)
1148
+ def gradio_api_full_wrapper(image_base64: str, tts_text: str, voice: str = "Auto", speed: float = 0.8):
1149
+ """Wrapper to handle optional parameters"""
1150
+ voice_to_use = None if voice == "Auto" or voice == "" else voice
1151
+ return gradio_api_endpoint_custom(image_base64, tts_text, voice_to_use, speed)
1152
+
1153
+ full_api = gr.Interface(
1154
+ fn=gradio_api_full_wrapper,
1155
+ inputs=[
1156
+ gr.Textbox(label="image_base64", placeholder="Base64 encoded image (any size)"),
1157
+ gr.Textbox(label="tts_text", placeholder="Text for TTS"),
1158
+ gr.Dropdown(choices=["Auto"] + KOKORO_VOICES, value="Auto", label="voice"),
1159
+ gr.Slider(minimum=0.5, maximum=2.0, value=0.8, step=0.1, label="speed")
1160
+ ],
1161
+ outputs=gr.JSON(label="API Response"),
1162
+ title="Full YouTube Shorts Video API",
1163
+ description=f"Generate YouTube Shorts video ({YOUTUBE_SHORTS_WIDTH}x{YOUTUBE_SHORTS_HEIGHT}) with custom voice and speed settings",
1164
+ api_name="generate_video_full"
1165
+ )
1166
+
1167
+ # Mount both APIs into the main demo using TabbedInterface
1168
+ combined_demo = gr.TabbedInterface(
1169
+ [demo, simple_api, full_api],
1170
+ ["Main Interface", "API: Simple", "API: Full"],
1171
+ title=f"🎬 Video Agent - YouTube Shorts ({YOUTUBE_SHORTS_WIDTH}x{YOUTUBE_SHORTS_HEIGHT})"
1172
+ )
1173
+
1174
+ if __name__ == "__main__":
1175
+ PORT = int(os.getenv("PORT", "7860"))
1176
+ log.info(f"Starting Video Agent with YouTube Shorts format ({YOUTUBE_SHORTS_WIDTH}x{YOUTUBE_SHORTS_HEIGHT})...")
1177
+ combined_demo.launch(server_name="0.0.0.0", server_port=PORT)