aliSaac510 commited on
Commit
0faf659
·
1 Parent(s): 342e0fb

applay caption styles and test it

Browse files
Files changed (11) hide show
  1. core/analyze.py +23 -3
  2. core/config.py +178 -64
  3. core/database.py +144 -0
  4. core/security.py +63 -0
  5. core/stt.py +5 -4
  6. core/styles.py +307 -121
  7. core/subtitle_manager.py +489 -237
  8. firebase_key.json +13 -0
  9. main.py +56 -10
  10. processor.py +377 -370
  11. requirements.txt +2 -0
core/analyze.py CHANGED
@@ -2,14 +2,34 @@ import os
2
  import time
3
  from openai import OpenAI
4
  from dotenv import load_dotenv
 
5
 
6
  load_dotenv()
7
 
8
- # إعداد OpenRouter API
9
- # يتم تحميل المفتاح من ملف .env (تأكد من وجود OPENROUTER_API_KEY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  client = OpenAI(
11
  base_url="https://openrouter.ai/api/v1",
12
- api_key=os.getenv("OPENROUTER_API_KEY")
13
  )
14
 
15
  def analyze_transcript_gemini(transcript):
 
2
  import time
3
  from openai import OpenAI
4
  from dotenv import load_dotenv
5
+ from .database import DatabaseManager
6
 
7
  load_dotenv()
8
 
9
+ # Initialize Database Manager
10
+ # We try to use Firebase if available, otherwise fallback to local SQLite
11
+ try:
12
+ db = DatabaseManager(use_firebase=True)
13
+ except Exception:
14
+ print("⚠️ Firebase not configured, falling back to local SQLite.")
15
+ db = DatabaseManager(use_firebase=False)
16
+
17
+ # Retrieve API Key from Secure Storage
18
+ # 1. Try to get from Database
19
+ api_key = db.get_key("openrouter")
20
+
21
+ # 2. If not in DB, fallback to .env (Legacy support)
22
+ if not api_key:
23
+ api_key = os.getenv("OPENROUTER_API_KEY")
24
+
25
+ if not api_key:
26
+ print("❌ ERROR: OPENROUTER_API_KEY not found in Database or .env")
27
+ # We don't raise error here to allow module import, but client creation will fail if used.
28
+
29
+ # Configure OpenAI Client
30
  client = OpenAI(
31
  base_url="https://openrouter.ai/api/v1",
32
+ api_key=api_key
33
  )
34
 
35
  def analyze_transcript_gemini(transcript):
core/config.py CHANGED
@@ -1,105 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
2
  import requests
 
 
 
 
3
 
4
  class Config:
5
- BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
6
- TEMP_DIR = os.path.join(BASE_DIR, "temp")
7
  UPLOADS_DIR = os.path.join(BASE_DIR, "uploads")
8
  OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs")
9
- LOGS_DIR = os.path.join(BASE_DIR, "logs")
10
 
11
- # Font URLs - Google Fonts CSS API
12
- # We use the CSS API to get the correct WOFF2/TTF file
13
  FONTS = {
14
- "Roboto-Bold.ttf": "https://fonts.googleapis.com/css2?family=Roboto:wght@700&display=swap",
15
- "NotoSansArabic-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@700&display=swap",
16
- "NotoSansSC-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@700&display=swap",
17
- "NotoSansJP-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@700&display=swap",
18
- "NotoSansDevanagari-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Devanagari:wght@700&display=swap",
19
- "Cairo-Bold.ttf": "https://fonts.googleapis.com/css2?family=Cairo:wght@700&display=swap",
20
- "Montserrat-Bold.ttf": "https://fonts.googleapis.com/css2?family=Montserrat:wght@700&display=swap"
 
 
 
 
 
 
 
 
 
21
  }
22
 
23
- # Dynamic Language to Font Mapping
 
24
  LANGUAGE_FONT_MAP = {
25
- "ar": "NotoSansArabic-Bold.ttf", # Arabic
26
- "zh": "NotoSansSC-Bold.ttf", # Chinese
27
- "ja": "NotoSansJP-Bold.ttf", # Japanese
28
- "hi": "NotoSansDevanagari-Bold.ttf", # Hindi
29
- "ru": "Roboto-Bold.ttf", # Russian (Supported by Roboto)
30
- "en": "Roboto-Bold.ttf", # English
31
- "default": "Roboto-Bold.ttf"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
 
34
- # Video Settings
35
- DEFAULT_SIZE = (1080, 1920)
36
- CHUNK_SIZE_SECONDS = 600
37
- OVERLAP_SECONDS = 60
38
-
39
- # Styles
40
  STYLES = [
41
  "cinematic",
42
  "cinematic_blur",
43
  "vertical_full",
44
  "split_vertical",
45
- "split_horizontal"
46
  ]
47
 
 
48
  @classmethod
49
  def setup_dirs(cls):
50
  for d in [cls.TEMP_DIR, cls.UPLOADS_DIR, cls.OUTPUTS_DIR, cls.LOGS_DIR]:
51
  os.makedirs(d, exist_ok=True)
52
 
 
53
  @staticmethod
54
- def get_urls(content):
55
  """
56
- Parses the css file and retrieves the font urls.
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  """
58
- urls = []
59
- for i in range(len(content)):
60
- if content[i: i+3] == 'url':
61
- j = i + 4
62
- url = ''
63
- while content[j] != ')':
64
- url += content[j]
65
- j += 1
66
- urls.append(url)
67
- return urls
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  @staticmethod
70
- def download_font_from_css(css_url, output_path):
71
  """
72
- Downloads the first font file found in the CSS to output_path.
 
 
 
73
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  try:
75
- # 1. Fetch CSS content
76
- # Add User-Agent to avoid getting minimal CSS or being blocked
77
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
78
- response = requests.get(css_url, headers=headers)
79
- response.raise_for_status()
80
- content = response.text
81
-
82
- # 2. Extract URLs
83
- urls = Config.get_urls(content)
84
-
85
  if not urls:
86
  print(f"❌ No font URLs found in CSS: {css_url}")
87
  return False
88
 
89
- # 3. Download the first font found (usually the most specific/relevant or primary subset)
90
- # For Arabic fonts like Cairo/NotoSansArabic, the first subset is usually the Arabic one.
91
  font_url = urls[0]
92
-
93
- print(f"⬇️ Downloading font from: {font_url}")
94
- font_response = requests.get(font_url, headers=headers)
95
- font_response.raise_for_status()
96
-
97
- with open(output_path, 'wb') as f:
98
- f.write(font_response.content)
99
-
100
- print(f"✅ Font saved to: {output_path}")
 
101
  return True
102
 
103
- except Exception as e:
104
- print(f"❌ Failed to download font from CSS {css_url}: {e}")
105
  return False
 
 
 
 
1
+ """
2
+ Config — Viral Shorts Engine Configuration
3
+
4
+ Font choices based on analysis of 2M+ short-form videos (2024-2025):
5
+
6
+ English / Latin
7
+ ───────────────
8
+ • Montserrat-Bold → #1 most used font in viral Shorts (Alex Hormozi, MrBeast style)
9
+ • Rubik-Bold → Distinctive modern feel, high engagement, less saturated
10
+ • Oswald-Bold → Condensed, fits more words per line — great for fast speech
11
+ • Roboto-Bold → YouTube's native subtitle font, clean baseline
12
+
13
+ Arabic
14
+ ──────
15
+ • Tajawal-Bold → Most used modern Arabic font on social media, youth-oriented
16
+ • Cairo-Bold → Clean, highly legible for captions, widely recognized
17
+ • Almarai-Bold → Rounded, friendly — popular in Gulf & Egyptian content
18
+ • NotoSansArabic → Fallback — covers all Arabic Unicode correctly
19
+
20
+ Style → Font mapping (per caption style):
21
+ classic → Montserrat-Bold (professional, universal)
22
+ modern_glow → Rubik-Bold (distinctive, modern)
23
+ tiktok_bold → Montserrat-Bold (proven viral, MrBeast aesthetic)
24
+ tiktok_neon → Oswald-Bold (condensed punch)
25
+ youtube_clean → Rubik-Bold (clean educator look)
26
+ youtube_box → Montserrat-Bold (karaoke / game-show energy)
27
+ """
28
  import os
29
+ import re
30
  import requests
31
+ from dotenv import load_dotenv
32
+
33
+ load_dotenv()
34
+
35
 
36
  class Config:
37
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
38
+ TEMP_DIR = os.path.join(BASE_DIR, "temp")
39
  UPLOADS_DIR = os.path.join(BASE_DIR, "uploads")
40
  OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs")
41
+ LOGS_DIR = os.path.join(BASE_DIR, "logs")
42
 
43
+ # ── Font registry ──────────────────────────────────────────────────────────
44
+ # Google Fonts CSS2 API wght@700 = Bold
45
  FONTS = {
46
+ # ── English / Latin ────────────────────────────────────────────────────
47
+ "Montserrat-Bold.ttf": "https://fonts.googleapis.com/css2?family=Montserrat:wght@700&display=swap",
48
+ "Rubik-Bold.ttf": "https://fonts.googleapis.com/css2?family=Rubik:wght@700&display=swap",
49
+ "Oswald-Bold.ttf": "https://fonts.googleapis.com/css2?family=Oswald:wght@700&display=swap",
50
+ "Roboto-Bold.ttf": "https://fonts.googleapis.com/css2?family=Roboto:wght@700&display=swap",
51
+
52
+ # ── Arabic ─────────────────────────────────────────────────────────────
53
+ "Tajawal-Bold.ttf": "https://fonts.googleapis.com/css2?family=Tajawal:wght@700&display=swap",
54
+ "Cairo-Bold.ttf": "https://fonts.googleapis.com/css2?family=Cairo:wght@700&display=swap",
55
+ "Almarai-Bold.ttf": "https://fonts.googleapis.com/css2?family=Almarai:wght@800&display=swap",
56
+ "NotoSansArabic-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@700&display=swap",
57
+
58
+ # ── CJK & other scripts ────────────────────────────────────────────────
59
+ "NotoSansSC-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@700&display=swap",
60
+ "NotoSansJP-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@700&display=swap",
61
+ "NotoSansDevanagari-Bold.ttf":"https://fonts.googleapis.com/css2?family=Noto+Sans+Devanagari:wght@700&display=swap",
62
  }
63
 
64
+ # ── Language default caption font ───────────────────────────────���───────
65
+ # Used when no explicit style font is set, or for non-Latin scripts.
66
  LANGUAGE_FONT_MAP = {
67
+ # Arabic — Tajawal is the modern social-media standard
68
+ "ar": "Tajawal-Bold.ttf",
69
+
70
+ # CJK
71
+ "zh": "NotoSansSC-Bold.ttf",
72
+ "ja": "NotoSansJP-Bold.ttf",
73
+
74
+ # Devanagari
75
+ "hi": "NotoSansDevanagari-Bold.ttf",
76
+
77
+ # Cyrillic + Latin — Roboto covers both
78
+ "ru": "Roboto-Bold.ttf",
79
+
80
+ # Latin languages — Montserrat is #1 viral font
81
+ "en": "Montserrat-Bold.ttf",
82
+ "fr": "Montserrat-Bold.ttf",
83
+ "es": "Montserrat-Bold.ttf",
84
+ "de": "Montserrat-Bold.ttf",
85
+ "pt": "Montserrat-Bold.ttf",
86
+ "it": "Montserrat-Bold.ttf",
87
+ "tr": "Montserrat-Bold.ttf",
88
+
89
+ # Fallback for any unlisted language
90
+ "default": "Montserrat-Bold.ttf",
91
+ }
92
+
93
+ # ── Caption style → preferred font ────────────────────────────────────────
94
+ # SubtitleManager reads this via get_style_config()["font"].
95
+ # Only for Latin scripts — non-Latin always uses LANGUAGE_FONT_MAP.
96
+ STYLE_FONT_MAP = {
97
+ "classic": "Montserrat-Bold.ttf",
98
+ "modern_glow": "Rubik-Bold.ttf",
99
+ "tiktok_bold": "Montserrat-Bold.ttf",
100
+ "tiktok_neon": "Oswald-Bold.ttf",
101
+ "youtube_clean": "Rubik-Bold.ttf",
102
+ "youtube_box": "Montserrat-Bold.ttf",
103
  }
104
 
105
+ # ── Video settings ─────────────────────────────────────────────────────────
106
+ DEFAULT_SIZE = (1080, 1920)
107
+ CHUNK_SIZE_SECONDS = 600
108
+ OVERLAP_SECONDS = 60
109
+
 
110
  STYLES = [
111
  "cinematic",
112
  "cinematic_blur",
113
  "vertical_full",
114
  "split_vertical",
115
+ "split_horizontal",
116
  ]
117
 
118
+ # ── Directory setup ────────────────────────────────────────────────────────
119
  @classmethod
120
  def setup_dirs(cls):
121
  for d in [cls.TEMP_DIR, cls.UPLOADS_DIR, cls.OUTPUTS_DIR, cls.LOGS_DIR]:
122
  os.makedirs(d, exist_ok=True)
123
 
124
+ # ── Font URL extraction ────────────────────────────────────────────────────
125
  @staticmethod
126
+ def get_urls(css_content: str, prefer_latin: bool = True) -> list:
127
  """
128
+ Extracts font file URLs from a Google Fonts CSS response.
129
+
130
+ Google Fonts CSS contains multiple @font-face blocks, one per subset:
131
+ /* [0] cyrillic */
132
+ /* [1] latin-ext */
133
+ /* [2] latin */ ← we usually want this for Latin fonts
134
+
135
+ For Arabic fonts the arabic subset comes first — which is what we want.
136
+
137
+ Strategy:
138
+ - Parse all (comment, url) pairs.
139
+ - For Latin fonts (prefer_latin=True): prefer the 'latin' subset.
140
+ - For Arabic/CJK: prefer the script-specific subset (first one).
141
+ - Fallback: return the last URL found (most specific subset in Google's ordering).
142
  """
143
+ # Extract (subset_comment, url) pairs using regex
144
+ pattern = re.compile(
145
+ r'/\*\s*\[?\d*\]?\s*([\w\-]+)\s*\*/[^}]*?url\(([^)]+)\)',
146
+ re.DOTALL,
147
+ )
148
+ pairs = pattern.findall(css_content)
149
+
150
+ if not pairs:
151
+ # Fallback: grab all bare urls
152
+ bare = re.findall(r'url\(([^)]+)\)', css_content)
153
+ return bare if bare else []
154
+
155
+ subset_map = {subset.lower(): url.strip() for subset, url in pairs}
156
+
157
+ if prefer_latin:
158
+ # Priority: latin > latin-ext > first available
159
+ for key in ("latin", "latin-ext"):
160
+ if key in subset_map:
161
+ return [subset_map[key]]
162
+ # Return the last subset (Google puts most basic last for Latin)
163
+ return [list(subset_map.values())[-1]]
164
+ else:
165
+ # Arabic/CJK: first subset is the script-specific one
166
+ return [list(subset_map.values())[0]]
167
 
168
  @staticmethod
169
+ def download_font_from_css(css_url: str, output_path: str) -> bool:
170
  """
171
+ Downloads the correct font file for the given CSS URL.
172
+
173
+ Automatically detects whether this is a Latin or non-Latin font
174
+ based on the filename so it picks the right subset.
175
  """
176
+ NON_LATIN_KEYWORDS = ("arabic", "noto", "devanagari", "sc", "jp", "kr")
177
+ filename = os.path.basename(output_path).lower()
178
+ is_non_latin = any(kw in filename for kw in NON_LATIN_KEYWORDS)
179
+ prefer_latin = not is_non_latin
180
+
181
+ headers = {
182
+ "User-Agent": (
183
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
184
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
185
+ "Chrome/124.0.0.0 Safari/537.36"
186
+ )
187
+ }
188
+
189
  try:
190
+ # 1. Fetch CSS
191
+ resp = requests.get(css_url, headers=headers, timeout=15)
192
+ resp.raise_for_status()
193
+ css_content = resp.text
194
+
195
+ # 2. Extract correct URL
196
+ urls = Config.get_urls(css_content, prefer_latin=prefer_latin)
 
 
 
197
  if not urls:
198
  print(f"❌ No font URLs found in CSS: {css_url}")
199
  return False
200
 
 
 
201
  font_url = urls[0]
202
+ print(f"⬇️ Downloading font ({('latin' if prefer_latin else 'script')}) → {font_url}")
203
+
204
+ # 3. Download font binary
205
+ font_resp = requests.get(font_url, headers=headers, timeout=30)
206
+ font_resp.raise_for_status()
207
+
208
+ with open(output_path, "wb") as f:
209
+ f.write(font_resp.content)
210
+
211
+ print(f"✅ Font saved: {output_path}")
212
  return True
213
 
214
+ except requests.RequestException as e:
215
+ print(f"❌ Network error downloading font from {css_url}: {e}")
216
  return False
217
+ except Exception as e:
218
+ print(f"❌ Unexpected error: {e}")
219
+ return False
core/database.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ import os
3
+ import json
4
+ from typing import Optional, List, Dict
5
+ from .security import SecurityManager
6
+
7
+ class DatabaseManager:
8
+ """
9
+ Manages secure storage of API keys.
10
+ Supports both SQLite (Local) and Firebase (Cloud/Community).
11
+ """
12
+ def __init__(self, use_firebase: bool = False, db_name="secure_storage.db"):
13
+ self.security = SecurityManager()
14
+ self.use_firebase = use_firebase
15
+
16
+ if self.use_firebase:
17
+ self._init_firebase()
18
+ else:
19
+ # Local SQLite Setup
20
+ base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
21
+ self.db_path = os.path.join(base_dir, db_name)
22
+ self._init_sqlite()
23
+
24
+ def _init_sqlite(self):
25
+ """Initialize local SQLite table."""
26
+ conn = sqlite3.connect(self.db_path)
27
+ cursor = conn.cursor()
28
+ cursor.execute('''
29
+ CREATE TABLE IF NOT EXISTS api_keys (
30
+ service_name TEXT,
31
+ encrypted_key TEXT NOT NULL,
32
+ added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
33
+ is_active BOOLEAN DEFAULT 1,
34
+ PRIMARY KEY (service_name, encrypted_key)
35
+ )
36
+ ''')
37
+ conn.commit()
38
+ conn.close()
39
+
40
+ def _init_firebase(self):
41
+ """Initialize Firebase Admin SDK."""
42
+ import firebase_admin
43
+ from firebase_admin import credentials, firestore
44
+
45
+ # Check if already initialized
46
+ if not firebase_admin._apps:
47
+ # Look for service account file
48
+ cred_path = os.getenv("FIREBASE_CREDENTIALS_PATH", "firebase_key.json")
49
+
50
+ if os.path.exists(cred_path):
51
+ cred = credentials.Certificate(cred_path)
52
+ else:
53
+ # Try to load from environment variable (For Hugging Face / Cloud)
54
+ firebase_json = os.getenv("FIREBASE_CREDENTIALS_JSON")
55
+ if firebase_json:
56
+ cred_dict = json.loads(firebase_json)
57
+ cred = credentials.Certificate(cred_dict)
58
+ else:
59
+ raise ValueError("Firebase credentials not found! Set FIREBASE_CREDENTIALS_PATH or FIREBASE_CREDENTIALS_JSON.")
60
+
61
+ firebase_admin.initialize_app(cred)
62
+
63
+ self.db = firestore.client()
64
+ self.collection = self.db.collection('community_keys')
65
+
66
+ def save_key(self, service_name: str, api_key: str):
67
+ """Encrypts and saves an API key."""
68
+ encrypted = self.security.encrypt_data(api_key)
69
+
70
+ if self.use_firebase:
71
+ # Save to Firestore
72
+ # We use a hash of the encrypted key as document ID to prevent duplicates
73
+ doc_id = f"{service_name}_{hash(encrypted)}"
74
+ self.collection.document(doc_id).set({
75
+ 'service': service_name.lower(),
76
+ 'encrypted_key': encrypted,
77
+ 'is_active': True,
78
+ 'added_at': firestore.SERVER_TIMESTAMP
79
+ })
80
+ else:
81
+ # Save to SQLite
82
+ conn = sqlite3.connect(self.db_path)
83
+ cursor = conn.cursor()
84
+ try:
85
+ cursor.execute('''
86
+ INSERT INTO api_keys (service_name, encrypted_key)
87
+ VALUES (?, ?)
88
+ ''', (service_name.lower(), encrypted))
89
+ conn.commit()
90
+ except sqlite3.IntegrityError:
91
+ pass # Key already exists
92
+ finally:
93
+ conn.close()
94
+
95
+ def get_key(self, service_name: str) -> Optional[str]:
96
+ """Retrieves a valid API key (Round-Robin or Random could be implemented here)."""
97
+ # For now, just get the first available active key
98
+ if self.use_firebase:
99
+ docs = self.collection.where('service', '==', service_name.lower())\
100
+ .where('is_active', '==', True)\
101
+ .limit(1).stream()
102
+ for doc in docs:
103
+ data = doc.to_dict()
104
+ return self.security.decrypt_data(data['encrypted_key'])
105
+ return None
106
+ else:
107
+ conn = sqlite3.connect(self.db_path)
108
+ cursor = conn.cursor()
109
+ cursor.execute('''
110
+ SELECT encrypted_key FROM api_keys
111
+ WHERE service_name = ? AND is_active = 1
112
+ LIMIT 1
113
+ ''', (service_name.lower(),))
114
+ row = cursor.fetchone()
115
+ conn.close()
116
+
117
+ if row:
118
+ return self.security.decrypt_data(row[0])
119
+ return None
120
+
121
+ def get_all_keys(self, service_name: str) -> List[str]:
122
+ """Returns ALL valid decrypted keys for a service (useful for rotation)."""
123
+ keys = []
124
+ if self.use_firebase:
125
+ docs = self.collection.where('service', '==', service_name.lower())\
126
+ .where('is_active', '==', True).stream()
127
+ for doc in docs:
128
+ decrypted = self.security.decrypt_data(doc.to_dict()['encrypted_key'])
129
+ if decrypted:
130
+ keys.append(decrypted)
131
+ else:
132
+ conn = sqlite3.connect(self.db_path)
133
+ cursor = conn.cursor()
134
+ cursor.execute('''
135
+ SELECT encrypted_key FROM api_keys
136
+ WHERE service_name = ? AND is_active = 1
137
+ ''', (service_name.lower(),))
138
+ rows = cursor.fetchall()
139
+ conn.close()
140
+ for row in rows:
141
+ decrypted = self.security.decrypt_data(row[0])
142
+ if decrypted:
143
+ keys.append(decrypted)
144
+ return keys
core/security.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from cryptography.fernet import Fernet
3
+ from typing import Optional
4
+
5
+ class SecurityManager:
6
+ """
7
+ Manages encryption and decryption of sensitive data like API keys.
8
+ """
9
+
10
+ def __init__(self, secret_key: Optional[str] = None):
11
+ """
12
+ Initialize with a secret key. If not provided, it attempts to read from FERNET_SECRET env var.
13
+ """
14
+ if not secret_key:
15
+ secret_key = os.getenv("FERNET_SECRET")
16
+
17
+ if not secret_key:
18
+ # For development/testing, we can generate one if not present, but warn about it
19
+ # In production, this should raise an error
20
+ print("⚠ WARNING: FERNET_SECRET not found. Generating a temporary key for this session.")
21
+ self._cipher = Fernet(Fernet.generate_key())
22
+ else:
23
+ try:
24
+ self._cipher = Fernet(secret_key.encode() if isinstance(secret_key, str) else secret_key)
25
+ except Exception as e:
26
+ raise ValueError(f"Invalid FERNET_SECRET: {e}")
27
+
28
+ @staticmethod
29
+ def generate_key() -> str:
30
+ """
31
+ Generates a new Fernet key.
32
+ Run this once and store the output in your environment variables.
33
+ """
34
+ return Fernet.generate_key().decode()
35
+
36
+ def encrypt_data(self, data: str) -> str:
37
+ """
38
+ Encrypts a string.
39
+ """
40
+ if not data:
41
+ return ""
42
+ encrypted = self._cipher.encrypt(data.encode())
43
+ return encrypted.decode()
44
+
45
+ def decrypt_data(self, encrypted_data: str) -> str:
46
+ """
47
+ Decrypts an encrypted string.
48
+ """
49
+ if not encrypted_data:
50
+ return ""
51
+ try:
52
+ decrypted = self._cipher.decrypt(encrypted_data.encode())
53
+ return decrypted.decode()
54
+ except Exception as e:
55
+ print(f"❌ Decryption failed: {e}")
56
+ return ""
57
+
58
+ # Helper instance
59
+ # You can import 'security' and use security.encrypt_data() directly if env var is set
60
+ try:
61
+ security = SecurityManager()
62
+ except Exception:
63
+ security = None
core/stt.py CHANGED
@@ -33,13 +33,14 @@ class STT:
33
  print(f"⚠️ GPU not available, using CPU with {model_size} model: {e}")
34
  self.model = WhisperModel(model_size, device="cpu", compute_type="int8")
35
 
36
- def get_transcript(self, video_path: str, language: str = None, skip_ai: bool = False, timestamp_mode="segments"):
37
  """تحويل الفيديو لنص مع توقيت الكلمات باستخدام Faster-Whisper
38
 
39
  Args:
40
  timestamp_mode: "words" للكلمات الفردية, "segments" للجمل الكاملة
 
41
  """
42
- print(f"🎙️ Transcribing: {video_path} (Language: {language if language else 'Auto'}, Mode: {timestamp_mode})")
43
 
44
  # تسجيل الـ transcript في ملف logs
45
  log_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs", "transcript.log")
@@ -104,8 +105,8 @@ class STT:
104
  beam_size=1,
105
  word_timestamps=word_timestamps,
106
  language=actual_stt_lang,
107
- vad_filter=True, # تصفية الصوت الفارغ
108
- vad_parameters=dict(min_silence_duration_ms=500)
109
  )
110
  detected_lang = info.language
111
 
 
33
  print(f"⚠️ GPU not available, using CPU with {model_size} model: {e}")
34
  self.model = WhisperModel(model_size, device="cpu", compute_type="int8")
35
 
36
+ def get_transcript(self, video_path: str, language: str = None, skip_ai: bool = False, timestamp_mode="segments", vad_filter=True):
37
  """تحويل الفيديو لنص مع توقيت الكلمات باستخدام Faster-Whisper
38
 
39
  Args:
40
  timestamp_mode: "words" للكلمات الفردية, "segments" للجمل الكاملة
41
+ vad_filter: تصفية الصوت الفارغ (True/False)
42
  """
43
+ print(f"🎙️ Transcribing: {video_path} (Language: {language if language else 'Auto'}, Mode: {timestamp_mode}, VAD: {vad_filter})")
44
 
45
  # تسجيل الـ transcript في ملف logs
46
  log_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs", "transcript.log")
 
105
  beam_size=1,
106
  word_timestamps=word_timestamps,
107
  language=actual_stt_lang,
108
+ vad_filter=vad_filter, # استخدام المعامل الممرر
109
+ vad_parameters=dict(min_silence_duration_ms=500) if vad_filter else None
110
  )
111
  detected_lang = info.language
112
 
core/styles.py CHANGED
@@ -1,6 +1,12 @@
 
 
 
 
 
1
  from abc import ABC, abstractmethod
2
  import os
3
  import cv2
 
4
  import moviepy.editor as mpe
5
  from .config import Config
6
  from .logger import Logger
@@ -8,34 +14,114 @@ from .subtitle_manager import SubtitleManager
8
 
9
  logger = Logger.get_logger(__name__)
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  class SmartFaceCropper:
12
  def __init__(self, output_size=(1080, 1920)):
13
  self.output_size = output_size
14
- self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
15
- self.last_coords = None
16
- self.smoothed_x = None
17
- self.smoothing = 0.2
18
- self.frame_count = 0
 
 
19
 
20
  def get_crop_coordinates(self, frame):
21
- h, w = frame.shape[:2]
22
  target_w = int(h * self.output_size[0] / self.output_size[1])
23
- gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
24
- small_gray = cv2.resize(gray, (0, 0), fx=0.5, fy=0.5)
25
- faces = self.face_cascade.detectMultiScale(small_gray, 1.1, 8, minSize=(50, 50))
26
 
27
  if len(faces) > 0:
28
- faces = sorted(faces, key=lambda f: f[2]*f[3], reverse=True)
29
- fx, fy, fw, fh = [v * 2 for v in faces[0]]
30
  current_center_x = fx + fw // 2
31
- self.last_coords = (fx, fy, fw, fh)
32
  else:
33
  current_center_x = w // 2 if self.smoothed_x is None else self.smoothed_x
34
 
35
  if self.smoothed_x is None:
36
  self.smoothed_x = current_center_x
37
  else:
38
- self.smoothed_x = self.smoothed_x * (1 - self.smoothing) + current_center_x * self.smoothing
 
 
 
39
 
40
  left = int(self.smoothed_x - target_w // 2)
41
  left = max(0, min(left, w - target_w))
@@ -43,24 +129,27 @@ class SmartFaceCropper:
43
 
44
  def apply_to_clip(self, clip):
45
  frame_skip = 5
46
-
47
  def filter_frame(get_frame, t):
48
  frame = get_frame(t)
49
  self.frame_count += 1
50
-
51
  if self.frame_count % frame_skip == 0 or self.last_coords is None:
52
- left, top, right, bottom = self.get_crop_coordinates(frame)
53
  else:
54
- h, w = frame.shape[:2]
55
  target_w = int(h * self.output_size[0] / self.output_size[1])
56
- left = int(self.smoothed_x - target_w // 2) if self.smoothed_x else w // 2 - target_w // 2
57
- left = max(0, min(left, w - target_w))
58
- right = left + target_w
59
-
60
- cropped = frame[:, left:right]
61
- return cv2.resize(cropped, self.output_size)
62
  return clip.fl(filter_frame)
63
 
 
 
 
 
 
64
  class BaseStyle(ABC):
65
  def __init__(self, output_size=Config.DEFAULT_SIZE):
66
  self.output_size = output_size
@@ -68,162 +157,259 @@ class BaseStyle(ABC):
68
  @abstractmethod
69
  def apply(self, clip, **kwargs):
70
  pass
71
-
72
- # --------------------------------------------------------------------------
73
- # Refactored method to combine Style + Captions in ONE CompositeVideoClip
74
- # --------------------------------------------------------------------------
75
- def apply_with_captions(self, clip, transcript_data=None, language=None, caption_mode="sentence", **kwargs):
76
- """
77
- Applies style AND adds captions in a single composition step.
78
- This prevents double rendering (CompositeVideoClip inside CompositeVideoClip).
79
- """
80
- # 1. Get the base styled clip (which might be a CompositeVideoClip itself)
81
  styled_clip = self.apply(clip, **kwargs)
82
-
83
- # 2. If no captions needed, just return the styled clip
84
  if not transcript_data:
85
  return styled_clip
86
 
87
- # 3. Generate caption CLIPS (ImageClips) only, do not composite yet
88
  caption_clips = self._create_caption_clips(transcript_data, language, caption_mode)
89
-
90
  if not caption_clips:
91
  return styled_clip
92
 
93
- # 4. Optimize Composition:
94
- # If styled_clip is already a CompositeVideoClip, we can flatten the list
95
- # instead of nesting composites.
96
  if isinstance(styled_clip, mpe.CompositeVideoClip):
97
- # IMPORTANT: We must copy the list to avoid modifying the original list in place if it's reused
98
- final_layers = list(styled_clip.clips) + caption_clips
99
- return mpe.CompositeVideoClip(final_layers, size=self.output_size)
100
- else:
101
- # If styled_clip is just a simple VideoFileClip or similar, wrap it
102
- return mpe.CompositeVideoClip([styled_clip] + caption_clips, size=self.output_size)
103
- # --------------------------------------------------------------------------
104
 
105
  def add_captions(self, clip, transcript_data, language=None, caption_mode="sentence"):
106
- # This method is now DEPRECATED in favor of passing transcript_data to apply()
107
- # but kept for backward compatibility if needed.
108
  if not transcript_data:
109
  return clip
110
  return SubtitleManager.create_captions(
111
- clip,
112
- transcript_data,
113
- size=self.output_size,
114
- language=language,
115
- caption_mode=caption_mode
116
  )
117
-
118
  def _create_caption_clips(self, transcript_data, language=None, caption_mode="sentence"):
119
- """Helper to create just the caption clips list, not a full CompositeVideoClip"""
120
  return SubtitleManager.create_caption_clips(
121
- transcript_data,
122
- size=self.output_size,
123
- language=language,
124
- caption_mode=caption_mode
125
  )
126
 
 
 
 
 
 
127
  class CinematicStyle(BaseStyle):
128
  def apply(self, clip, background_path=None, **kwargs):
129
  if background_path and os.path.exists(background_path):
130
  ext = os.path.splitext(background_path)[1].lower()
131
- video_extensions = ['.mp4', '.avi', '.mov', '.mkv', '.webm']
132
-
133
- if ext in video_extensions:
134
- bg = mpe.VideoFileClip(background_path).without_audio().resize(height=self.output_size[1])
135
- if bg.duration < clip.duration:
136
- bg = bg.loop(duration=clip.duration)
137
- else:
138
- bg = bg.subclip(0, clip.duration)
139
  else:
140
- bg = mpe.ImageClip(background_path).set_duration(clip.duration).resize(height=self.output_size[1])
141
-
 
 
 
142
  if bg.w > self.output_size[0]:
143
- bg = bg.crop(x_center=bg.w/2, width=self.output_size[0])
144
  else:
145
  bg = bg.resize(width=self.output_size[0])
146
  else:
147
  bg = mpe.ColorClip(size=self.output_size, color=(0, 0, 0)).set_duration(clip.duration)
148
-
149
- main_video = clip.resize(width=self.output_size[0]).set_position("center")
150
-
151
- if main_video.h > self.output_size[1]:
152
- main_video = clip.resize(height=self.output_size[1]).set_position("center")
153
-
154
- return mpe.CompositeVideoClip([bg, main_video], size=self.output_size)
 
 
 
 
155
 
156
  class CinematicBlurStyle(BaseStyle):
157
  def apply(self, clip, **kwargs):
158
  bg = clip.resize(height=self.output_size[1])
159
  if bg.w < self.output_size[0]:
160
  bg = clip.resize(width=self.output_size[0])
161
-
162
  def make_blur(get_frame, t):
163
- frame = get_frame(t)
164
- small = cv2.resize(frame, (16, 16))
165
- blurred = cv2.resize(small, (self.output_size[0], self.output_size[1]), interpolation=cv2.INTER_LINEAR)
166
- blurred = cv2.GaussianBlur(blurred, (21, 21), 0)
167
- return blurred
168
-
 
 
169
  bg_blurred = bg.fl(make_blur).set_opacity(0.6)
170
-
171
- main_video = clip.resize(width=self.output_size[0]).set_position("center")
172
-
173
- if main_video.h > self.output_size[1]:
174
- main_video = clip.resize(height=self.output_size[1]).set_position("center")
175
-
176
- return mpe.CompositeVideoClip([bg_blurred, main_video], size=self.output_size)
 
 
 
177
 
178
  class SplitVerticalStyle(BaseStyle):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  def apply(self, clip, playground_path=None, **kwargs):
180
- h_half = self.output_size[1] // 2
181
- top = clip.resize(height=h_half).set_position(('center', 'top'))
182
-
183
- bottom = None
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  if playground_path and os.path.exists(playground_path):
185
- bottom = mpe.VideoFileClip(playground_path).without_audio().resize(height=h_half).set_position(('center', 'bottom'))
186
- if bottom.duration < clip.duration:
187
- bottom = bottom.loop(duration=clip.duration)
188
- else:
189
- bottom = bottom.subclip(0, clip.duration)
190
  else:
191
- bottom = clip.resize(height=h_half).set_position(('center', 'bottom')).set_opacity(0.5)
192
-
193
- return mpe.CompositeVideoClip([top, bottom], size=self.output_size)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
  class SplitHorizontalStyle(BaseStyle):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  def apply(self, clip, playground_path=None, **kwargs):
197
- w_half = self.output_size[0] // 2
198
- left = clip.resize(width=w_half).set_position(('left', 'center'))
199
-
200
- right = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  if playground_path and os.path.exists(playground_path):
202
- right = mpe.VideoFileClip(playground_path).without_audio().resize(width=w_half).set_position(('right', 'center'))
203
- if right.duration < clip.duration:
204
- right = right.loop(duration=clip.duration)
205
- else:
206
- right = right.subclip(0, clip.duration)
207
  else:
208
- right = clip.resize(width=w_half).set_position(('right', 'center')).set_opacity(0.5)
209
-
210
- return mpe.CompositeVideoClip([left, right], size=self.output_size)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
  class VerticalFullStyle(BaseStyle):
213
  def apply(self, clip, **kwargs):
214
  cropper = SmartFaceCropper(output_size=self.output_size)
215
  return cropper.apply_to_clip(clip)
216
 
 
 
 
 
 
217
  class StyleFactory:
218
  _styles = {
219
- "cinematic": CinematicStyle,
220
- "cinematic_blur": CinematicBlurStyle,
221
- "split_vertical": SplitVerticalStyle,
222
  "split_horizontal": SplitHorizontalStyle,
223
- "vertical_full": VerticalFullStyle
224
  }
225
 
226
  @staticmethod
227
  def get_style(style_name) -> BaseStyle:
228
  style_class = StyleFactory._styles.get(style_name, CinematicBlurStyle)
229
- return style_class()
 
1
+ """
2
+ Video Styles — YouTube Shorts Production Engine
3
+ SplitVertical & SplitHorizontal rebuilt with seamless gradient blending.
4
+ All class/method names kept identical for drop-in integration.
5
+ """
6
  from abc import ABC, abstractmethod
7
  import os
8
  import cv2
9
+ import numpy as np
10
  import moviepy.editor as mpe
11
  from .config import Config
12
  from .logger import Logger
 
14
 
15
  logger = Logger.get_logger(__name__)
16
 
17
+
18
+ # ─────────────────────────────────────────────────────────────────────────────
19
+ # Gradient Mask Helpers
20
+ # ─────────────────────────────────────────────────────────────────────────────
21
+
22
+ def _linear_gradient(length: int, fade_from_zero: bool) -> np.ndarray:
23
+ """
24
+ Returns a 1-D float32 array [0..1] of given length.
25
+ fade_from_zero=True → 0 → 1 (clip fades IN at this edge)
26
+ fade_from_zero=False → 1 → 0 (clip fades OUT at this edge)
27
+ """
28
+ arr = np.linspace(0.0, 1.0, length, dtype=np.float32)
29
+ return arr if fade_from_zero else arr[::-1]
30
+
31
+
32
+ def _make_vertical_mask(clip_w: int, clip_h: int,
33
+ blend_top: int = 0, blend_bottom: int = 0) -> np.ndarray:
34
+ """
35
+ Float32 mask (clip_h × clip_w) in [0,1].
36
+ blend_top → pixels from top that fade in (0→1)
37
+ blend_bottom → pixels from bottom that fade out (1→0)
38
+ """
39
+ mask = np.ones((clip_h, clip_w), dtype=np.float32)
40
+ if blend_top > 0:
41
+ grad = _linear_gradient(blend_top, fade_from_zero=True)
42
+ mask[:blend_top, :] = grad[:, np.newaxis]
43
+ if blend_bottom > 0:
44
+ grad = _linear_gradient(blend_bottom, fade_from_zero=False)
45
+ mask[clip_h - blend_bottom:, :] = grad[:, np.newaxis]
46
+ return mask
47
+
48
+
49
+ def _make_horizontal_mask(clip_w: int, clip_h: int,
50
+ blend_left: int = 0, blend_right: int = 0) -> np.ndarray:
51
+ """
52
+ Float32 mask (clip_h × clip_w) in [0,1].
53
+ blend_left → pixels from left that fade in (0→1)
54
+ blend_right → pixels from right that fade out (1→0)
55
+ """
56
+ mask = np.ones((clip_h, clip_w), dtype=np.float32)
57
+ if blend_left > 0:
58
+ grad = _linear_gradient(blend_left, fade_from_zero=True)
59
+ mask[:, :blend_left] = grad[np.newaxis, :]
60
+ if blend_right > 0:
61
+ grad = _linear_gradient(blend_right, fade_from_zero=False)
62
+ mask[:, clip_w - blend_right:] = grad[np.newaxis, :]
63
+ return mask
64
+
65
+
66
+ def _apply_mask(clip: mpe.VideoClip, mask_array: np.ndarray) -> mpe.VideoClip:
67
+ """Attach a static float32 numpy mask to a video clip."""
68
+ mask_clip = mpe.ImageClip(mask_array, ismask=True, duration=clip.duration)
69
+ return clip.set_mask(mask_clip)
70
+
71
+
72
+ def _fit_to_width(clip: mpe.VideoClip, target_w: int) -> mpe.VideoClip:
73
+ """Resize clip so width == target_w, keeping aspect ratio."""
74
+ return clip.resize(width=target_w)
75
+
76
+
77
+ def _fit_to_height(clip: mpe.VideoClip, target_h: int) -> mpe.VideoClip:
78
+ """Resize clip so height == target_h, keeping aspect ratio."""
79
+ return clip.resize(height=target_h)
80
+
81
+
82
+ def _loop_or_cut(clip: mpe.VideoClip, duration: float) -> mpe.VideoClip:
83
+ if clip.duration < duration:
84
+ return clip.loop(duration=duration)
85
+ return clip.subclip(0, duration)
86
+
87
+
88
+ # ─────────────────────────────────────────────────────────────────────────────
89
+ # Smart Face Cropper
90
+ # ─────────────────────────────────────────────────────────────────────────────
91
+
92
  class SmartFaceCropper:
93
  def __init__(self, output_size=(1080, 1920)):
94
  self.output_size = output_size
95
+ self.face_cascade = cv2.CascadeClassifier(
96
+ cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
97
+ )
98
+ self.last_coords = None
99
+ self.smoothed_x = None
100
+ self.smoothing = 0.2
101
+ self.frame_count = 0
102
 
103
  def get_crop_coordinates(self, frame):
104
+ h, w = frame.shape[:2]
105
  target_w = int(h * self.output_size[0] / self.output_size[1])
106
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
107
+ small = cv2.resize(gray, (0, 0), fx=0.5, fy=0.5)
108
+ faces = self.face_cascade.detectMultiScale(small, 1.1, 8, minSize=(50, 50))
109
 
110
  if len(faces) > 0:
111
+ faces = sorted(faces, key=lambda f: f[2] * f[3], reverse=True)
112
+ fx, fy, fw, fh = [v * 2 for v in faces[0]]
113
  current_center_x = fx + fw // 2
114
+ self.last_coords = (fx, fy, fw, fh)
115
  else:
116
  current_center_x = w // 2 if self.smoothed_x is None else self.smoothed_x
117
 
118
  if self.smoothed_x is None:
119
  self.smoothed_x = current_center_x
120
  else:
121
+ self.smoothed_x = (
122
+ self.smoothed_x * (1 - self.smoothing)
123
+ + current_center_x * self.smoothing
124
+ )
125
 
126
  left = int(self.smoothed_x - target_w // 2)
127
  left = max(0, min(left, w - target_w))
 
129
 
130
  def apply_to_clip(self, clip):
131
  frame_skip = 5
132
+
133
  def filter_frame(get_frame, t):
134
  frame = get_frame(t)
135
  self.frame_count += 1
 
136
  if self.frame_count % frame_skip == 0 or self.last_coords is None:
137
+ left, _, right, _ = self.get_crop_coordinates(frame)
138
  else:
139
+ h, w = frame.shape[:2]
140
  target_w = int(h * self.output_size[0] / self.output_size[1])
141
+ left = int(self.smoothed_x - target_w // 2) if self.smoothed_x else w // 2 - target_w // 2
142
+ left = max(0, min(left, w - target_w))
143
+ right = left + target_w
144
+ return cv2.resize(frame[:, left:right], self.output_size)
145
+
 
146
  return clip.fl(filter_frame)
147
 
148
+
149
+ # ─────────────────────────────────────────────────────────────────────────────
150
+ # Base Style
151
+ # ─────────────────────────────────────────────────────────────────────────────
152
+
153
  class BaseStyle(ABC):
154
  def __init__(self, output_size=Config.DEFAULT_SIZE):
155
  self.output_size = output_size
 
157
  @abstractmethod
158
  def apply(self, clip, **kwargs):
159
  pass
160
+
161
+ def apply_with_captions(self, clip, transcript_data=None, language=None,
162
+ caption_mode="sentence", **kwargs):
 
 
 
 
 
 
 
163
  styled_clip = self.apply(clip, **kwargs)
 
 
164
  if not transcript_data:
165
  return styled_clip
166
 
 
167
  caption_clips = self._create_caption_clips(transcript_data, language, caption_mode)
 
168
  if not caption_clips:
169
  return styled_clip
170
 
 
 
 
171
  if isinstance(styled_clip, mpe.CompositeVideoClip):
172
+ return mpe.CompositeVideoClip(
173
+ list(styled_clip.clips) + caption_clips, size=self.output_size
174
+ )
175
+ return mpe.CompositeVideoClip([styled_clip] + caption_clips, size=self.output_size)
 
 
 
176
 
177
  def add_captions(self, clip, transcript_data, language=None, caption_mode="sentence"):
178
+ """Kept for backward compatibility."""
 
179
  if not transcript_data:
180
  return clip
181
  return SubtitleManager.create_captions(
182
+ clip, transcript_data, size=self.output_size,
183
+ language=language, caption_mode=caption_mode,
 
 
 
184
  )
185
+
186
  def _create_caption_clips(self, transcript_data, language=None, caption_mode="sentence"):
 
187
  return SubtitleManager.create_caption_clips(
188
+ transcript_data, size=self.output_size,
189
+ language=language, caption_mode=caption_mode,
 
 
190
  )
191
 
192
+
193
+ # ─────────────────────────────────────────────────────────────────────────────
194
+ # Cinematic Style
195
+ # ─────────────────────────────────────────────────────────────────────────────
196
+
197
  class CinematicStyle(BaseStyle):
198
  def apply(self, clip, background_path=None, **kwargs):
199
  if background_path and os.path.exists(background_path):
200
  ext = os.path.splitext(background_path)[1].lower()
201
+ video_ext = {".mp4", ".avi", ".mov", ".mkv", ".webm"}
202
+ if ext in video_ext:
203
+ bg = _loop_or_cut(
204
+ mpe.VideoFileClip(background_path).without_audio()
205
+ .resize(height=self.output_size[1]),
206
+ clip.duration,
207
+ )
 
208
  else:
209
+ bg = (
210
+ mpe.ImageClip(background_path)
211
+ .set_duration(clip.duration)
212
+ .resize(height=self.output_size[1])
213
+ )
214
  if bg.w > self.output_size[0]:
215
+ bg = bg.crop(x_center=bg.w / 2, width=self.output_size[0])
216
  else:
217
  bg = bg.resize(width=self.output_size[0])
218
  else:
219
  bg = mpe.ColorClip(size=self.output_size, color=(0, 0, 0)).set_duration(clip.duration)
220
+
221
+ main = clip.resize(width=self.output_size[0]).set_position("center")
222
+ if main.h > self.output_size[1]:
223
+ main = clip.resize(height=self.output_size[1]).set_position("center")
224
+
225
+ return mpe.CompositeVideoClip([bg, main], size=self.output_size)
226
+
227
+
228
+ # ─────────────────────────────────────────────────────────────────────────────
229
+ # Cinematic Blur Style
230
+ # ─────────────────────────────────────────────────────────────────────────────
231
 
232
  class CinematicBlurStyle(BaseStyle):
233
  def apply(self, clip, **kwargs):
234
  bg = clip.resize(height=self.output_size[1])
235
  if bg.w < self.output_size[0]:
236
  bg = clip.resize(width=self.output_size[0])
237
+
238
  def make_blur(get_frame, t):
239
+ frame = get_frame(t)
240
+ small = cv2.resize(frame, (16, 16))
241
+ blurred = cv2.resize(
242
+ small, (self.output_size[0], self.output_size[1]),
243
+ interpolation=cv2.INTER_LINEAR,
244
+ )
245
+ return cv2.GaussianBlur(blurred, (21, 21), 0)
246
+
247
  bg_blurred = bg.fl(make_blur).set_opacity(0.6)
248
+ main = clip.resize(width=self.output_size[0]).set_position("center")
249
+ if main.h > self.output_size[1]:
250
+ main = clip.resize(height=self.output_size[1]).set_position("center")
251
+
252
+ return mpe.CompositeVideoClip([bg_blurred, main], size=self.output_size)
253
+
254
+
255
+ # ─────────────────────────────────────────────────────────────────────────────
256
+ # Split Vertical (top / bottom, seamless gradient blend)
257
+ # ─────────────────────────────────────────────────────────────────────────────
258
 
259
  class SplitVerticalStyle(BaseStyle):
260
+ """
261
+ Splits the Shorts canvas (1080 × 1920) into top and bottom segments.
262
+
263
+ Layout
264
+ ──────
265
+ • Top segment : 58 % of canvas height → ~1114 px
266
+ • Bottom segment: fills the rest → ~926 px
267
+ • Blend zone : 120 px overlap where the two clips cross-fade via
268
+ gradient masks — no hard dividing line visible.
269
+
270
+ The gradient is very subtle (linear alpha), so it doesn't destroy
271
+ content near the seam, it just dissolves one clip into the other.
272
+ """
273
+
274
+ SPLIT_RATIO : float = 0.58 # top segment fraction of total height
275
+ BLEND_PX : int = 120 # overlap / blend zone height in pixels
276
+
277
  def apply(self, clip, playground_path=None, **kwargs):
278
+ W, H = self.output_size # 1080 × 1920
279
+ blend = self.BLEND_PX
280
+ h_top_seg = int(H * self.SPLIT_RATIO) # ~1114
281
+ h_bot_seg = H - h_top_seg + blend # ~926 (includes overlap)
282
+
283
+ # ── Prepare main clip for top segment ───────────────────────────────
284
+ top_clip = _fit_to_width(clip, W)
285
+
286
+ # Crop to the top portion we need (+ blend zone so gradient has room)
287
+ top_h = min(top_clip.h, h_top_seg + blend // 2)
288
+ top_clip = top_clip.crop(x1=0, y1=0, x2=W, y2=top_h).resize((W, h_top_seg))
289
+
290
+ # Gradient: fade out the bottom `blend` rows → seamless merge
291
+ top_mask = _make_vertical_mask(W, h_top_seg, blend_bottom=blend)
292
+ top_clip = _apply_mask(top_clip, top_mask).set_position((0, 0))
293
+
294
+ # ── Prepare playground / fallback clip for bottom segment ────────────
295
  if playground_path and os.path.exists(playground_path):
296
+ bot_src = _loop_or_cut(
297
+ mpe.VideoFileClip(playground_path).without_audio(), clip.duration
298
+ )
 
 
299
  else:
300
+ # Fallback: mirror/tint of the same source
301
+ bot_src = clip.set_opacity(0.85)
302
+
303
+ bot_clip = _fit_to_width(bot_src, W)
304
+
305
+ # We want the middle/lower portion of the source for the bottom panel
306
+ if bot_clip.h > h_bot_seg:
307
+ y_start = max(0, bot_clip.h - h_bot_seg)
308
+ bot_clip = bot_clip.crop(x1=0, y1=y_start,
309
+ x2=W, y2=bot_clip.h)
310
+
311
+ bot_clip = bot_clip.resize((W, h_bot_seg))
312
+
313
+ # Gradient: fade in the top `blend` rows → seamless merge
314
+ bot_mask = _make_vertical_mask(W, h_bot_seg, blend_top=blend)
315
+ bot_y = h_top_seg - blend # overlaps by `blend` px
316
+ bot_clip = _apply_mask(bot_clip, bot_mask).set_position((0, bot_y))
317
+
318
+ return mpe.CompositeVideoClip([bot_clip, top_clip], size=self.output_size)
319
+
320
+
321
+ # ─────────────────────────────────────────────────────────────────────────────
322
+ # Split Horizontal (left / right, seamless gradient blend)
323
+ # ─────────────────────────────────────────────────────────────────────────────
324
 
325
  class SplitHorizontalStyle(BaseStyle):
326
+ """
327
+ Splits the Shorts canvas (1080 × 1920) into left and right panels.
328
+
329
+ Layout
330
+ ──────
331
+ • Each panel fills the full 1920 px height.
332
+ • Left panel: 52 % of canvas width → ~562 px
333
+ • Right panel: fills the rest → ~518 px
334
+ • Blend zone : 80 px overlap with cross-fade gradient masks.
335
+
336
+ Both panels are individually cropped to portrait aspect ratio
337
+ (each showing a 540-wide slice of a 1080-wide source),
338
+ then blended at the seam — no visible dividing line.
339
+ """
340
+
341
+ SPLIT_RATIO : float = 0.52 # left panel fraction of total width
342
+ BLEND_PX : int = 80 # horizontal overlap / blend zone
343
+
344
  def apply(self, clip, playground_path=None, **kwargs):
345
+ W, H = self.output_size # 1080 × 1920
346
+ blend = self.BLEND_PX
347
+ w_left_seg = int(W * self.SPLIT_RATIO) # ~562
348
+ w_right_seg = W - w_left_seg + blend # ~598 (includes overlap)
349
+
350
+ # ── Left panel from main clip ────────────────────────────────────────
351
+ left_src = _fit_to_height(clip, H)
352
+ lw = left_src.w
353
+
354
+ # Crop the left portion (slightly more than half for a natural look)
355
+ crop_w_l = min(lw, w_left_seg + blend)
356
+ left_clip = left_src.crop(x1=max(0, lw // 2 - crop_w_l),
357
+ y1=0, x2=lw // 2, y2=H)
358
+ left_clip = left_clip.resize((w_left_seg, H))
359
+
360
+ # Gradient: fade out rightmost `blend` columns
361
+ left_mask = _make_horizontal_mask(w_left_seg, H, blend_right=blend)
362
+ left_clip = _apply_mask(left_clip, left_mask).set_position((0, 0))
363
+
364
+ # ── Right panel from playground or fallback ───────────────────────────
365
  if playground_path and os.path.exists(playground_path):
366
+ right_src = _loop_or_cut(
367
+ mpe.VideoFileClip(playground_path).without_audio(), clip.duration
368
+ )
 
 
369
  else:
370
+ right_src = clip.set_opacity(0.85)
371
+
372
+ right_full = _fit_to_height(right_src, H)
373
+ rw = right_full.w
374
+
375
+ # Crop the right portion of the source
376
+ crop_w_r = min(rw, w_right_seg + blend)
377
+ right_clip = right_full.crop(x1=rw // 2, y1=0,
378
+ x2=rw // 2 + crop_w_r, y2=H)
379
+ right_clip = right_clip.resize((w_right_seg, H))
380
+
381
+ # Gradient: fade in leftmost `blend` columns
382
+ right_mask = _make_horizontal_mask(w_right_seg, H, blend_left=blend)
383
+ right_x = w_left_seg - blend # overlaps by `blend` px
384
+ right_clip = _apply_mask(right_clip, right_mask).set_position((right_x, 0))
385
+
386
+ return mpe.CompositeVideoClip([right_clip, left_clip], size=self.output_size)
387
+
388
+
389
+ # ─────────────────────────────────────────────────────────────────────────────
390
+ # Vertical Full Style
391
+ # ─────────────────────────────────────────────────────────────────────────────
392
 
393
  class VerticalFullStyle(BaseStyle):
394
  def apply(self, clip, **kwargs):
395
  cropper = SmartFaceCropper(output_size=self.output_size)
396
  return cropper.apply_to_clip(clip)
397
 
398
+
399
+ # ─────────────────────────────────────────────────────────────────────────────
400
+ # Style Factory (unchanged API)
401
+ # ─────────────────────────────────────────────────────────────────────────────
402
+
403
  class StyleFactory:
404
  _styles = {
405
+ "cinematic": CinematicStyle,
406
+ "cinematic_blur": CinematicBlurStyle,
407
+ "split_vertical": SplitVerticalStyle,
408
  "split_horizontal": SplitHorizontalStyle,
409
+ "vertical_full": VerticalFullStyle,
410
  }
411
 
412
  @staticmethod
413
  def get_style(style_name) -> BaseStyle:
414
  style_class = StyleFactory._styles.get(style_name, CinematicBlurStyle)
415
+ return style_class()
core/subtitle_manager.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  import os
2
  import numpy as np
3
  import urllib.request
@@ -10,307 +15,554 @@ from .logger import Logger
10
 
11
  logger = Logger.get_logger(__name__)
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  class SubtitleManager:
 
 
14
  @staticmethod
15
  def ensure_font(language=None, style_font=None, text_content=None):
16
- """Ensures a valid font exists dynamically based on language or content."""
17
-
18
- # 1. Determine Font Name
19
  font_name = Config.LANGUAGE_FONT_MAP.get("default", "Roboto-Bold.ttf")
20
-
21
- # Priority 1: Explicit Style Font (if language supports it or it's Latin)
22
- # However, if text is Arabic/CJK, style font (usually Latin) might break it.
23
- # So we should check language compatibility first.
24
-
25
  detected_lang = language
26
  if not detected_lang and text_content:
27
- # Simple script detection
28
  if any("\u0600" <= c <= "\u06FF" for c in text_content):
29
  detected_lang = "ar"
30
  elif any("\u4E00" <= c <= "\u9FFF" for c in text_content):
31
  detected_lang = "zh"
32
- elif any("\u3040" <= c <= "\u309F" for c in text_content) or any("\u30A0" <= c <= "\u30FF" for c in text_content):
33
  detected_lang = "ja"
34
  elif any("\u0900" <= c <= "\u097F" for c in text_content):
35
  detected_lang = "hi"
36
  elif any("\u0400" <= c <= "\u04FF" for c in text_content):
37
  detected_lang = "ru"
38
 
39
- # Priority 2: Language-specific font from Config Map
40
  if detected_lang in Config.LANGUAGE_FONT_MAP:
41
- font_name = Config.LANGUAGE_FONT_MAP[detected_lang]
42
- elif style_font and not detected_lang:
43
- # Only use style font if no specific non-Latin language detected
44
- font_name = style_font
45
 
46
- # Fallback: if detected language is known but not in map (shouldn't happen with default keys)
47
  if detected_lang and detected_lang not in Config.LANGUAGE_FONT_MAP:
48
- logger.warning(f"⚠️ Language {detected_lang} not in font map, using default.")
49
-
50
  font_path = os.path.join(Config.BASE_DIR, font_name)
51
-
52
  if not os.path.exists(font_path):
53
- logger.info(f"📥 Downloading font: {font_name}...")
54
- # We might need to add more fonts to Config.FONTS or download dynamically
55
  url = Config.FONTS.get(font_name)
56
  if url:
57
  try:
58
- # Use Config's CSS downloader for Google Fonts
59
  if "fonts.googleapis.com/css" in url:
60
- success = Config.download_font_from_css(url, font_path)
61
- if not success:
62
- raise Exception("CSS font download failed")
63
  else:
64
- # Fallback for direct links
65
  urllib.request.urlretrieve(url, font_path)
66
-
67
- logger.info(f"✅ Font downloaded: {font_name}")
68
- except Exception as e:
69
- logger.error(f"❌ Failed to download font: {e}")
70
  return "Arial"
71
  else:
72
- logger.warning(f"⚠️ No URL found for font: {font_name}")
73
- # Fallback for now if not in config
74
- if font_name == "Montserrat-Bold.ttf": # TikTok popular
75
- # Add logic to download or use system font
76
- pass
77
-
78
  return font_path
79
 
 
80
  @staticmethod
81
- def create_pil_text_clip(text, fontsize, color, font_path, stroke_color='black', stroke_width=2, bg_color=None, padding=10):
82
- """Creates a text clip using PIL."""
 
83
  try:
84
  try:
85
  font = ImageFont.truetype(font_path, fontsize)
86
- except:
87
- logger.warning(f"⚠️ Failed to load font {font_path}, using default.")
88
  font = ImageFont.load_default()
89
-
90
- dummy_img = Image.new('RGBA', (1, 1))
91
- draw = ImageDraw.Draw(dummy_img)
92
- bbox = draw.textbbox((0, 0), text, font=font)
93
- text_width = bbox[2] - bbox[0]
94
- text_height = bbox[3] - bbox[1]
95
-
96
  margin = int(stroke_width * 2) + padding
97
- img_width = text_width + margin * 2
98
- img_height = text_height + margin * 2
99
-
100
- img = Image.new('RGBA', (int(img_width), int(img_height)), (0, 0, 0, 0))
101
  draw = ImageDraw.Draw(img)
102
-
103
- # Draw Background if requested
104
  if bg_color:
105
- draw.rounded_rectangle(
106
- [(0, 0), (img_width, img_height)],
107
- radius=15,
108
- fill=bg_color
109
- )
110
-
111
- x = (img_width - text_width) / 2 - bbox[0]
112
- y = (img_height - text_height) / 2 - bbox[1]
113
-
114
- draw.text(
115
- (x, y),
116
- text,
117
- font=font,
118
- fill=color,
119
- stroke_width=stroke_width,
120
- stroke_fill=stroke_color
121
- )
122
-
123
  return mpe.ImageClip(np.array(img))
124
-
125
- except Exception as e:
126
- logger.error(f"⚠️ PIL Text Error: {e}")
127
  return None
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  @staticmethod
130
  def get_style_config(style_name):
131
- """Returns configuration for different caption styles."""
132
- styles = {
133
- "classic": {
134
- "fontsize": 75,
135
- "color": "white",
136
- "stroke_color": "black",
137
- "stroke_width": 2,
138
- "font": None, # Default based on language
139
- "bg_color": None,
140
- "position": ("center", 1350)
141
- },
142
- "tiktok_bold": {
143
- "fontsize": 85,
144
- "color": "white",
145
- "stroke_color": "black",
146
- "stroke_width": 4,
147
- "font": "Montserrat-Bold.ttf", # Popular on TikTok
148
- "bg_color": None, # Shadow usually used instead of BG
149
- "position": ("center", 1400)
150
- },
151
- "tiktok_neon": {
152
- "fontsize": 80,
153
- "color": "#00f2ea", # TikTok Cyan
154
- "stroke_color": "#ff0050", # TikTok Red
155
- "stroke_width": 3,
156
- "font": "Roboto-Bold.ttf",
157
- "bg_color": None,
158
- "position": ("center", 1400)
159
- },
160
- "youtube_clean": {
161
- "fontsize": 70,
162
- "color": "yellow",
163
- "stroke_color": "black",
164
- "stroke_width": 3,
165
- "font": "Roboto-Bold.ttf",
166
- "bg_color": None,
167
- "position": ("center", 1300)
168
- },
169
- "youtube_box": {
170
- "fontsize": 65,
171
- "color": "white",
172
- "stroke_color": None,
173
- "stroke_width": 0,
174
- "font": "Roboto-Bold.ttf",
175
- "bg_color": "red", # YouTube Red Box
176
- "position": ("center", 1300)
177
- }
178
- }
179
- return styles.get(style_name, styles["classic"])
180
 
 
181
  @staticmethod
182
- def create_caption_clips(transcript_data, size=(1080, 1920), language=None, caption_mode="sentence", caption_style="classic"):
183
- """Generates a list of caption ImageClips for the video, without composing them."""
184
- all_text_clips = []
185
-
186
- style_config = SubtitleManager.get_style_config(caption_style)
187
-
188
- # We need to peek at the first segment to determine language if not provided
189
- # Or better, check each chunk dynamically?
190
- # For simplicity and consistency, let's check the first non-empty text.
191
-
192
  sample_text = ""
193
- segments = []
194
  if isinstance(transcript_data, list):
195
- if len(transcript_data) > 0 and 'segments' in transcript_data[0]:
196
- segments = transcript_data[0]['segments']
197
- else:
198
  segments = transcript_data
199
- elif isinstance(transcript_data, dict) and 'segments' in transcript_data:
200
- segments = transcript_data['segments']
201
-
202
- if segments:
203
- for s in segments:
204
- if s.get('text'):
205
- sample_text = s['text']
206
- break
207
-
208
- font_path = SubtitleManager.ensure_font(language, style_config.get("font"), text_content=sample_text)
209
-
210
- for segment in segments:
211
- full_text = segment.get('text', '').strip()
212
- if not full_text:
213
- words = segment.get('words', [])
214
- full_text = " ".join([w['text'] for w in words])
215
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  if not full_text:
217
  continue
218
 
219
- start_t = segment.get('start', 0)
220
- end_t = segment.get('end', 0)
221
-
222
  if end_t <= start_t:
223
- if segment.get('words'):
224
- start_t = segment['words'][0]['start']
225
- end_t = segment['words'][-1]['end']
226
  else:
227
  continue
228
 
229
- words_list = full_text.split()
230
- if not words_list:
231
- continue
232
-
233
  chunk_size = 1 if caption_mode == "word" else 4
234
- chunks = []
235
-
236
- # Use Word Timestamps if available (More Accurate)
237
- stt_words = segment.get('words')
238
- if stt_words and len(stt_words) > 0:
239
- valid_words = [w for w in stt_words if w.get('text', '').strip()]
240
-
241
- if valid_words:
242
- for i in range(0, len(valid_words), chunk_size):
243
- chunk_group = valid_words[i:i + chunk_size]
244
-
245
- chunk_text = " ".join([w['text'] for w in chunk_group])
246
- chunk_start = chunk_group[0]['start']
247
- chunk_end = chunk_group[-1]['end']
248
-
249
- chunks.append({
250
- "text": chunk_text,
251
- "start": chunk_start,
252
- "end": chunk_end
253
- })
254
  else:
255
- # Fallback to linear interpolation (Less Accurate)
256
- words_list = full_text.split()
257
- if not words_list:
258
- continue
259
-
260
- for i in range(0, len(words_list), chunk_size):
261
- chunk_words = words_list[i:i + chunk_size]
262
- chunk_text = " ".join(chunk_words)
263
-
264
- chunk_duration = (end_t - start_t) * (len(chunk_words) / len(words_list))
265
- chunk_start = start_t + (end_t - start_t) * (i / len(words_list))
266
- chunk_end = chunk_start + chunk_duration
267
-
268
- if chunk_end <= chunk_start:
269
- chunk_end = chunk_start + 0.5
270
-
271
- chunks.append({
272
- "text": chunk_text,
273
- "start": chunk_start,
274
- "end": chunk_end
275
- })
276
 
277
  for chunk in chunks:
278
- display_text = chunk["text"]
279
- is_arabic = language == "ar" or any("\u0600" <= c <= "\u06FF" for c in display_text)
280
-
281
- if is_arabic:
282
  try:
283
- display_text = get_display(reshape(display_text))
284
- except:
285
  pass
286
  else:
287
- display_text = display_text.upper()
288
-
289
- # Override size if provided in style, else use dynamic size based on mode
290
- f_size = style_config.get("fontsize", 75)
291
- if caption_mode == "word":
292
- f_size = int(f_size * 1.4) # Make word mode larger
293
-
294
- img_clip = SubtitleManager.create_pil_text_clip(
295
- display_text,
296
- fontsize=f_size,
297
- color=style_config.get("color", "white"),
298
- font_path=font_path,
299
- stroke_color=style_config.get("stroke_color", "black"),
300
- stroke_width=style_config.get("stroke_width", 2),
301
- bg_color=style_config.get("bg_color")
302
  )
303
-
304
- if img_clip:
305
- # Center horizontally, and place near bottom
306
- pos = style_config.get("position", ('center', 1350))
307
- txt_clip = img_clip.set_start(chunk["start"]).set_end(chunk["end"]).set_position(pos)
308
- all_text_clips.append(txt_clip)
309
 
310
- return all_text_clips
311
 
 
312
  @staticmethod
313
- def create_captions(video_clip, transcript_data, size=(1080, 1920), language=None, caption_mode="sentence"):
314
- """Generates caption clips and composites them onto the video."""
315
- text_clips = SubtitleManager.create_caption_clips(transcript_data, size, language, caption_mode)
316
- return mpe.CompositeVideoClip([video_clip] + text_clips, size=size)
 
 
 
1
+ """
2
+ SubtitleManager — Viral YouTube Shorts Caption Engine
3
+ Styles tuned for 2024-2025 Shorts/Reels/TikTok viral aesthetics.
4
+ All style names kept identical to the original for drop-in integration.
5
+ """
6
  import os
7
  import numpy as np
8
  import urllib.request
 
15
 
16
  logger = Logger.get_logger(__name__)
17
 
18
+
19
+ # ─────────────────────────────────────────────────────────────────────────────
20
+ # Style Registry (same 6 keys as original — drop-in compatible)
21
+ # ─────────────────────────────────────────────────────────────────────────────
22
+ #
23
+ # Extra keys consumed only by highlight_word mode:
24
+ # highlight_color → text color for the active word
25
+ # highlight_bg → RGBA fill of the box behind active word
26
+ # highlight_bg_radius → corner radius of that box
27
+ # shadow_layers → list of (off_x, off_y, blur_steps, RGBA)
28
+ # drawn UNDER the highlight box for depth/glow
29
+ #
30
+ STYLES = {
31
+
32
+ # ── 1. CLASSIC ────────────────────────────────────────────────────────────
33
+ # Clean, professional — news / podcast feel.
34
+ # Active word: crisp white on a near-black pill with a soft drop shadow.
35
+ "classic": {
36
+ "fontsize": 72,
37
+ "color": (255, 255, 255, 255),
38
+ "stroke_color": (0, 0, 0, 200),
39
+ "stroke_width": 3,
40
+ "font": None,
41
+ "bg_color": None,
42
+ "position": ("center", 0.80),
43
+ "highlight_color": (255, 255, 255, 255),
44
+ "highlight_bg": (18, 18, 18, 220),
45
+ "highlight_bg_radius": 20,
46
+ "shadow_layers": [
47
+ (0, 6, 8, (0, 0, 0, 160)), # soft drop-shadow
48
+ ],
49
+ },
50
+
51
+ # ── 2. MODERN GLOW ────────────────────────────────────────────────────────
52
+ # Apple / high-end documentary aesthetic.
53
+ # Dark frosted sentence bar; electric-blue glowing pill on active word.
54
+ "modern_glow": {
55
+ "fontsize": 78,
56
+ "color": (200, 225, 255, 200),
57
+ "stroke_color": (0, 10, 40, 255),
58
+ "stroke_width": 2,
59
+ "font": "Montserrat-Bold.ttf",
60
+ "bg_color": (10, 10, 30, 160), # dark frosted bar
61
+ "position": ("center", 0.83),
62
+ "highlight_color": (130, 230, 255, 255), # electric cyan text
63
+ "highlight_bg": (0, 130, 255, 210), # vivid blue pill
64
+ "highlight_bg_radius": 22,
65
+ "shadow_layers": [
66
+ (0, 0, 16, (0, 160, 255, 110)), # wide outer glow
67
+ (0, 3, 6, (0, 60, 160, 180)), # tight drop-shadow
68
+ ],
69
+ },
70
+
71
+ # ── 3. TIKTOK BOLD ────────────────────────────────────────────────────────
72
+ # MrBeast / Sidemen. High-contrast yellow box, heavy stroke.
73
+ # Active word: black text on pure yellow — impossible to miss.
74
+ "tiktok_bold": {
75
+ "fontsize": 90,
76
+ "color": (255, 255, 255, 255),
77
+ "stroke_color": (0, 0, 0, 255),
78
+ "stroke_width": 5,
79
+ "font": "Montserrat-Bold.ttf",
80
+ "bg_color": None,
81
+ "position": ("center", 0.84),
82
+ "highlight_color": (10, 10, 10, 255), # almost-black on yellow
83
+ "highlight_bg": (255, 220, 0, 255), # MrBeast yellow
84
+ "highlight_bg_radius": 12,
85
+ "shadow_layers": [
86
+ (4, 6, 0, (0, 0, 0, 230)), # hard pixel-offset (punchy feel)
87
+ (7, 10, 0, (0, 0, 0, 90)),
88
+ ],
89
+ },
90
+
91
+ # ── 4. TIKTOK NEON ────────────────────────────────────────────────────────
92
+ # Y2K / EDM / night-out. Hot-pink pill, cyan text — maximum vibe.
93
+ "tiktok_neon": {
94
+ "fontsize": 80,
95
+ "color": (255, 255, 255, 230),
96
+ "stroke_color": (100, 0, 60, 255),
97
+ "stroke_width": 3,
98
+ "font": "Roboto-Bold.ttf",
99
+ "bg_color": None,
100
+ "position": ("center", 0.85),
101
+ "highlight_color": (0, 242, 234, 255), # TikTok cyan
102
+ "highlight_bg": (255, 0, 80, 235), # TikTok pink-red
103
+ "highlight_bg_radius": 22,
104
+ "shadow_layers": [
105
+ (0, 0, 20, (255, 0, 80, 120)), # pink outer glow
106
+ (0, 0, 8, (0, 242, 234, 80)), # cyan inner glow
107
+ (3, 5, 0, (80, 0, 40, 210)), # hard dark offset
108
+ ],
109
+ },
110
+
111
+ # ── 5. YOUTUBE CLEAN ──────────────────────────────────────────────────────
112
+ # Educator / talking-head minimal style.
113
+ # Frosted dark pill under sentence; warm amber box on active word.
114
+ "youtube_clean": {
115
+ "fontsize": 70,
116
+ "color": (240, 240, 240, 220),
117
+ "stroke_color": (0, 0, 0, 160),
118
+ "stroke_width": 2,
119
+ "font": "Roboto-Bold.ttf",
120
+ "bg_color": (0, 0, 0, 140), # subtle sentence pill
121
+ "position": ("center", 0.76),
122
+ "highlight_color": (20, 20, 20, 255), # dark text on amber
123
+ "highlight_bg": (255, 200, 40, 248), # warm amber
124
+ "highlight_bg_radius": 16,
125
+ "shadow_layers": [
126
+ (0, 4, 10, (180, 130, 0, 170)), # amber drop-shadow
127
+ ],
128
+ },
129
+
130
+ # ── 6. YOUTUBE BOX ────────────────────────────────────────────────────────
131
+ # Karaoke / game-show energy.
132
+ # Solid dark sentence bar; bold YouTube-red box on active word.
133
+ "youtube_box": {
134
+ "fontsize": 68,
135
+ "color": (255, 255, 255, 255),
136
+ "stroke_color": (0, 0, 0, 255),
137
+ "stroke_width": 2,
138
+ "font": "Roboto-Bold.ttf",
139
+ "bg_color": (15, 15, 15, 210), # dark sentence bar
140
+ "position": ("center", 0.77),
141
+ "highlight_color": (255, 255, 255, 255),
142
+ "highlight_bg": (200, 0, 0, 255), # YouTube red
143
+ "highlight_bg_radius": 8,
144
+ "shadow_layers": [
145
+ (0, 5, 0, (110, 0, 0, 230)), # hard dark-red offset
146
+ (0, 9, 0, ( 0, 0, 0, 130)),
147
+ ],
148
+ },
149
+ }
150
+
151
+
152
+ # ─────────────────────────────────────────────────────────────────────────────
153
+ # Helpers
154
+ # ─────────────────────────────────────────────────────────────────────────────
155
+
156
+ def _rgba(c):
157
+ """Normalise any colour spec to an (R,G,B,A) tuple."""
158
+ if c is None:
159
+ return None
160
+ if isinstance(c, (tuple, list)):
161
+ return (*c[:3], c[3] if len(c) == 4 else 255)
162
+ tmp = Image.new("RGBA", (1, 1), c)
163
+ return tmp.getpixel((0, 0))
164
+
165
+
166
+ def _draw_shadow_layers(draw, box, layers, base_radius):
167
+ """
168
+ Paint shadow / glow layers behind a rounded-rect.
169
+
170
+ layers: [(off_x, off_y, blur_steps, rgba)]
171
+ blur_steps == 0 → single hard-offset rectangle
172
+ blur_steps > 0 → concentric rects with fading alpha (soft glow)
173
+ """
174
+ x1, y1, x2, y2 = box
175
+ for (ox, oy, blur, color) in layers:
176
+ rgba = _rgba(color)
177
+ if blur == 0:
178
+ draw.rounded_rectangle(
179
+ [(x1 + ox, y1 + oy), (x2 + ox, y2 + oy)],
180
+ radius=base_radius, fill=rgba,
181
+ )
182
+ else:
183
+ steps = max(blur // 2, 3)
184
+ base_a = rgba[3]
185
+ for s in range(steps, 0, -1):
186
+ expand = s * (blur / steps)
187
+ step_alpha = int(base_a * (1 - s / (steps + 1)))
188
+ draw.rounded_rectangle(
189
+ [
190
+ (x1 + ox - expand, y1 + oy - expand),
191
+ (x2 + ox + expand, y2 + oy + expand),
192
+ ],
193
+ radius=int(base_radius + expand),
194
+ fill=(*rgba[:3], step_alpha),
195
+ )
196
+
197
+
198
+ # ─────────────────────────────────────────────────────────────────────────────
199
  class SubtitleManager:
200
+
201
+ # ── Font management ───────────────────────────────────────────────────────
202
  @staticmethod
203
  def ensure_font(language=None, style_font=None, text_content=None):
204
+ """Returns an absolute path to a valid font file."""
 
 
205
  font_name = Config.LANGUAGE_FONT_MAP.get("default", "Roboto-Bold.ttf")
206
+
 
 
 
 
207
  detected_lang = language
208
  if not detected_lang and text_content:
 
209
  if any("\u0600" <= c <= "\u06FF" for c in text_content):
210
  detected_lang = "ar"
211
  elif any("\u4E00" <= c <= "\u9FFF" for c in text_content):
212
  detected_lang = "zh"
213
+ elif any("\u3040" <= c <= "\u30FF" for c in text_content):
214
  detected_lang = "ja"
215
  elif any("\u0900" <= c <= "\u097F" for c in text_content):
216
  detected_lang = "hi"
217
  elif any("\u0400" <= c <= "\u04FF" for c in text_content):
218
  detected_lang = "ru"
219
 
 
220
  if detected_lang in Config.LANGUAGE_FONT_MAP:
221
+ font_name = Config.LANGUAGE_FONT_MAP[detected_lang]
222
+ elif style_font and not detected_lang:
223
+ font_name = style_font
 
224
 
 
225
  if detected_lang and detected_lang not in Config.LANGUAGE_FONT_MAP:
226
+ logger.warning(f"⚠️ Language '{detected_lang}' not in font map, using default.")
227
+
228
  font_path = os.path.join(Config.BASE_DIR, font_name)
 
229
  if not os.path.exists(font_path):
230
+ logger.info(f"📥 Downloading font: {font_name}")
 
231
  url = Config.FONTS.get(font_name)
232
  if url:
233
  try:
 
234
  if "fonts.googleapis.com/css" in url:
235
+ if not Config.download_font_from_css(url, font_path):
236
+ raise RuntimeError("CSS font download failed")
 
237
  else:
 
238
  urllib.request.urlretrieve(url, font_path)
239
+ logger.info(f"✅ Font ready: {font_name}")
240
+ except Exception as exc:
241
+ logger.error(f"❌ Font download failed: {exc}")
 
242
  return "Arial"
243
  else:
244
+ logger.warning(f"⚠️ No URL configured for font: {font_name}")
245
+
 
 
 
 
246
  return font_path
247
 
248
+ # ── Legacy single-text clip (sentence / word modes) ───────────────────────
249
  @staticmethod
250
+ def create_pil_text_clip(text, fontsize, color, font_path,
251
+ stroke_color="black", stroke_width=2,
252
+ bg_color=None, padding=12, bg_radius=18):
253
  try:
254
  try:
255
  font = ImageFont.truetype(font_path, fontsize)
256
+ except Exception:
257
+ logger.warning(f"⚠️ Could not load {font_path}, using default.")
258
  font = ImageFont.load_default()
259
+
260
+ dummy = Image.new("RGBA", (1, 1))
261
+ d = ImageDraw.Draw(dummy)
262
+ bbox = d.textbbox((0, 0), text, font=font)
263
+ tw = bbox[2] - bbox[0]
264
+ th = bbox[3] - bbox[1]
265
+
266
  margin = int(stroke_width * 2) + padding
267
+ iw, ih = tw + margin * 2, th + margin * 2
268
+
269
+ img = Image.new("RGBA", (int(iw), int(ih)), (0, 0, 0, 0))
 
270
  draw = ImageDraw.Draw(img)
271
+
 
272
  if bg_color:
273
+ draw.rounded_rectangle([(0, 0), (iw, ih)],
274
+ radius=bg_radius, fill=_rgba(bg_color))
275
+
276
+ x = (iw - tw) / 2 - bbox[0]
277
+ y = (ih - th) / 2 - bbox[1]
278
+ draw.text((x, y), text, font=font, fill=_rgba(color),
279
+ stroke_width=stroke_width, stroke_fill=_rgba(stroke_color))
280
+
 
 
 
 
 
 
 
 
 
 
281
  return mpe.ImageClip(np.array(img))
282
+
283
+ except Exception as exc:
284
+ logger.error(f"⚠️ create_pil_text_clip: {exc}")
285
  return None
286
 
287
+ # ── Highlight-word composite renderer ─────────────────────────────────────
288
+ @staticmethod
289
+ def create_sentence_highlight_clip(
290
+ sentence_words, active_word, font, fontsize, font_path,
291
+ style_config, is_arabic, padding=14, bg_radius=20,
292
+ ):
293
+ """
294
+ Renders the entire sentence as ONE image.
295
+ The active word gets a visible shadow-box underneath + highlight colour.
296
+ No floating clip artefacts — position is always correct.
297
+ """
298
+ try:
299
+ dummy = Image.new("RGBA", (1, 1))
300
+ d = ImageDraw.Draw(dummy)
301
+
302
+ sp_w = max(d.textbbox((0, 0), " ", font=font)[2], 4)
303
+
304
+ # ── 1. Measure each word ─────────────────────────────────────────
305
+ ordered = list(reversed(sentence_words)) if is_arabic else sentence_words
306
+ word_metrics = []
307
+ cursor = 0
308
+ for w in ordered:
309
+ raw = w.get("text", "")
310
+ display = get_display(reshape(raw)) if is_arabic else raw.upper()
311
+ bbox = d.textbbox((0, 0), display, font=font)
312
+ ww = bbox[2] - bbox[0]
313
+ word_metrics.append({"id": id(w), "display": display,
314
+ "bbox": bbox, "x": cursor, "width": ww})
315
+ cursor += ww + sp_w
316
+
317
+ total_w = cursor - sp_w
318
+ ref_bbox = d.textbbox((0, 0), "Ag", font=font)
319
+ text_h = ref_bbox[3] - ref_bbox[1]
320
+
321
+ stroke_w = style_config.get("stroke_width", 2)
322
+ margin = int(stroke_w * 2) + padding
323
+
324
+ # Extra vertical bleed for shadow layers
325
+ bleed = 14
326
+ iw = int(total_w + margin * 2)
327
+ ih = int(text_h + margin * 2 + bleed)
328
+
329
+ img = Image.new("RGBA", (iw, ih), (0, 0, 0, 0))
330
+ draw = ImageDraw.Draw(img)
331
+
332
+ # ── 2. Optional full-sentence background bar ──────────────────────
333
+ sentence_bg = style_config.get("bg_color")
334
+ if sentence_bg:
335
+ draw.rounded_rectangle(
336
+ [(0, bleed // 2), (iw, ih - bleed // 2)],
337
+ radius=bg_radius, fill=_rgba(sentence_bg),
338
+ )
339
+
340
+ # ── 3. Shadow + highlight box for active word ─────────────────────
341
+ active_id = id(active_word)
342
+ hl_bg = _rgba(style_config.get("highlight_bg"))
343
+ hl_radius = style_config.get("highlight_bg_radius", bg_radius)
344
+ shadows = style_config.get("shadow_layers", [])
345
+
346
+ active_wm = next((wm for wm in word_metrics if wm["id"] == active_id), None)
347
+
348
+ if active_wm and hl_bg:
349
+ bx1 = margin + active_wm["x"] - active_wm["bbox"][0] - padding
350
+ by1 = bleed // 2
351
+ bx2 = bx1 + active_wm["width"] + padding * 2
352
+ by2 = ih - bleed // 2
353
+
354
+ # Shadow / glow layers first
355
+ _draw_shadow_layers(draw, (bx1, by1, bx2, by2), shadows, hl_radius)
356
+
357
+ # Main highlight box
358
+ draw.rounded_rectangle([(bx1, by1), (bx2, by2)],
359
+ radius=hl_radius, fill=hl_bg)
360
+
361
+ # ── 4. Draw text words ────────────────────────────────────────────
362
+ rest_c = _rgba(style_config.get("color", (255, 255, 255, 255)))
363
+ hl_c = _rgba(style_config.get("highlight_color", rest_c))
364
+ stk_c = _rgba(style_config.get("stroke_color", (0, 0, 0, 255)))
365
+
366
+ for wm in word_metrics:
367
+ col = hl_c if (wm["id"] == active_id) else rest_c
368
+ tx = margin + wm["x"] - wm["bbox"][0]
369
+ ty = margin + bleed // 2 - wm["bbox"][1]
370
+ draw.text((tx, ty), wm["display"], font=font, fill=col,
371
+ stroke_width=stroke_w, stroke_fill=stk_c)
372
+
373
+ return mpe.ImageClip(np.array(img))
374
+
375
+ except Exception as exc:
376
+ logger.error(f"⚠️ create_sentence_highlight_clip: {exc}")
377
+ return None
378
+
379
+ # ── Public style accessor ──────────────────────────────────────────────────
380
  @staticmethod
381
  def get_style_config(style_name):
382
+ """Returns the style dict for the given name (falls back to 'classic')."""
383
+ return STYLES.get(style_name, STYLES["classic"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
 
385
+ # ── Main generator ─────────────────────────────────────────────────────────
386
  @staticmethod
387
+ def create_caption_clips(transcript_data, size=(1080, 1920), language=None,
388
+ caption_mode="sentence", caption_style="classic"):
389
+ """Generates all caption ImageClips ready for compositing."""
390
+ all_clips = []
391
+ style_cfg = SubtitleManager.get_style_config(caption_style)
392
+
393
+ # ── Parse transcript ─────────────────────────────────────────────────
394
+ segments = []
 
 
395
  sample_text = ""
 
396
  if isinstance(transcript_data, list):
397
+ if transcript_data and "segments" in transcript_data[0]:
398
+ segments = transcript_data[0]["segments"]
399
+ else:
400
  segments = transcript_data
401
+ elif isinstance(transcript_data, dict) and "segments" in transcript_data:
402
+ segments = transcript_data["segments"]
403
+
404
+ for s in segments:
405
+ if s.get("text"):
406
+ sample_text = s["text"]
407
+ break
408
+
409
+ font_path = SubtitleManager.ensure_font(
410
+ language, style_cfg.get("font"), text_content=sample_text
411
+ )
412
+ pos_cfg = style_cfg.get("position", ("center", 0.80))
413
+ pos = (pos_cfg[0], int(pos_cfg[1] * size[1]))
414
+
415
+ # ════════════════════════════════════════════════════════════════════
416
+ # MODE: highlight_word
417
+ # ════════════════════════════════════════════════════════════════════
418
+ if caption_mode == "highlight_word":
419
+ all_words = []
420
+ for seg in segments:
421
+ if "words" in seg and seg["words"]:
422
+ all_words.extend(seg["words"])
423
+
424
+ if not all_words:
425
+ logger.warning("⚠️ highlight_word needs word-level timestamps — none found.")
426
+ return []
427
+
428
+ fontsize = style_cfg.get("fontsize", 75)
429
+ try:
430
+ font = ImageFont.truetype(font_path, fontsize)
431
+ except Exception:
432
+ logger.warning("⚠️ TrueType load failed — falling back to default font.")
433
+ font = ImageFont.load_default()
434
+
435
+ # Group words into sentences (gap > 0.7 s = new sentence)
436
+ sentences, cur = [], []
437
+ for i, word in enumerate(all_words):
438
+ if not word.get("text", "").strip():
439
+ continue
440
+ cur.append(word)
441
+ is_last = (i == len(all_words) - 1)
442
+ pause = (all_words[i + 1]["start"] - word["end"]) if not is_last else 1.0
443
+ if pause > 0.7 or is_last:
444
+ sentences.append(cur)
445
+ cur = []
446
+
447
+ for sw in sentences:
448
+ sent_text = " ".join(w["text"] for w in sw)
449
+ sent_start = sw[0]["start"]
450
+ sent_end = sw[-1]["end"]
451
+ is_ar = language == "ar" or any("\u0600" <= c <= "\u06FF" for c in sent_text)
452
+
453
+ # One frame per word (active highlight moves)
454
+ for active in sw:
455
+ clip = SubtitleManager.create_sentence_highlight_clip(
456
+ sentence_words=sw, active_word=active,
457
+ font=font, fontsize=fontsize, font_path=font_path,
458
+ style_config=style_cfg, is_arabic=is_ar,
459
+ padding=style_cfg.get("padding", 14),
460
+ bg_radius=style_cfg.get("highlight_bg_radius", 20),
461
+ )
462
+ if clip:
463
+ all_clips.append(
464
+ clip.set_start(active["start"])
465
+ .set_end(active["end"])
466
+ .set_position(pos)
467
+ )
468
+
469
+ # Fill inter-word gaps (no active word) with plain sentence
470
+ covered = [(w["start"], w["end"]) for w in sw]
471
+ gaps = []
472
+ if sent_start < covered[0][0]:
473
+ gaps.append((sent_start, covered[0][0]))
474
+ for j in range(len(covered) - 1):
475
+ if covered[j][1] < covered[j + 1][0]:
476
+ gaps.append((covered[j][1], covered[j + 1][0]))
477
+ if covered[-1][1] < sent_end:
478
+ gaps.append((covered[-1][1], sent_end))
479
+
480
+ for gs, ge in gaps:
481
+ plain_cfg = {**style_cfg, "highlight_bg": None, "shadow_layers": []}
482
+ dummy_w = {"text": "", "start": gs, "end": ge}
483
+ gc = SubtitleManager.create_sentence_highlight_clip(
484
+ sentence_words=sw, active_word=dummy_w,
485
+ font=font, fontsize=fontsize, font_path=font_path,
486
+ style_config=plain_cfg, is_arabic=is_ar,
487
+ )
488
+ if gc:
489
+ all_clips.append(gc.set_start(gs).set_end(ge).set_position(pos))
490
+
491
+ return all_clips
492
+
493
+ # ════════════════════════════════════════════════════════════════════
494
+ # LEGACY MODES: sentence / word
495
+ # ════════════════════════════════════════════════════════════════════
496
+ for seg in segments:
497
+ full_text = seg.get("text", "").strip() or " ".join(
498
+ w["text"] for w in seg.get("words", [])
499
+ )
500
  if not full_text:
501
  continue
502
 
503
+ start_t = seg.get("start", 0)
504
+ end_t = seg.get("end", 0)
 
505
  if end_t <= start_t:
506
+ ws = seg.get("words", [])
507
+ if ws:
508
+ start_t, end_t = ws[0]["start"], ws[-1]["end"]
509
  else:
510
  continue
511
 
 
 
 
 
512
  chunk_size = 1 if caption_mode == "word" else 4
513
+ chunks = []
514
+ stt_words = seg.get("words")
515
+
516
+ if stt_words:
517
+ valid = [w for w in stt_words if w.get("text", "").strip()]
518
+ for i in range(0, len(valid), chunk_size):
519
+ grp = valid[i:i + chunk_size]
520
+ chunks.append({"text": " ".join(w["text"] for w in grp),
521
+ "start": grp[0]["start"], "end": grp[-1]["end"]})
 
 
 
 
 
 
 
 
 
 
 
522
  else:
523
+ wl = full_text.split()
524
+ for i in range(0, len(wl), chunk_size):
525
+ cw = wl[i:i + chunk_size]
526
+ cs = start_t + (end_t - start_t) * (i / len(wl))
527
+ ce = cs + (end_t - start_t) * (len(cw) / len(wl))
528
+ chunks.append({"text": " ".join(cw),
529
+ "start": cs, "end": max(ce, cs + 0.1)})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
530
 
531
  for chunk in chunks:
532
+ disp = chunk["text"]
533
+ is_ar = language == "ar" or any("\u0600" <= c <= "\u06FF" for c in disp)
534
+ if is_ar:
 
535
  try:
536
+ disp = get_display(reshape(disp))
537
+ except Exception:
538
  pass
539
  else:
540
+ disp = disp.upper()
541
+
542
+ clip = SubtitleManager.create_pil_text_clip(
543
+ disp,
544
+ fontsize = style_cfg.get("fontsize", 72),
545
+ color = style_cfg.get("color", (255, 255, 255, 255)),
546
+ font_path = font_path,
547
+ stroke_color = style_cfg.get("stroke_color", (0, 0, 0, 200)),
548
+ stroke_width = style_cfg.get("stroke_width", 2),
549
+ bg_color = style_cfg.get("bg_color"),
550
+ bg_radius = style_cfg.get("highlight_bg_radius", 18),
 
 
 
 
551
  )
552
+ if clip:
553
+ all_clips.append(
554
+ clip.set_start(chunk["start"])
555
+ .set_end(chunk["end"])
556
+ .set_position(pos)
557
+ )
558
 
559
+ return all_clips
560
 
561
+ # ── Convenience compositor ─────────────────────────────────────────────────
562
  @staticmethod
563
+ def create_captions(video_clip, transcript_data, size=(1080, 1920),
564
+ language=None, caption_mode="sentence"):
565
+ clips = SubtitleManager.create_caption_clips(
566
+ transcript_data, size, language, caption_mode
567
+ )
568
+ return mpe.CompositeVideoClip([video_clip] + clips, size=size)
firebase_key.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "service_account",
3
+ "project_id": "clippingcommunity-caf5a",
4
+ "private_key_id": "787bbcf48f5a4924137010157aa70faac25d6b3c",
5
+ "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDP8JezwCe7gxDn\nNTqbRWCn2Y9w9XtvLT3Rh/SS+XIgzzGZupsCz/gtPJzrPEMhR2NsEwyk0uPvumd5\nICLIBaYrmfpB6h/CtvD9bvNyea8BvPuMxkJ/COSwm4kUDlutExI5WCr3XKxdrAQB\n7pZv//vI9xgWrR08/EP/YeP74L3nb5+z1DeW1C5lxvGalYJQu80iWMSMgr7kHaQL\ner5K2kl6cpQS0+mS+A3jsj8gtTZRrviZEMpAuRbd9PTVq3FDDCZK978KJ3tt+Hpv\nOlnjbzdH10+WtU4Br9H1xLD4VKfakoXYU9lUMyVdfvQoPhpuNrcdXUWIRSpHjHKU\nvCEYnxtxAgMBAAECggEAGo/FmRD1Ilc6LAaZH2dr0tUQJSI+a7OwP1kqDTIu+XDu\nCZCUJ3ZQYdrpwoS2oIQhd5cxWMaVljRN7mOD/d/223/2P0A8YV+EBiOLlnZjvAmb\nal6S9O46ZDLh/j1dSYzzmX6hMmrUm7yS4HpNHi5pR8EEgzUG1Jj2yKME/9Iz/+F/\nuih50z9UiKA7TmCUwfn63l8kT1trBoRYUTqDFoDmul/gbDazfwqCSwtw3BXeOfHY\n98WjhPq3krhuct/nKwY3XzjCchdrej4UaOYMQNN9jRU5haq/L+JKXClEyjgp4IUR\nKywcyxj/QgfbPRFyZMrgmSsGdRBKhOhI83FyMjNLSQKBgQD5W77I2KlIqSqHuzhg\nNOMqYt3FHa86c7kcuZjFquWxEtgFYVvmN9o6IyDcOlo6yYwR0dGmBrADf1Izi/BP\nGyanYvZW2djYpb2j+V/ovPx1br9or54icVjR7eAXJQfRAuJ6pcsK916U32GuAvU+\nTMxp0kBExt+8sBI3E6mMelIAHQKBgQDVem8fw2hDRFEREKWWme9z/usRVmazw3On\nyyMsWa7dsL9tfvZVGp+NZsND4CvZbPLRM1QEyXK6TrgAv1C+lvtNah5/qdIY0NYa\nTXlI7RaP/DS8UszVtOYbVxdRt98Rz3K1vANjCS9v6Kqq3O1CQp3kxQ/07v+j5Udh\n4ALbsmxQZQKBgQDOfTjJnRDhyKQdbd3LXUBYEzLOTjySzM2XieGGnnRCY/ZazjqJ\ns+qUhg5qEDAzyj6havyO3a7X1pE9ej5vY25o4jxXw2IcVXNq29CeLBFrNWBv0i3D\nG9WPUcploBaO8DKXmb8/v4SlBy4eKPjotDP51U+/JGiWGb+buD6Iw6ovyQKBgBad\nrO4hhocx0qDLMa+9ySdxxzeD/sdmmncZRzWonqTv16fi/nfPpT2WuHMVaa/UIflV\nxb3oFZZ1RnsVyZkXZ7Iw3uBJfm+QmE4bDRFTxMMmRfP5lafCTWpyFI9cum4pmw5z\nx6wTSgpCDOqjEyOk6RNWaTVaqIyVerV8xPC/e0gdAoGBAPkFU9UePr4pMEdchvwd\nd+H7kbywlodwiPh7SoeInYMZSBxK6rzZAaRRv6nNGlBe8HoqDud5SOR1X8T0VYk+\n6Ou0s6ploSL9vbM3YkhiYqKXj1tCg+emBtucp19bILUPGBW56Aje0merJmDg5kyf\nvUTLlRBbkNySxP08n86/Fcrc\n-----END PRIVATE KEY-----\n",
6
+ "client_email": "firebase-adminsdk-fbsvc@clippingcommunity-caf5a.iam.gserviceaccount.com",
7
+ "client_id": "110147267785886278722",
8
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
9
+ "token_uri": "https://oauth2.googleapis.com/token",
10
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11
+ "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/firebase-adminsdk-fbsvc%40clippingcommunity-caf5a.iam.gserviceaccount.com",
12
+ "universe_domain": "googleapis.com"
13
+ }
main.py CHANGED
@@ -12,13 +12,26 @@ from processor import VideoProcessor
12
  from core.config import Config
13
  from core.logger import Logger
14
  from core.task_queue import TaskManager
 
 
15
 
16
  logger = Logger.get_logger(__name__)
17
  task_manager = TaskManager()
18
 
 
 
 
 
 
 
19
  # Ensure directories exist
20
  Config.setup_dirs()
21
 
 
 
 
 
 
22
  class VideoStyle(str, Enum):
23
  cinematic = "cinematic"
24
  cinematic_blur = "cinematic_blur"
@@ -29,9 +42,11 @@ class VideoStyle(str, Enum):
29
  class CaptionMode(str, Enum):
30
  word = "word"
31
  sentence = "sentence"
 
32
 
33
  class CaptionStyle(str, Enum):
34
  classic = "classic"
 
35
  tiktok_bold = "tiktok_bold"
36
  tiktok_neon = "tiktok_neon"
37
  youtube_clean = "youtube_clean"
@@ -52,6 +67,37 @@ class Language(str, Enum):
52
  app = FastAPI(title="Auto-Clipping API")
53
  clipper = VideoProcessor()
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  def process_video_task(
56
  task_id: str,
57
  video_path: str,
@@ -69,9 +115,6 @@ def process_video_task(
69
  from moviepy.editor import VideoFileClip
70
  full_video_clip = None
71
  try:
72
- # Optimization: Open video once
73
- full_video_clip = VideoFileClip(video_path)
74
-
75
  # Helper for progress updates
76
  def update_progress(progress, message):
77
  task_manager.update_task_progress(task_id, progress, message)
@@ -79,11 +122,15 @@ def process_video_task(
79
  update_progress(1, "Starting video analysis...")
80
 
81
  # 1. Analyze video
82
- timestamp_mode = "words" if caption_mode == CaptionMode.word else "segments"
 
83
  scored_segments, total_duration, llm_moments = clipper.analyze_impact(
84
  video_path,
85
- video_clip=full_video_clip,
86
- language=language,
 
 
 
87
  timestamp_mode=timestamp_mode,
88
  progress_callback=update_progress
89
  )
@@ -101,8 +148,8 @@ def process_video_task(
101
  llm_moments,
102
  style=style,
103
  task_id=task_id,
104
- language=language,
105
- video_clip=full_video_clip,
106
  playground_path=playground_path,
107
  audio_path=audio_path,
108
  bg_music_volume=bg_music_volume,
@@ -135,8 +182,7 @@ def process_video_task(
135
  "traceback": traceback.format_exc()
136
  }
137
  finally:
138
- if full_video_clip:
139
- full_video_clip.close()
140
 
141
  # Send webhook
142
  if webhook_url and webhook_url.strip() and webhook_url.startswith(('http://', 'https://')):
 
12
  from core.config import Config
13
  from core.logger import Logger
14
  from core.task_queue import TaskManager
15
+ from core.database import DatabaseManager
16
+ from pydantic import BaseModel
17
 
18
  logger = Logger.get_logger(__name__)
19
  task_manager = TaskManager()
20
 
21
+ # Initialize Database Manager (Try Firebase, fallback to Local)
22
+ try:
23
+ db_manager = DatabaseManager(use_firebase=True)
24
+ except Exception:
25
+ db_manager = DatabaseManager(use_firebase=False)
26
+
27
  # Ensure directories exist
28
  Config.setup_dirs()
29
 
30
+ class APIKeyInput(BaseModel):
31
+ service: str
32
+ key: str
33
+ use_firebase: bool = False
34
+
35
  class VideoStyle(str, Enum):
36
  cinematic = "cinematic"
37
  cinematic_blur = "cinematic_blur"
 
42
  class CaptionMode(str, Enum):
43
  word = "word"
44
  sentence = "sentence"
45
+ highlight_word = "highlight_word"
46
 
47
  class CaptionStyle(str, Enum):
48
  classic = "classic"
49
+ modern_glow = "modern_glow"
50
  tiktok_bold = "tiktok_bold"
51
  tiktok_neon = "tiktok_neon"
52
  youtube_clean = "youtube_clean"
 
67
  app = FastAPI(title="Auto-Clipping API")
68
  clipper = VideoProcessor()
69
 
70
+ @app.post("/api/keys")
71
+ async def add_api_key(input_data: APIKeyInput):
72
+ """
73
+ Securely adds an API key to the database.
74
+ - service: Service name (e.g., 'openrouter', 'openai')
75
+ - key: The API key string
76
+ - use_firebase: If true, saves to community database (Firebase). If false, saves to local SQLite.
77
+ """
78
+ try:
79
+ # If user explicitly requested Firebase but it wasn't initialized globally
80
+ target_db = db_manager
81
+ if input_data.use_firebase and not db_manager.use_firebase:
82
+ # Try to init a temporary firebase manager
83
+ try:
84
+ target_db = DatabaseManager(use_firebase=True)
85
+ except Exception as e:
86
+ return JSONResponse(
87
+ status_code=400,
88
+ content={"error": f"Firebase not configured: {str(e)}"}
89
+ )
90
+
91
+ # Save key
92
+ target_db.save_key(input_data.service, input_data.key)
93
+
94
+ dest = "Firebase (Community)" if input_data.use_firebase else "Local SQLite"
95
+ return {"message": f"API Key for {input_data.service} saved securely to {dest}."}
96
+
97
+ except Exception as e:
98
+ logger.error(f"Error saving API key: {e}")
99
+ return JSONResponse(status_code=500, content={"error": str(e)})
100
+
101
  def process_video_task(
102
  task_id: str,
103
  video_path: str,
 
115
  from moviepy.editor import VideoFileClip
116
  full_video_clip = None
117
  try:
 
 
 
118
  # Helper for progress updates
119
  def update_progress(progress, message):
120
  task_manager.update_task_progress(task_id, progress, message)
 
122
  update_progress(1, "Starting video analysis...")
123
 
124
  # 1. Analyze video
125
+ # Fix: Ensure 'words' mode is used for highlight_word too
126
+ timestamp_mode = "words" if caption_mode in (CaptionMode.word, CaptionMode.highlight_word) else "segments"
127
  scored_segments, total_duration, llm_moments = clipper.analyze_impact(
128
  video_path,
129
+ # video_clip removed as it's not supported
130
+ # language passed as target_language if needed, or source?
131
+ # In processor.py: source_language=None (auto), target_language=...
132
+ # main.py seems to treat 'language' as the output/target language
133
+ target_language=language,
134
  timestamp_mode=timestamp_mode,
135
  progress_callback=update_progress
136
  )
 
148
  llm_moments,
149
  style=style,
150
  task_id=task_id,
151
+ language=language, # target language
152
+ # video_clip removed
153
  playground_path=playground_path,
154
  audio_path=audio_path,
155
  bg_music_volume=bg_music_volume,
 
182
  "traceback": traceback.format_exc()
183
  }
184
  finally:
185
+ pass
 
186
 
187
  # Send webhook
188
  if webhook_url and webhook_url.strip() and webhook_url.startswith(('http://', 'https://')):
processor.py CHANGED
@@ -1,9 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
2
  import json
3
  import traceback
4
- from datetime import datetime
5
  import moviepy.editor as mpe
6
- import core # Applies monkey patches
 
 
7
  from core.config import Config
8
  from core.logger import Logger
9
  from core.stt import STT
@@ -11,455 +25,448 @@ from core.analyze import analyze_transcript_gemini
11
  from core.styles import StyleFactory
12
  from core.subtitle_manager import SubtitleManager
13
  from core.free_translator import FreeTranslator
14
- import json_repair
15
 
16
  logger = Logger.get_logger(__name__)
17
 
 
 
18
  class VideoProcessor:
 
19
  def __init__(self, model_size="base"):
20
  self.stt = STT(model_size=model_size)
21
  Config.setup_dirs()
22
-
 
 
23
  def _clean_json_response(self, content):
24
- """Cleans AI JSON response using json_repair."""
 
 
 
25
  if not isinstance(content, str):
26
  return content
27
-
28
- # Remove markdown blocks if present
29
  content = content.strip()
30
- if content.startswith("```json"):
31
- content = content[7:]
32
- if content.startswith("```"):
33
- content = content[3:]
34
  if content.endswith("```"):
35
  content = content[:-3]
36
-
37
  content = content.strip()
38
-
39
- # Use json_repair to fix truncated or malformed JSON
40
  try:
41
- repaired_json = json_repair.loads(content)
42
- return json.dumps(repaired_json)
43
  except Exception as e:
44
- logger.warning(f"⚠️ json_repair failed, falling back to manual fix: {e}")
45
-
46
- # Fallback manual fix (though json_repair is usually sufficient)
47
- if content and not content.endswith('}'):
48
- open_braces = content.count('{')
49
- close_braces = content.count('}')
50
- if open_braces > close_braces:
51
- content += '}' * (open_braces - close_braces)
52
- logger.info(f"🔧 Fixed truncated JSON with {open_braces - close_braces} closing braces")
53
-
54
  return content
55
 
56
  def parse_ai_response(self, ai_res):
57
- """
58
- Parses the JSON response from the AI and returns a list of segments.
59
- Handles both string and dictionary responses, and various potential key names.
60
- """
61
  if not isinstance(ai_res, dict):
62
- logger.error(f"❌ Invalid AI response format: expected dict, got {type(ai_res)}")
63
  return []
64
-
65
  res_content = ai_res.get("content")
66
- segments_data = {}
67
-
68
  try:
69
  if isinstance(res_content, str):
70
- cleaned_content = self._clean_json_response(res_content)
71
- segments_data = json.loads(cleaned_content)
72
  else:
73
  segments_data = res_content
74
-
75
- chunk_segments = []
 
 
76
  if isinstance(segments_data, dict):
77
- for key in ["segments", "clips", "moments"]:
78
  if key in segments_data and isinstance(segments_data[key], list):
79
- chunk_segments = segments_data[key]
80
- break
81
- if not chunk_segments and any(isinstance(v, list) for v in segments_data.values()):
82
- for v in segments_data.values():
83
- if isinstance(v, list):
84
- chunk_segments = v
85
- break
86
- elif isinstance(segments_data, list):
87
- chunk_segments = segments_data
88
-
89
- return chunk_segments
90
-
91
  except Exception as e:
92
  logger.error(f"❌ Failed to parse AI response: {e}")
93
- logger.error(f"Raw Content: {res_content}")
94
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
- def _clean_json_response(self, text):
97
- """Clean markdown and other noise from AI JSON response."""
98
- text = text.strip()
99
- if text.startswith("```json"):
100
- text = text[7:]
101
- if text.startswith("```"):
102
- text = text[3:]
103
- if text.endswith("```"):
104
- text = text[:-3]
105
- return text.strip()
106
-
107
- def analyze_impact(self, video_path, video_clip=None, language=None, timestamp_mode="segments", progress_callback=None):
108
- """Analyzes video content and suggests viral clips using AI."""
109
- if progress_callback: progress_callback(5, "Starting speech-to-text...")
110
- logger.info("🎙️ Phase 1: Speech-to-Text (STT)...")
111
- # Always transcribe in source language (auto detect)
112
  full_segments, full_text, duration, detected_lang = self.stt.get_transcript(
113
- video_path,
114
- language=None, # Always auto-detect source language
115
  skip_ai=True,
116
- timestamp_mode=timestamp_mode
117
  )
118
-
119
- # Check if translation is needed
120
- target_lang = None
121
- if language:
122
- if hasattr(language, 'value'):
123
- target_lang = language.value
124
- else:
125
- target_lang = str(language)
126
-
127
-
128
  data = {
129
- "segments": full_segments,
130
- "detected_language": detected_lang,
131
- "duration": duration
 
132
  }
133
-
134
- logger.info("🤖 Phase 2: AI Viral Moment Analysis...")
135
- if progress_callback: progress_callback(20, "Analyzing content for viral moments...")
136
-
137
- chunk_size = Config.CHUNK_SIZE_SECONDS
138
- overlap = Config.OVERLAP_SECONDS
139
-
140
- all_ai_segments = []
141
- max_time = full_segments[-1]["end"] if full_segments else 0
142
-
143
  current_start = 0
 
144
  while current_start < max_time:
145
- current_end = current_start + chunk_size
146
-
147
  chunk_transcript = ""
 
148
  for seg in full_segments:
149
  if seg["start"] >= current_start and seg["start"] < current_end:
150
- chunk_transcript += f"[{seg['start']:.2f} - {seg['end']:.2f}] {seg['text']}\n"
151
-
 
 
152
  if chunk_transcript.strip():
153
- transcript_len = len(chunk_transcript)
154
-
155
- # Calculate progress
156
- current_progress = 20 + int((current_start / max_time) * 40) # 20% to 60%
157
- if progress_callback:
158
- progress_callback(current_progress, f"Analyzing chunk {current_start/60:.1f}m - {min(current_end, max_time)/60:.1f}m")
159
-
160
- logger.info(f"🧠 Analyzing chunk: {current_start/60:.1f}m to {min(current_end, max_time)/60:.1f}m (Length: {transcript_len} chars)...")
161
-
 
 
 
162
  ai_res = analyze_transcript_gemini(chunk_transcript)
163
-
164
- # Log debug info
165
- logger.info(f"🤖 AI Response Type: {type(ai_res)}")
166
- if isinstance(ai_res, dict) and "content" in ai_res:
167
- logger.info(f"🤖 Raw AI Response (First 500 chars): {ai_res['content'][:500]}...")
168
- else:
169
- logger.info(f"🤖 Raw AI Response (Structure): {str(ai_res)[:500]}...")
170
-
171
  try:
172
- chunk_segments = self.parse_ai_response(ai_res)
173
- logger.info(f"✅ Found {len(chunk_segments)} segments in chunk")
174
- all_ai_segments.extend(chunk_segments)
175
  except Exception as e:
176
- logger.error(f"❌ Error processing chunk: {e}")
177
  logger.error(traceback.format_exc())
178
-
179
- current_start += (chunk_size - overlap)
180
- if current_end >= max_time: break
181
-
182
- # Deduplicate
183
- unique_segments = []
184
- seen_starts = set()
185
- for s in all_ai_segments:
186
- start_t = s.get("start_time")
187
- if start_t not in seen_starts:
188
- unique_segments.append(s)
189
- seen_starts.add(start_t)
190
-
191
- return unique_segments, duration, data
 
 
 
192
 
193
  def get_best_segments(self, segments, video_duration=0):
194
- """Sorts segments by viral score."""
195
  return sorted(segments, key=lambda x: x.get("viral_score", 0), reverse=True)
196
 
197
- def process_clips(self, input_video_path, best_clips, data, style="cinematic", language=None, video_clip=None, progress_callback=None, **kwargs):
198
- """Processes the selected viral clips with styles and captions."""
199
- logger.info("🎨 Phase 3: Style & Captions...")
200
- if progress_callback: progress_callback(60, "Generating clips...")
201
-
202
- # Determine video duration safely
203
- video_duration = 0
204
- if "duration" in data and data["duration"]:
205
- video_duration = data["duration"]
206
- elif video_clip:
207
- video_duration = video_clip.duration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  else:
209
- try:
210
- with mpe.VideoFileClip(input_video_path) as temp_vid:
211
- video_duration = temp_vid.duration
212
- except Exception as e:
213
- logger.error(f"❌ Failed to get video duration: {e}")
214
 
 
 
 
 
 
 
215
  output_files = []
216
-
217
- # Initialize Translator if needed
218
- translator = None
219
- target_language = None
220
- if language:
221
- target_language = language.value if hasattr(language, 'value') else language
222
-
223
- detected_lang = data.get("detected_language", "en")
224
- needs_translation = (target_language and
225
- target_language != "auto" and
226
- target_language != detected_lang)
227
-
228
- if needs_translation:
229
- logger.info(f"🌍 Translating from {detected_lang} to {target_language}...")
230
- translator = FreeTranslator()
231
 
232
- try:
233
- if not best_clips:
234
- logger.warning("⚠️ No best clips provided to process_clips!")
235
- return []
236
-
237
- logger.info(f"📊 Starting processing for {len(best_clips)} clips...")
238
-
239
- for i, seg in enumerate(best_clips):
240
- # Update progress
241
- current_progress = 60 + int((i / len(best_clips)) * 35) # 60% to 95%
242
- if progress_callback:
243
- progress_callback(current_progress, f"Rendering clip {i+1} of {len(best_clips)}...")
244
 
245
- clip = None
246
- final_clip = None
247
- current_video_clip = None # Local handle for this iteration
248
-
249
- try:
250
- start = max(0, seg.get("start_time", 0))
251
- end = min(video_duration, seg.get("end_time", 0))
252
-
253
- # Ensure valid duration
254
- if end - start < 1.0:
255
- logger.warning(f"⚠️ Clip {i+1} too short ({end-start:.2f}s), skipping.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  continue
257
-
258
- # TRANSLATION STEP: Translate only the current segment if needed
 
 
 
259
  if needs_translation and translator:
 
260
  try:
261
- # Find matching transcript segments for this clip
262
- # Note: segments in 'data' use 'start' and 'end' keys
263
- matching_segs = [
264
- s for s in data.get('segments', [])
265
- if s['start'] >= start and s['end'] <= end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  ]
267
-
268
- if matching_segs:
269
- logger.info(f"🌍 Translating {len(matching_segs)} segments for Clip {i+1}...")
270
- for match_s in matching_segs:
271
- # Skip if already translated (heuristic check if needed, but safe to re-translate if simple)
272
- # Or better, check if text is already in target language?
273
- # Since we modify in place, subsequent clips covering same segment might re-translate.
274
- # But clips usually don't overlap much.
275
-
276
- tr_text, _ = translator.translate_text(match_s['text'], target_language)
277
- match_s['text'] = tr_text
278
- # Clear words to force interpolation since word-level timing is lost
279
- if 'words' in match_s:
280
- match_s['words'] = []
281
-
282
- logger.info(f"✅ Translated clip {i+1} content to {target_language}")
283
-
284
- except Exception as e:
285
- logger.warning(f"⚠️ Translation failed for clip {i+1}: {e}")
286
-
287
- logger.info(f"\n🎬 Processing Clip {i+1}/{len(best_clips)} ({start:.2f} - {end:.2f})...")
288
-
289
- # Ensure style is a clean string
290
- style_str = style.value if hasattr(style, "value") else str(style)
291
- if "." in style_str:
292
- style_str = style_str.split(".")[-1] # Handle VideoStyle.split_vertical
293
-
294
- output_filename = f"viral_{i+1}_{style_str}.mp4"
295
- # Add task_id to filename if provided to avoid collisions
296
- task_id = kwargs.get("task_id")
297
- if task_id:
298
- output_filename = f"viral_{task_id}_{i+1}_{style_str}.mp4"
299
-
300
- final_output = os.path.join(Config.OUTPUTS_DIR, "viral_clips", output_filename)
301
- os.makedirs(os.path.dirname(final_output), exist_ok=True)
302
-
303
- if start >= video_duration:
304
- logger.warning(f"⚠️ Clip start time {start} is beyond video duration {video_duration}, skipping.")
305
- continue
306
 
307
- # 1. Cut the clip
308
- # ALWAYS open a fresh VideoFileClip for each iteration to avoid shared reader issues
309
- # and allow full cleanup (closing reader) after each clip.
310
- current_video_clip = mpe.VideoFileClip(input_video_path)
311
- clip = current_video_clip.subclip(start, end)
312
-
313
- # 2. Get the style strategy
314
- style_strategy = StyleFactory.get_style(style)
315
- logger.info(f"✨ Applying style: {style}")
316
-
317
- # 3. Handle Translation and Captions PREPARATION
318
- segment_transcript = {"segments": []}
319
-
320
- # Filter relevant segments for this clip
321
- for s in data["segments"]:
322
- if s["start"] < end and s["end"] > start:
323
- # Clone the segment to avoid modifying original data
324
- new_seg = s.copy()
325
-
326
- # Adjust timestamps relative to clip start
327
- new_seg["start"] = max(0, s["start"] - start)
328
- new_seg["end"] = min(end - start, s["end"] - start)
329
-
330
- if needs_translation and translator:
331
- logger.info(f"🌍 Translating segment: {s['text'][:30]}...")
332
- translated_text, _ = translator.translate_text(s['text'], target_language)
333
- new_seg["text"] = translated_text
334
- # Simple word distribution for translated text
335
- words = translated_text.split()
336
- seg_duration = new_seg["end"] - new_seg["start"]
337
- word_duration = seg_duration / len(words) if words else seg_duration
338
- new_seg["words"] = []
339
- for idx, w in enumerate(words):
340
- new_seg["words"].append({
341
- "text": w,
342
- "start": new_seg["start"] + (idx * word_duration),
343
- "end": new_seg["start"] + ((idx + 1) * word_duration)
344
- })
345
- else:
346
- # Adjust word timestamps if they exist
347
- if "words" in s:
348
- new_words = []
349
- for w in s["words"]:
350
- if w["start"] < end and w["end"] > start:
351
- nw = w.copy()
352
- nw["start"] = max(0, w["start"] - start)
353
- nw["end"] = min(end - start, w["end"] - start)
354
- new_words.append(nw)
355
- new_seg["words"] = new_words
356
-
357
- segment_transcript["segments"].append(new_seg)
358
-
359
- # 4. Use the optimized apply_with_captions method
360
- final_clip = style_strategy.apply_with_captions(
361
- clip,
362
- transcript_data=segment_transcript,
363
- language=target_language if needs_translation else detected_lang,
364
- caption_mode=kwargs.get('caption_mode', 'sentence'),
365
- caption_style=kwargs.get('caption_style', 'classic'),
366
- background_path=kwargs.get("background_path"),
367
- playground_path=kwargs.get("playground_path")
368
- )
369
-
370
- # 5. Write Output
371
- # Automatically use all available CPU cores
372
- # os.cpu_count() returns None if undetermined, so we default to 4 in that case
373
- cpu_count = os.cpu_count() or 4
374
- logger.info(f"⚙️ Using {cpu_count} threads for video rendering")
375
-
376
- final_clip.write_videofile(
377
- final_output,
378
- codec="libx264",
379
- audio_codec="aac",
380
- threads=cpu_count,
381
- logger=None
382
- )
383
-
384
- output_files.append(final_output)
385
-
386
- except Exception as e:
387
- logger.error(f"❌ Error processing clip {i+1}: {e}")
388
- logger.error(traceback.format_exc())
389
- finally:
390
- # 🧹 Explicit Cleanup
391
- if final_clip:
392
- try: final_clip.close()
393
- except: pass
394
- if clip:
395
- try: clip.close()
396
- except: pass
397
- if current_video_clip:
398
- try: current_video_clip.close()
399
- except: pass
400
-
401
- # Force garbage collection
402
- import gc
403
- gc.collect()
404
-
405
- except Exception as e:
406
- logger.error(f"❌ Error in processing loop: {e}")
407
- logger.error(traceback.format_exc())
408
-
409
- # Note: We don't close passed video_clip here because we didn't open it (or we treated it as read-only for duration)
410
- # The caller is responsible for closing video_clip if they passed it.
411
 
412
  return output_files
413
 
414
- # -----------------------------------------------------------------------------
415
- # Module Level Function to wrap the class usage
416
- # -----------------------------------------------------------------------------
 
 
417
  def process_video(video_path, style="cinematic_blur", model_size="base", **kwargs):
418
  """
419
- Main entry point to process a video end-to-end.
 
 
 
 
 
 
 
 
420
  """
421
- video_clip = None
422
  try:
423
  processor = VideoProcessor(model_size=model_size)
424
-
425
- # 1. Open Video Clip ONCE
426
- video_clip = mpe.VideoFileClip(video_path)
427
-
428
- # 2. Analyze (Reuse video_clip)
429
  caption_mode = kwargs.get("caption_mode", "sentence")
430
- timestamp_mode = "words" if caption_mode == "word" else "segments"
431
-
 
 
 
 
 
 
 
432
  viral_segments, duration, stt_data = processor.analyze_impact(
433
- video_path,
434
- video_clip=video_clip,
435
- language=kwargs.get("language"),
436
- timestamp_mode=timestamp_mode
437
  )
438
-
439
  if not viral_segments:
440
  logger.warning("⚠️ No viral segments found.")
441
  return []
442
 
443
- # 3. Process Clips (Reuse video_clip and STT data)
444
- output_files = processor.process_clips(
445
- video_path,
446
- viral_segments,
447
- stt_data,
448
- style=style,
449
- language=kwargs.get("language"),
450
- video_clip=video_clip,
451
- **kwargs
 
452
  )
453
-
454
- return output_files
455
 
456
  except Exception as e:
457
  logger.error(f"❌ Processing failed: {e}")
458
  logger.error(traceback.format_exc())
459
  return []
460
- finally:
461
- if video_clip:
462
- video_clip.close()
463
 
464
  if __name__ == "__main__":
465
  import sys
 
1
+ """
2
+ VideoProcessor — Core pipeline for viral clip extraction.
3
+
4
+ Fixes applied:
5
+ - source_language (for Whisper) separated from target_language (for translation/captions)
6
+ - Removed duplicate _clean_json_response (json_repair version kept)
7
+ - Single translation pass only (no double-translate on data in-place)
8
+ - timestamp_mode handles highlight_word correctly
9
+ - style string normalised once
10
+ - get_best_segments wired into process_video
11
+ - detected_lang used correctly for captions
12
+ """
13
  import os
14
+ import gc
15
  import json
16
  import traceback
 
17
  import moviepy.editor as mpe
18
+ import json_repair
19
+
20
+ import core # Applies monkey patches
21
  from core.config import Config
22
  from core.logger import Logger
23
  from core.stt import STT
 
25
  from core.styles import StyleFactory
26
  from core.subtitle_manager import SubtitleManager
27
  from core.free_translator import FreeTranslator
 
28
 
29
  logger = Logger.get_logger(__name__)
30
 
31
+
32
+ # ─────────────────────────────────────────────────────────────────────────────
33
  class VideoProcessor:
34
+
35
  def __init__(self, model_size="base"):
36
  self.stt = STT(model_size=model_size)
37
  Config.setup_dirs()
38
+
39
+ # ── JSON helpers ──────────────────────────────────────────────────────────
40
+
41
  def _clean_json_response(self, content):
42
+ """
43
+ Strips markdown fences then uses json_repair to fix malformed JSON.
44
+ Single definition — json_repair version only.
45
+ """
46
  if not isinstance(content, str):
47
  return content
48
+
 
49
  content = content.strip()
50
+ for fence in ("```json", "```"):
51
+ if content.startswith(fence):
52
+ content = content[len(fence):]
 
53
  if content.endswith("```"):
54
  content = content[:-3]
 
55
  content = content.strip()
56
+
 
57
  try:
58
+ repaired = json_repair.loads(content)
59
+ return json.dumps(repaired)
60
  except Exception as e:
61
+ logger.warning(f"⚠️ json_repair failed, using raw content: {e}")
62
+
63
+ # Last-resort brace balancing
64
+ open_b = content.count("{")
65
+ close_b = content.count("}")
66
+ if open_b > close_b:
67
+ content += "}" * (open_b - close_b)
68
+ logger.info(f"🔧 Appended {open_b - close_b} closing brace(s)")
69
+
 
70
  return content
71
 
72
  def parse_ai_response(self, ai_res):
73
+ """Parses AI JSON response → list of segment dicts."""
 
 
 
74
  if not isinstance(ai_res, dict):
75
+ logger.error(f"❌ Invalid AI response type: {type(ai_res)}")
76
  return []
77
+
78
  res_content = ai_res.get("content")
 
 
79
  try:
80
  if isinstance(res_content, str):
81
+ segments_data = json.loads(self._clean_json_response(res_content))
 
82
  else:
83
  segments_data = res_content
84
+
85
+ if isinstance(segments_data, list):
86
+ return segments_data
87
+
88
  if isinstance(segments_data, dict):
89
+ for key in ("segments", "clips", "moments"):
90
  if key in segments_data and isinstance(segments_data[key], list):
91
+ return segments_data[key]
92
+ # Fallback: first list value found
93
+ for v in segments_data.values():
94
+ if isinstance(v, list):
95
+ return v
96
+
 
 
 
 
 
 
97
  except Exception as e:
98
  logger.error(f"❌ Failed to parse AI response: {e}")
99
+ logger.error(f"Raw content: {res_content}")
100
+
101
+ return []
102
+
103
+ # ── Analysis ──────────────────────────────────────────────────────────────
104
+
105
+ def analyze_impact(self,
106
+ video_path,
107
+ source_language=None, # ← لغة الفيديو الأصلي → بتيجي لـ Whisper
108
+ target_language=None, # ← لغة الـ output (ترجمة/كابشن)
109
+ timestamp_mode="segments",
110
+ progress_callback=None):
111
+ """
112
+ STT + AI viral-moment detection.
113
+
114
+ source_language : بيتبعت لـ Whisper مباشرة.
115
+ لو None → Whisper يكتشف تلقائي (أبطأ لكن آمن).
116
+ target_language : بيتحفظ في data عشان process_clips يستخدمه للترجمة والكابشن.
117
+
118
+ Returns (unique_segments, duration, data)
119
+ """
120
+ if progress_callback:
121
+ progress_callback(5, "Starting speech-to-text...")
122
+
123
+ logger.info(
124
+ f"🎙️ Phase 1: STT | source_language={source_language or 'auto-detect'}"
125
+ )
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  full_segments, full_text, duration, detected_lang = self.stt.get_transcript(
128
+ video_path,
129
+ language=source_language, # None = Whisper يكتشف تلقائي
130
  skip_ai=True,
131
+ timestamp_mode=timestamp_mode,
132
  )
133
+
134
+ logger.info(f"🔍 Whisper detected language: {detected_lang}")
135
+
 
 
 
 
 
 
 
136
  data = {
137
+ "segments": full_segments,
138
+ "detected_language": detected_lang, # اللغة الفعلية اللي Whisper اكتشفها
139
+ "target_language": target_language, # اللغة المطلوبة للـ output
140
+ "duration": duration,
141
  }
142
+
143
+ # ── AI Viral Analysis ─────────────────────────────────────────────────
144
+ logger.info("🤖 Phase 2: AI Viral Moment Analysis ")
145
+ if progress_callback:
146
+ progress_callback(20, "Analysing content for viral moments …")
147
+
148
+ chunk_size = Config.CHUNK_SIZE_SECONDS
149
+ overlap = Config.OVERLAP_SECONDS
150
+ max_time = full_segments[-1]["end"] if full_segments else 0
151
+ all_ai_segs = []
152
  current_start = 0
153
+
154
  while current_start < max_time:
155
+ current_end = current_start + chunk_size
 
156
  chunk_transcript = ""
157
+
158
  for seg in full_segments:
159
  if seg["start"] >= current_start and seg["start"] < current_end:
160
+ chunk_transcript += (
161
+ f"[{seg['start']:.2f} - {seg['end']:.2f}] {seg['text']}\n"
162
+ )
163
+
164
  if chunk_transcript.strip():
165
+ pct = 20 + int((current_start / max_time) * 40)
166
+ if progress_callback:
167
+ progress_callback(
168
+ pct,
169
+ f"Analysing {current_start/60:.1f}m – "
170
+ f"{min(current_end, max_time)/60:.1f}m",
171
+ )
172
+ logger.info(
173
+ f"🧠 Chunk {current_start/60:.1f}m → "
174
+ f"{min(current_end, max_time)/60:.1f}m …"
175
+ )
176
+
177
  ai_res = analyze_transcript_gemini(chunk_transcript)
178
+ logger.info(f"🤖 AI response type: {type(ai_res)}")
179
+
 
 
 
 
 
 
180
  try:
181
+ chunk_segs = self.parse_ai_response(ai_res)
182
+ logger.info(f"✅ {len(chunk_segs)} segments in chunk")
183
+ all_ai_segs.extend(chunk_segs)
184
  except Exception as e:
185
+ logger.error(f"❌ Chunk processing error: {e}")
186
  logger.error(traceback.format_exc())
187
+
188
+ current_start += chunk_size - overlap
189
+ if current_end >= max_time:
190
+ break
191
+
192
+ # Deduplicate by start_time
193
+ seen, unique = set(), []
194
+ for s in all_ai_segs:
195
+ st = s.get("start_time")
196
+ if st not in seen:
197
+ unique.append(s)
198
+ seen.add(st)
199
+
200
+ logger.info(f"📊 Total unique viral segments found: {len(unique)}")
201
+ return unique, duration, data
202
+
203
+ # ── Sorting ───────────────────────────────────────────────────────────────
204
 
205
  def get_best_segments(self, segments, video_duration=0):
206
+ """Sort segments by viral_score descending."""
207
  return sorted(segments, key=lambda x: x.get("viral_score", 0), reverse=True)
208
 
209
+ # ── Processing ────────────────────────────────────────────────────────────
210
+
211
+ def process_clips(self,
212
+ input_video_path,
213
+ best_clips,
214
+ data,
215
+ style="cinematic",
216
+ progress_callback=None,
217
+ **kwargs):
218
+ """
219
+ Cuts, styles, captions, and exports each viral clip.
220
+
221
+ target_language يييجي من data["target_language"] (اللي حطّه analyze_impact).
222
+ Translation يحصل مرة واحدة فقط داخل segment_transcript loop.
223
+ """
224
+ logger.info("🎨 Phase 3: Style & Captions …")
225
+ if progress_callback:
226
+ progress_callback(60, "Generating clips …")
227
+
228
+ # ── Video duration ────────────────────────────────────────────────────
229
+ video_duration = data.get("duration") or 0
230
+ if not video_duration:
231
+ try:
232
+ with mpe.VideoFileClip(input_video_path) as tmp:
233
+ video_duration = tmp.duration
234
+ except Exception as e:
235
+ logger.error(f"❌ Could not determine video duration: {e}")
236
+
237
+ # ── Language resolution ───────────────────────────────────────────────
238
+ #
239
+ # detected_lang = اللغة الفعلية للفيديو (من Whisper)
240
+ # target_language = اللغة المطلوبة للـ output (من الريكويست)
241
+ #
242
+ # needs_translation = True → نترجم النص
243
+ # caption_lang = اللغة اللي هيتعمل بيها الكابشن
244
+ #
245
+ detected_lang = data.get("detected_language", "en")
246
+ target_language = data.get("target_language") # من analyze_impact
247
+
248
+ # normalize
249
+ if hasattr(target_language, "value"):
250
+ target_language = target_language.value
251
+
252
+ needs_translation = bool(
253
+ target_language
254
+ and target_language != "auto"
255
+ and target_language != detected_lang
256
+ )
257
+
258
+ # الكابشن بيتعمل بلغة الـ output لو فيه ترجمة، وإلا بلغة الفيديو الأصلي
259
+ caption_lang = target_language if needs_translation else detected_lang
260
+
261
+ translator = FreeTranslator() if needs_translation else None
262
+ if needs_translation:
263
+ logger.info(f"🌍 Will translate: {detected_lang} → {target_language}")
264
  else:
265
+ logger.info(f"🗣️ No translation needed — captions in: {caption_lang}")
 
 
 
 
266
 
267
+ # ── Normalise style string once ───────────────────────────────────────
268
+ style_str = style.value if hasattr(style, "value") else str(style)
269
+ if "." in style_str:
270
+ style_str = style_str.split(".")[-1]
271
+
272
+ # ── Main loop ─────────────────────────────────────────────────────────
273
  output_files = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
+ if not best_clips:
276
+ logger.warning("⚠️ No clips to process.")
277
+ return []
 
 
 
 
 
 
 
 
 
278
 
279
+ logger.info(f"📊 Processing {len(best_clips)} clip(s) …")
280
+
281
+ for i, seg in enumerate(best_clips):
282
+ pct = 60 + int((i / len(best_clips)) * 35)
283
+ if progress_callback:
284
+ progress_callback(pct, f"Rendering clip {i+1}/{len(best_clips)} …")
285
+
286
+ clip = None
287
+ final_clip = None
288
+ current_video_clip = None
289
+
290
+ try:
291
+ start = max(0, seg.get("start_time", 0))
292
+ end = min(video_duration, seg.get("end_time", 0))
293
+
294
+ if end - start < 1.0:
295
+ logger.warning(
296
+ f"⚠️ Clip {i+1} too short ({end-start:.2f}s), skipping."
297
+ )
298
+ continue
299
+
300
+ if start >= video_duration:
301
+ logger.warning(
302
+ f"⚠️ Clip {i+1} start {start}s ≥ duration {video_duration}s, skipping."
303
+ )
304
+ continue
305
+
306
+ logger.info(f"\n🎬 Clip {i+1}/{len(best_clips)} ({start:.2f}s – {end:.2f}s)")
307
+
308
+ # ── Output path ───────────────────────────────────────────────
309
+ task_id = kwargs.get("task_id")
310
+ prefix = f"viral_{task_id}_{i+1}" if task_id else f"viral_{i+1}"
311
+ out_name = f"{prefix}_{style_str}.mp4"
312
+ final_output = os.path.join(Config.OUTPUTS_DIR, "viral_clips", out_name)
313
+ os.makedirs(os.path.dirname(final_output), exist_ok=True)
314
+
315
+ # ── Cut clip (fresh VideoFileClip per iteration) ───────────────
316
+ current_video_clip = mpe.VideoFileClip(input_video_path)
317
+ clip = current_video_clip.subclip(start, end)
318
+
319
+ # ── Build segment_transcript ──────────────────────────────────
320
+ # الترجمة بتحصل هنا فقط — مفيش أي مكان تاني بيعدّل على data
321
+ segment_transcript = {"segments": []}
322
+
323
+ for s in data["segments"]:
324
+ if s["start"] >= end or s["end"] <= start:
325
  continue
326
+
327
+ new_seg = s.copy()
328
+ new_seg["start"] = max(0, s["start"] - start)
329
+ new_seg["end"] = min(end - start, s["end"] - start)
330
+
331
  if needs_translation and translator:
332
+ # ترجمة النص مع توزيع timestamps على الكلمات الجديدة
333
  try:
334
+ translated_text, _ = translator.translate_text(
335
+ s["text"], target_language
336
+ )
337
+ except Exception as te:
338
+ logger.warning(f"⚠️ Translation error: {te}")
339
+ translated_text = s["text"]
340
+
341
+ new_seg["text"] = translated_text
342
+ words = translated_text.split()
343
+ seg_dur = new_seg["end"] - new_seg["start"]
344
+ word_dur = seg_dur / len(words) if words else seg_dur
345
+ new_seg["words"] = [
346
+ {
347
+ "text": w,
348
+ "start": new_seg["start"] + idx * word_dur,
349
+ "end": new_seg["start"] + (idx + 1) * word_dur,
350
+ }
351
+ for idx, w in enumerate(words)
352
+ ]
353
+ else:
354
+ # تعديل timestamps الكلمات الموجودة بدون ترجمة
355
+ if "words" in s:
356
+ new_seg["words"] = [
357
+ {
358
+ **w,
359
+ "start": max(0, w["start"] - start),
360
+ "end": min(end - start, w["end"] - start),
361
+ }
362
+ for w in s["words"]
363
+ if w["start"] < end and w["end"] > start
364
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
 
366
+ segment_transcript["segments"].append(new_seg)
367
+
368
+ # ── Apply style + captions ────────────────────────────────────
369
+ style_strategy = StyleFactory.get_style(style_str)
370
+ logger.info(f"✨ Style: {style_str} | Caption lang: {caption_lang}")
371
+
372
+ final_clip = style_strategy.apply_with_captions(
373
+ clip,
374
+ transcript_data = segment_transcript,
375
+ language = caption_lang,
376
+ caption_mode = kwargs.get("caption_mode", "sentence"),
377
+ caption_style = kwargs.get("caption_style", "classic"),
378
+ background_path = kwargs.get("background_path"),
379
+ playground_path = kwargs.get("playground_path"),
380
+ )
381
+
382
+ # ── Export ────────────────────────────────────────────────────
383
+ cpu_count = os.cpu_count() or 4
384
+ logger.info(f"⚙️ Rendering with {cpu_count} thread(s) …")
385
+
386
+ final_clip.write_videofile(
387
+ final_output,
388
+ codec = "libx264",
389
+ audio_codec = "aac",
390
+ threads = cpu_count,
391
+ logger = None,
392
+ )
393
+
394
+ output_files.append(final_output)
395
+ logger.info(f" Saved: {final_output}")
396
+
397
+ except Exception as e:
398
+ logger.error(f"❌ Clip {i+1} error: {e}")
399
+ logger.error(traceback.format_exc())
400
+
401
+ finally:
402
+ for obj in (final_clip, clip, current_video_clip):
403
+ if obj:
404
+ try:
405
+ obj.close()
406
+ except Exception:
407
+ pass
408
+ gc.collect()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
 
410
  return output_files
411
 
412
+
413
+ # ─────────────────────────────────────────────────────────────────────────────
414
+ # Module-level convenience wrapper
415
+ # ─────────────────────────────────────────────────────────────────────────────
416
+
417
  def process_video(video_path, style="cinematic_blur", model_size="base", **kwargs):
418
  """
419
+ End-to-end pipeline: STT AI analysis clip export.
420
+
421
+ kwargs المهمة:
422
+ source_language : لغة الفيديو الأصلي → بتتبعت لـ Whisper
423
+ لو مش محدد → Whisper يكتشف تلقائي
424
+ language : لغة الـ output المطلوبة (ترجمة + كابشن)
425
+ لو نفس لغة الفيديو → مش هيترجم
426
+ caption_mode : sentence | word | highlight_word
427
+ caption_style : classic | modern_glow | tiktok_bold | ...
428
  """
 
429
  try:
430
  processor = VideoProcessor(model_size=model_size)
431
+
 
 
 
 
432
  caption_mode = kwargs.get("caption_mode", "sentence")
433
+
434
+ # highlight_word و word كلاهما يحتاج word-level timestamps من Whisper
435
+ timestamp_mode = (
436
+ "words"
437
+ if caption_mode in ("word", "highlight_word")
438
+ else "segments"
439
+ )
440
+
441
+ # Phase 1 + 2: STT + AI analysis
442
  viral_segments, duration, stt_data = processor.analyze_impact(
443
+ video_path,
444
+ source_language = kwargs.get("source_language"), # لـ Whisper
445
+ target_language = kwargs.get("language"), # للترجمة والكابشن
446
+ timestamp_mode = timestamp_mode,
447
  )
448
+
449
  if not viral_segments:
450
  logger.warning("⚠️ No viral segments found.")
451
  return []
452
 
453
+ # Sort by viral score
454
+ best_clips = processor.get_best_segments(viral_segments, duration)
455
+
456
+ # Phase 3: render
457
+ return processor.process_clips(
458
+ video_path,
459
+ best_clips,
460
+ stt_data,
461
+ style = style,
462
+ **kwargs,
463
  )
 
 
464
 
465
  except Exception as e:
466
  logger.error(f"❌ Processing failed: {e}")
467
  logger.error(traceback.format_exc())
468
  return []
469
+
 
 
470
 
471
  if __name__ == "__main__":
472
  import sys
requirements.txt CHANGED
@@ -15,3 +15,5 @@ imageio-ffmpeg==0.4.8
15
  openai>=1.0.0
16
  scipy
17
  json_repair
 
 
 
15
  openai>=1.0.0
16
  scipy
17
  json_repair
18
+ cryptography
19
+ firebase-admin