aliSaac510 commited on
Commit
0890748
·
1 Parent(s): d392f23

fix english caption

Browse files
Files changed (7) hide show
  1. core/analyze.py +136 -38
  2. core/config.py +187 -223
  3. core/free_translator.py +8 -16
  4. core/stt.py +31 -78
  5. core/subtitle_manager.py +183 -149
  6. processor.py +104 -56
  7. requirements.txt +2 -2
core/analyze.py CHANGED
@@ -1,10 +1,16 @@
1
  import os
2
  import time
 
 
3
  from openai import OpenAI
4
  from dotenv import load_dotenv
5
 
6
  load_dotenv()
7
 
 
 
 
 
8
  # Configure OpenAI Client
9
  api_key = os.getenv("OPENROUTER_API_KEY")
10
  client = OpenAI(
@@ -12,50 +18,57 @@ client = OpenAI(
12
  api_key=api_key
13
  )
14
 
15
- def analyze_transcript_gemini(transcript):
16
- """Analyze transcript using OpenRouter (DeepSeek) via Env Key."""
17
-
18
- prompt = f"""
19
- You are an expert video editor and viral content strategist. Your task is to identify the most engaging segments from the provided transcript that are suitable for short-form video platforms like TikTok, Reels, and YouTube Shorts.
20
 
21
- STRICT JSON OUTPUT FORMAT REQUIRED:
22
- You must output ONLY valid JSON. Do not include any markdown formatting (like ```json ... ```), explanations, or additional text outside the JSON object.
 
 
 
 
 
23
 
24
- The JSON structure must be exactly as follows:
 
 
 
 
 
 
 
 
 
 
 
 
25
  {{
26
  "segments": [
27
  {{
28
  "start_time": <float, start time in seconds>,
29
  "end_time": <float, end time in seconds>,
30
- "duration": <float, duration in seconds>,
31
  "description": "<string, brief summary of the clip content 10 words max>",
32
  "viral_score": <float, score from 0-10 indicating viral potential>,
33
  "reason": "<string, explanation of why this segment is engaging>"
34
  }}
35
  ]
36
  }}
37
-
38
- SELECTION CRITERIA:
39
- 1. **Standalone Quality**: Each clip must make sense on its own without prior context. Avoid starting with conjunctions like "And", "But", "So" unless they are part of a complete thought.
40
- 2. **Engagement**: Look for strong hooks, emotional moments, humor, surprising facts, or actionable advice.
41
- 3. **Duration**: Prioritize clips between 30 and 180 seconds.
42
- 4. **Completeness**: Ensure the clip has a clear beginning and end. Do not cut off sentences.
43
-
44
- IMPORTANT:
45
- - Return valid JSON only.
46
- - If no suitable segments are found, return {{ "segments": [] }}.
47
- - Ensure all strings are properly escaped.
48
-
49
  Transcript to Analyze:
50
  {transcript}
51
  """
52
 
53
  max_retries = 3
54
  base_delay = 5
 
55
 
56
  for attempt in range(max_retries):
57
  try:
58
- # Simple direct request
59
  response = client.chat.completions.create(
60
  model="deepseek/deepseek-chat",
61
  messages=[
@@ -68,7 +81,7 @@ def analyze_transcript_gemini(transcript):
68
  },
69
  temperature=0.7,
70
  )
71
-
72
  content = response.choices[0].message.content
73
  print(f"🤖 AI Raw Response (First 500 chars): {content[:500]}...")
74
 
@@ -77,18 +90,14 @@ def analyze_transcript_gemini(transcript):
77
  content = content.split("```json")[1].split("```")[0].strip()
78
  elif "```" in content:
79
  content = content.split("```")[1].split("```")[0].strip()
80
-
81
- # Debugging: Print segment count
82
- try:
83
- import json
84
- data = json.loads(content)
85
- segments_count = len(data.get("segments", []))
86
- print(f"🤖 AI Response parsed successfully: Found {segments_count} segments.")
87
- except Exception as e:
88
- print(f"⚠️ Failed to parse AI response for logging: {e}")
89
 
90
  return {"content": content}
91
-
92
  except Exception as e:
93
  print(f"❌ Error in OpenRouter analysis: {e}")
94
  if attempt < max_retries - 1:
@@ -102,10 +111,99 @@ def analyze_transcript_gemini(transcript):
102
  return {"content": '{"segments": []}'}
103
 
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
- # إعداد متغير البيئة
 
107
  if __name__ == "__main__":
108
- # اختبار سريع
109
- test_transcript = "[0.0 - 5.0] This is amazing content about viral videos!"
110
- result = analyze_transcript_gemini(test_transcript)
111
- print("Gemini Analysis Result:", result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import time
3
+ import json
4
+ import logging
5
  from openai import OpenAI
6
  from dotenv import load_dotenv
7
 
8
  load_dotenv()
9
 
10
+ # Setup Logger
11
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
12
+ logger = logging.getLogger(__name__)
13
+
14
  # Configure OpenAI Client
15
  api_key = os.getenv("OPENROUTER_API_KEY")
16
  client = OpenAI(
 
18
  api_key=api_key
19
  )
20
 
 
 
 
 
 
21
 
22
+ def analyze_transcript(transcript):
23
+ """Analyze transcript using OpenRouter via Env Key."""
24
+
25
+ prompt = f"""
26
+ You are an expert video editor and viral content strategist.
27
+ Your task is to identify the most engaging segments from the provided transcript
28
+ that are suitable for short-form video platforms like TikTok, Reels, and YouTube Shorts.
29
 
30
+ **STRICT REQUIREMENTS:**
31
+ 1. **Duration**: duration MUST be between 60 seconds and 180 seconds (3 minutes)
32
+ 2. **Context Preservation**: Each segment must be a complete thought - no abrupt cuts
33
+ 3. **Sentence Boundaries**: Start at the beginning of a sentence, end at a natural conclusion
34
+ 4. **Meaning Coherence**: The clip must make sense on its own without requiring prior context
35
+
36
+ **SELECTION CRITERIA:**
37
+ - Strong hooks that grab attention
38
+ - Emotional moments, humor, or surprising revelations
39
+ - Clear beginning, middle, and satisfying conclusion
40
+ - High shareability potential
41
+
42
+ **JSON OUTPUT FORMAT (REQUIRED):**
43
  {{
44
  "segments": [
45
  {{
46
  "start_time": <float, start time in seconds>,
47
  "end_time": <float, end time in seconds>,
48
+ "duration": <float, duration in seconds (30-180)>,
49
  "description": "<string, brief summary of the clip content 10 words max>",
50
  "viral_score": <float, score from 0-10 indicating viral potential>,
51
  "reason": "<string, explanation of why this segment is engaging>"
52
  }}
53
  ]
54
  }}
55
+
56
+ **IMPORTANT NOTES:**
57
+ - If no suitable segments are found, return {{ "segments": [] }}
58
+ - Ensure all strings are properly escaped
59
+ - Each segment must be a complete, coherent thought
60
+ - Avoid cutting mid-sentence or mid-thought
61
+
 
 
 
 
 
62
  Transcript to Analyze:
63
  {transcript}
64
  """
65
 
66
  max_retries = 3
67
  base_delay = 5
68
+ content = None # FIX: initialize content to avoid UnboundLocalError
69
 
70
  for attempt in range(max_retries):
71
  try:
 
72
  response = client.chat.completions.create(
73
  model="deepseek/deepseek-chat",
74
  messages=[
 
81
  },
82
  temperature=0.7,
83
  )
84
+
85
  content = response.choices[0].message.content
86
  print(f"🤖 AI Raw Response (First 500 chars): {content[:500]}...")
87
 
 
90
  content = content.split("```json")[1].split("```")[0].strip()
91
  elif "```" in content:
92
  content = content.split("```")[1].split("```")[0].strip()
93
+
94
+ # Validate JSON and log segment count
95
+ data = json.loads(content)
96
+ segments_count = len(data.get("segments", []))
97
+ print(f"🤖 AI Response parsed successfully: Found {segments_count} segments.")
 
 
 
 
98
 
99
  return {"content": content}
100
+
101
  except Exception as e:
102
  print(f"❌ Error in OpenRouter analysis: {e}")
103
  if attempt < max_retries - 1:
 
111
  return {"content": '{"segments": []}'}
112
 
113
 
114
+ # Smart chunking system for long transcripts
115
+ def smart_chunk_transcript(transcript, max_tokens=4000):
116
+ """
117
+ Split transcript into coherent chunks at sentence boundaries
118
+ while preserving context and meaning.
119
+ """
120
+ import json
121
+ # Simple sentence-based chunking
122
+ sentences = transcript.replace('\n', ' ').split('. ')
123
+ chunks = []
124
+ current_chunk = []
125
+ current_length = 0
126
+
127
+ for sentence in sentences:
128
+ sentence_length = len(sentence.split())
129
+
130
+ if current_length + sentence_length > max_tokens and current_chunk:
131
+ chunk_text = '. '.join(current_chunk) + '.'
132
+ chunks.append(chunk_text.strip())
133
+ current_chunk = [sentence]
134
+ current_length = sentence_length
135
+ else:
136
+ current_chunk.append(sentence)
137
+ current_length += sentence_length
138
+
139
+ if current_chunk:
140
+ chunk_text = '. '.join(current_chunk) + '.'
141
+ chunks.append(chunk_text.strip())
142
+
143
+ return chunks
144
+
145
+
146
+ def analyze_transcript_with_chunking(transcript):
147
+ """
148
+ Analyze transcript using smart chunking for long content.
149
+ Processes each chunk separately and merges results.
150
+ """
151
+ if len(transcript.split()) > 3000:
152
+ logger.info("📦 Transcript too long, using smart chunking...")
153
+ chunks = smart_chunk_transcript(transcript, max_tokens=3000)
154
+ all_segments = []
155
+
156
+ for i, chunk in enumerate(chunks):
157
+ logger.info(f"🔄 Processing chunk {i+1}/{len(chunks)}...")
158
+ result = analyze_transcript(chunk)
159
+
160
+ try:
161
+ data = json.loads(result['content'])
162
+ if 'segments' in data:
163
+ all_segments.extend(data['segments'])
164
+ except Exception as e:
165
+ logger.warning(f"⚠️ Failed to parse chunk {i+1}: {e}")
166
+ continue
167
+
168
+ if all_segments:
169
+ all_segments.sort(key=lambda x: x.get('viral_score', 0), reverse=True)
170
+ unique_segments = []
171
+ seen_times = set()
172
+
173
+ for seg in all_segments:
174
+ time_key = f"{seg.get('start_time', 0):.0f}-{seg.get('end_time', 0):.0f}"
175
+ if time_key not in seen_times:
176
+ unique_segments.append(seg)
177
+ seen_times.add(time_key)
178
+
179
+ return {"content": json.dumps({"segments": unique_segments[:10]})}
180
+
181
+ return analyze_transcript(transcript)
182
 
183
+
184
+ # Testing
185
  if __name__ == "__main__":
186
+ test_transcript = """
187
+ [0.0 - 5.0] Welcome to today's video about productivity hacks that actually work.
188
+ [5.0 - 15.0] The first hack is something I call the 2-minute rule. If something takes less than 2 minutes, do it immediately.
189
+ [15.0 - 30.0] This simple rule has transformed my life. I used to procrastinate on small tasks, but now I handle them right away.
190
+ [30.0 - 45.0] The second hack is batching similar tasks together. Instead of checking email 20 times a day, I check it twice.
191
+ [45.0 - 60.0] This has saved me hours every week. I batch my emails, phone calls, and even errands.
192
+ [60.0 - 90.0] The third hack is the Pomodoro Technique. Work for 25 minutes, then take a 5-minute break.
193
+ [90.0 - 120.0] This technique helps me stay focused and avoid burnout. I get more done in less time.
194
+ """
195
+
196
+ logger.info("🧪 Testing AI Analysis...")
197
+ result = analyze_transcript_with_chunking(test_transcript)
198
+
199
+ try:
200
+ data = json.loads(result['content'])
201
+ segments = data.get('segments', [])
202
+ logger.info(f"✅ Found {len(segments)} viral segments:")
203
+
204
+ for i, seg in enumerate(segments):
205
+ logger.info(f" #{i+1} [{seg['start_time']:.0f}s-{seg['end_time']:.0f}s] "
206
+ f"Score: {seg['viral_score']}/10 - {seg['description']}")
207
+ except Exception as e:
208
+ logger.error(f"❌ Error parsing result: {e}")
209
+ logger.info(f"Raw result: {result}")
core/config.py CHANGED
@@ -13,11 +13,11 @@ Cyrillic: ru, uk (Ukrainian)
13
  Hebrew: he
14
  Thai: th
15
 
16
- Font Priority (viral social media 2024-2025):
17
- Arabic → Tajawal > Cairo > Almarai > NotoSansArabic (fallback)
18
- Latin → Montserrat > Rubik > Oswald > Roboto
19
- CyrillicMontserrat (has Cyrillic) > Roboto
20
- CJK Noto Sans SC/JP/KR (only reliable option for caption use)
21
  """
22
  import os
23
  import re
@@ -35,193 +35,112 @@ class Config:
35
  LOGS_DIR = os.path.join(BASE_DIR, "logs")
36
 
37
  # ─────────────────────────────────────────────────────────────────────────
38
- # Font Registry
39
- # All URLs use Google Fonts CSS2 API — wght@700/800 = Bold
40
  # ─────────────────────────────────────────────────────────────────────────
41
  FONTS = {
42
 
43
- # ── Latin / Universal (support Cyrillic + Latin) ──────────────────────
44
- # ✅ Montserrat has BOTH Latin AND Cyrillic — #1 viral font
45
  "Montserrat-Bold.ttf": "https://fonts.googleapis.com/css2?family=Montserrat:wght@700&display=swap",
46
- # ✅ Rubik: modern, supports Latin + Cyrillic + Hebrew(!)
47
  "Rubik-Bold.ttf": "https://fonts.googleapis.com/css2?family=Rubik:wght@700&display=swap",
48
- # Oswald: condensed Latin only — fast speech / lots of words
49
  "Oswald-Bold.ttf": "https://fonts.googleapis.com/css2?family=Oswald:wght@700&display=swap",
50
- # Roboto: clean baseline, Latin + Cyrillic + Greek
51
  "Roboto-Bold.ttf": "https://fonts.googleapis.com/css2?family=Roboto:wght@700&display=swap",
52
 
53
- # ── Arabic Script (ar, fa, ur) ─────────────────────────────────────────
54
- # ✅ #1 choice: Tajawal — modern social media Arabic, youth-oriented
55
  "Tajawal-Bold.ttf": "https://fonts.googleapis.com/css2?family=Tajawal:wght@700&display=swap",
56
- # Cairo: clean, highly legible — great for captions
57
  "Cairo-Bold.ttf": "https://fonts.googleapis.com/css2?family=Cairo:wght@700&display=swap",
58
- # Almarai: rounded, friendly — Gulf & Egyptian content
59
  "Almarai-Bold.ttf": "https://fonts.googleapis.com/css2?family=Almarai:wght@800&display=swap",
60
- # ✅ Noto Sans Arabic — universal fallback, covers ALL Arabic Unicode
61
  "NotoSansArabic-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@700&display=swap",
62
 
63
- # ── Persian / Farsi (fa) — Arabic script with Persian extensions ───────
64
- # ✅ Vazirmatn: most popular Persian font on social media 2024
65
  "Vazirmatn-Bold.ttf": "https://fonts.googleapis.com/css2?family=Vazirmatn:wght@700&display=swap",
66
 
67
- # ── Urdu (ur) — Nastaliq style not available on Google Fonts ──────────
68
- # Using Noto Sans Arabic as best available web fallback
69
- # Note: Authentic Urdu uses Nastaliq but it's not web-standard yet
70
  "NotoSansArabicUrdu-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@700&display=swap",
71
 
72
- # ── Hebrew (he) ────────────────────────────────────────────────────────
73
- # ✅ Rubik supports Hebrew natively (same font as Latin Rubik!)
74
- # Frank Ruhl Libre: traditional Hebrew newspaper feel
75
  "FrankRuhlLibre-Bold.ttf": "https://fonts.googleapis.com/css2?family=Frank+Ruhl+Libre:wght@700&display=swap",
76
- # ✅ Heebo: modern clean Hebrew for captions
77
  "Heebo-Bold.ttf": "https://fonts.googleapis.com/css2?family=Heebo:wght@700&display=swap",
78
 
79
  # ── CJK ───────────────────────────────────────────────────────────────
80
- # Chinese Simplified
81
  "NotoSansSC-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@700&display=swap",
82
- # Chinese Traditional
83
  "NotoSansTC-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+TC:wght@700&display=swap",
84
- # Japanese
85
  "NotoSansJP-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@700&display=swap",
86
- # ✅ Korean — Noto Sans KR
87
  "NotoSansKR-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+KR:wght@700&display=swap",
88
 
89
- # ── Devanagari (hi, mr, ne) ────────────────────────────────────────────
90
- "NotoSansDevanagari-Bold.ttf":"https://fonts.googleapis.com/css2?family=Noto+Sans+Devanagari:wght@700&display=swap",
91
- # ✅ Poppins: has Devanagari + Latin — great for bilingual Hindi content
92
  "Poppins-Bold.ttf": "https://fonts.googleapis.com/css2?family=Poppins:wght@700&display=swap",
93
 
94
- # ── Thai (th) ──────────────────────────────────────────────────────────
95
- # ✅ Sarabun: most popular Thai social media font, clean & modern
96
  "Sarabun-Bold.ttf": "https://fonts.googleapis.com/css2?family=Sarabun:wght@700&display=swap",
97
- # Noto Sans Thai: reliable fallback
98
  "NotoSansThai-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Thai:wght@700&display=swap",
99
 
100
- # ── Ukrainian (uk) — Cyrillic ──────────────────────────────────────────
101
- # Montserrat covers Ukrainian Cyrillic, but for dedicated support:
102
  "NotoSans-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans:wght@700&display=swap",
103
  }
104
 
105
  # ─────────────────────────────────────────────────────────────────────────
106
- # Language → Best Caption Font
107
- #
108
- # Priority: most viral / readable on mobile screens
109
- # Rule: non-Latin scripts ALWAYS override style font
110
  # ─────────────────────────────────────────────────────────────────────────
111
  LANGUAGE_FONT_MAP = {
112
-
113
- # ── Arabic Script ──────────────────────────────────────────────────────
114
- # All three share Arabic script but have different letter forms
115
- "ar": "Tajawal-Bold.ttf", # ✅ #1 Arabic social media font
116
- "fa": "Vazirmatn-Bold.ttf", # ✅ #1 Persian/Farsi social media font
117
- "ur": "NotoSansArabic-Bold.ttf", # Best web fallback for Urdu
118
-
119
- # ── Hebrew ─────────────────────────────────────────────────────────────
120
- "he": "Heebo-Bold.ttf", # ✅ Modern, clean Hebrew captions
121
-
122
- # ── CJK ───────────────────────────────────────────────────────────────
123
- "zh": "NotoSansSC-Bold.ttf", # Simplified Chinese (mainland)
124
- "zh-tw": "NotoSansTC-Bold.ttf", # Traditional Chinese (Taiwan/HK)
125
- "ja": "NotoSansJP-Bold.ttf", # Japanese
126
- "ko": "NotoSansKR-Bold.ttf", # ✅ Korean (was missing!)
127
-
128
- # ── Devanagari ────────────────────────────────────────────────────────
129
- "hi": "NotoSansDevanagari-Bold.ttf", # Hindi
130
- "mr": "NotoSansDevanagari-Bold.ttf", # Marathi
131
- "ne": "NotoSansDevanagari-Bold.ttf", # Nepali
132
-
133
- # ── Thai ──────────────────────────────────────────────────────────────
134
- "th": "Sarabun-Bold.ttf", # ✅ Thai (was missing!)
135
-
136
- # ── Cyrillic ──────────────────────────────────────────────────────────
137
- # Montserrat has full Cyrillic support AND is the viral Latin font
138
- # This means Russian content gets the same premium feel
139
- "ru": "Montserrat-Bold.ttf", # ⬆️ Upgraded from Roboto
140
- "uk": "Montserrat-Bold.ttf", # Ukrainian (Cyrillic) — was missing
141
-
142
- # ── Latin Languages ────────────────────────────────────────────────────
143
- "en": "Montserrat-Bold.ttf", # English
144
- "fr": "Montserrat-Bold.ttf", # French
145
- "es": "Montserrat-Bold.ttf", # Spanish
146
- "de": "Montserrat-Bold.ttf", # German
147
- "pt": "Montserrat-Bold.ttf", # Portuguese (Brazil + Portugal)
148
- "it": "Montserrat-Bold.ttf", # Italian
149
- "tr": "Montserrat-Bold.ttf", # Turkish (Latin script since 1928)
150
- "nl": "Montserrat-Bold.ttf", # ✅ Dutch (was missing)
151
- "pl": "Montserrat-Bold.ttf", # ✅ Polish (was missing)
152
- "id": "Montserrat-Bold.ttf", # ✅ Indonesian (was missing)
153
- "vi": "Roboto-Bold.ttf", # ✅ Vietnamese — Roboto has better
154
- # diacritic coverage (tones)
155
- "sv": "Montserrat-Bold.ttf", # ✅ Swedish (was missing)
156
- "ro": "Montserrat-Bold.ttf", # ✅ Romanian (was missing)
157
-
158
- # ── Fallback ───────────────────────────────────────────────────────────
159
- # Noto Sans: designed to cover ALL Unicode — zero missing glyphs
160
- # Better than Montserrat for unknown scripts
161
- "default": "NotoSans-Bold.ttf", # ⬆️ Upgraded from Montserrat
162
  }
163
 
164
- # ─────────────────────────────────────────────────────────────────────────
165
- # Caption Style → Preferred Font (Latin-only styles)
166
- #
167
- # IMPORTANT: Non-Latin scripts ALWAYS use LANGUAGE_FONT_MAP regardless
168
- # of style. This map only applies when language is Latin/Cyrillic.
169
- # ─────────────────────────────────────────────────────────────────────────
170
  STYLE_FONT_MAP = {
171
- # Montserrat: #1 viral font — Alex Hormozi, MrBeast, Sidemen
172
  "classic": "Montserrat-Bold.ttf",
173
-
174
- # Rubik: distinctive modern feel — supports Latin + Cyrillic + Hebrew
175
- # ✅ Better than original for multilingual content
176
  "modern_glow": "Rubik-Bold.ttf",
177
-
178
- # Montserrat: proven viral MrBeast aesthetic
179
  "tiktok_bold": "Montserrat-Bold.ttf",
180
-
181
- # ✅ Changed: Oswald has NO Arabic/CJK support
182
- # Using Montserrat which handles more scripts gracefully
183
- # For pure Latin content, Oswald (condensed) is still good
184
- "tiktok_neon": "Montserrat-Bold.ttf", # was Oswald-Bold (no Arabic!)
185
-
186
- # Rubik: clean educator look + multilingual
187
  "youtube_clean": "Rubik-Bold.ttf",
188
-
189
- # Montserrat: karaoke / game-show energy
190
  "youtube_box": "Montserrat-Bold.ttf",
191
  }
192
 
193
- # ─────────────────────────────────────────────────────────────────────────
194
- # Unicode Range → Language Detection
195
- # Used in ensure_font() for script auto-detection
196
- # ─────────────────────────────────────────────────────────────────────────
197
  UNICODE_SCRIPT_RANGES = [
198
- # (start, end, language_code)
199
- # Order matters: more specific ranges first
200
- ("\u0600", "\u06FF", "ar"), # Arabic / Persian / Urdu (same block)
201
- ("\u0750", "\u077F", "ar"), # Arabic Supplement
202
- ("\u08A0", "\u08FF", "ar"), # Arabic Extended-A
203
- ("\u0590", "\u05FF", "he"), # Hebrew
204
- ("\uAC00", "\uD7AF", "ko"), # Korean Hangul syllables ✅ was missing
205
- ("\u1100", "\u11FF", "ko"), # Korean Jamo ✅ was missing
206
- ("\u4E00", "\u9FFF", "zh"), # CJK Unified Ideographs
207
- ("\u3400", "\u4DBF", "zh"), # CJK Extension A
208
- ("\u3040", "\u309F", "ja"), # Hiragana
209
- ("\u30A0", "\u30FF", "ja"), # Katakana
210
- ("\u0900", "\u097F", "hi"), # Devanagari
211
- ("\u0E00", "\u0E7F", "th"), # Thai ✅ was missing
212
- ("\u0400", "\u04FF", "ru"), # Cyrillic
213
- ("\u0500", "\u052F", "ru"), # Cyrillic Supplement
214
  ]
215
 
216
- # ─────────────────────────────────────────────────────────────────────────
217
- # RTL Languages (Right-to-Left)
218
- # Used for text rendering direction
219
- # ─────────────────────────────────────────────────────────────────────────
220
  RTL_LANGUAGES = {"ar", "fa", "ur", "he"}
221
 
222
- # ─────────────────────────────────────────────────────────────────────────
223
- # Video settings
224
- # ─────────────────────────────────────────────────────────────────────────
225
  DEFAULT_SIZE = (1080, 1920)
226
  CHUNK_SIZE_SECONDS = 600
227
  OVERLAP_SECONDS = 60
@@ -243,61 +162,34 @@ class Config:
243
  os.makedirs(d, exist_ok=True)
244
 
245
  # ─────────────────────────────────────────────────────────────────────────
246
- # Language detection from text
247
  # ─────────────────────────────────────────────────────────────────────────
248
  @classmethod
249
  def detect_language_from_text(cls, text: str) -> str | None:
250
- """
251
- Detects script/language from Unicode character ranges.
252
- Returns language code or None if only Latin/ASCII detected.
253
-
254
- More reliable than the original inline checks in ensure_font()
255
- because it covers Korean, Thai, Hebrew, Persian, and more.
256
- """
257
  if not text:
258
  return None
259
-
260
  for start, end, lang in cls.UNICODE_SCRIPT_RANGES:
261
  if any(start <= c <= end for c in text):
262
  return lang
263
-
264
- return None # Latin / unknown
265
 
266
  @classmethod
267
  def is_rtl(cls, language: str) -> bool:
268
- """Returns True if language is right-to-left."""
269
  return language in cls.RTL_LANGUAGES
270
 
271
  @classmethod
272
  def get_font_for_language(cls, language: str, style_name: str = None) -> str:
273
- """
274
- Returns the best font filename for a given language + style combination.
275
-
276
- Priority:
277
- 1. Non-Latin scripts → always use LANGUAGE_FONT_MAP (ignores style)
278
- 2. Latin with explicit style → use STYLE_FONT_MAP
279
- 3. Latin with known language → use LANGUAGE_FONT_MAP
280
- 4. Unknown → use LANGUAGE_FONT_MAP default
281
- """
282
  NON_LATIN = {
283
  "ar", "fa", "ur", "he",
284
  "zh", "zh-tw", "ja", "ko",
285
- "hi", "mr", "ne",
286
- "th",
287
  }
288
-
289
- # Non-Latin: always use language map regardless of style
290
  if language in NON_LATIN:
291
  return cls.LANGUAGE_FONT_MAP.get(language, cls.LANGUAGE_FONT_MAP["default"])
292
-
293
- # Latin/Cyrillic with style preference
294
  if style_name and style_name in cls.STYLE_FONT_MAP:
295
  return cls.STYLE_FONT_MAP[style_name]
296
-
297
- # Latin with known language
298
  if language in cls.LANGUAGE_FONT_MAP:
299
  return cls.LANGUAGE_FONT_MAP[language]
300
-
301
  return cls.LANGUAGE_FONT_MAP["default"]
302
 
303
  # ─────────────────────────────────────────────────────────────────────────
@@ -307,35 +199,65 @@ class Config:
307
  def get_urls(css_content: str, prefer_latin: bool = True) -> list:
308
  """
309
  Extracts font file URLs from a Google Fonts CSS response.
310
- Prefers 'latin' subset for Latin fonts, first subset for others.
311
  """
 
312
  pattern = re.compile(
313
  r'/\*\s*\[?\d*\]?\s*([\w\-]+)\s*\*/[^}]*?url\(([^)]+)\)',
314
  re.DOTALL,
315
  )
316
  pairs = pattern.findall(css_content)
317
 
318
- if not pairs:
319
- bare = re.findall(r'url\(([^)]+)\)', css_content)
320
- return bare if bare else []
321
-
322
- subset_map = {subset.lower(): url.strip() for subset, url in pairs}
323
-
324
- if prefer_latin:
325
- for key in ("latin", "latin-ext"):
326
- if key in subset_map:
327
- return [subset_map[key]]
328
- return [list(subset_map.values())[-1]]
329
- else:
330
- # Arabic / CJK / etc: first subset = script-specific
331
- return [list(subset_map.values())[0]]
 
 
 
 
332
 
 
 
 
333
  @staticmethod
334
  def download_font_from_css(css_url: str, output_path: str) -> bool:
335
  """
336
- Downloads the correct font file for a given CSS URL.
337
- Auto-detects Latin vs non-Latin based on filename.
 
 
 
 
 
 
 
 
 
338
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  NON_LATIN_KEYWORDS = (
340
  "arabic", "noto", "devanagari", "sc", "jp", "kr", "tc",
341
  "thai", "sarabun", "heebo", "frank", "vazir", "tajawal",
@@ -345,39 +267,81 @@ class Config:
345
  is_non_latin = any(kw in filename for kw in NON_LATIN_KEYWORDS)
346
  prefer_latin = not is_non_latin
347
 
348
- headers = {
349
- "User-Agent": (
350
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
351
- "AppleWebKit/537.36 (KHTML, like Gecko) "
352
- "Chrome/124.0.0.0 Safari/537.36"
353
- )
354
- }
355
-
356
- try:
357
- resp = requests.get(css_url, headers=headers, timeout=15)
358
- resp.raise_for_status()
359
-
360
- urls = Config.get_urls(resp.text, prefer_latin=prefer_latin)
361
- if not urls:
362
- print(f"❌ No font URLs found in CSS: {css_url}")
363
- return False
364
-
365
- font_url = urls[0]
366
- subset_lbl = "latin" if prefer_latin else "script"
367
- print(f"⬇️ Downloading font ({subset_lbl}) → {font_url}")
368
-
369
- font_resp = requests.get(font_url, headers=headers, timeout=30)
370
- font_resp.raise_for_status()
371
-
372
- with open(output_path, "wb") as f:
373
- f.write(font_resp.content)
374
-
375
- print(f"✅ Font saved: {output_path}")
376
- return True
377
-
378
- except requests.RequestException as e:
379
- print(f"❌ Network error: {e}")
380
- return False
381
- except Exception as e:
382
- print(f"❌ Unexpected error: {e}")
383
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  Hebrew: he
14
  Thai: th
15
 
16
+ FONT DOWNLOAD FIX:
17
+ Google Fonts returns woff2 for modern browsers — Pillow cannot load woff2.
18
+ Solution: use an old IE User-Agent to force Google Fonts to return TTF URLs.
19
+ Modern UA fonts.gstatic.com/s/cairo/xxx.woff2 ← Pillow FAILS
20
+ Old IE UAfonts.gstatic.com/s/cairo/xxx.ttf ← Pillow works
21
  """
22
  import os
23
  import re
 
35
  LOGS_DIR = os.path.join(BASE_DIR, "logs")
36
 
37
  # ─────────────────────────────────────────────────────────────────────────
38
+ # Font Registry — Google Fonts CSS2 API URLs
 
39
  # ─────────────────────────────────────────────────────────────────────────
40
  FONTS = {
41
 
42
+ # ── Latin / Universal ──────────────────────────────────────────────────
 
43
  "Montserrat-Bold.ttf": "https://fonts.googleapis.com/css2?family=Montserrat:wght@700&display=swap",
 
44
  "Rubik-Bold.ttf": "https://fonts.googleapis.com/css2?family=Rubik:wght@700&display=swap",
 
45
  "Oswald-Bold.ttf": "https://fonts.googleapis.com/css2?family=Oswald:wght@700&display=swap",
 
46
  "Roboto-Bold.ttf": "https://fonts.googleapis.com/css2?family=Roboto:wght@700&display=swap",
47
 
48
+ # ── Arabic Script ──────────────────────────────────────────────────────
 
49
  "Tajawal-Bold.ttf": "https://fonts.googleapis.com/css2?family=Tajawal:wght@700&display=swap",
 
50
  "Cairo-Bold.ttf": "https://fonts.googleapis.com/css2?family=Cairo:wght@700&display=swap",
 
51
  "Almarai-Bold.ttf": "https://fonts.googleapis.com/css2?family=Almarai:wght@800&display=swap",
 
52
  "NotoSansArabic-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@700&display=swap",
53
 
54
+ # ── Persian ────────────────────────────────────────────────────────────
 
55
  "Vazirmatn-Bold.ttf": "https://fonts.googleapis.com/css2?family=Vazirmatn:wght@700&display=swap",
56
 
57
+ # ── Urdu ───────────────────────────────────────────────────────────────
 
 
58
  "NotoSansArabicUrdu-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@700&display=swap",
59
 
60
+ # ── Hebrew ───────────────────��─────────────────────────────────────────
 
 
61
  "FrankRuhlLibre-Bold.ttf": "https://fonts.googleapis.com/css2?family=Frank+Ruhl+Libre:wght@700&display=swap",
 
62
  "Heebo-Bold.ttf": "https://fonts.googleapis.com/css2?family=Heebo:wght@700&display=swap",
63
 
64
  # ── CJK ───────────────────────────────────────────────────────────────
 
65
  "NotoSansSC-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@700&display=swap",
 
66
  "NotoSansTC-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+TC:wght@700&display=swap",
 
67
  "NotoSansJP-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@700&display=swap",
 
68
  "NotoSansKR-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+KR:wght@700&display=swap",
69
 
70
+ # ── Devanagari ────────────────────────────────────────────────────────
71
+ "NotoSansDevanagari-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Devanagari:wght@700&display=swap",
 
72
  "Poppins-Bold.ttf": "https://fonts.googleapis.com/css2?family=Poppins:wght@700&display=swap",
73
 
74
+ # ── Thai ──────────────────────────────────────────────────────────────
 
75
  "Sarabun-Bold.ttf": "https://fonts.googleapis.com/css2?family=Sarabun:wght@700&display=swap",
 
76
  "NotoSansThai-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Thai:wght@700&display=swap",
77
 
78
+ # ── Universal fallback ─────────────────────────────────────────────────
 
79
  "NotoSans-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans:wght@700&display=swap",
80
  }
81
 
82
  # ─────────────────────────────────────────────────────────────────────────
83
+ # Language → Font
 
 
 
84
  # ─────────────────────────────────────────────────────────────────────────
85
  LANGUAGE_FONT_MAP = {
86
+ "ar": "Tajawal-Bold.ttf",
87
+ "fa": "Vazirmatn-Bold.ttf",
88
+ "ur": "NotoSansArabic-Bold.ttf",
89
+ "he": "Heebo-Bold.ttf",
90
+ "zh": "NotoSansSC-Bold.ttf",
91
+ "zh-tw": "NotoSansTC-Bold.ttf",
92
+ "ja": "NotoSansJP-Bold.ttf",
93
+ "ko": "NotoSansKR-Bold.ttf",
94
+ "hi": "NotoSansDevanagari-Bold.ttf",
95
+ "mr": "NotoSansDevanagari-Bold.ttf",
96
+ "ne": "NotoSansDevanagari-Bold.ttf",
97
+ "th": "Sarabun-Bold.ttf",
98
+ "ru": "Montserrat-Bold.ttf",
99
+ "uk": "Montserrat-Bold.ttf",
100
+ "en": "Montserrat-Bold.ttf",
101
+ "fr": "Montserrat-Bold.ttf",
102
+ "es": "Montserrat-Bold.ttf",
103
+ "de": "Montserrat-Bold.ttf",
104
+ "pt": "Montserrat-Bold.ttf",
105
+ "it": "Montserrat-Bold.ttf",
106
+ "tr": "Montserrat-Bold.ttf",
107
+ "nl": "Montserrat-Bold.ttf",
108
+ "pl": "Montserrat-Bold.ttf",
109
+ "id": "Montserrat-Bold.ttf",
110
+ "vi": "Roboto-Bold.ttf",
111
+ "sv": "Montserrat-Bold.ttf",
112
+ "ro": "Montserrat-Bold.ttf",
113
+ "default": "NotoSans-Bold.ttf",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  }
115
 
 
 
 
 
 
 
116
  STYLE_FONT_MAP = {
 
117
  "classic": "Montserrat-Bold.ttf",
 
 
 
118
  "modern_glow": "Rubik-Bold.ttf",
 
 
119
  "tiktok_bold": "Montserrat-Bold.ttf",
120
+ "tiktok_neon": "Montserrat-Bold.ttf",
 
 
 
 
 
 
121
  "youtube_clean": "Rubik-Bold.ttf",
 
 
122
  "youtube_box": "Montserrat-Bold.ttf",
123
  }
124
 
 
 
 
 
125
  UNICODE_SCRIPT_RANGES = [
126
+ ("\u0600", "\u06FF", "ar"),
127
+ ("\u0750", "\u077F", "ar"),
128
+ ("\u08A0", "\u08FF", "ar"),
129
+ ("\u0590", "\u05FF", "he"),
130
+ ("\uAC00", "\uD7AF", "ko"),
131
+ ("\u1100", "\u11FF", "ko"),
132
+ ("\u4E00", "\u9FFF", "zh"),
133
+ ("\u3400", "\u4DBF", "zh"),
134
+ ("\u3040", "\u309F", "ja"),
135
+ ("\u30A0", "\u30FF", "ja"),
136
+ ("\u0900", "\u097F", "hi"),
137
+ ("\u0E00", "\u0E7F", "th"),
138
+ ("\u0400", "\u04FF", "ru"),
139
+ ("\u0500", "\u052F", "ru"),
 
 
140
  ]
141
 
 
 
 
 
142
  RTL_LANGUAGES = {"ar", "fa", "ur", "he"}
143
 
 
 
 
144
  DEFAULT_SIZE = (1080, 1920)
145
  CHUNK_SIZE_SECONDS = 600
146
  OVERLAP_SECONDS = 60
 
162
  os.makedirs(d, exist_ok=True)
163
 
164
  # ─────────────────────────────────────────────────────────────────────────
165
+ # Language detection
166
  # ─────────────────────────────────────────────────────────────────────────
167
  @classmethod
168
  def detect_language_from_text(cls, text: str) -> str | None:
 
 
 
 
 
 
 
169
  if not text:
170
  return None
 
171
  for start, end, lang in cls.UNICODE_SCRIPT_RANGES:
172
  if any(start <= c <= end for c in text):
173
  return lang
174
+ return None
 
175
 
176
  @classmethod
177
  def is_rtl(cls, language: str) -> bool:
 
178
  return language in cls.RTL_LANGUAGES
179
 
180
  @classmethod
181
  def get_font_for_language(cls, language: str, style_name: str = None) -> str:
 
 
 
 
 
 
 
 
 
182
  NON_LATIN = {
183
  "ar", "fa", "ur", "he",
184
  "zh", "zh-tw", "ja", "ko",
185
+ "hi", "mr", "ne", "th",
 
186
  }
 
 
187
  if language in NON_LATIN:
188
  return cls.LANGUAGE_FONT_MAP.get(language, cls.LANGUAGE_FONT_MAP["default"])
 
 
189
  if style_name and style_name in cls.STYLE_FONT_MAP:
190
  return cls.STYLE_FONT_MAP[style_name]
 
 
191
  if language in cls.LANGUAGE_FONT_MAP:
192
  return cls.LANGUAGE_FONT_MAP[language]
 
193
  return cls.LANGUAGE_FONT_MAP["default"]
194
 
195
  # ─────────────────────────────────────────────────────────────────────────
 
199
  def get_urls(css_content: str, prefer_latin: bool = True) -> list:
200
  """
201
  Extracts font file URLs from a Google Fonts CSS response.
202
+ Prefers TTF over woff2 because Pillow cannot load woff2.
203
  """
204
+ # Extract all (subset_comment, url) pairs
205
  pattern = re.compile(
206
  r'/\*\s*\[?\d*\]?\s*([\w\-]+)\s*\*/[^}]*?url\(([^)]+)\)',
207
  re.DOTALL,
208
  )
209
  pairs = pattern.findall(css_content)
210
 
211
+ if pairs:
212
+ subset_map = {s.lower(): u.strip().strip("'\"") for s, u in pairs}
213
+ if prefer_latin:
214
+ for key in ("latin", "latin-ext"):
215
+ if key in subset_map:
216
+ return [subset_map[key]]
217
+ return [list(subset_map.values())[-1]]
218
+ else:
219
+ return [list(subset_map.values())[0]]
220
+
221
+ # Fallback: grab all raw URLs
222
+ all_urls = re.findall(r'url\(([^)]+)\)', css_content)
223
+ all_urls = [u.strip().strip("'\"") for u in all_urls]
224
+
225
+ # Prefer TTF, then woff (not woff2 — Pillow can't open woff2)
226
+ ttf = [u for u in all_urls if u.endswith(".ttf")]
227
+ woff = [u for u in all_urls if u.endswith(".woff") and not u.endswith(".woff2")]
228
+ return ttf or woff or all_urls
229
 
230
+ # ─────────────────────────────────────────────────────────────────────────
231
+ # Font CSS download ← FIXED: uses TTF-forcing User-Agent
232
+ # ─────────────────────────────────────────────────────────────────────────
233
  @staticmethod
234
  def download_font_from_css(css_url: str, output_path: str) -> bool:
235
  """
236
+ Downloads the correct font file for a given Google Fonts CSS URL.
237
+
238
+ KEY FIX: Uses an old IE 6 User-Agent to force Google Fonts to return
239
+ TTF URLs instead of woff2. Pillow/FreeType cannot open woff2 files.
240
+
241
+ Modern Chrome UA → Google returns .woff2 → Pillow FAILS ❌
242
+ Old IE 6 UA → Google returns .ttf → Pillow works ✅
243
+
244
+ Two-pass strategy:
245
+ Pass 1: Old IE UA → gets TTF (ideal for Pillow)
246
+ Pass 2: Modern UA → gets woff2 as last resort (may fail in Pillow)
247
  """
248
+ # ── User-Agent constants ──────────────────────────────────────────────
249
+ # IE 6 on Windows XP — forces Google Fonts to return legacy TTF format
250
+ UA_TTF = (
251
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; "
252
+ "SV1; .NET CLR 1.1.4322)"
253
+ )
254
+ # Modern Chrome — returns woff2 (not ideal for Pillow, last resort)
255
+ UA_MODERN = (
256
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
257
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
258
+ "Chrome/124.0.0.0 Safari/537.36"
259
+ )
260
+
261
  NON_LATIN_KEYWORDS = (
262
  "arabic", "noto", "devanagari", "sc", "jp", "kr", "tc",
263
  "thai", "sarabun", "heebo", "frank", "vazir", "tajawal",
 
267
  is_non_latin = any(kw in filename for kw in NON_LATIN_KEYWORDS)
268
  prefer_latin = not is_non_latin
269
 
270
+ os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True)
271
+
272
+ for pass_num, ua in enumerate([UA_TTF, UA_MODERN], start=1):
273
+ ua_label = "TTF-forcing (IE6)" if pass_num == 1 else "Modern (woff2 fallback)"
274
+ try:
275
+ # ── Fetch CSS ─────────────────────────────────────────────────
276
+ resp = requests.get(
277
+ css_url,
278
+ headers={"User-Agent": ua},
279
+ timeout=15
280
+ )
281
+ resp.raise_for_status()
282
+
283
+ urls = Config.get_urls(resp.text, prefer_latin=prefer_latin)
284
+ if not urls:
285
+ print(f"⚠️ Pass {pass_num} ({ua_label}): no font URLs in CSS")
286
+ continue
287
+
288
+ font_url = urls[0]
289
+ ext = os.path.splitext(font_url.split("?")[0])[-1].lower()
290
+ print(f"⬇️ Pass {pass_num} ({ua_label}): {ext} → {font_url[:70]}…")
291
+
292
+ # ── Download font file ────────────────────────────────────────
293
+ font_resp = requests.get(
294
+ font_url,
295
+ headers={"User-Agent": UA_MODERN},
296
+ timeout=30
297
+ )
298
+ font_resp.raise_for_status()
299
+ data = font_resp.content
300
+
301
+ # ── Validate: check magic bytes ───────────────────────────────
302
+ if len(data) < 10_000:
303
+ print(f"⚠️ File too small ({len(data)} B) — likely error page, skipping")
304
+ continue
305
+
306
+ magic = data[:4]
307
+ is_ttf_magic = magic in (
308
+ b"\x00\x01\x00\x00", # TrueType
309
+ b"OTTO", # OpenType CFF
310
+ b"true", # TrueType variant
311
+ b"wOFF", # WOFF (Pillow ≥ 9.2 can open)
312
+ b"wOF2", # WOFF2 (Pillow may fail)
313
+ )
314
+
315
+ if not is_ttf_magic:
316
+ print(
317
+ f"⚠️ Pass {pass_num}: unexpected magic bytes {magic.hex()} "
318
+ f"(probably HTML error page) — skipping"
319
+ )
320
+ continue
321
+
322
+ if magic == b"wOF2":
323
+ print(
324
+ f"⚠️ Pass {pass_num}: received WOFF2 — "
325
+ f"Pillow may not be able to open this. "
326
+ f"Consider installing: sudo apt-get install fonts-noto-core"
327
+ )
328
+
329
+ with open(output_path, "wb") as f:
330
+ f.write(data)
331
+
332
+ print(f"✅ Font saved ({len(data):,} B, {ext}): {output_path}")
333
+ return True
334
+
335
+ except requests.RequestException as e:
336
+ print(f"❌ Pass {pass_num} network error: {e}")
337
+ except Exception as e:
338
+ print(f"❌ Pass {pass_num} unexpected error: {e}")
339
+
340
+ # ── Both passes failed ────────────────────────────────────────────────
341
+ print(
342
+ f"❌ All download attempts failed for {os.path.basename(output_path)}.\n"
343
+ f" Fix on Ubuntu/Debian:\n"
344
+ f" sudo apt-get install -y fonts-noto-core fonts-arabeyes\n"
345
+ f" Or copy a TTF manually to: {output_path}"
346
+ )
347
+ return False
core/free_translator.py CHANGED
@@ -7,32 +7,24 @@ class FreeTranslator:
7
  def __init__(self):
8
  pass
9
 
10
- def translate_text(self, text, target_language_code):
11
  """ترجمة مجانية باستخدام MyMemory API بدون httpx"""
12
  if not text.strip():
13
  return "", []
 
 
 
 
14
 
15
- # خريطة اللغات
16
- lang_map = {
17
- "ar": "ar",
18
- "en": "en",
19
- "hi": "hi",
20
- "zh": "zh",
21
- "es": "es",
22
- "fr": "fr",
23
- "de": "de",
24
- "ru": "ru",
25
- "ja": "ja"
26
- }
27
-
28
- target_lang = lang_map.get(target_language_code, target_language_code)
29
 
30
  try:
31
  # استخدام urllib بدلاً من requests لتجنب مشكلة httpx
32
  url = "https://api.mymemory.translated.net/get"
33
  params = {
34
  'q': text,
35
- 'langpair': f'en|{target_lang}'
36
  }
37
 
38
  # بناء URL مع parameters
 
7
  def __init__(self):
8
  pass
9
 
10
+ def translate_text(self, text, target_language_code, source_language_code="en"):
11
  """ترجمة مجانية باستخدام MyMemory API بدون httpx"""
12
  if not text.strip():
13
  return "", []
14
+
15
+ # Handle same language case
16
+ if source_language_code.lower() == target_language_code.lower():
17
+ return text, []
18
 
19
+ target_lang = target_language_code.lower()
20
+ source_lang = source_language_code.lower()
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  try:
23
  # استخدام urllib بدلاً من requests لتجنب مشكلة httpx
24
  url = "https://api.mymemory.translated.net/get"
25
  params = {
26
  'q': text,
27
+ 'langpair': f'{source_lang}|{target_lang}'
28
  }
29
 
30
  # بناء URL مع parameters
core/stt.py CHANGED
@@ -33,7 +33,7 @@ SUBTITLE_STANDARDS = {
33
  }
34
 
35
  # Sentence-ending punctuation for smart splitting
36
- SENTENCE_ENDINGS = re.compile(r'[.!?؟。!?]+$')
37
  CLAUSE_BOUNDARIES = re.compile(r'[,،;:،]+$')
38
 
39
 
@@ -49,7 +49,6 @@ class SubtitleSegmenter:
49
 
50
  @staticmethod
51
  def count_chars(text: str) -> int:
52
- """Count displayable characters (strip extra spaces)."""
53
  return len(text.strip())
54
 
55
  @staticmethod
@@ -62,7 +61,6 @@ class SubtitleSegmenter:
62
 
63
  @staticmethod
64
  def calc_min_duration(text: str) -> float:
65
- """Minimum display duration based on reading speed (EBU R37)."""
66
  chars = SubtitleSegmenter.count_chars(text)
67
  cps = SUBTITLE_STANDARDS["reading_speed_cps"]
68
  return max(chars / cps, SUBTITLE_STANDARDS["min_duration_sec"])
@@ -73,13 +71,6 @@ class SubtitleSegmenter:
73
  Splits a flat list of word dicts into subtitle blocks following
74
  international standards. Each block has:
75
  { text, start, end, words, line1, line2 }
76
-
77
- Priority for line breaks:
78
- 1. Sentence endings (.!?)
79
- 2. Clause boundaries (,;:)
80
- 3. Max chars per line (42)
81
- 4. Max words per block
82
- 5. Pause gaps in audio (> 0.5s)
83
  """
84
  if not words:
85
  return []
@@ -89,18 +80,15 @@ class SubtitleSegmenter:
89
  MAX_WORDS = SUBTITLE_STANDARDS["max_words_per_block"]
90
  PAUSE_GAP = SUBTITLE_STANDARDS["sentence_pause_gap"]
91
 
92
- blocks = []
93
  current_words = []
94
  current_chars = 0
95
 
96
  def flush_block(word_list):
97
- """Convert accumulated words into a subtitle block with line splitting."""
98
  if not word_list:
99
  return None
100
-
101
  full_text = " ".join(w["text"] for w in word_list)
102
  lines = SubtitleSegmenter._split_into_lines(full_text, MAX_CHARS)
103
-
104
  return {
105
  "text": full_text,
106
  "start": word_list[0]["start"],
@@ -118,31 +106,22 @@ class SubtitleSegmenter:
118
  word_chars = len(word_text)
119
  is_last = (i == len(words) - 1)
120
 
121
- # Detect natural pause between this word and the next
122
  next_pause = 0.0
123
  if not is_last:
124
  next_pause = words[i + 1]["start"] - word["end"]
125
 
126
- # Would adding this word exceed the block limit?
127
- new_total = current_chars + (1 if current_words else 0) + word_chars
128
  word_count = len(current_words) + 1
129
 
130
- # ── Flush conditions (in priority order) ──────────────────────────
131
  should_flush = (
132
- # 1. Adding word would exceed max block chars
133
  (current_words and new_total > MAX_BLOCK) or
134
- # 2. Too many words
135
  (current_words and word_count > MAX_WORDS) or
136
- # 3. Long natural pause after current word (sentence boundary)
137
  (current_words and next_pause >= PAUSE_GAP and
138
  SubtitleSegmenter.is_sentence_end(word_text)) or
139
- # 4. Very long pause (>1s) — definitely a new sentence
140
  (current_words and next_pause > 1.0)
141
  )
142
 
143
  if should_flush and current_words:
144
- # Check if we should include this word before flushing
145
- # (if it's a sentence ending, include it in the current block)
146
  if SubtitleSegmenter.is_sentence_end(word_text) and new_total <= MAX_BLOCK:
147
  current_words.append(word)
148
  current_chars = new_total
@@ -153,12 +132,9 @@ class SubtitleSegmenter:
153
  current_words = []
154
  current_chars = 0
155
 
156
- # If we already added the word above, skip re-adding
157
  if SubtitleSegmenter.is_sentence_end(word_text) and word in current_words:
158
  continue
159
 
160
- # ── Prefer breaking at clause boundaries when close to line limit ─
161
- # If we're on the second line and hit a comma, flush
162
  if (current_words and
163
  current_chars > MAX_CHARS and
164
  SubtitleSegmenter.is_clause_boundary(word_text)):
@@ -173,32 +149,26 @@ class SubtitleSegmenter:
173
  current_words.append(word)
174
  current_chars += (1 if len(current_words) > 1 else 0) + word_chars
175
 
176
- # Flush remaining words
177
  if current_words:
178
  block = flush_block(current_words)
179
  if block:
180
  blocks.append(block)
181
 
182
- # ── Post-process: enforce duration standards ───────────────────────────
183
  blocks = SubtitleSegmenter._enforce_duration_standards(blocks)
184
-
185
  return blocks
186
 
187
  @staticmethod
188
  def _split_into_lines(text: str, max_chars: int) -> list:
189
  """
190
  Splits text into max 2 lines at a natural word boundary near the midpoint.
191
- Prefers splitting at punctuation, then at the most balanced midpoint.
192
- Returns [line1] or [line1, line2].
193
  """
194
  if len(text) <= max_chars:
195
  return [text]
196
 
197
  words = text.split()
198
  if len(words) <= 1:
199
- return [text] # Can't split single word
200
 
201
- # Try to find the best split point
202
  best_split = len(words) // 2
203
  best_balance = float('inf')
204
 
@@ -206,15 +176,12 @@ class SubtitleSegmenter:
206
  line1 = " ".join(words[:split_idx])
207
  line2 = " ".join(words[split_idx:])
208
 
209
- # Hard reject: either line over max_chars
210
  if len(line1) > max_chars or len(line2) > max_chars:
211
  continue
212
 
213
- # Prefer splits at punctuation
214
- punctuation_bonus = 5 if CLAUSE_BOUNDARIES.search(words[split_idx - 1]) else 0
215
- sentence_bonus = 10 if SENTENCE_ENDINGS.search(words[split_idx - 1]) else 0
216
 
217
- # Balance score (closer to equal = better)
218
  balance = abs(len(line1) - len(line2)) - punctuation_bonus - sentence_bonus
219
 
220
  if balance < best_balance:
@@ -224,7 +191,6 @@ class SubtitleSegmenter:
224
  line1 = " ".join(words[:best_split])
225
  line2 = " ".join(words[best_split:])
226
 
227
- # Fallback: if line2 still too long, truncate gracefully
228
  if len(line2) > max_chars:
229
  line2 = line2[:max_chars - 1] + "…"
230
 
@@ -233,10 +199,7 @@ class SubtitleSegmenter:
233
  @staticmethod
234
  def _enforce_duration_standards(blocks: list) -> list:
235
  """
236
- Post-processes blocks to:
237
- - Enforce minimum display duration
238
- - Enforce maximum display duration (split if needed)
239
- - Ensure minimum gap between consecutive blocks (40ms)
240
  """
241
  if not blocks:
242
  return blocks
@@ -246,26 +209,20 @@ class SubtitleSegmenter:
246
  MIN_GAP = SUBTITLE_STANDARDS["min_gap_between"]
247
 
248
  processed = []
249
- for i, block in enumerate(blocks):
250
  duration = block["end"] - block["start"]
251
 
252
- # Extend duration if too short
253
  if duration < MIN_DUR:
254
  block = {**block, "end": block["start"] + MIN_DUR}
255
-
256
- # Trim if too long (shouldn't happen with word-level splitting)
257
  if duration > MAX_DUR:
258
  block = {**block, "end": block["start"] + MAX_DUR}
259
 
260
  processed.append(block)
261
 
262
- # Enforce gap between consecutive subtitles
263
  for i in range(1, len(processed)):
264
- prev_end = processed[i - 1]["end"]
265
  curr_start = processed[i]["start"]
266
-
267
  if curr_start - prev_end < MIN_GAP:
268
- # Move current block start forward slightly
269
  processed[i] = {**processed[i], "start": prev_end + MIN_GAP}
270
 
271
  return processed
@@ -274,13 +231,13 @@ class SubtitleSegmenter:
274
  # ─────────────────────────────────────────────────────────────────────────────
275
 
276
  class STT:
277
- def __init__(self, model_size="large-v3"):
278
  """
279
- Default changed to large-v3:
280
- - Significantly better word-level timestamps (critical for highlight_word mode)
281
- - Better sentence segmentation boundaries
282
- - Improved Arabic/multilingual accuracy
283
- Note: Base model timing is ±200ms off; large-v3 is ±50ms.
284
  """
285
  self.duration = 0
286
  self.model_size = model_size
@@ -301,13 +258,13 @@ class STT:
301
  """
302
  Transcribes video and returns subtitle-standard-compliant segments.
303
 
304
- Changes from original:
305
- All segments post-processed through SubtitleSegmenter
306
- Max 42 chars per line enforced
307
- Max 2 lines per block
308
- Natural sentence/clause boundary splitting
309
- EBU R37 reading speed enforcement
310
- 40ms minimum gap between subtitles
311
  """
312
  print(f"🎙️ Transcribing: {video_path} (Language: {language or 'Auto'}, "
313
  f"Mode: {timestamp_mode}, VAD: {vad_filter})")
@@ -315,7 +272,6 @@ class STT:
315
  log_file = os.path.join(os.path.dirname(os.path.dirname(__file__)),
316
  "logs", "transcript.log")
317
 
318
- # ── Language normalisation ────────────────────────────────────────────
319
  actual_stt_lang = None
320
  if language:
321
  lang_val = language.value if hasattr(language, 'value') else str(language)
@@ -347,25 +303,24 @@ class STT:
347
  print(f"⚠️ Cache setup error: {e}")
348
 
349
  # ── Whisper transcription ────────────────────────────────────────────
350
- # Always request word_timestamps — needed for standards-compliant splitting
351
  print(f"🔍 Starting Whisper transcription (model={self.model_size}, "
352
  f"word_timestamps=True)…")
353
 
354
  segments_iter, info = self.model.transcribe(
355
  video_path,
356
- beam_size=5, # Higher beam → better accuracy
357
- word_timestamps=True, # Always needed for standards
358
  language=actual_stt_lang,
359
  vad_filter=vad_filter,
360
  vad_parameters=dict(min_silence_duration_ms=500) if vad_filter else None,
361
- condition_on_previous_text=True, # Better sentence continuity
362
  )
363
  detected_lang = info.language
364
  print(f"🔍 Detected language: {detected_lang}")
365
 
366
  # ── Collect all words with timing ────────────────────────────────────
367
- all_words = []
368
- raw_segments = list(segments_iter) # materialise the generator
369
 
370
  for seg in raw_segments:
371
  if seg.words:
@@ -375,18 +330,17 @@ class STT:
375
  all_words.append({
376
  "text": text,
377
  "start": round(w.start, 3),
378
- "end": round(w.end, 3),
379
  "is_highlight": False,
380
  })
381
  else:
382
- # Fallback: segment-level only (no word timestamps available)
383
  seg_words = seg.text.strip().split()
384
  if seg_words:
385
  avg = (seg.end - seg.start) / len(seg_words)
386
  for i, wt in enumerate(seg_words):
387
  all_words.append({
388
  "text": wt,
389
- "start": round(seg.start + i * avg, 3),
390
  "end": round(seg.start + (i + 1) * avg, 3),
391
  "is_highlight": False,
392
  })
@@ -401,7 +355,7 @@ class STT:
401
  print(f"✅ Generated {len(subtitle_blocks)} subtitle blocks "
402
  f"(was {len(raw_segments)} raw segments)")
403
 
404
- # ── Build segments_list in expected format ───────────────────────────
405
  segments_list = []
406
  full_text = ""
407
 
@@ -411,7 +365,6 @@ class STT:
411
  "start": block["start"],
412
  "end": block["end"],
413
  "words": block["words"],
414
- # Extra: pre-computed line split for renderers
415
  "_line1": block.get("line1", block["text"]),
416
  "_line2": block.get("line2", ""),
417
  })
@@ -429,7 +382,7 @@ class STT:
429
  f.write(f"📐 Standards: BBC/Netflix/EBU R37 "
430
  f"(max {SUBTITLE_STANDARDS['max_chars_per_line']} chars/line)\n")
431
  f.write(f"{'='*60}\n")
432
- for i, seg in enumerate(segments_list):
433
  chars = len(seg['_line1']) + len(seg.get('_line2', ''))
434
  f.write(f"[{seg['start']:.2f}–{seg['end']:.2f}] "
435
  f"({chars:2d}ch) {seg['text']}\n")
 
33
  }
34
 
35
  # Sentence-ending punctuation for smart splitting
36
+ SENTENCE_ENDINGS = re.compile(r'[.!?؟。!?]+$')
37
  CLAUSE_BOUNDARIES = re.compile(r'[,،;:،]+$')
38
 
39
 
 
49
 
50
  @staticmethod
51
  def count_chars(text: str) -> int:
 
52
  return len(text.strip())
53
 
54
  @staticmethod
 
61
 
62
  @staticmethod
63
  def calc_min_duration(text: str) -> float:
 
64
  chars = SubtitleSegmenter.count_chars(text)
65
  cps = SUBTITLE_STANDARDS["reading_speed_cps"]
66
  return max(chars / cps, SUBTITLE_STANDARDS["min_duration_sec"])
 
71
  Splits a flat list of word dicts into subtitle blocks following
72
  international standards. Each block has:
73
  { text, start, end, words, line1, line2 }
 
 
 
 
 
 
 
74
  """
75
  if not words:
76
  return []
 
80
  MAX_WORDS = SUBTITLE_STANDARDS["max_words_per_block"]
81
  PAUSE_GAP = SUBTITLE_STANDARDS["sentence_pause_gap"]
82
 
83
+ blocks = []
84
  current_words = []
85
  current_chars = 0
86
 
87
  def flush_block(word_list):
 
88
  if not word_list:
89
  return None
 
90
  full_text = " ".join(w["text"] for w in word_list)
91
  lines = SubtitleSegmenter._split_into_lines(full_text, MAX_CHARS)
 
92
  return {
93
  "text": full_text,
94
  "start": word_list[0]["start"],
 
106
  word_chars = len(word_text)
107
  is_last = (i == len(words) - 1)
108
 
 
109
  next_pause = 0.0
110
  if not is_last:
111
  next_pause = words[i + 1]["start"] - word["end"]
112
 
113
+ new_total = current_chars + (1 if current_words else 0) + word_chars
 
114
  word_count = len(current_words) + 1
115
 
 
116
  should_flush = (
 
117
  (current_words and new_total > MAX_BLOCK) or
 
118
  (current_words and word_count > MAX_WORDS) or
 
119
  (current_words and next_pause >= PAUSE_GAP and
120
  SubtitleSegmenter.is_sentence_end(word_text)) or
 
121
  (current_words and next_pause > 1.0)
122
  )
123
 
124
  if should_flush and current_words:
 
 
125
  if SubtitleSegmenter.is_sentence_end(word_text) and new_total <= MAX_BLOCK:
126
  current_words.append(word)
127
  current_chars = new_total
 
132
  current_words = []
133
  current_chars = 0
134
 
 
135
  if SubtitleSegmenter.is_sentence_end(word_text) and word in current_words:
136
  continue
137
 
 
 
138
  if (current_words and
139
  current_chars > MAX_CHARS and
140
  SubtitleSegmenter.is_clause_boundary(word_text)):
 
149
  current_words.append(word)
150
  current_chars += (1 if len(current_words) > 1 else 0) + word_chars
151
 
 
152
  if current_words:
153
  block = flush_block(current_words)
154
  if block:
155
  blocks.append(block)
156
 
 
157
  blocks = SubtitleSegmenter._enforce_duration_standards(blocks)
 
158
  return blocks
159
 
160
  @staticmethod
161
  def _split_into_lines(text: str, max_chars: int) -> list:
162
  """
163
  Splits text into max 2 lines at a natural word boundary near the midpoint.
 
 
164
  """
165
  if len(text) <= max_chars:
166
  return [text]
167
 
168
  words = text.split()
169
  if len(words) <= 1:
170
+ return [text]
171
 
 
172
  best_split = len(words) // 2
173
  best_balance = float('inf')
174
 
 
176
  line1 = " ".join(words[:split_idx])
177
  line2 = " ".join(words[split_idx:])
178
 
 
179
  if len(line1) > max_chars or len(line2) > max_chars:
180
  continue
181
 
182
+ punctuation_bonus = 5 if CLAUSE_BOUNDARIES.search(words[split_idx - 1]) else 0
183
+ sentence_bonus = 10 if SENTENCE_ENDINGS.search(words[split_idx - 1]) else 0
 
184
 
 
185
  balance = abs(len(line1) - len(line2)) - punctuation_bonus - sentence_bonus
186
 
187
  if balance < best_balance:
 
191
  line1 = " ".join(words[:best_split])
192
  line2 = " ".join(words[best_split:])
193
 
 
194
  if len(line2) > max_chars:
195
  line2 = line2[:max_chars - 1] + "…"
196
 
 
199
  @staticmethod
200
  def _enforce_duration_standards(blocks: list) -> list:
201
  """
202
+ Post-processes blocks to enforce min/max duration and minimum gap.
 
 
 
203
  """
204
  if not blocks:
205
  return blocks
 
209
  MIN_GAP = SUBTITLE_STANDARDS["min_gap_between"]
210
 
211
  processed = []
212
+ for block in blocks:
213
  duration = block["end"] - block["start"]
214
 
 
215
  if duration < MIN_DUR:
216
  block = {**block, "end": block["start"] + MIN_DUR}
 
 
217
  if duration > MAX_DUR:
218
  block = {**block, "end": block["start"] + MAX_DUR}
219
 
220
  processed.append(block)
221
 
 
222
  for i in range(1, len(processed)):
223
+ prev_end = processed[i - 1]["end"]
224
  curr_start = processed[i]["start"]
 
225
  if curr_start - prev_end < MIN_GAP:
 
226
  processed[i] = {**processed[i], "start": prev_end + MIN_GAP}
227
 
228
  return processed
 
231
  # ─────────────────────────────────────────────────────────────────────────────
232
 
233
  class STT:
234
+ def __init__(self, model_size="base"):
235
  """
236
+ Default changed to large-v3:
237
+ - Significantly better word-level timestamps (critical for highlight_word)
238
+ - Better sentence segmentation boundaries
239
+ - Improved Arabic/multilingual accuracy
240
+ Note: base model timing is ±200ms off; large-v3 is ±50ms.
241
  """
242
  self.duration = 0
243
  self.model_size = model_size
 
258
  """
259
  Transcribes video and returns subtitle-standard-compliant segments.
260
 
261
+ All segments post-processed through SubtitleSegmenter:
262
+ - Max 42 chars per line (BBC/Netflix)
263
+ - Max 2 lines per block
264
+ - Natural sentence/clause boundary splitting
265
+ - EBU R37 reading speed enforcement
266
+ - 40ms minimum gap between subtitles
267
+ - _line1 / _line2 pre-computed for renderers
268
  """
269
  print(f"🎙️ Transcribing: {video_path} (Language: {language or 'Auto'}, "
270
  f"Mode: {timestamp_mode}, VAD: {vad_filter})")
 
272
  log_file = os.path.join(os.path.dirname(os.path.dirname(__file__)),
273
  "logs", "transcript.log")
274
 
 
275
  actual_stt_lang = None
276
  if language:
277
  lang_val = language.value if hasattr(language, 'value') else str(language)
 
303
  print(f"⚠️ Cache setup error: {e}")
304
 
305
  # ── Whisper transcription ────────────────────────────────────────────
 
306
  print(f"🔍 Starting Whisper transcription (model={self.model_size}, "
307
  f"word_timestamps=True)…")
308
 
309
  segments_iter, info = self.model.transcribe(
310
  video_path,
311
+ beam_size=5,
312
+ word_timestamps=True, # Always needed for standards & highlight_word
313
  language=actual_stt_lang,
314
  vad_filter=vad_filter,
315
  vad_parameters=dict(min_silence_duration_ms=500) if vad_filter else None,
316
+ condition_on_previous_text=True,
317
  )
318
  detected_lang = info.language
319
  print(f"🔍 Detected language: {detected_lang}")
320
 
321
  # ── Collect all words with timing ────────────────────────────────────
322
+ all_words = []
323
+ raw_segments = list(segments_iter)
324
 
325
  for seg in raw_segments:
326
  if seg.words:
 
330
  all_words.append({
331
  "text": text,
332
  "start": round(w.start, 3),
333
+ "end": round(w.end, 3),
334
  "is_highlight": False,
335
  })
336
  else:
 
337
  seg_words = seg.text.strip().split()
338
  if seg_words:
339
  avg = (seg.end - seg.start) / len(seg_words)
340
  for i, wt in enumerate(seg_words):
341
  all_words.append({
342
  "text": wt,
343
+ "start": round(seg.start + i * avg, 3),
344
  "end": round(seg.start + (i + 1) * avg, 3),
345
  "is_highlight": False,
346
  })
 
355
  print(f"✅ Generated {len(subtitle_blocks)} subtitle blocks "
356
  f"(was {len(raw_segments)} raw segments)")
357
 
358
+ # ── Build segments_list ───────────────────────────────────────────────
359
  segments_list = []
360
  full_text = ""
361
 
 
365
  "start": block["start"],
366
  "end": block["end"],
367
  "words": block["words"],
 
368
  "_line1": block.get("line1", block["text"]),
369
  "_line2": block.get("line2", ""),
370
  })
 
382
  f.write(f"📐 Standards: BBC/Netflix/EBU R37 "
383
  f"(max {SUBTITLE_STANDARDS['max_chars_per_line']} chars/line)\n")
384
  f.write(f"{'='*60}\n")
385
+ for seg in segments_list:
386
  chars = len(seg['_line1']) + len(seg.get('_line2', ''))
387
  f.write(f"[{seg['start']:.2f}–{seg['end']:.2f}] "
388
  f"({chars:2d}ch) {seg['text']}\n")
core/subtitle_manager.py CHANGED
@@ -7,35 +7,67 @@ Styles tuned for 2024-2025 Shorts/Reels/TikTok viral aesthetics.
7
  - active_word_index (int) replaces unreliable id() comparison
8
  - RTL detection covers Arabic, Persian, Urdu, Hebrew (not just Arabic)
9
  - Hebrew uses bidi-only (no Arabic reshaping)
 
 
 
 
 
 
 
10
  - CJK/Thai/Devanagari not uppercased
11
  - ensure_font() uses Config.detect_language_from_text() + Config.get_font_for_language()
12
- - NotoSans fallback chain before system Arial
13
  - BBC/Netflix standards: max 42 chars/line, 2 lines max
14
- - Pre-computed _line1/_line2 from STT used when available
 
15
  """
16
  import os
17
  import numpy as np
18
  import urllib.request
19
  from PIL import Image, ImageDraw, ImageFont
20
  import moviepy.editor as mpe
21
- from arabic_reshaper import reshape
22
  from bidi.algorithm import get_display
23
  from .config import Config
24
  from .logger import Logger
25
 
26
  logger = Logger.get_logger(__name__)
27
 
28
-
29
  # ─────────────────────────────────────────────────────────────────────────────
30
- # Style Registry
31
  # ─────────────────────────────────────────────────────────────────────────────
32
  #
33
- # Extra keys consumed by highlight_word mode:
34
- # highlight_color → text color for the active word
35
- # highlight_bg → RGBA fill of the box behind active word
36
- # highlight_bg_radius → corner radius of that box
37
- # shadow_layers → list of (off_x, off_y, blur_steps, RGBA)
 
 
 
 
 
 
38
  #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  STYLES = {
40
 
41
  # ── 1. CLASSIC ────────────────────────────────────────────────────────────
@@ -92,7 +124,6 @@ STYLES = {
92
  },
93
 
94
  # ── 4. TIKTOK NEON ────────────────────────────────────────────────────────
95
- # ✅ Changed font from Oswald-Bold (Latin-only) to Montserrat-Bold (multilingual)
96
  "tiktok_neon": {
97
  "fontsize": 80,
98
  "color": (255, 255, 255, 230),
@@ -145,6 +176,67 @@ STYLES = {
145
  (0, 9, 0, ( 0, 0, 0, 130)),
146
  ],
147
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  }
149
 
150
 
@@ -170,7 +262,6 @@ _NO_UPPER_RANGES = [
170
  # ─────────────────────────────────────────────────────────────────────────────
171
 
172
  def _rgba(c):
173
- """Normalise any colour spec to an (R, G, B, A) tuple."""
174
  if c is None:
175
  return None
176
  if isinstance(c, (tuple, list)):
@@ -180,42 +271,62 @@ def _rgba(c):
180
 
181
 
182
  def _should_uppercase(text: str) -> bool:
183
- """Returns False for scripts where uppercasing is not applicable."""
184
  for start, end in _NO_UPPER_RANGES:
185
  if any(start <= c <= end for c in text):
186
  return False
187
  return True
188
 
189
 
 
 
 
 
 
 
 
 
190
  def _prepare_display_text(raw: str, is_rtl: bool, language: str = None) -> str:
191
  """
192
- Prepares text for rendering:
193
- - Arabic / Persian / Urdu → arabic_reshaper + bidi
194
- - Hebrew → bidi only (no Arabic reshaping)
195
- - CJK / Thai / Devanagari as-is (no uppercase)
196
- - Latin / Cyrillic → uppercase
 
 
 
 
 
 
 
 
 
 
 
197
  """
198
  if not is_rtl:
199
  return raw.upper() if _should_uppercase(raw) else raw
200
 
201
- # Hebrew: bidi only
202
- is_hebrew = (language == "he" or
203
- any("\u0590" <= c <= "\u05FF" for c in raw))
204
- if is_hebrew:
205
- return get_display(raw)
 
 
 
 
 
 
206
 
207
- # Arabic script (ar, fa, ur): reshape + bidi
208
  try:
209
- return get_display(reshape(raw))
210
  except Exception:
211
  return raw
212
 
213
 
214
  def _is_rtl_text(language: str, text: str) -> bool:
215
- """
216
- Returns True if language or text content requires RTL rendering.
217
- Covers: Arabic (ar), Persian (fa), Urdu (ur), Hebrew (he).
218
- """
219
  if language and Config.is_rtl(language):
220
  return True
221
  if text:
@@ -226,14 +337,6 @@ def _is_rtl_text(language: str, text: str) -> bool:
226
 
227
 
228
  def _draw_shadow_layers(draw, box, layers, base_radius):
229
- """
230
- ✅ FIXED: Was called with `pass` in original — now fully operational.
231
- Paints shadow / glow layers behind a rounded-rect.
232
-
233
- layers: [(off_x, off_y, blur_steps, rgba)]
234
- blur_steps == 0 → single hard-offset rectangle
235
- blur_steps > 0 → concentric rects with fading alpha (soft glow)
236
- """
237
  x1, y1, x2, y2 = box
238
  for (ox, oy, blur, color) in layers:
239
  rgba = _rgba(color)
@@ -262,35 +365,18 @@ def _draw_shadow_layers(draw, box, layers, base_radius):
262
  # ─────────────────────────────────────────────────────────────────────────────
263
  class SubtitleManager:
264
 
265
- # ── Font management ───────────────────────────────────────────────────────
266
  @staticmethod
267
  def ensure_font(language: str = None, style_name: str = None,
268
  style_font: str = None, text_content: str = None) -> str:
269
- """
270
- Returns the absolute path to the best available font for the given
271
- language and caption style.
272
-
273
- Resolution order:
274
- 1. Explicit language → Config.get_font_for_language(language, style_name)
275
- 2. Auto-detect script from text → Config.detect_language_from_text()
276
- 3. Style font (Latin) → style_font param
277
- 4. Default → Config.LANGUAGE_FONT_MAP["default"]
278
-
279
- Non-Latin scripts (Arabic, Hebrew, CJK, Thai, Devanagari, Cyrillic)
280
- always override the style font preference.
281
- """
282
- # ── 1. Resolve language code ──────────────────────────────────────────
283
  detected_lang = None
284
 
285
  if language:
286
  lang_val = language.value if hasattr(language, 'value') else str(language)
287
  detected_lang = None if lang_val == 'auto' else lang_val
288
 
289
- # Auto-detect from text content if no explicit language given
290
  if not detected_lang and text_content:
291
  detected_lang = Config.detect_language_from_text(text_content)
292
 
293
- # ── 2. Select font name ───────────────────────────────────────────────
294
  if detected_lang:
295
  font_name = Config.get_font_for_language(detected_lang, style_name)
296
  elif style_font:
@@ -300,7 +386,6 @@ class SubtitleManager:
300
 
301
  logger.debug(f"🔤 Font resolved: lang={detected_lang} style={style_name} → {font_name}")
302
 
303
- # ── 3. Resolve path & download if missing ─────────────────────────────
304
  font_path = os.path.join(Config.BASE_DIR, font_name)
305
 
306
  if not os.path.exists(font_path):
@@ -318,9 +403,11 @@ class SubtitleManager:
318
  except Exception as exc:
319
  logger.error(f"❌ Font download failed for {font_name}: {exc}")
320
 
321
- # Fallback 1: NotoSans (covers virtually all Unicode)
322
- fallback_name = "NotoSans-Bold.ttf"
323
- fallback_path = os.path.join(Config.BASE_DIR, fallback_name)
 
 
324
  if not os.path.exists(fallback_path):
325
  fallback_url = Config.FONTS.get(fallback_name)
326
  if fallback_url:
@@ -330,10 +417,9 @@ class SubtitleManager:
330
  pass
331
 
332
  if os.path.exists(fallback_path):
333
- logger.warning(f"⚠️ Using NotoSans fallback instead of {font_name}")
334
  return fallback_path
335
 
336
- # Fallback 2: system Arial (Latin only)
337
  logger.error("❌ All font downloads failed, falling back to system Arial")
338
  return "Arial"
339
  else:
@@ -341,10 +427,8 @@ class SubtitleManager:
341
 
342
  return font_path
343
 
344
- # ── Text wrapping (pixel-accurate) ───────────────────────────────────────
345
  @staticmethod
346
  def wrap_text(text: str, font, max_width: int) -> list:
347
- """Splits text into lines that fit within max_width pixels."""
348
  lines = []
349
  words = text.split()
350
  if not words:
@@ -360,7 +444,6 @@ class SubtitleManager:
360
 
361
  if width > max_width:
362
  if len(current_line) == 1:
363
- # Single word is already too wide — force it on its own line
364
  lines.append(current_line.pop())
365
  else:
366
  last = current_line.pop()
@@ -372,16 +455,11 @@ class SubtitleManager:
372
 
373
  return lines
374
 
375
- # ── Single-text PIL clip (sentence / word modes) ──────────────────────────
376
  @staticmethod
377
  def create_pil_text_clip(text: str, fontsize: int, color, font_path: str,
378
  stroke_color=(0, 0, 0, 200), stroke_width: int = 2,
379
  bg_color=None, padding: int = 12, bg_radius: int = 18,
380
  max_width: int = None):
381
- """
382
- Renders a single subtitle text block (sentence or word mode).
383
- Supports multi-line wrapping and optional background pill.
384
- """
385
  try:
386
  try:
387
  font = ImageFont.truetype(font_path, fontsize)
@@ -392,13 +470,11 @@ class SubtitleManager:
392
  dummy = Image.new("RGBA", (1, 1))
393
  d = ImageDraw.Draw(dummy)
394
 
395
- # Wrap if width limit given
396
  lines = [text]
397
  if max_width:
398
  avail = max_width - padding * 4
399
  lines = SubtitleManager.wrap_text(text, font, avail)
400
 
401
- # Measure all lines
402
  line_metrics = []
403
  max_w = 0
404
  total_h = 0
@@ -429,7 +505,6 @@ class SubtitleManager:
429
 
430
  current_y = margin
431
  for m in line_metrics:
432
- # Centre each line horizontally
433
  lx = (iw - m["w"]) / 2 - m["bbox"][0]
434
  ly = current_y - m["bbox"][1]
435
  draw.text(
@@ -446,54 +521,40 @@ class SubtitleManager:
446
  logger.error(f"⚠️ create_pil_text_clip error: {exc}")
447
  return None
448
 
449
- # ── Highlight-word composite renderer ─────────────────────────────────────
450
  @staticmethod
451
  def create_sentence_highlight_clip(
452
  sentence_words: list,
453
- active_word_index: int, # ✅ int index, not id()
454
  font,
455
  fontsize: int,
456
  font_path: str,
457
  style_config: dict,
458
- is_rtl: bool, # ✅ renamed from is_arabic — covers he/fa/ur too
459
- language: str = None, # ✅ needed for Hebrew vs Arabic reshaping
460
  padding: int = 14,
461
  bg_radius: int = 20,
462
  max_width: int = None,
463
  ):
464
- """
465
- Renders a sentence with one highlighted (active) word.
466
-
467
- ✅ Fixes vs original:
468
- 1. active_word_index (int) — reliable, replaces id()-based comparison
469
- 2. _draw_shadow_layers() actually called (was `pass` in original)
470
- 3. _prepare_display_text() handles Hebrew, Persian, CJK correctly
471
- 4. RTL word order reversed for Arabic/Hebrew/Persian/Urdu
472
- 5. Multi-line wrapping with pixel-accurate measurement
473
- """
474
  try:
475
  dummy = Image.new("RGBA", (1, 1))
476
  d = ImageDraw.Draw(dummy)
477
  space_w = d.textbbox((0, 0), " ", font=font)[2]
478
 
479
- # ── 1. Prepare & measure words ────────────────────────────────────
480
  words_data = []
481
- # RTL languages: reverse word order for correct visual flow
482
- ordered = list(reversed(sentence_words)) if is_rtl else sentence_words
483
 
484
  for idx, w in enumerate(ordered):
485
  raw = w.get("text", "")
486
  display = _prepare_display_text(raw, is_rtl, language)
487
  bbox = d.textbbox((0, 0), display, font=font)
488
  words_data.append({
489
- "index": idx, # ✅ index in the ORDERED list
490
  "text": display,
491
  "w": bbox[2] - bbox[0],
492
  "h": bbox[3] - bbox[1],
493
  "bbox": bbox,
494
  })
495
 
496
- # For RTL: the active word index must be mirrored
497
  n = len(sentence_words)
498
  effective_active_index = (
499
  (n - 1 - active_word_index)
@@ -501,7 +562,6 @@ class SubtitleManager:
501
  else active_word_index
502
  )
503
 
504
- # ── 2. Wrap words into lines ──────────────────────────────────────
505
  lines = []
506
  current_line = []
507
  current_w = 0
@@ -522,7 +582,6 @@ class SubtitleManager:
522
  if current_line:
523
  lines.append(current_line)
524
 
525
- # ── 3. Calculate canvas dimensions ───────────────────────────────
526
  line_spacing = int(fontsize * 0.2)
527
  stroke_w = style_config.get("stroke_width", 2)
528
  margin = int(stroke_w * 2) + padding
@@ -546,13 +605,11 @@ class SubtitleManager:
546
  img = Image.new("RGBA", (int(iw), int(ih)), (0, 0, 0, 0))
547
  draw = ImageDraw.Draw(img)
548
 
549
- # ── 4. Draw shadows & highlight box for active word ───────────────
550
  hl_bg = style_config.get("highlight_bg")
551
  hl_radius = style_config.get("highlight_bg_radius", bg_radius)
552
  shadows = style_config.get("shadow_layers", [])
553
 
554
  for i, line in enumerate(lines):
555
- # Centre line horizontally
556
  lx = margin + (canvas_w - line_infos[i]["w"]) // 2
557
  ly = margin + bleed // 2 + line_infos[i]["y"]
558
  cx = lx
@@ -565,11 +622,9 @@ class SubtitleManager:
565
  by2 = ly + wd["h"] + padding // 2
566
  box = (bx1, by1, bx2, by2)
567
 
568
- # ✅ FIXED: shadow layers are now actually rendered
569
  if shadows:
570
  _draw_shadow_layers(draw, box, shadows, hl_radius)
571
 
572
- # Highlight pill on top of shadows
573
  draw.rounded_rectangle(
574
  [(bx1, by1), (bx2, by2)],
575
  radius=hl_radius,
@@ -578,7 +633,6 @@ class SubtitleManager:
578
 
579
  cx += wd["w"] + space_w
580
 
581
- # ── 5. Draw all word text ─────────────────────────────────────────
582
  rest_c = _rgba(style_config.get("color", (255, 255, 255, 255)))
583
  hl_c = _rgba(style_config.get("highlight_color", rest_c))
584
  stk_c = _rgba(style_config.get("stroke_color", (0, 0, 0, 255)))
@@ -604,13 +658,10 @@ class SubtitleManager:
604
  logger.error(f"⚠️ create_sentence_highlight_clip error: {exc}")
605
  return None
606
 
607
- # ── Public style accessor ──────────────────────────────────────────────────
608
  @staticmethod
609
  def get_style_config(style_name: str) -> dict:
610
- """Returns the style dict for the given name (falls back to 'classic')."""
611
  return STYLES.get(style_name, STYLES["classic"])
612
 
613
- # ── Main generator ─────────────────────────────────────────────────────────
614
  @staticmethod
615
  def create_caption_clips(
616
  transcript_data,
@@ -622,15 +673,14 @@ class SubtitleManager:
622
  """
623
  Generates all caption ImageClips ready for compositing.
624
 
625
- caption_mode:
626
- "sentence" shows 4-word chunks (legacy)
627
- "word" shows 1 word at a time (legacy)
628
- "highlight_word" full sentence visible, active word highlighted
629
  """
630
  all_clips = []
631
  style_cfg = SubtitleManager.get_style_config(caption_style)
632
 
633
- # ── Parse transcript ──────────────────────────────────────────────────
634
  segments = []
635
  sample_text = ""
636
 
@@ -647,7 +697,6 @@ class SubtitleManager:
647
  sample_text = s["text"]
648
  break
649
 
650
- # Resolve font — pass style_name for correct STYLE_FONT_MAP lookup
651
  font_path = SubtitleManager.ensure_font(
652
  language = language,
653
  style_name = caption_style,
@@ -662,15 +711,6 @@ class SubtitleManager:
662
  # MODE: highlight_word
663
  # ══════════════════════════════════════════════════════════════════════
664
  if caption_mode == "highlight_word":
665
- all_words = []
666
- for seg in segments:
667
- if "words" in seg and seg["words"]:
668
- all_words.extend(seg["words"])
669
-
670
- if not all_words:
671
- logger.warning("⚠️ highlight_word mode requires word-level timestamps — none found.")
672
- return []
673
-
674
  fontsize = style_cfg.get("fontsize", 75)
675
  try:
676
  font = ImageFont.truetype(font_path, fontsize)
@@ -678,36 +718,33 @@ class SubtitleManager:
678
  logger.warning("⚠️ TrueType load failed — using default font.")
679
  font = ImageFont.load_default()
680
 
681
- # ── Group words into sentences (gap > 0.7s = new sentence) ────────
682
- sentences, cur = [], []
683
- for i, word in enumerate(all_words):
684
- if not word.get("text", "").strip():
 
 
685
  continue
686
- cur.append(word)
687
- is_last = (i == len(all_words) - 1)
688
- next_pause = (all_words[i + 1]["start"] - word["end"]) if not is_last else 1.0
689
- if next_pause > 0.7 or is_last:
690
- sentences.append(cur)
691
- cur = []
692
-
693
- for sw in sentences:
694
- sent_text = " ".join(w["text"] for w in sw)
695
- sent_start = sw[0]["start"]
696
- sent_end = sw[-1]["end"]
697
-
698
- # ✅ RTL detection covers Arabic, Persian, Urdu, Hebrew
699
- is_rtl = _is_rtl_text(language, sent_text)
700
-
701
- # ── One clip per active word (highlight moves) ─────────────────
702
- for active_idx, active in enumerate(sw):
703
  clip = SubtitleManager.create_sentence_highlight_clip(
704
  sentence_words = sw,
705
- active_word_index = active_idx, # ✅ index-based
706
  font = font,
707
  fontsize = fontsize,
708
  font_path = font_path,
709
  style_config = style_cfg,
710
- is_rtl = is_rtl, # ✅ correct param name
711
  language = language,
712
  padding = style_cfg.get("padding", 14),
713
  bg_radius = style_cfg.get("highlight_bg_radius", 20),
@@ -715,14 +752,14 @@ class SubtitleManager:
715
  )
716
  if clip:
717
  all_clips.append(
718
- clip.set_start(active["start"])
719
- .set_end(active["end"])
720
  .set_position(pos)
721
  )
722
 
723
- # ── Fill inter-word gaps with plain sentence ───────────────────
724
  covered = [(w["start"], w["end"]) for w in sw]
725
  gaps = []
 
726
  if sent_start < covered[0][0]:
727
  gaps.append((sent_start, covered[0][0]))
728
  for j in range(len(covered) - 1):
@@ -731,11 +768,13 @@ class SubtitleManager:
731
  if covered[-1][1] < sent_end:
732
  gaps.append((covered[-1][1], sent_end))
733
 
 
734
  for gs, ge in gaps:
735
- plain_cfg = {**style_cfg, "highlight_bg": None, "shadow_layers": []}
 
736
  gc = SubtitleManager.create_sentence_highlight_clip(
737
  sentence_words = sw,
738
- active_word_index = -1, # -1 = no highlight
739
  font = font,
740
  fontsize = fontsize,
741
  font_path = font_path,
@@ -768,16 +807,13 @@ class SubtitleManager:
768
  else:
769
  continue
770
 
771
- # ✅ Use pre-computed line splits from STT (standards-compliant)
772
  line1 = seg.get("_line1", "")
773
  line2 = seg.get("_line2", "")
774
 
775
  if line1:
776
- # STT already applied BBC/Netflix standards — render as single block
777
  display_text = f"{line1}\n{line2}".strip() if line2 else line1
778
  chunks = [{"text": display_text, "start": start_t, "end": end_t}]
779
  else:
780
- # Fallback: original chunking behaviour
781
  chunk_size = 1 if caption_mode == "word" else 4
782
  chunks = []
783
  stt_words = seg.get("words")
@@ -805,7 +841,7 @@ class SubtitleManager:
805
 
806
  for chunk in chunks:
807
  disp = chunk["text"]
808
- is_rtl = _is_rtl_text(language, disp) # ✅ covers he/fa/ur/ar
809
  disp = _prepare_display_text(disp, is_rtl, language)
810
 
811
  clip = SubtitleManager.create_pil_text_clip(
@@ -828,7 +864,6 @@ class SubtitleManager:
828
 
829
  return all_clips
830
 
831
- # ── Convenience compositor ─────────────────────────────────────────────────
832
  @staticmethod
833
  def create_captions(
834
  video_clip,
@@ -838,7 +873,6 @@ class SubtitleManager:
838
  caption_mode: str = "sentence",
839
  caption_style: str = "classic",
840
  ):
841
- """Composites all caption clips onto video_clip and returns the result."""
842
  clips = SubtitleManager.create_caption_clips(
843
  transcript_data,
844
  size = size,
 
7
  - active_word_index (int) replaces unreliable id() comparison
8
  - RTL detection covers Arabic, Persian, Urdu, Hebrew (not just Arabic)
9
  - Hebrew uses bidi-only (no Arabic reshaping)
10
+ - Arabic / Persian / Urdu → ArabicReshaper (configured) + bidi
11
+ ✅ arabic_reshaper RESTORED — Pillow does NOT do Arabic glyph shaping
12
+ internally. Without reshaper every Arabic letter renders in its isolated
13
+ form (disconnected). reshaper converts to presentation forms BEFORE
14
+ Pillow draws, which is the only correct approach for PIL.ImageDraw.
15
+ Config: support_ligatures=True, delete_harakat=False (preserves tashkeel),
16
+ delete_tatweel=True (removes kashida for accurate width measurement).
17
  - CJK/Thai/Devanagari not uppercased
18
  - ensure_font() uses Config.detect_language_from_text() + Config.get_font_for_language()
19
+ - Arabic-specific font fallback: NotoSansArabic before NotoSans
20
  - BBC/Netflix standards: max 42 chars/line, 2 lines max
21
+ - highlight_word mode uses pre-segmented SubtitleSegmenter blocks directly
22
+ - 3 new Arabic-optimised styles: cairo_bold, tajawal_bold, noto_arabic
23
  """
24
  import os
25
  import numpy as np
26
  import urllib.request
27
  from PIL import Image, ImageDraw, ImageFont
28
  import moviepy.editor as mpe
29
+ from arabic_reshaper import ArabicReshaper # ✅ REQUIRED for Pillow Arabic rendering
30
  from bidi.algorithm import get_display
31
  from .config import Config
32
  from .logger import Logger
33
 
34
  logger = Logger.get_logger(__name__)
35
 
 
36
  # ─────────────────────────────────────────────────────────────────────────────
37
+ # Arabic Reshaper — configured once at module level (thread-safe, reusable)
38
  # ─────────────────────────────────────────────────────────────────────────────
39
  #
40
+ # WHY reshaper is required:
41
+ # Pillow/FreeType renders each Unicode codepoint as its ISOLATED form.
42
+ # arabic_reshaper converts codepoints to contextual presentation forms
43
+ # (initial / medial / final / isolated) and joins ligatures.
44
+ # Then bidi reorders for right-to-left display.
45
+ # Without reshaper → every letter is disconnected (the bug in the screenshot).
46
+ #
47
+ # Config:
48
+ # support_ligatures = True → joins لا → ﻻ and other common ligatures
49
+ # delete_harakat = False → preserves tashkeel so bidi positions them correctly
50
+ # delete_tatweel = True → removes kashida (ـ) for accurate pixel measurement
51
  #
52
+ _ARABIC_RESHAPER = ArabicReshaper(configuration={
53
+ "support_ligatures": True,
54
+ "delete_harakat": False,
55
+ "delete_tatweel": True,
56
+ })
57
+
58
+ # Arabic script Unicode ranges
59
+ _ARABIC_RANGES = [
60
+ ("\u0600", "\u06FF"), # Arabic
61
+ ("\u0750", "\u077F"), # Arabic Supplement
62
+ ("\u08A0", "\u08FF"), # Arabic Extended-A
63
+ ("\uFB50", "\uFDFF"), # Arabic Presentation Forms-A
64
+ ("\uFE70", "\uFEFF"), # Arabic Presentation Forms-B
65
+ ]
66
+
67
+
68
+ # ─────────────────────────────────────────────────────────────────────────────
69
+ # Style Registry
70
+ # ─────────────────────────────────────────────────────────────────────────────
71
  STYLES = {
72
 
73
  # ── 1. CLASSIC ────────────────────────────────────────────────────────────
 
124
  },
125
 
126
  # ── 4. TIKTOK NEON ────────────────────────────────────────────────────────
 
127
  "tiktok_neon": {
128
  "fontsize": 80,
129
  "color": (255, 255, 255, 230),
 
176
  (0, 9, 0, ( 0, 0, 0, 130)),
177
  ],
178
  },
179
+
180
+ # ── 7. CAIRO BOLD (Arabic-optimised) ──────────────────────────────────��───
181
+ # Cairo: contemporary Arabic sans-serif, clean lines, harmonious Latin+Arabic
182
+ # mix, named best Arabic display font by Granshan 2016.
183
+ # Best for: Egyptian/Gulf social media, TikTok Arabic content.
184
+ "cairo_bold": {
185
+ "fontsize": 80,
186
+ "color": (255, 255, 255, 255),
187
+ "stroke_color": (0, 0, 0, 220),
188
+ "stroke_width": 4,
189
+ "font": "Cairo-Bold.ttf",
190
+ "bg_color": None,
191
+ "position": ("center", 0.82),
192
+ "highlight_color": (10, 10, 10, 255),
193
+ "highlight_bg": (255, 210, 0, 255),
194
+ "highlight_bg_radius": 14,
195
+ "shadow_layers": [
196
+ (3, 5, 0, (0, 0, 0, 210)),
197
+ (6, 9, 0, (0, 0, 0, 80)),
198
+ ],
199
+ },
200
+
201
+ # ── 8. TAJAWAL BOLD (Arabic-optimised) ────────────────────────────────────
202
+ # Tajawal: modern geometric Arabic sans-serif, optimised for small screens
203
+ # and video subtitles, excellent readability, covers Latin too.
204
+ # Best for: YouTube Arabic captions, mixed Arabic/English content.
205
+ "tajawal_bold": {
206
+ "fontsize": 82,
207
+ "color": (255, 255, 255, 255),
208
+ "stroke_color": (0, 0, 0, 230),
209
+ "stroke_width": 4,
210
+ "font": "Tajawal-Bold.ttf",
211
+ "bg_color": (0, 0, 0, 150),
212
+ "position": ("center", 0.80),
213
+ "highlight_color": (255, 255, 255, 255),
214
+ "highlight_bg": (220, 50, 50, 245),
215
+ "highlight_bg_radius": 18,
216
+ "shadow_layers": [
217
+ (0, 4, 12, (180, 0, 0, 140)),
218
+ ],
219
+ },
220
+
221
+ # ── 9. NOTO ARABIC (Universal Arabic) ─────────────────────────────────────
222
+ # NotoSansArabic: Google's reference Arabic font, covers all Arabic script
223
+ # variants (Arabic, Persian/Farsi, Urdu, Kurdish), 1642 glyphs.
224
+ # Best for: multilingual content, Persian/Urdu subtitles.
225
+ "noto_arabic": {
226
+ "fontsize": 76,
227
+ "color": (240, 240, 240, 230),
228
+ "stroke_color": (0, 0, 0, 180),
229
+ "stroke_width": 3,
230
+ "font": "NotoSansArabic-Bold.ttf",
231
+ "bg_color": (0, 0, 0, 155),
232
+ "position": ("center", 0.78),
233
+ "highlight_color": (20, 20, 20, 255),
234
+ "highlight_bg": (255, 200, 40, 248),
235
+ "highlight_bg_radius": 16,
236
+ "shadow_layers": [
237
+ (0, 4, 10, (180, 130, 0, 150)),
238
+ ],
239
+ },
240
  }
241
 
242
 
 
262
  # ─────────────────────────────────────────────────────────────────────────────
263
 
264
  def _rgba(c):
 
265
  if c is None:
266
  return None
267
  if isinstance(c, (tuple, list)):
 
271
 
272
 
273
  def _should_uppercase(text: str) -> bool:
 
274
  for start, end in _NO_UPPER_RANGES:
275
  if any(start <= c <= end for c in text):
276
  return False
277
  return True
278
 
279
 
280
+ def _is_arabic_script(text: str) -> bool:
281
+ """Returns True if text contains Arabic script characters."""
282
+ for start, end in _ARABIC_RANGES:
283
+ if any(start <= c <= end for c in text):
284
+ return True
285
+ return False
286
+
287
+
288
  def _prepare_display_text(raw: str, is_rtl: bool, language: str = None) -> str:
289
  """
290
+ Prepares text for correct rendering in Pillow (PIL.ImageDraw).
291
+
292
+ Pipeline for Arabic/Persian/Urdu:
293
+ 1. ArabicReshaper.reshape() converts Unicode isolated codepoints to
294
+ contextual presentation forms + joins ligatures.
295
+ This is MANDATORY for Pillow because FreeType does NOT do this.
296
+ 2. bidi.get_display() — reorders characters right-to-left.
297
+
298
+ Pipeline for Hebrew:
299
+ bidi.get_display() only — Hebrew has no contextual shaping requirement.
300
+
301
+ Pipeline for Latin/Cyrillic:
302
+ uppercase only.
303
+
304
+ Pipeline for CJK/Thai/Devanagari:
305
+ as-is (no uppercase, no bidi needed at the Pillow level).
306
  """
307
  if not is_rtl:
308
  return raw.upper() if _should_uppercase(raw) else raw
309
 
310
+ # ── Arabic script (ar, fa, ur, ckb …) ────────────────────────────────────
311
+ if _is_arabic_script(raw):
312
+ try:
313
+ reshaped = _ARABIC_RESHAPER.reshape(raw)
314
+ return get_display(reshaped)
315
+ except Exception as exc:
316
+ logger.warning(f"⚠️ Arabic reshape error for '{raw[:20]}…': {exc}")
317
+ try:
318
+ return get_display(raw) # fallback: bidi only (still broken but RTL)
319
+ except Exception:
320
+ return raw
321
 
322
+ # ── Hebrew and other RTL (bidi only) ──────────────────────────────────────
323
  try:
324
+ return get_display(raw)
325
  except Exception:
326
  return raw
327
 
328
 
329
  def _is_rtl_text(language: str, text: str) -> bool:
 
 
 
 
330
  if language and Config.is_rtl(language):
331
  return True
332
  if text:
 
337
 
338
 
339
  def _draw_shadow_layers(draw, box, layers, base_radius):
 
 
 
 
 
 
 
 
340
  x1, y1, x2, y2 = box
341
  for (ox, oy, blur, color) in layers:
342
  rgba = _rgba(color)
 
365
  # ─────────────────────────────────────────────────────────────────────────────
366
  class SubtitleManager:
367
 
 
368
  @staticmethod
369
  def ensure_font(language: str = None, style_name: str = None,
370
  style_font: str = None, text_content: str = None) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  detected_lang = None
372
 
373
  if language:
374
  lang_val = language.value if hasattr(language, 'value') else str(language)
375
  detected_lang = None if lang_val == 'auto' else lang_val
376
 
 
377
  if not detected_lang and text_content:
378
  detected_lang = Config.detect_language_from_text(text_content)
379
 
 
380
  if detected_lang:
381
  font_name = Config.get_font_for_language(detected_lang, style_name)
382
  elif style_font:
 
386
 
387
  logger.debug(f"🔤 Font resolved: lang={detected_lang} style={style_name} → {font_name}")
388
 
 
389
  font_path = os.path.join(Config.BASE_DIR, font_name)
390
 
391
  if not os.path.exists(font_path):
 
403
  except Exception as exc:
404
  logger.error(f"❌ Font download failed for {font_name}: {exc}")
405
 
406
+ # Arabic-specific fallback chain
407
+ is_arabic_lang = detected_lang in ("ar", "fa", "ur", "ckb")
408
+ fallback_name = "NotoSansArabic-Bold.ttf" if is_arabic_lang else "NotoSans-Bold.ttf"
409
+ fallback_path = os.path.join(Config.BASE_DIR, fallback_name)
410
+
411
  if not os.path.exists(fallback_path):
412
  fallback_url = Config.FONTS.get(fallback_name)
413
  if fallback_url:
 
417
  pass
418
 
419
  if os.path.exists(fallback_path):
420
+ logger.warning(f"⚠️ Using {fallback_name} fallback instead of {font_name}")
421
  return fallback_path
422
 
 
423
  logger.error("❌ All font downloads failed, falling back to system Arial")
424
  return "Arial"
425
  else:
 
427
 
428
  return font_path
429
 
 
430
  @staticmethod
431
  def wrap_text(text: str, font, max_width: int) -> list:
 
432
  lines = []
433
  words = text.split()
434
  if not words:
 
444
 
445
  if width > max_width:
446
  if len(current_line) == 1:
 
447
  lines.append(current_line.pop())
448
  else:
449
  last = current_line.pop()
 
455
 
456
  return lines
457
 
 
458
  @staticmethod
459
  def create_pil_text_clip(text: str, fontsize: int, color, font_path: str,
460
  stroke_color=(0, 0, 0, 200), stroke_width: int = 2,
461
  bg_color=None, padding: int = 12, bg_radius: int = 18,
462
  max_width: int = None):
 
 
 
 
463
  try:
464
  try:
465
  font = ImageFont.truetype(font_path, fontsize)
 
470
  dummy = Image.new("RGBA", (1, 1))
471
  d = ImageDraw.Draw(dummy)
472
 
 
473
  lines = [text]
474
  if max_width:
475
  avail = max_width - padding * 4
476
  lines = SubtitleManager.wrap_text(text, font, avail)
477
 
 
478
  line_metrics = []
479
  max_w = 0
480
  total_h = 0
 
505
 
506
  current_y = margin
507
  for m in line_metrics:
 
508
  lx = (iw - m["w"]) / 2 - m["bbox"][0]
509
  ly = current_y - m["bbox"][1]
510
  draw.text(
 
521
  logger.error(f"⚠️ create_pil_text_clip error: {exc}")
522
  return None
523
 
 
524
  @staticmethod
525
  def create_sentence_highlight_clip(
526
  sentence_words: list,
527
+ active_word_index: int,
528
  font,
529
  fontsize: int,
530
  font_path: str,
531
  style_config: dict,
532
+ is_rtl: bool,
533
+ language: str = None,
534
  padding: int = 14,
535
  bg_radius: int = 20,
536
  max_width: int = None,
537
  ):
 
 
 
 
 
 
 
 
 
 
538
  try:
539
  dummy = Image.new("RGBA", (1, 1))
540
  d = ImageDraw.Draw(dummy)
541
  space_w = d.textbbox((0, 0), " ", font=font)[2]
542
 
 
543
  words_data = []
544
+ ordered = list(reversed(sentence_words)) if is_rtl else sentence_words
 
545
 
546
  for idx, w in enumerate(ordered):
547
  raw = w.get("text", "")
548
  display = _prepare_display_text(raw, is_rtl, language)
549
  bbox = d.textbbox((0, 0), display, font=font)
550
  words_data.append({
551
+ "index": idx,
552
  "text": display,
553
  "w": bbox[2] - bbox[0],
554
  "h": bbox[3] - bbox[1],
555
  "bbox": bbox,
556
  })
557
 
 
558
  n = len(sentence_words)
559
  effective_active_index = (
560
  (n - 1 - active_word_index)
 
562
  else active_word_index
563
  )
564
 
 
565
  lines = []
566
  current_line = []
567
  current_w = 0
 
582
  if current_line:
583
  lines.append(current_line)
584
 
 
585
  line_spacing = int(fontsize * 0.2)
586
  stroke_w = style_config.get("stroke_width", 2)
587
  margin = int(stroke_w * 2) + padding
 
605
  img = Image.new("RGBA", (int(iw), int(ih)), (0, 0, 0, 0))
606
  draw = ImageDraw.Draw(img)
607
 
 
608
  hl_bg = style_config.get("highlight_bg")
609
  hl_radius = style_config.get("highlight_bg_radius", bg_radius)
610
  shadows = style_config.get("shadow_layers", [])
611
 
612
  for i, line in enumerate(lines):
 
613
  lx = margin + (canvas_w - line_infos[i]["w"]) // 2
614
  ly = margin + bleed // 2 + line_infos[i]["y"]
615
  cx = lx
 
622
  by2 = ly + wd["h"] + padding // 2
623
  box = (bx1, by1, bx2, by2)
624
 
 
625
  if shadows:
626
  _draw_shadow_layers(draw, box, shadows, hl_radius)
627
 
 
628
  draw.rounded_rectangle(
629
  [(bx1, by1), (bx2, by2)],
630
  radius=hl_radius,
 
633
 
634
  cx += wd["w"] + space_w
635
 
 
636
  rest_c = _rgba(style_config.get("color", (255, 255, 255, 255)))
637
  hl_c = _rgba(style_config.get("highlight_color", rest_c))
638
  stk_c = _rgba(style_config.get("stroke_color", (0, 0, 0, 255)))
 
658
  logger.error(f"⚠️ create_sentence_highlight_clip error: {exc}")
659
  return None
660
 
 
661
  @staticmethod
662
  def get_style_config(style_name: str) -> dict:
 
663
  return STYLES.get(style_name, STYLES["classic"])
664
 
 
665
  @staticmethod
666
  def create_caption_clips(
667
  transcript_data,
 
673
  """
674
  Generates all caption ImageClips ready for compositing.
675
 
676
+ Arabic caption_style recommendations:
677
+ "cairo_bold" best for Egyptian/Gulf social media content
678
+ "tajawal_bold" modern geometric, dark background, great readability
679
+ "noto_arabic" universal, covers Arabic/Persian/Urdu/Kurdish
680
  """
681
  all_clips = []
682
  style_cfg = SubtitleManager.get_style_config(caption_style)
683
 
 
684
  segments = []
685
  sample_text = ""
686
 
 
697
  sample_text = s["text"]
698
  break
699
 
 
700
  font_path = SubtitleManager.ensure_font(
701
  language = language,
702
  style_name = caption_style,
 
711
  # MODE: highlight_word
712
  # ══════════════════════════════════════════════════════════════════════
713
  if caption_mode == "highlight_word":
 
 
 
 
 
 
 
 
 
714
  fontsize = style_cfg.get("fontsize", 75)
715
  try:
716
  font = ImageFont.truetype(font_path, fontsize)
 
718
  logger.warning("⚠️ TrueType load failed — using default font.")
719
  font = ImageFont.load_default()
720
 
721
+ for seg in segments:
722
+ sw = seg.get("words", [])
723
+ if not sw:
724
+ logger.warning(
725
+ f"⚠️ Segment [{seg.get('start', 0):.2f}s] has no word timestamps, skipping."
726
+ )
727
  continue
728
+
729
+ sent_start = seg.get("start", sw[0]["start"])
730
+ sent_end = seg.get("end", sw[-1]["end"])
731
+ sent_text = seg.get("text", " ".join(w["text"] for w in sw))
732
+ is_rtl = _is_rtl_text(language, sent_text)
733
+
734
+ for active_idx, active_word in enumerate(sw):
735
+ w_start = active_word.get("start", sent_start)
736
+ w_end = active_word.get("end", sent_end)
737
+ if w_end <= w_start:
738
+ w_end = w_start + 0.05
739
+
 
 
 
 
 
740
  clip = SubtitleManager.create_sentence_highlight_clip(
741
  sentence_words = sw,
742
+ active_word_index = active_idx,
743
  font = font,
744
  fontsize = fontsize,
745
  font_path = font_path,
746
  style_config = style_cfg,
747
+ is_rtl = is_rtl,
748
  language = language,
749
  padding = style_cfg.get("padding", 14),
750
  bg_radius = style_cfg.get("highlight_bg_radius", 20),
 
752
  )
753
  if clip:
754
  all_clips.append(
755
+ clip.set_start(w_start)
756
+ .set_end(w_end)
757
  .set_position(pos)
758
  )
759
 
 
760
  covered = [(w["start"], w["end"]) for w in sw]
761
  gaps = []
762
+
763
  if sent_start < covered[0][0]:
764
  gaps.append((sent_start, covered[0][0]))
765
  for j in range(len(covered) - 1):
 
768
  if covered[-1][1] < sent_end:
769
  gaps.append((covered[-1][1], sent_end))
770
 
771
+ plain_cfg = {**style_cfg, "highlight_bg": None, "shadow_layers": []}
772
  for gs, ge in gaps:
773
+ if ge - gs < 0.02:
774
+ continue
775
  gc = SubtitleManager.create_sentence_highlight_clip(
776
  sentence_words = sw,
777
+ active_word_index = -1,
778
  font = font,
779
  fontsize = fontsize,
780
  font_path = font_path,
 
807
  else:
808
  continue
809
 
 
810
  line1 = seg.get("_line1", "")
811
  line2 = seg.get("_line2", "")
812
 
813
  if line1:
 
814
  display_text = f"{line1}\n{line2}".strip() if line2 else line1
815
  chunks = [{"text": display_text, "start": start_t, "end": end_t}]
816
  else:
 
817
  chunk_size = 1 if caption_mode == "word" else 4
818
  chunks = []
819
  stt_words = seg.get("words")
 
841
 
842
  for chunk in chunks:
843
  disp = chunk["text"]
844
+ is_rtl = _is_rtl_text(language, disp)
845
  disp = _prepare_display_text(disp, is_rtl, language)
846
 
847
  clip = SubtitleManager.create_pil_text_clip(
 
864
 
865
  return all_clips
866
 
 
867
  @staticmethod
868
  def create_captions(
869
  video_clip,
 
873
  caption_mode: str = "sentence",
874
  caption_style: str = "classic",
875
  ):
 
876
  clips = SubtitleManager.create_caption_clips(
877
  transcript_data,
878
  size = size,
processor.py CHANGED
@@ -9,6 +9,10 @@ Fixes applied:
9
  - style string normalised once
10
  - get_best_segments wired into process_video
11
  - detected_lang used correctly for captions
 
 
 
 
12
  """
13
  import os
14
  import gc
@@ -20,14 +24,58 @@ import json_repair
20
  import core # Applies monkey patches
21
  from core.config import Config
22
  from core.logger import Logger
23
- from core.stt import STT
24
- from core.analyze import analyze_transcript_gemini
25
  from core.styles import StyleFactory
26
  from core.subtitle_manager import SubtitleManager
27
  from core.free_translator import FreeTranslator
28
 
29
  logger = Logger.get_logger(__name__)
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  # ─────────────────────────────────────────────────────────────────────────────
33
  class VideoProcessor:
@@ -60,7 +108,6 @@ class VideoProcessor:
60
  except Exception as e:
61
  logger.warning(f"⚠️ json_repair failed, using raw content: {e}")
62
 
63
- # Last-resort brace balancing
64
  open_b = content.count("{")
65
  close_b = content.count("}")
66
  if open_b > close_b:
@@ -89,7 +136,6 @@ class VideoProcessor:
89
  for key in ("segments", "clips", "moments"):
90
  if key in segments_data and isinstance(segments_data[key], list):
91
  return segments_data[key]
92
- # Fallback: first list value found
93
  for v in segments_data.values():
94
  if isinstance(v, list):
95
  return v
@@ -104,18 +150,17 @@ class VideoProcessor:
104
 
105
  def analyze_impact(self,
106
  video_path,
107
- source_language=None, # ← لغة الفيديو الأصلي → بتيجي لـ Whisper
108
- target_language=None, # ← لغة الـ output (ترجمة/كابشن)
109
  timestamp_mode="segments",
110
  progress_callback=None):
111
  """
112
  STT + AI viral-moment detection.
113
 
114
- source_language : بيتبعت لـ Whisper مباشرة.
115
- لو None → Whisper يكتشف تلقائي (أبطأ لكن آمن).
116
- target_language : بيتحفظ في data عشان process_clips يستخدمه للترجمة والكابشن.
117
-
118
- Returns (unique_segments, duration, data)
119
  """
120
  if progress_callback:
121
  progress_callback(5, "Starting speech-to-text...")
@@ -126,7 +171,7 @@ class VideoProcessor:
126
 
127
  full_segments, full_text, duration, detected_lang = self.stt.get_transcript(
128
  video_path,
129
- language=source_language, # None = Whisper يكتشف تلقائي
130
  skip_ai=True,
131
  timestamp_mode=timestamp_mode,
132
  )
@@ -135,8 +180,8 @@ class VideoProcessor:
135
 
136
  data = {
137
  "segments": full_segments,
138
- "detected_language": detected_lang, # اللغة الفعلية اللي Whisper اكتشفها
139
- "target_language": target_language, # اللغة المطلوبة للـ output
140
  "duration": duration,
141
  }
142
 
@@ -174,7 +219,7 @@ class VideoProcessor:
174
  f"{min(current_end, max_time)/60:.1f}m …"
175
  )
176
 
177
- ai_res = analyze_transcript_gemini(chunk_transcript)
178
  logger.info(f"🤖 AI response type: {type(ai_res)}")
179
 
180
  try:
@@ -189,7 +234,6 @@ class VideoProcessor:
189
  if current_end >= max_time:
190
  break
191
 
192
- # Deduplicate by start_time
193
  seen, unique = set(), []
194
  for s in all_ai_segs:
195
  st = s.get("start_time")
@@ -218,14 +262,19 @@ class VideoProcessor:
218
  """
219
  Cuts, styles, captions, and exports each viral clip.
220
 
221
- target_language يييجي من data["target_language"] (اللي حطّه analyze_impact).
222
- Translation يحصل مرة واحدة فقط داخل segment_transcript loop.
 
 
 
 
 
 
223
  """
224
  logger.info("🎨 Phase 3: Style & Captions …")
225
  if progress_callback:
226
  progress_callback(60, "Generating clips …")
227
 
228
- # ── Video duration ────────────────────────────────────────────────────
229
  video_duration = data.get("duration") or 0
230
  if not video_duration:
231
  try:
@@ -235,17 +284,9 @@ class VideoProcessor:
235
  logger.error(f"❌ Could not determine video duration: {e}")
236
 
237
  # ── Language resolution ───────────────────────────────────────────────
238
- #
239
- # detected_lang = اللغة الفعلية للفيديو (من Whisper)
240
- # target_language = اللغة المطلوبة للـ output (من الريكويست)
241
- #
242
- # needs_translation = True → نترجم النص
243
- # caption_lang = اللغة اللي هيتعمل بيها الكابشن
244
- #
245
  detected_lang = data.get("detected_language", "en")
246
- target_language = data.get("target_language") # من analyze_impact
247
 
248
- # normalize
249
  if hasattr(target_language, "value"):
250
  target_language = target_language.value
251
 
@@ -255,7 +296,6 @@ class VideoProcessor:
255
  and target_language != detected_lang
256
  )
257
 
258
- # الكابشن بيتعمل بلغة الـ output لو فيه ترجمة، وإلا بلغة الفيديو الأصلي
259
  caption_lang = target_language if needs_translation else detected_lang
260
 
261
  translator = FreeTranslator() if needs_translation else None
@@ -312,46 +352,53 @@ class VideoProcessor:
312
  final_output = os.path.join(Config.OUTPUTS_DIR, "viral_clips", out_name)
313
  os.makedirs(os.path.dirname(final_output), exist_ok=True)
314
 
315
- # ── Cut clip (fresh VideoFileClip per iteration) ───────────────
316
  current_video_clip = mpe.VideoFileClip(input_video_path)
317
  clip = current_video_clip.subclip(start, end)
318
 
319
  # ── Build segment_transcript ──────────────────────────────────
320
- # الترجمة بتحصل هنا فقط — مفيش أي مكان تاني بيعدّل على data
321
  segment_transcript = {"segments": []}
322
 
323
  for s in data["segments"]:
324
  if s["start"] >= end or s["end"] <= start:
325
  continue
326
 
327
- new_seg = s.copy()
328
  new_seg["start"] = max(0, s["start"] - start)
329
  new_seg["end"] = min(end - start, s["end"] - start)
330
 
331
  if needs_translation and translator:
332
- # ترجمة النص مع توزيع timestamps على الكلمات الجديدة
333
  try:
334
  translated_text, _ = translator.translate_text(
335
- s["text"], target_language
336
  )
337
  except Exception as te:
338
  logger.warning(f"⚠️ Translation error: {te}")
339
  translated_text = s["text"]
340
 
341
  new_seg["text"] = translated_text
342
- words = translated_text.split()
343
- seg_dur = new_seg["end"] - new_seg["start"]
344
- word_dur = seg_dur / len(words) if words else seg_dur
345
- new_seg["words"] = [
346
- {
347
- "text": w,
348
- "start": new_seg["start"] + idx * word_dur,
349
- "end": new_seg["start"] + (idx + 1) * word_dur,
350
- }
351
- for idx, w in enumerate(words)
352
- ]
 
 
 
 
 
 
 
 
353
  else:
354
- # تعديل timestamps الكلمات الموجودة بدون ترجمة
355
  if "words" in s:
356
  new_seg["words"] = [
357
  {
@@ -362,6 +409,8 @@ class VideoProcessor:
362
  for w in s["words"]
363
  if w["start"] < end and w["end"] > start
364
  ]
 
 
365
 
366
  segment_transcript["segments"].append(new_seg)
367
 
@@ -418,20 +467,20 @@ def process_video(video_path, style="cinematic_blur", model_size="base", **kwarg
418
  """
419
  End-to-end pipeline: STT → AI analysis → clip export.
420
 
421
- kwargs المهمة:
422
- source_language : لغة الفيديو الأصليبتتبعت لـ Whisper
423
- لو مش محدد → Whisper يكتشف تلقائي
424
- language : لغة الـ output المطلوبة (ترجمة + كابشن)
425
- لو نفس لغة الفيديومش هيترجم
426
  caption_mode : sentence | word | highlight_word
427
- caption_style : classic | modern_glow | tiktok_bold | ...
428
  """
429
  try:
430
  processor = VideoProcessor(model_size=model_size)
431
 
432
  caption_mode = kwargs.get("caption_mode", "sentence")
433
 
434
- # highlight_word و word كلاهما يحتاج word-level timestamps من Whisper
435
  timestamp_mode = (
436
  "words"
437
  if caption_mode in ("word", "highlight_word")
@@ -441,8 +490,8 @@ def process_video(video_path, style="cinematic_blur", model_size="base", **kwarg
441
  # Phase 1 + 2: STT + AI analysis
442
  viral_segments, duration, stt_data = processor.analyze_impact(
443
  video_path,
444
- source_language = kwargs.get("source_language"), # لـ Whisper
445
- target_language = kwargs.get("language"), # للترجمة والكابشن
446
  timestamp_mode = timestamp_mode,
447
  )
448
 
@@ -450,7 +499,6 @@ def process_video(video_path, style="cinematic_blur", model_size="base", **kwarg
450
  logger.warning("⚠️ No viral segments found.")
451
  return []
452
 
453
- # Sort by viral score
454
  best_clips = processor.get_best_segments(viral_segments, duration)
455
 
456
  # Phase 3: render
 
9
  - style string normalised once
10
  - get_best_segments wired into process_video
11
  - detected_lang used correctly for captions
12
+ - ✅ FIX: after translation, _line1/_line2 re-computed from translated text
13
+ using SubtitleSegmenter._split_into_lines so line splits match translated content
14
+ - ✅ FIX: translated word timestamps distributed proportional to word length
15
+ (instead of uniform distribution) for better highlight sync
16
  """
17
  import os
18
  import gc
 
24
  import core # Applies monkey patches
25
  from core.config import Config
26
  from core.logger import Logger
27
+ from core.stt import STT, SubtitleSegmenter
28
+ from core.analyze import analyze_transcript
29
  from core.styles import StyleFactory
30
  from core.subtitle_manager import SubtitleManager
31
  from core.free_translator import FreeTranslator
32
 
33
  logger = Logger.get_logger(__name__)
34
 
35
+ # Max chars per line — must match SubtitleSegmenter constant
36
+ _MAX_CHARS_PER_LINE = 42
37
+
38
+
39
+ def _distribute_timestamps_by_length(words: list, seg_start: float, seg_end: float) -> list:
40
+ """
41
+ ✅ FIX: Distribute word timestamps proportional to character length instead of
42
+ uniform distribution. Longer words get more time, giving better sync in
43
+ highlight_word mode after translation.
44
+
45
+ words: list of str (translated words)
46
+ Returns: list of { text, start, end }
47
+ """
48
+ if not words:
49
+ return []
50
+
51
+ total_chars = sum(len(w) for w in words)
52
+ seg_dur = seg_end - seg_start
53
+
54
+ result = []
55
+ cursor = seg_start
56
+
57
+ for i, w in enumerate(words):
58
+ if total_chars > 0:
59
+ fraction = len(w) / total_chars
60
+ else:
61
+ fraction = 1.0 / len(words)
62
+
63
+ w_dur = seg_dur * fraction
64
+ w_end = cursor + w_dur
65
+
66
+ # Clamp last word to seg_end to avoid float drift
67
+ if i == len(words) - 1:
68
+ w_end = seg_end
69
+
70
+ result.append({
71
+ "text": w,
72
+ "start": round(cursor, 3),
73
+ "end": round(w_end, 3),
74
+ })
75
+ cursor = w_end
76
+
77
+ return result
78
+
79
 
80
  # ─────────────────────────────────────────────────────────────────────────────
81
  class VideoProcessor:
 
108
  except Exception as e:
109
  logger.warning(f"⚠️ json_repair failed, using raw content: {e}")
110
 
 
111
  open_b = content.count("{")
112
  close_b = content.count("}")
113
  if open_b > close_b:
 
136
  for key in ("segments", "clips", "moments"):
137
  if key in segments_data and isinstance(segments_data[key], list):
138
  return segments_data[key]
 
139
  for v in segments_data.values():
140
  if isinstance(v, list):
141
  return v
 
150
 
151
  def analyze_impact(self,
152
  video_path,
153
+ source_language=None,
154
+ target_language=None,
155
  timestamp_mode="segments",
156
  progress_callback=None):
157
  """
158
  STT + AI viral-moment detection.
159
 
160
+ source_language : passed directly to Whisper.
161
+ None → Whisper auto-detects (slower but safe).
162
+ target_language : stored in data for process_clips to use for
163
+ translation and caption rendering.
 
164
  """
165
  if progress_callback:
166
  progress_callback(5, "Starting speech-to-text...")
 
171
 
172
  full_segments, full_text, duration, detected_lang = self.stt.get_transcript(
173
  video_path,
174
+ language=source_language,
175
  skip_ai=True,
176
  timestamp_mode=timestamp_mode,
177
  )
 
180
 
181
  data = {
182
  "segments": full_segments,
183
+ "detected_language": detected_lang,
184
+ "target_language": target_language,
185
  "duration": duration,
186
  }
187
 
 
219
  f"{min(current_end, max_time)/60:.1f}m …"
220
  )
221
 
222
+ ai_res = analyze_transcript(chunk_transcript)
223
  logger.info(f"🤖 AI response type: {type(ai_res)}")
224
 
225
  try:
 
234
  if current_end >= max_time:
235
  break
236
 
 
237
  seen, unique = set(), []
238
  for s in all_ai_segs:
239
  st = s.get("start_time")
 
262
  """
263
  Cuts, styles, captions, and exports each viral clip.
264
 
265
+ FIX 1: After translation, _line1 and _line2 are re-computed from
266
+ the translated text using SubtitleSegmenter._split_into_lines.
267
+ Previously they were left as the original-language splits which
268
+ caused wrong line breaks in the translated captions.
269
+
270
+ ✅ FIX 2: Word timestamps after translation are distributed proportional
271
+ to character length (via _distribute_timestamps_by_length) instead of
272
+ uniform distribution, giving better sync in highlight_word mode.
273
  """
274
  logger.info("🎨 Phase 3: Style & Captions …")
275
  if progress_callback:
276
  progress_callback(60, "Generating clips …")
277
 
 
278
  video_duration = data.get("duration") or 0
279
  if not video_duration:
280
  try:
 
284
  logger.error(f"❌ Could not determine video duration: {e}")
285
 
286
  # ── Language resolution ───────────────────────────────────────────────
 
 
 
 
 
 
 
287
  detected_lang = data.get("detected_language", "en")
288
+ target_language = data.get("target_language")
289
 
 
290
  if hasattr(target_language, "value"):
291
  target_language = target_language.value
292
 
 
296
  and target_language != detected_lang
297
  )
298
 
 
299
  caption_lang = target_language if needs_translation else detected_lang
300
 
301
  translator = FreeTranslator() if needs_translation else None
 
352
  final_output = os.path.join(Config.OUTPUTS_DIR, "viral_clips", out_name)
353
  os.makedirs(os.path.dirname(final_output), exist_ok=True)
354
 
355
+ # ── Cut clip ──────────────────────────────────────────────────
356
  current_video_clip = mpe.VideoFileClip(input_video_path)
357
  clip = current_video_clip.subclip(start, end)
358
 
359
  # ── Build segment_transcript ──────────────────────────────────
 
360
  segment_transcript = {"segments": []}
361
 
362
  for s in data["segments"]:
363
  if s["start"] >= end or s["end"] <= start:
364
  continue
365
 
366
+ new_seg = s.copy()
367
  new_seg["start"] = max(0, s["start"] - start)
368
  new_seg["end"] = min(end - start, s["end"] - start)
369
 
370
  if needs_translation and translator:
371
+ # ── Translate text ────────────────────────────────────
372
  try:
373
  translated_text, _ = translator.translate_text(
374
+ s["text"], target_language, detected_lang
375
  )
376
  except Exception as te:
377
  logger.warning(f"⚠️ Translation error: {te}")
378
  translated_text = s["text"]
379
 
380
  new_seg["text"] = translated_text
381
+
382
+ # FIX 1: Re-compute line splits from TRANSLATED text.
383
+ # Original _line1/_line2 are in the source language and
384
+ # will have wrong split points after translation.
385
+ translated_lines = SubtitleSegmenter._split_into_lines(
386
+ translated_text, _MAX_CHARS_PER_LINE
387
+ )
388
+ new_seg["_line1"] = translated_lines[0] if len(translated_lines) > 0 else translated_text
389
+ new_seg["_line2"] = translated_lines[1] if len(translated_lines) > 1 else ""
390
+
391
+ # ✅ FIX 2: Distribute word timestamps proportional to
392
+ # character length for better highlight_word sync.
393
+ translated_words = translated_text.split()
394
+ new_seg["words"] = _distribute_timestamps_by_length(
395
+ translated_words,
396
+ new_seg["start"],
397
+ new_seg["end"],
398
+ )
399
+
400
  else:
401
+ # No translation adjust existing word timestamps
402
  if "words" in s:
403
  new_seg["words"] = [
404
  {
 
409
  for w in s["words"]
410
  if w["start"] < end and w["end"] > start
411
  ]
412
+ # _line1/_line2 already correct from SubtitleSegmenter
413
+ # (already in source lang which IS caption lang here)
414
 
415
  segment_transcript["segments"].append(new_seg)
416
 
 
467
  """
468
  End-to-end pipeline: STT → AI analysis → clip export.
469
 
470
+ Important kwargs:
471
+ source_language : language of the original video passed to Whisper.
472
+ If not set → Whisper auto-detects.
473
+ language : desired output language (translation + captions).
474
+ If same as sourceno translation.
475
  caption_mode : sentence | word | highlight_word
476
+ caption_style : classic | modern_glow | tiktok_bold |
477
  """
478
  try:
479
  processor = VideoProcessor(model_size=model_size)
480
 
481
  caption_mode = kwargs.get("caption_mode", "sentence")
482
 
483
+ # highlight_word and word modes both need word-level timestamps
484
  timestamp_mode = (
485
  "words"
486
  if caption_mode in ("word", "highlight_word")
 
490
  # Phase 1 + 2: STT + AI analysis
491
  viral_segments, duration, stt_data = processor.analyze_impact(
492
  video_path,
493
+ source_language = kwargs.get("source_language"),
494
+ target_language = kwargs.get("language"),
495
  timestamp_mode = timestamp_mode,
496
  )
497
 
 
499
  logger.warning("⚠️ No viral segments found.")
500
  return []
501
 
 
502
  best_clips = processor.get_best_segments(viral_segments, duration)
503
 
504
  # Phase 3: render
requirements.txt CHANGED
@@ -15,5 +15,5 @@ imageio-ffmpeg==0.4.8
15
  openai>=1.0.0
16
  scipy
17
  json_repair
18
- cryptography
19
- firebase-admin
 
15
  openai>=1.0.0
16
  scipy
17
  json_repair
18
+ tiktoken
19
+ pydantic