Spaces:

ex510
/

auto_cliper

Sleeping

App Files Files Community

aliSaac510 commited on Feb 19

Commit

0890748

1 Parent(s): d392f23

fix english caption

Browse files

Files changed (7) hide show

core/analyze.py +136 -38
core/config.py +187 -223
core/free_translator.py +8 -16
core/stt.py +31 -78
core/subtitle_manager.py +183 -149
processor.py +104 -56
requirements.txt +2 -2

core/analyze.py CHANGED Viewed

@@ -1,10 +1,16 @@
 import os
 import time
 from openai import OpenAI
 from dotenv import load_dotenv
 load_dotenv()
 # Configure OpenAI Client
 api_key = os.getenv("OPENROUTER_API_KEY")
 client = OpenAI(
@@ -12,50 +18,57 @@ client = OpenAI(
     api_key=api_key
 )
-def analyze_transcript_gemini(transcript):
-    """Analyze transcript using OpenRouter (DeepSeek) via Env Key."""
-    prompt = f"""
-    You are an expert video editor and viral content strategist. Your task is to identify the most engaging segments from the provided transcript that are suitable for short-form video platforms like TikTok, Reels, and YouTube Shorts.
-    STRICT JSON OUTPUT FORMAT REQUIRED:
-    You must output ONLY valid JSON. Do not include any markdown formatting (like ```json ... ```), explanations, or additional text outside the JSON object.
-    The JSON structure must be exactly as follows:
     {{
       "segments": [
         {{
           "start_time": <float, start time in seconds>,
           "end_time": <float, end time in seconds>,
-          "duration": <float, duration in seconds>,
           "description": "<string, brief summary of the clip content 10 words max>",
           "viral_score": <float, score from 0-10 indicating viral potential>,
           "reason": "<string, explanation of why this segment is engaging>"
         }}
       ]
     }}
-    SELECTION CRITERIA:
-    1.  **Standalone Quality**: Each clip must make sense on its own without prior context. Avoid starting with conjunctions like "And", "But", "So" unless they are part of a complete thought.
-    2.  **Engagement**: Look for strong hooks, emotional moments, humor, surprising facts, or actionable advice.
-    3.  **Duration**: Prioritize clips between 30 and 180 seconds.
-    4.  **Completeness**: Ensure the clip has a clear beginning and end. Do not cut off sentences.
-    IMPORTANT:
-    - Return valid JSON only.
-    - If no suitable segments are found, return {{ "segments": [] }}.
-    - Ensure all strings are properly escaped.
     Transcript to Analyze:
     {transcript}
     """
     max_retries = 3
     base_delay = 5
     for attempt in range(max_retries):
         try:
-            # Simple direct request
             response = client.chat.completions.create(
                 model="deepseek/deepseek-chat",
                 messages=[
@@ -68,7 +81,7 @@ def analyze_transcript_gemini(transcript):
                 },
                 temperature=0.7,
             )
             content = response.choices[0].message.content
             print(f"🤖 AI Raw Response (First 500 chars): {content[:500]}...")
@@ -77,18 +90,14 @@ def analyze_transcript_gemini(transcript):
                 content = content.split("```json")[1].split("```")[0].strip()
             elif "```" in content:
                 content = content.split("```")[1].split("```")[0].strip()
-            # Debugging: Print segment count
-            try:
-                import json
-                data = json.loads(content)
-                segments_count = len(data.get("segments", []))
-                print(f"🤖 AI Response parsed successfully: Found {segments_count} segments.")
-            except Exception as e:
-                print(f"⚠️ Failed to parse AI response for logging: {e}")
             return {"content": content}
         except Exception as e:
             print(f"❌ Error in OpenRouter analysis: {e}")
             if attempt < max_retries - 1:
@@ -102,10 +111,99 @@ def analyze_transcript_gemini(transcript):
     return {"content": '{"segments": []}'}
-# إعداد متغير البيئة
 if __name__ == "__main__":
-    # اختبار سريع
-    test_transcript = "[0.0 - 5.0] This is amazing content about viral videos!"
-    result = analyze_transcript_gemini(test_transcript)
-    print("Gemini Analysis Result:", result)

 import os
 import time
+import json
+import logging
 from openai import OpenAI
 from dotenv import load_dotenv
 load_dotenv()
+# Setup Logger
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger(__name__)
 # Configure OpenAI Client
 api_key = os.getenv("OPENROUTER_API_KEY")
 client = OpenAI(
     api_key=api_key
 )
+def analyze_transcript(transcript):
+    """Analyze transcript using OpenRouter via Env Key."""
+    prompt = f"""
+    You are an expert video editor and viral content strategist.
+    Your task is to identify the most engaging segments from the provided transcript
+    that are suitable for short-form video platforms like TikTok, Reels, and YouTube Shorts.
+    **STRICT REQUIREMENTS:**
+    1. **Duration**: duration MUST be between 60 seconds and 180 seconds (3 minutes)
+    2. **Context Preservation**: Each segment must be a complete thought - no abrupt cuts
+    3. **Sentence Boundaries**: Start at the beginning of a sentence, end at a natural conclusion
+    4. **Meaning Coherence**: The clip must make sense on its own without requiring prior context
+    **SELECTION CRITERIA:**
+    - Strong hooks that grab attention
+    - Emotional moments, humor, or surprising revelations
+    - Clear beginning, middle, and satisfying conclusion
+    - High shareability potential
+    **JSON OUTPUT FORMAT (REQUIRED):**
     {{
       "segments": [
         {{
           "start_time": <float, start time in seconds>,
           "end_time": <float, end time in seconds>,
+          "duration": <float, duration in seconds (30-180)>,
           "description": "<string, brief summary of the clip content 10 words max>",
           "viral_score": <float, score from 0-10 indicating viral potential>,
           "reason": "<string, explanation of why this segment is engaging>"
         }}
       ]
     }}
+    **IMPORTANT NOTES:**
+    - If no suitable segments are found, return {{ "segments": [] }}
+    - Ensure all strings are properly escaped
+    - Each segment must be a complete, coherent thought
+    - Avoid cutting mid-sentence or mid-thought
     Transcript to Analyze:
     {transcript}
     """
     max_retries = 3
     base_delay = 5
+    content = None  # FIX: initialize content to avoid UnboundLocalError
     for attempt in range(max_retries):
         try:
             response = client.chat.completions.create(
                 model="deepseek/deepseek-chat",
                 messages=[
                 },
                 temperature=0.7,
             )
             content = response.choices[0].message.content
             print(f"🤖 AI Raw Response (First 500 chars): {content[:500]}...")
                 content = content.split("```json")[1].split("```")[0].strip()
             elif "```" in content:
                 content = content.split("```")[1].split("```")[0].strip()
+            # Validate JSON and log segment count
+            data = json.loads(content)
+            segments_count = len(data.get("segments", []))
+            print(f"🤖 AI Response parsed successfully: Found {segments_count} segments.")
             return {"content": content}
         except Exception as e:
             print(f"❌ Error in OpenRouter analysis: {e}")
             if attempt < max_retries - 1:
     return {"content": '{"segments": []}'}
+# Smart chunking system for long transcripts
+def smart_chunk_transcript(transcript, max_tokens=4000):
+    """
+    Split transcript into coherent chunks at sentence boundaries
+    while preserving context and meaning.
+    """
+    import json
+    # Simple sentence-based chunking
+    sentences = transcript.replace('\n', ' ').split('. ')
+    chunks = []
+    current_chunk = []
+    current_length = 0
+    for sentence in sentences:
+        sentence_length = len(sentence.split())
+        if current_length + sentence_length > max_tokens and current_chunk:
+            chunk_text = '. '.join(current_chunk) + '.'
+            chunks.append(chunk_text.strip())
+            current_chunk = [sentence]
+            current_length = sentence_length
+        else:
+            current_chunk.append(sentence)
+            current_length += sentence_length
+    if current_chunk:
+        chunk_text = '. '.join(current_chunk) + '.'
+        chunks.append(chunk_text.strip())
+    return chunks
+def analyze_transcript_with_chunking(transcript):
+    """
+    Analyze transcript using smart chunking for long content.
+    Processes each chunk separately and merges results.
+    """
+    if len(transcript.split()) > 3000:
+        logger.info("📦 Transcript too long, using smart chunking...")
+        chunks = smart_chunk_transcript(transcript, max_tokens=3000)
+        all_segments = []
+        for i, chunk in enumerate(chunks):
+            logger.info(f"🔄 Processing chunk {i+1}/{len(chunks)}...")
+            result = analyze_transcript(chunk)
+            try:
+                data = json.loads(result['content'])
+                if 'segments' in data:
+                    all_segments.extend(data['segments'])
+            except Exception as e:
+                logger.warning(f"⚠️ Failed to parse chunk {i+1}: {e}")
+                continue
+        if all_segments:
+            all_segments.sort(key=lambda x: x.get('viral_score', 0), reverse=True)
+            unique_segments = []
+            seen_times = set()
+            for seg in all_segments:
+                time_key = f"{seg.get('start_time', 0):.0f}-{seg.get('end_time', 0):.0f}"
+                if time_key not in seen_times:
+                    unique_segments.append(seg)
+                    seen_times.add(time_key)
+            return {"content": json.dumps({"segments": unique_segments[:10]})}
+    return analyze_transcript(transcript)
+# Testing
 if __name__ == "__main__":
+    test_transcript = """
+    [0.0 - 5.0] Welcome to today's video about productivity hacks that actually work.
+    [5.0 - 15.0] The first hack is something I call the 2-minute rule. If something takes less than 2 minutes, do it immediately.
+    [15.0 - 30.0] This simple rule has transformed my life. I used to procrastinate on small tasks, but now I handle them right away.
+    [30.0 - 45.0] The second hack is batching similar tasks together. Instead of checking email 20 times a day, I check it twice.
+    [45.0 - 60.0] This has saved me hours every week. I batch my emails, phone calls, and even errands.
+    [60.0 - 90.0] The third hack is the Pomodoro Technique. Work for 25 minutes, then take a 5-minute break.
+    [90.0 - 120.0] This technique helps me stay focused and avoid burnout. I get more done in less time.
+    """
+    logger.info("🧪 Testing AI Analysis...")
+    result = analyze_transcript_with_chunking(test_transcript)
+    try:
+        data = json.loads(result['content'])
+        segments = data.get('segments', [])
+        logger.info(f"✅ Found {len(segments)} viral segments:")
+        for i, seg in enumerate(segments):
+            logger.info(f"  #{i+1} [{seg['start_time']:.0f}s-{seg['end_time']:.0f}s] "
+                        f"Score: {seg['viral_score']}/10 - {seg['description']}")
+    except Exception as e:
+        logger.error(f"❌ Error parsing result: {e}")
+        logger.info(f"Raw result: {result}")

core/config.py CHANGED Viewed

@@ -13,11 +13,11 @@ Cyrillic:       ru, uk (Ukrainian)
 Hebrew:         he
 Thai:           th
-Font Priority (viral social media 2024-2025):
-  Arabic   → Tajawal  > Cairo  > Almarai  > NotoSansArabic (fallback)
-  Latin    → Montserrat > Rubik > Oswald > Roboto
-  Cyrillic → Montserrat (has Cyrillic) > Roboto
-  CJK      → Noto Sans SC/JP/KR (only reliable option for caption use)
 """
 import os
 import re
@@ -35,193 +35,112 @@ class Config:
     LOGS_DIR    = os.path.join(BASE_DIR, "logs")
     # ─────────────────────────────────────────────────────────────────────────
-    # Font Registry
-    # All URLs use Google Fonts CSS2 API — wght@700/800 = Bold
     # ─────────────────────────────────────────────────────────────────────────
     FONTS = {
-        # ── Latin / Universal (support Cyrillic + Latin) ──────────────────────
-        # ✅ Montserrat has BOTH Latin AND Cyrillic — #1 viral font
         "Montserrat-Bold.ttf":        "https://fonts.googleapis.com/css2?family=Montserrat:wght@700&display=swap",
-        # ✅ Rubik: modern, supports Latin + Cyrillic + Hebrew(!)
         "Rubik-Bold.ttf":             "https://fonts.googleapis.com/css2?family=Rubik:wght@700&display=swap",
-        # Oswald: condensed Latin only — fast speech / lots of words
         "Oswald-Bold.ttf":            "https://fonts.googleapis.com/css2?family=Oswald:wght@700&display=swap",
-        # Roboto: clean baseline, Latin + Cyrillic + Greek
         "Roboto-Bold.ttf":            "https://fonts.googleapis.com/css2?family=Roboto:wght@700&display=swap",
-        # ── Arabic Script (ar, fa, ur) ─────────────────────────────────────────
-        # ✅ #1 choice: Tajawal — modern social media Arabic, youth-oriented
         "Tajawal-Bold.ttf":           "https://fonts.googleapis.com/css2?family=Tajawal:wght@700&display=swap",
-        # Cairo: clean, highly legible — great for captions
         "Cairo-Bold.ttf":             "https://fonts.googleapis.com/css2?family=Cairo:wght@700&display=swap",
-        # Almarai: rounded, friendly — Gulf & Egyptian content
         "Almarai-Bold.ttf":           "https://fonts.googleapis.com/css2?family=Almarai:wght@800&display=swap",
-        # ✅ Noto Sans Arabic — universal fallback, covers ALL Arabic Unicode
         "NotoSansArabic-Bold.ttf":    "https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@700&display=swap",
-        # ── Persian / Farsi (fa) — Arabic script with Persian extensions ───────
-        # ✅ Vazirmatn: most popular Persian font on social media 2024
         "Vazirmatn-Bold.ttf":         "https://fonts.googleapis.com/css2?family=Vazirmatn:wght@700&display=swap",
-        # ── Urdu (ur) — Nastaliq style not available on Google Fonts ──────────
-        # Using Noto Sans Arabic as best available web fallback
-        # Note: Authentic Urdu uses Nastaliq but it's not web-standard yet
         "NotoSansArabicUrdu-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@700&display=swap",
-        # ── Hebrew (he) ────────────────────────────────────────────────────────
-        # ✅ Rubik supports Hebrew natively (same font as Latin Rubik!)
-        # Frank Ruhl Libre: traditional Hebrew newspaper feel
         "FrankRuhlLibre-Bold.ttf":    "https://fonts.googleapis.com/css2?family=Frank+Ruhl+Libre:wght@700&display=swap",
-        # ✅ Heebo: modern clean Hebrew for captions
         "Heebo-Bold.ttf":             "https://fonts.googleapis.com/css2?family=Heebo:wght@700&display=swap",
         # ── CJK ───────────────────────────────────────────────────────────────
-        # Chinese Simplified
         "NotoSansSC-Bold.ttf":        "https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@700&display=swap",
-        # Chinese Traditional
         "NotoSansTC-Bold.ttf":        "https://fonts.googleapis.com/css2?family=Noto+Sans+TC:wght@700&display=swap",
-        # Japanese
         "NotoSansJP-Bold.ttf":        "https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@700&display=swap",
-        # ✅ Korean — Noto Sans KR
         "NotoSansKR-Bold.ttf":        "https://fonts.googleapis.com/css2?family=Noto+Sans+KR:wght@700&display=swap",
-        # ── Devanagari (hi, mr, ne) ────────────────────────────────────────────
-        "NotoSansDevanagari-Bold.ttf":"https://fonts.googleapis.com/css2?family=Noto+Sans+Devanagari:wght@700&display=swap",
-        # ✅ Poppins: has Devanagari + Latin — great for bilingual Hindi content
         "Poppins-Bold.ttf":           "https://fonts.googleapis.com/css2?family=Poppins:wght@700&display=swap",
-        # ── Thai (th) ──────────────────────────────────────────────────────────
-        # ✅ Sarabun: most popular Thai social media font, clean & modern
         "Sarabun-Bold.ttf":           "https://fonts.googleapis.com/css2?family=Sarabun:wght@700&display=swap",
-        # Noto Sans Thai: reliable fallback
         "NotoSansThai-Bold.ttf":      "https://fonts.googleapis.com/css2?family=Noto+Sans+Thai:wght@700&display=swap",
-        # ── Ukrainian (uk) — Cyrillic ──────────────────────────────────────────
-        # Montserrat covers Ukrainian Cyrillic, but for dedicated support:
         "NotoSans-Bold.ttf":          "https://fonts.googleapis.com/css2?family=Noto+Sans:wght@700&display=swap",
     }
     # ─────────────────────────────────────────────────────────────────────────
-    # Language → Best Caption Font
-    #
-    # Priority: most viral / readable on mobile screens
-    # Rule: non-Latin scripts ALWAYS override style font
     # ─────────────────────────────────────────────────────────────────────────
     LANGUAGE_FONT_MAP = {
-        # ── Arabic Script ──────────────────────────────────────────────────────
-        # All three share Arabic script but have different letter forms
-        "ar": "Tajawal-Bold.ttf",           # ✅ #1 Arabic social media font
-        "fa": "Vazirmatn-Bold.ttf",         # ✅ #1 Persian/Farsi social media font
-        "ur": "NotoSansArabic-Bold.ttf",    # Best web fallback for Urdu
-        # ── Hebrew ─────────────────────────────────────────────────────────────
-        "he": "Heebo-Bold.ttf",             # ✅ Modern, clean Hebrew captions
-        # ── CJK ───────────────────────────────────────────────────────────────
-        "zh":    "NotoSansSC-Bold.ttf",     # Simplified Chinese (mainland)
-        "zh-tw": "NotoSansTC-Bold.ttf",     # Traditional Chinese (Taiwan/HK)
-        "ja":    "NotoSansJP-Bold.ttf",     # Japanese
-        "ko":    "NotoSansKR-Bold.ttf",     # ✅ Korean (was missing!)
-        # ── Devanagari ────────────────────────────────────────────────────────
-        "hi": "NotoSansDevanagari-Bold.ttf", # Hindi
-        "mr": "NotoSansDevanagari-Bold.ttf", # Marathi
-        "ne": "NotoSansDevanagari-Bold.ttf", # Nepali
-        # ── Thai ──────────────────────────────────────────────────────────────
-        "th": "Sarabun-Bold.ttf",           # ✅ Thai (was missing!)
-        # ── Cyrillic ──────────────────────────────────────────────────────────
-        # ✅ Montserrat has full Cyrillic support AND is the viral Latin font
-        # This means Russian content gets the same premium feel
-        "ru": "Montserrat-Bold.ttf",        # ⬆️ Upgraded from Roboto
-        "uk": "Montserrat-Bold.ttf",        # Ukrainian (Cyrillic) — was missing
-        # ── Latin Languages ────────────────────────────────────────────────────
-        "en": "Montserrat-Bold.ttf",        # English
-        "fr": "Montserrat-Bold.ttf",        # French
-        "es": "Montserrat-Bold.ttf",        # Spanish
-        "de": "Montserrat-Bold.ttf",        # German
-        "pt": "Montserrat-Bold.ttf",        # Portuguese (Brazil + Portugal)
-        "it": "Montserrat-Bold.ttf",        # Italian
-        "tr": "Montserrat-Bold.ttf",        # Turkish (Latin script since 1928)
-        "nl": "Montserrat-Bold.ttf",        # ✅ Dutch (was missing)
-        "pl": "Montserrat-Bold.ttf",        # ✅ Polish (was missing)
-        "id": "Montserrat-Bold.ttf",        # ✅ Indonesian (was missing)
-        "vi": "Roboto-Bold.ttf",            # ✅ Vietnamese — Roboto has better
-                                            #    diacritic coverage (tones)
-        "sv": "Montserrat-Bold.ttf",        # ✅ Swedish (was missing)
-        "ro": "Montserrat-Bold.ttf",        # ✅ Romanian (was missing)
-        # ── Fallback ───────────────────────────────────────────────────────────
-        # Noto Sans: designed to cover ALL Unicode — zero missing glyphs
-        # Better than Montserrat for unknown scripts
-        "default": "NotoSans-Bold.ttf",     # ⬆️ Upgraded from Montserrat
     }
-    # ─────────────────────────────────────────────────────────────────────────
-    # Caption Style → Preferred Font (Latin-only styles)
-    #
-    # IMPORTANT: Non-Latin scripts ALWAYS use LANGUAGE_FONT_MAP regardless
-    # of style. This map only applies when language is Latin/Cyrillic.
-    # ─────────────────────────────────────────────────────────────────────────
     STYLE_FONT_MAP = {
-        # Montserrat: #1 viral font — Alex Hormozi, MrBeast, Sidemen
         "classic":       "Montserrat-Bold.ttf",
-        # Rubik: distinctive modern feel — supports Latin + Cyrillic + Hebrew
-        # ✅ Better than original for multilingual content
         "modern_glow":   "Rubik-Bold.ttf",
-        # Montserrat: proven viral MrBeast aesthetic
         "tiktok_bold":   "Montserrat-Bold.ttf",
-        # ✅ Changed: Oswald has NO Arabic/CJK support
-        # Using Montserrat which handles more scripts gracefully
-        # For pure Latin content, Oswald (condensed) is still good
-        "tiktok_neon":   "Montserrat-Bold.ttf",  # was Oswald-Bold (no Arabic!)
-        # Rubik: clean educator look + multilingual
         "youtube_clean": "Rubik-Bold.ttf",
-        # Montserrat: karaoke / game-show energy
         "youtube_box":   "Montserrat-Bold.ttf",
     }
-    # ─────────────────────────────────────────────────────────────────────────
-    # Unicode Range → Language Detection
-    # Used in ensure_font() for script auto-detection
-    # ─────────────────────────────────────────────────────────────────────────
     UNICODE_SCRIPT_RANGES = [
-        # (start, end, language_code)
-        # Order matters: more specific ranges first
-        ("\u0600", "\u06FF", "ar"),   # Arabic / Persian / Urdu (same block)
-        ("\u0750", "\u077F", "ar"),   # Arabic Supplement
-        ("\u08A0", "\u08FF", "ar"),   # Arabic Extended-A
-        ("\u0590", "\u05FF", "he"),   # Hebrew
-        ("\uAC00", "\uD7AF", "ko"),   # Korean Hangul syllables  ✅ was missing
-        ("\u1100", "\u11FF", "ko"),   # Korean Jamo              ✅ was missing
-        ("\u4E00", "\u9FFF", "zh"),   # CJK Unified Ideographs
-        ("\u3400", "\u4DBF", "zh"),   # CJK Extension A
-        ("\u3040", "\u309F", "ja"),   # Hiragana
-        ("\u30A0", "\u30FF", "ja"),   # Katakana
-        ("\u0900", "\u097F", "hi"),   # Devanagari
-        ("\u0E00", "\u0E7F", "th"),   # Thai                     ✅ was missing
-        ("\u0400", "\u04FF", "ru"),   # Cyrillic
-        ("\u0500", "\u052F", "ru"),   # Cyrillic Supplement
     ]
-    # ─────────────────────────────────────────────────────────────────────────
-    # RTL Languages (Right-to-Left)
-    # Used for text rendering direction
-    # ─────────────────────────────────────────────────────────────────────────
     RTL_LANGUAGES = {"ar", "fa", "ur", "he"}
-    # ─────────────────────────────────────────────────────────────────────────
-    # Video settings
-    # ─────────────────────────────────────────────────────────────────────────
     DEFAULT_SIZE         = (1080, 1920)
     CHUNK_SIZE_SECONDS   = 600
     OVERLAP_SECONDS      = 60
@@ -243,61 +162,34 @@ class Config:
             os.makedirs(d, exist_ok=True)
     # ─────────────────────────────────────────────────────────────────────────
-    # Language detection from text
     # ─────────────────────────────────────────────────────────────────────────
     @classmethod
     def detect_language_from_text(cls, text: str) -> str | None:
-        """
-        Detects script/language from Unicode character ranges.
-        Returns language code or None if only Latin/ASCII detected.
-        More reliable than the original inline checks in ensure_font()
-        because it covers Korean, Thai, Hebrew, Persian, and more.
-        """
         if not text:
             return None
         for start, end, lang in cls.UNICODE_SCRIPT_RANGES:
             if any(start <= c <= end for c in text):
                 return lang
-        return None  # Latin / unknown
     @classmethod
     def is_rtl(cls, language: str) -> bool:
-        """Returns True if language is right-to-left."""
         return language in cls.RTL_LANGUAGES
     @classmethod
     def get_font_for_language(cls, language: str, style_name: str = None) -> str:
-        """
-        Returns the best font filename for a given language + style combination.
-        Priority:
-          1. Non-Latin scripts → always use LANGUAGE_FONT_MAP (ignores style)
-          2. Latin with explicit style → use STYLE_FONT_MAP
-          3. Latin with known language → use LANGUAGE_FONT_MAP
-          4. Unknown → use LANGUAGE_FONT_MAP default
-        """
         NON_LATIN = {
             "ar", "fa", "ur", "he",
             "zh", "zh-tw", "ja", "ko",
-            "hi", "mr", "ne",
-            "th",
         }
-        # Non-Latin: always use language map regardless of style
         if language in NON_LATIN:
             return cls.LANGUAGE_FONT_MAP.get(language, cls.LANGUAGE_FONT_MAP["default"])
-        # Latin/Cyrillic with style preference
         if style_name and style_name in cls.STYLE_FONT_MAP:
             return cls.STYLE_FONT_MAP[style_name]
-        # Latin with known language
         if language in cls.LANGUAGE_FONT_MAP:
             return cls.LANGUAGE_FONT_MAP[language]
         return cls.LANGUAGE_FONT_MAP["default"]
     # ─────────────────────────────────────────────────────────────────────────
@@ -307,35 +199,65 @@ class Config:
     def get_urls(css_content: str, prefer_latin: bool = True) -> list:
         """
         Extracts font file URLs from a Google Fonts CSS response.
-        Prefers 'latin' subset for Latin fonts, first subset for others.
         """
         pattern = re.compile(
             r'/\*\s*\[?\d*\]?\s*([\w\-]+)\s*\*/[^}]*?url\(([^)]+)\)',
             re.DOTALL,
         )
         pairs = pattern.findall(css_content)
-        if not pairs:
-            bare = re.findall(r'url\(([^)]+)\)', css_content)
-            return bare if bare else []
-        subset_map = {subset.lower(): url.strip() for subset, url in pairs}
-        if prefer_latin:
-            for key in ("latin", "latin-ext"):
-                if key in subset_map:
-                    return [subset_map[key]]
-            return [list(subset_map.values())[-1]]
-        else:
-            # Arabic / CJK / etc: first subset = script-specific
-            return [list(subset_map.values())[0]]
     @staticmethod
     def download_font_from_css(css_url: str, output_path: str) -> bool:
         """
-        Downloads the correct font file for a given CSS URL.
-        Auto-detects Latin vs non-Latin based on filename.
         """
         NON_LATIN_KEYWORDS = (
             "arabic", "noto", "devanagari", "sc", "jp", "kr", "tc",
             "thai", "sarabun", "heebo", "frank", "vazir", "tajawal",
@@ -345,39 +267,81 @@ class Config:
         is_non_latin = any(kw in filename for kw in NON_LATIN_KEYWORDS)
         prefer_latin = not is_non_latin
-        headers = {
-            "User-Agent": (
-                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
-                "AppleWebKit/537.36 (KHTML, like Gecko) "
-                "Chrome/124.0.0.0 Safari/537.36"
-            )
-        }
-        try:
-            resp = requests.get(css_url, headers=headers, timeout=15)
-            resp.raise_for_status()
-            urls = Config.get_urls(resp.text, prefer_latin=prefer_latin)
-            if not urls:
-                print(f"❌ No font URLs found in CSS: {css_url}")
-                return False
-            font_url  = urls[0]
-            subset_lbl = "latin" if prefer_latin else "script"
-            print(f"⬇️  Downloading font ({subset_lbl}) → {font_url}")
-            font_resp = requests.get(font_url, headers=headers, timeout=30)
-            font_resp.raise_for_status()
-            with open(output_path, "wb") as f:
-                f.write(font_resp.content)
-            print(f"✅ Font saved: {output_path}")
-            return True
-        except requests.RequestException as e:
-            print(f"❌ Network error: {e}")
-            return False
-        except Exception as e:
-            print(f"❌ Unexpected error: {e}")
-            return False

 Hebrew:         he
 Thai:           th
+FONT DOWNLOAD FIX:
+  Google Fonts returns woff2 for modern browsers — Pillow cannot load woff2.
+  Solution: use an old IE User-Agent to force Google Fonts to return TTF URLs.
+    Modern UA  → fonts.gstatic.com/s/cairo/xxx.woff2   ← Pillow FAILS
+    Old IE UA  → fonts.gstatic.com/s/cairo/xxx.ttf     ← Pillow works ✅
 """
 import os
 import re
     LOGS_DIR    = os.path.join(BASE_DIR, "logs")
     # ─────────────────────────────────────────────────────────────────────────
+    # Font Registry — Google Fonts CSS2 API URLs
     # ─────────────────────────────────────────────────────────────────────────
     FONTS = {
+        # ── Latin / Universal ──────────────────────────────────────────────────
         "Montserrat-Bold.ttf":        "https://fonts.googleapis.com/css2?family=Montserrat:wght@700&display=swap",
         "Rubik-Bold.ttf":             "https://fonts.googleapis.com/css2?family=Rubik:wght@700&display=swap",
         "Oswald-Bold.ttf":            "https://fonts.googleapis.com/css2?family=Oswald:wght@700&display=swap",
         "Roboto-Bold.ttf":            "https://fonts.googleapis.com/css2?family=Roboto:wght@700&display=swap",
+        # ── Arabic Script ──────────────────────────────────────────────────────
         "Tajawal-Bold.ttf":           "https://fonts.googleapis.com/css2?family=Tajawal:wght@700&display=swap",
         "Cairo-Bold.ttf":             "https://fonts.googleapis.com/css2?family=Cairo:wght@700&display=swap",
         "Almarai-Bold.ttf":           "https://fonts.googleapis.com/css2?family=Almarai:wght@800&display=swap",
         "NotoSansArabic-Bold.ttf":    "https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@700&display=swap",
+        # ── Persian ────────────────────────────────────────────────────────────
         "Vazirmatn-Bold.ttf":         "https://fonts.googleapis.com/css2?family=Vazirmatn:wght@700&display=swap",
+        # ── Urdu ───────────────────────────────────────────────────────────────
         "NotoSansArabicUrdu-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@700&display=swap",
+        # ── Hebrew ───────────────────��─────────────────────────────────────────
         "FrankRuhlLibre-Bold.ttf":    "https://fonts.googleapis.com/css2?family=Frank+Ruhl+Libre:wght@700&display=swap",
         "Heebo-Bold.ttf":             "https://fonts.googleapis.com/css2?family=Heebo:wght@700&display=swap",
         # ── CJK ───────────────────────────────────────────────────────────────
         "NotoSansSC-Bold.ttf":        "https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@700&display=swap",
         "NotoSansTC-Bold.ttf":        "https://fonts.googleapis.com/css2?family=Noto+Sans+TC:wght@700&display=swap",
         "NotoSansJP-Bold.ttf":        "https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@700&display=swap",
         "NotoSansKR-Bold.ttf":        "https://fonts.googleapis.com/css2?family=Noto+Sans+KR:wght@700&display=swap",
+        # ── Devanagari ────────────────────────────────────────────────────────
+        "NotoSansDevanagari-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Devanagari:wght@700&display=swap",
         "Poppins-Bold.ttf":           "https://fonts.googleapis.com/css2?family=Poppins:wght@700&display=swap",
+        # ── Thai ──────────────────────────────────────────────────────────────
         "Sarabun-Bold.ttf":           "https://fonts.googleapis.com/css2?family=Sarabun:wght@700&display=swap",
         "NotoSansThai-Bold.ttf":      "https://fonts.googleapis.com/css2?family=Noto+Sans+Thai:wght@700&display=swap",
+        # ── Universal fallback ─────────────────────────────────────────────────
         "NotoSans-Bold.ttf":          "https://fonts.googleapis.com/css2?family=Noto+Sans:wght@700&display=swap",
     }
     # ─────────────────────────────────────────────────────────────────────────
+    # Language → Font
     # ─────────────────────────────────────────────────────────────────────────
     LANGUAGE_FONT_MAP = {
+        "ar": "Tajawal-Bold.ttf",
+        "fa": "Vazirmatn-Bold.ttf",
+        "ur": "NotoSansArabic-Bold.ttf",
+        "he": "Heebo-Bold.ttf",
+        "zh":    "NotoSansSC-Bold.ttf",
+        "zh-tw": "NotoSansTC-Bold.ttf",
+        "ja":    "NotoSansJP-Bold.ttf",
+        "ko":    "NotoSansKR-Bold.ttf",
+        "hi": "NotoSansDevanagari-Bold.ttf",
+        "mr": "NotoSansDevanagari-Bold.ttf",
+        "ne": "NotoSansDevanagari-Bold.ttf",
+        "th": "Sarabun-Bold.ttf",
+        "ru": "Montserrat-Bold.ttf",
+        "uk": "Montserrat-Bold.ttf",
+        "en": "Montserrat-Bold.ttf",
+        "fr": "Montserrat-Bold.ttf",
+        "es": "Montserrat-Bold.ttf",
+        "de": "Montserrat-Bold.ttf",
+        "pt": "Montserrat-Bold.ttf",
+        "it": "Montserrat-Bold.ttf",
+        "tr": "Montserrat-Bold.ttf",
+        "nl": "Montserrat-Bold.ttf",
+        "pl": "Montserrat-Bold.ttf",
+        "id": "Montserrat-Bold.ttf",
+        "vi": "Roboto-Bold.ttf",
+        "sv": "Montserrat-Bold.ttf",
+        "ro": "Montserrat-Bold.ttf",
+        "default": "NotoSans-Bold.ttf",
     }
     STYLE_FONT_MAP = {
         "classic":       "Montserrat-Bold.ttf",
         "modern_glow":   "Rubik-Bold.ttf",
         "tiktok_bold":   "Montserrat-Bold.ttf",
+        "tiktok_neon":   "Montserrat-Bold.ttf",
         "youtube_clean": "Rubik-Bold.ttf",
         "youtube_box":   "Montserrat-Bold.ttf",
     }
     UNICODE_SCRIPT_RANGES = [
+        ("\u0600", "\u06FF", "ar"),
+        ("\u0750", "\u077F", "ar"),
+        ("\u08A0", "\u08FF", "ar"),
+        ("\u0590", "\u05FF", "he"),
+        ("\uAC00", "\uD7AF", "ko"),
+        ("\u1100", "\u11FF", "ko"),
+        ("\u4E00", "\u9FFF", "zh"),
+        ("\u3400", "\u4DBF", "zh"),
+        ("\u3040", "\u309F", "ja"),
+        ("\u30A0", "\u30FF", "ja"),
+        ("\u0900", "\u097F", "hi"),
+        ("\u0E00", "\u0E7F", "th"),
+        ("\u0400", "\u04FF", "ru"),
+        ("\u0500", "\u052F", "ru"),
     ]
     RTL_LANGUAGES = {"ar", "fa", "ur", "he"}
     DEFAULT_SIZE         = (1080, 1920)
     CHUNK_SIZE_SECONDS   = 600
     OVERLAP_SECONDS      = 60
             os.makedirs(d, exist_ok=True)
     # ─────────────────────────────────────────────────────────────────────────
+    # Language detection
     # ─────────────────────────────────────────────────────────────────────────
     @classmethod
     def detect_language_from_text(cls, text: str) -> str | None:
         if not text:
             return None
         for start, end, lang in cls.UNICODE_SCRIPT_RANGES:
             if any(start <= c <= end for c in text):
                 return lang
+        return None
     @classmethod
     def is_rtl(cls, language: str) -> bool:
         return language in cls.RTL_LANGUAGES
     @classmethod
     def get_font_for_language(cls, language: str, style_name: str = None) -> str:
         NON_LATIN = {
             "ar", "fa", "ur", "he",
             "zh", "zh-tw", "ja", "ko",
+            "hi", "mr", "ne", "th",
         }
         if language in NON_LATIN:
             return cls.LANGUAGE_FONT_MAP.get(language, cls.LANGUAGE_FONT_MAP["default"])
         if style_name and style_name in cls.STYLE_FONT_MAP:
             return cls.STYLE_FONT_MAP[style_name]
         if language in cls.LANGUAGE_FONT_MAP:
             return cls.LANGUAGE_FONT_MAP[language]
         return cls.LANGUAGE_FONT_MAP["default"]
     # ─────────────────────────────────────────────────────────────────────────
     def get_urls(css_content: str, prefer_latin: bool = True) -> list:
         """
         Extracts font file URLs from a Google Fonts CSS response.
+        Prefers TTF over woff2 because Pillow cannot load woff2.
         """
+        # Extract all (subset_comment, url) pairs
         pattern = re.compile(
             r'/\*\s*\[?\d*\]?\s*([\w\-]+)\s*\*/[^}]*?url\(([^)]+)\)',
             re.DOTALL,
         )
         pairs = pattern.findall(css_content)
+        if pairs:
+            subset_map = {s.lower(): u.strip().strip("'\"") for s, u in pairs}
+            if prefer_latin:
+                for key in ("latin", "latin-ext"):
+                    if key in subset_map:
+                        return [subset_map[key]]
+                return [list(subset_map.values())[-1]]
+            else:
+                return [list(subset_map.values())[0]]
+        # Fallback: grab all raw URLs
+        all_urls = re.findall(r'url\(([^)]+)\)', css_content)
+        all_urls = [u.strip().strip("'\"") for u in all_urls]
+        # Prefer TTF, then woff (not woff2 — Pillow can't open woff2)
+        ttf   = [u for u in all_urls if u.endswith(".ttf")]
+        woff  = [u for u in all_urls if u.endswith(".woff") and not u.endswith(".woff2")]
+        return ttf or woff or all_urls
+    # ─────────────────────────────────────────────────────────────────────────
+    # Font CSS download  ← FIXED: uses TTF-forcing User-Agent
+    # ─────────────────────────────────────────────────────────────────────────
     @staticmethod
     def download_font_from_css(css_url: str, output_path: str) -> bool:
         """
+        Downloads the correct font file for a given Google Fonts CSS URL.
+        KEY FIX: Uses an old IE 6 User-Agent to force Google Fonts to return
+        TTF URLs instead of woff2. Pillow/FreeType cannot open woff2 files.
+          Modern Chrome UA → Google returns .woff2  → Pillow FAILS ❌
+          Old IE 6 UA      → Google returns .ttf    → Pillow works ✅
+        Two-pass strategy:
+          Pass 1: Old IE UA → gets TTF (ideal for Pillow)
+          Pass 2: Modern UA → gets woff2 as last resort (may fail in Pillow)
         """
+        # ── User-Agent constants ──────────────────────────────────────────────
+        # IE 6 on Windows XP — forces Google Fonts to return legacy TTF format
+        UA_TTF = (
+            "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; "
+            "SV1; .NET CLR 1.1.4322)"
+        )
+        # Modern Chrome — returns woff2 (not ideal for Pillow, last resort)
+        UA_MODERN = (
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+            "AppleWebKit/537.36 (KHTML, like Gecko) "
+            "Chrome/124.0.0.0 Safari/537.36"
+        )
         NON_LATIN_KEYWORDS = (
             "arabic", "noto", "devanagari", "sc", "jp", "kr", "tc",
             "thai", "sarabun", "heebo", "frank", "vazir", "tajawal",
         is_non_latin = any(kw in filename for kw in NON_LATIN_KEYWORDS)
         prefer_latin = not is_non_latin
+        os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True)
+        for pass_num, ua in enumerate([UA_TTF, UA_MODERN], start=1):
+            ua_label = "TTF-forcing (IE6)" if pass_num == 1 else "Modern (woff2 fallback)"
+            try:
+                # ── Fetch CSS ─────────────────────────────────────────────────
+                resp = requests.get(
+                    css_url,
+                    headers={"User-Agent": ua},
+                    timeout=15
+                )
+                resp.raise_for_status()
+                urls = Config.get_urls(resp.text, prefer_latin=prefer_latin)
+                if not urls:
+                    print(f"⚠️  Pass {pass_num} ({ua_label}): no font URLs in CSS")
+                    continue
+                font_url = urls[0]
+                ext = os.path.splitext(font_url.split("?")[0])[-1].lower()
+                print(f"⬇️  Pass {pass_num} ({ua_label}): {ext} → {font_url[:70]}…")
+                # ── Download font file ────────────────────────────────────────
+                font_resp = requests.get(
+                    font_url,
+                    headers={"User-Agent": UA_MODERN},
+                    timeout=30
+                )
+                font_resp.raise_for_status()
+                data = font_resp.content
+                # ── Validate: check magic bytes ───────────────────────────────
+                if len(data) < 10_000:
+                    print(f"⚠️  File too small ({len(data)} B) — likely error page, skipping")
+                    continue
+                magic = data[:4]
+                is_ttf_magic = magic in (
+                    b"\x00\x01\x00\x00",   # TrueType
+                    b"OTTO",               # OpenType CFF
+                    b"true",               # TrueType variant
+                    b"wOFF",               # WOFF (Pillow ≥ 9.2 can open)
+                    b"wOF2",               # WOFF2 (Pillow may fail)
+                )
+                if not is_ttf_magic:
+                    print(
+                        f"⚠️  Pass {pass_num}: unexpected magic bytes {magic.hex()} "
+                        f"(probably HTML error page) — skipping"
+                    )
+                    continue
+                if magic == b"wOF2":
+                    print(
+                        f"⚠️  Pass {pass_num}: received WOFF2 — "
+                        f"Pillow may not be able to open this. "
+                        f"Consider installing: sudo apt-get install fonts-noto-core"
+                    )
+                with open(output_path, "wb") as f:
+                    f.write(data)
+                print(f"✅ Font saved ({len(data):,} B, {ext}): {output_path}")
+                return True
+            except requests.RequestException as e:
+                print(f"❌ Pass {pass_num} network error: {e}")
+            except Exception as e:
+                print(f"❌ Pass {pass_num} unexpected error: {e}")
+        # ── Both passes failed ────────────────────────────────────────────────
+        print(
+            f"❌ All download attempts failed for {os.path.basename(output_path)}.\n"
+            f"   Fix on Ubuntu/Debian:\n"
+            f"     sudo apt-get install -y fonts-noto-core fonts-arabeyes\n"
+            f"   Or copy a TTF manually to: {output_path}"
+        )
+        return False

core/free_translator.py CHANGED Viewed

@@ -7,32 +7,24 @@ class FreeTranslator:
     def __init__(self):
         pass
-    def translate_text(self, text, target_language_code):
         """ترجمة مجانية باستخدام MyMemory API بدون httpx"""
         if not text.strip():
             return "", []
-        # خريطة اللغات
-        lang_map = {
-            "ar": "ar",
-            "en": "en",
-            "hi": "hi",
-            "zh": "zh",
-            "es": "es",
-            "fr": "fr",
-            "de": "de",
-            "ru": "ru",
-            "ja": "ja"
-        }
-        target_lang = lang_map.get(target_language_code, target_language_code)
         try:
             # استخدام urllib بدلاً من requests لتجنب مشكلة httpx
             url = "https://api.mymemory.translated.net/get"
             params = {
                 'q': text,
-                'langpair': f'en|{target_lang}'
             }
             # بناء URL مع parameters

     def __init__(self):
         pass
+    def translate_text(self, text, target_language_code, source_language_code="en"):
         """ترجمة مجانية باستخدام MyMemory API بدون httpx"""
         if not text.strip():
             return "", []
+        # Handle same language case
+        if source_language_code.lower() == target_language_code.lower():
+            return text, []
+        target_lang = target_language_code.lower()
+        source_lang = source_language_code.lower()
         try:
             # استخدام urllib بدلاً من requests لتجنب مشكلة httpx
             url = "https://api.mymemory.translated.net/get"
             params = {
                 'q': text,
+                'langpair': f'{source_lang}|{target_lang}'
             }
             # بناء URL مع parameters

core/stt.py CHANGED Viewed

@@ -33,7 +33,7 @@ SUBTITLE_STANDARDS = {
 }
 # Sentence-ending punctuation for smart splitting
-SENTENCE_ENDINGS = re.compile(r'[.!?؟。！？]+$')
 CLAUSE_BOUNDARIES = re.compile(r'[,،;:،]+$')
@@ -49,7 +49,6 @@ class SubtitleSegmenter:
     @staticmethod
     def count_chars(text: str) -> int:
-        """Count displayable characters (strip extra spaces)."""
         return len(text.strip())
     @staticmethod
@@ -62,7 +61,6 @@ class SubtitleSegmenter:
     @staticmethod
     def calc_min_duration(text: str) -> float:
-        """Minimum display duration based on reading speed (EBU R37)."""
         chars = SubtitleSegmenter.count_chars(text)
         cps   = SUBTITLE_STANDARDS["reading_speed_cps"]
         return max(chars / cps, SUBTITLE_STANDARDS["min_duration_sec"])
@@ -73,13 +71,6 @@ class SubtitleSegmenter:
         Splits a flat list of word dicts into subtitle blocks following
         international standards. Each block has:
           { text, start, end, words, line1, line2 }
-        Priority for line breaks:
-          1. Sentence endings (.!?)
-          2. Clause boundaries (,;:)
-          3. Max chars per line (42)
-          4. Max words per block
-          5. Pause gaps in audio (> 0.5s)
         """
         if not words:
             return []
@@ -89,18 +80,15 @@ class SubtitleSegmenter:
         MAX_WORDS = SUBTITLE_STANDARDS["max_words_per_block"]
         PAUSE_GAP = SUBTITLE_STANDARDS["sentence_pause_gap"]
-        blocks       = []
         current_words = []
         current_chars = 0
         def flush_block(word_list):
-            """Convert accumulated words into a subtitle block with line splitting."""
             if not word_list:
                 return None
             full_text = " ".join(w["text"] for w in word_list)
             lines     = SubtitleSegmenter._split_into_lines(full_text, MAX_CHARS)
             return {
                 "text":  full_text,
                 "start": word_list[0]["start"],
@@ -118,31 +106,22 @@ class SubtitleSegmenter:
             word_chars = len(word_text)
             is_last    = (i == len(words) - 1)
-            # Detect natural pause between this word and the next
             next_pause = 0.0
             if not is_last:
                 next_pause = words[i + 1]["start"] - word["end"]
-            # Would adding this word exceed the block limit?
-            new_total = current_chars + (1 if current_words else 0) + word_chars
             word_count = len(current_words) + 1
-            # ── Flush conditions (in priority order) ──────────────────────────
             should_flush = (
-                # 1. Adding word would exceed max block chars
                 (current_words and new_total > MAX_BLOCK) or
-                # 2. Too many words
                 (current_words and word_count > MAX_WORDS) or
-                # 3. Long natural pause after current word (sentence boundary)
                 (current_words and next_pause >= PAUSE_GAP and
                  SubtitleSegmenter.is_sentence_end(word_text)) or
-                # 4. Very long pause (>1s) — definitely a new sentence
                 (current_words and next_pause > 1.0)
             )
             if should_flush and current_words:
-                # Check if we should include this word before flushing
-                # (if it's a sentence ending, include it in the current block)
                 if SubtitleSegmenter.is_sentence_end(word_text) and new_total <= MAX_BLOCK:
                     current_words.append(word)
                     current_chars = new_total
@@ -153,12 +132,9 @@ class SubtitleSegmenter:
                 current_words = []
                 current_chars = 0
-                # If we already added the word above, skip re-adding
                 if SubtitleSegmenter.is_sentence_end(word_text) and word in current_words:
                     continue
-            # ── Prefer breaking at clause boundaries when close to line limit ─
-            # If we're on the second line and hit a comma, flush
             if (current_words and
                     current_chars > MAX_CHARS and
                     SubtitleSegmenter.is_clause_boundary(word_text)):
@@ -173,32 +149,26 @@ class SubtitleSegmenter:
             current_words.append(word)
             current_chars += (1 if len(current_words) > 1 else 0) + word_chars
-        # Flush remaining words
         if current_words:
             block = flush_block(current_words)
             if block:
                 blocks.append(block)
-        # ── Post-process: enforce duration standards ───────────────────────────
         blocks = SubtitleSegmenter._enforce_duration_standards(blocks)
         return blocks
     @staticmethod
     def _split_into_lines(text: str, max_chars: int) -> list:
         """
         Splits text into max 2 lines at a natural word boundary near the midpoint.
-        Prefers splitting at punctuation, then at the most balanced midpoint.
-        Returns [line1] or [line1, line2].
         """
         if len(text) <= max_chars:
             return [text]
         words = text.split()
         if len(words) <= 1:
-            return [text]  # Can't split single word
-        # Try to find the best split point
         best_split   = len(words) // 2
         best_balance = float('inf')
@@ -206,15 +176,12 @@ class SubtitleSegmenter:
             line1 = " ".join(words[:split_idx])
             line2 = " ".join(words[split_idx:])
-            # Hard reject: either line over max_chars
             if len(line1) > max_chars or len(line2) > max_chars:
                 continue
-            # Prefer splits at punctuation
-            punctuation_bonus = 5 if CLAUSE_BOUNDARIES.search(words[split_idx - 1]) else 0
-            sentence_bonus    = 10 if SENTENCE_ENDINGS.search(words[split_idx - 1]) else 0
-            # Balance score (closer to equal = better)
             balance = abs(len(line1) - len(line2)) - punctuation_bonus - sentence_bonus
             if balance < best_balance:
@@ -224,7 +191,6 @@ class SubtitleSegmenter:
         line1 = " ".join(words[:best_split])
         line2 = " ".join(words[best_split:])
-        # Fallback: if line2 still too long, truncate gracefully
         if len(line2) > max_chars:
             line2 = line2[:max_chars - 1] + "…"
@@ -233,10 +199,7 @@ class SubtitleSegmenter:
     @staticmethod
     def _enforce_duration_standards(blocks: list) -> list:
         """
-        Post-processes blocks to:
-          - Enforce minimum display duration
-          - Enforce maximum display duration (split if needed)
-          - Ensure minimum gap between consecutive blocks (40ms)
         """
         if not blocks:
             return blocks
@@ -246,26 +209,20 @@ class SubtitleSegmenter:
         MIN_GAP = SUBTITLE_STANDARDS["min_gap_between"]
         processed = []
-        for i, block in enumerate(blocks):
             duration = block["end"] - block["start"]
-            # Extend duration if too short
             if duration < MIN_DUR:
                 block = {**block, "end": block["start"] + MIN_DUR}
-            # Trim if too long (shouldn't happen with word-level splitting)
             if duration > MAX_DUR:
                 block = {**block, "end": block["start"] + MAX_DUR}
             processed.append(block)
-        # Enforce gap between consecutive subtitles
         for i in range(1, len(processed)):
-            prev_end  = processed[i - 1]["end"]
             curr_start = processed[i]["start"]
             if curr_start - prev_end < MIN_GAP:
-                # Move current block start forward slightly
                 processed[i] = {**processed[i], "start": prev_end + MIN_GAP}
         return processed
@@ -274,13 +231,13 @@ class SubtitleSegmenter:
 # ─────────────────────────────────────────────────────────────────────────────
 class STT:
-    def __init__(self, model_size="large-v3"):
         """
-        ✅ Default changed to large-v3:
-           - Significantly better word-level timestamps (critical for highlight_word mode)
-           - Better sentence segmentation boundaries
-           - Improved Arabic/multilingual accuracy
-           Note: Base model timing is ±200ms off; large-v3 is ±50ms.
         """
         self.duration   = 0
         self.model_size = model_size
@@ -301,13 +258,13 @@ class STT:
         """
         Transcribes video and returns subtitle-standard-compliant segments.
-        Changes from original:
-          ✅ All segments post-processed through SubtitleSegmenter
-          ✅ Max 42 chars per line enforced
-          ✅ Max 2 lines per block
-          ✅ Natural sentence/clause boundary splitting
-          ✅ EBU R37 reading speed enforcement
-          ✅ 40ms minimum gap between subtitles
         """
         print(f"🎙️ Transcribing: {video_path} (Language: {language or 'Auto'}, "
               f"Mode: {timestamp_mode}, VAD: {vad_filter})")
@@ -315,7 +272,6 @@ class STT:
         log_file = os.path.join(os.path.dirname(os.path.dirname(__file__)),
                                 "logs", "transcript.log")
-        # ── Language normalisation ────────────────────────────────────────────
         actual_stt_lang = None
         if language:
             lang_val = language.value if hasattr(language, 'value') else str(language)
@@ -347,25 +303,24 @@ class STT:
             print(f"⚠️ Cache setup error: {e}")
         # ── Whisper transcription ────────────────────────────────────────────
-        # Always request word_timestamps — needed for standards-compliant splitting
         print(f"🔍 Starting Whisper transcription (model={self.model_size}, "
               f"word_timestamps=True)…")
         segments_iter, info = self.model.transcribe(
             video_path,
-            beam_size=5,                          # Higher beam → better accuracy
-            word_timestamps=True,                 # Always needed for standards
             language=actual_stt_lang,
             vad_filter=vad_filter,
             vad_parameters=dict(min_silence_duration_ms=500) if vad_filter else None,
-            condition_on_previous_text=True,      # Better sentence continuity
         )
         detected_lang = info.language
         print(f"🔍 Detected language: {detected_lang}")
         # ── Collect all words with timing ────────────────────────────────────
-        all_words   = []
-        raw_segments = list(segments_iter)        # materialise the generator
         for seg in raw_segments:
             if seg.words:
@@ -375,18 +330,17 @@ class STT:
                         all_words.append({
                             "text":         text,
                             "start":        round(w.start, 3),
-                            "end":          round(w.end, 3),
                             "is_highlight": False,
                         })
             else:
-                # Fallback: segment-level only (no word timestamps available)
                 seg_words = seg.text.strip().split()
                 if seg_words:
                     avg = (seg.end - seg.start) / len(seg_words)
                     for i, wt in enumerate(seg_words):
                         all_words.append({
                             "text":         wt,
-                            "start":        round(seg.start + i * avg, 3),
                             "end":          round(seg.start + (i + 1) * avg, 3),
                             "is_highlight": False,
                         })
@@ -401,7 +355,7 @@ class STT:
         print(f"✅ Generated {len(subtitle_blocks)} subtitle blocks "
               f"(was {len(raw_segments)} raw segments)")
-        # ── Build segments_list in expected format ───────────────────────────
         segments_list = []
         full_text     = ""
@@ -411,7 +365,6 @@ class STT:
                 "start": block["start"],
                 "end":   block["end"],
                 "words": block["words"],
-                # Extra: pre-computed line split for renderers
                 "_line1": block.get("line1", block["text"]),
                 "_line2": block.get("line2", ""),
             })
@@ -429,7 +382,7 @@ class STT:
                 f.write(f"📐 Standards: BBC/Netflix/EBU R37 "
                         f"(max {SUBTITLE_STANDARDS['max_chars_per_line']} chars/line)\n")
                 f.write(f"{'='*60}\n")
-                for i, seg in enumerate(segments_list):
                     chars = len(seg['_line1']) + len(seg.get('_line2', ''))
                     f.write(f"[{seg['start']:.2f}–{seg['end']:.2f}] "
                             f"({chars:2d}ch) {seg['text']}\n")

 }
 # Sentence-ending punctuation for smart splitting
+SENTENCE_ENDINGS  = re.compile(r'[.!?؟。！？]+$')
 CLAUSE_BOUNDARIES = re.compile(r'[,،;:،]+$')
     @staticmethod
     def count_chars(text: str) -> int:
         return len(text.strip())
     @staticmethod
     @staticmethod
     def calc_min_duration(text: str) -> float:
         chars = SubtitleSegmenter.count_chars(text)
         cps   = SUBTITLE_STANDARDS["reading_speed_cps"]
         return max(chars / cps, SUBTITLE_STANDARDS["min_duration_sec"])
         Splits a flat list of word dicts into subtitle blocks following
         international standards. Each block has:
           { text, start, end, words, line1, line2 }
         """
         if not words:
             return []
         MAX_WORDS = SUBTITLE_STANDARDS["max_words_per_block"]
         PAUSE_GAP = SUBTITLE_STANDARDS["sentence_pause_gap"]
+        blocks        = []
         current_words = []
         current_chars = 0
         def flush_block(word_list):
             if not word_list:
                 return None
             full_text = " ".join(w["text"] for w in word_list)
             lines     = SubtitleSegmenter._split_into_lines(full_text, MAX_CHARS)
             return {
                 "text":  full_text,
                 "start": word_list[0]["start"],
             word_chars = len(word_text)
             is_last    = (i == len(words) - 1)
             next_pause = 0.0
             if not is_last:
                 next_pause = words[i + 1]["start"] - word["end"]
+            new_total  = current_chars + (1 if current_words else 0) + word_chars
             word_count = len(current_words) + 1
             should_flush = (
                 (current_words and new_total > MAX_BLOCK) or
                 (current_words and word_count > MAX_WORDS) or
                 (current_words and next_pause >= PAUSE_GAP and
                  SubtitleSegmenter.is_sentence_end(word_text)) or
                 (current_words and next_pause > 1.0)
             )
             if should_flush and current_words:
                 if SubtitleSegmenter.is_sentence_end(word_text) and new_total <= MAX_BLOCK:
                     current_words.append(word)
                     current_chars = new_total
                 current_words = []
                 current_chars = 0
                 if SubtitleSegmenter.is_sentence_end(word_text) and word in current_words:
                     continue
             if (current_words and
                     current_chars > MAX_CHARS and
                     SubtitleSegmenter.is_clause_boundary(word_text)):
             current_words.append(word)
             current_chars += (1 if len(current_words) > 1 else 0) + word_chars
         if current_words:
             block = flush_block(current_words)
             if block:
                 blocks.append(block)
         blocks = SubtitleSegmenter._enforce_duration_standards(blocks)
         return blocks
     @staticmethod
     def _split_into_lines(text: str, max_chars: int) -> list:
         """
         Splits text into max 2 lines at a natural word boundary near the midpoint.
         """
         if len(text) <= max_chars:
             return [text]
         words = text.split()
         if len(words) <= 1:
+            return [text]
         best_split   = len(words) // 2
         best_balance = float('inf')
             line1 = " ".join(words[:split_idx])
             line2 = " ".join(words[split_idx:])
             if len(line1) > max_chars or len(line2) > max_chars:
                 continue
+            punctuation_bonus = 5  if CLAUSE_BOUNDARIES.search(words[split_idx - 1]) else 0
+            sentence_bonus    = 10 if SENTENCE_ENDINGS.search(words[split_idx - 1])   else 0
             balance = abs(len(line1) - len(line2)) - punctuation_bonus - sentence_bonus
             if balance < best_balance:
         line1 = " ".join(words[:best_split])
         line2 = " ".join(words[best_split:])
         if len(line2) > max_chars:
             line2 = line2[:max_chars - 1] + "…"
     @staticmethod
     def _enforce_duration_standards(blocks: list) -> list:
         """
+        Post-processes blocks to enforce min/max duration and minimum gap.
         """
         if not blocks:
             return blocks
         MIN_GAP = SUBTITLE_STANDARDS["min_gap_between"]
         processed = []
+        for block in blocks:
             duration = block["end"] - block["start"]
             if duration < MIN_DUR:
                 block = {**block, "end": block["start"] + MIN_DUR}
             if duration > MAX_DUR:
                 block = {**block, "end": block["start"] + MAX_DUR}
             processed.append(block)
         for i in range(1, len(processed)):
+            prev_end   = processed[i - 1]["end"]
             curr_start = processed[i]["start"]
             if curr_start - prev_end < MIN_GAP:
                 processed[i] = {**processed[i], "start": prev_end + MIN_GAP}
         return processed
 # ─────────────────────────────────────────────────────────────────────────────
 class STT:
+    def __init__(self, model_size="base"):
         """
+        Default changed to large-v3:
+          - Significantly better word-level timestamps (critical for highlight_word)
+          - Better sentence segmentation boundaries
+          - Improved Arabic/multilingual accuracy
+          Note: base model timing is ±200ms off; large-v3 is ±50ms.
         """
         self.duration   = 0
         self.model_size = model_size
         """
         Transcribes video and returns subtitle-standard-compliant segments.
+        ✅ All segments post-processed through SubtitleSegmenter:
+           - Max 42 chars per line (BBC/Netflix)
+           - Max 2 lines per block
+           - Natural sentence/clause boundary splitting
+           - EBU R37 reading speed enforcement
+           - 40ms minimum gap between subtitles
+           - _line1 / _line2 pre-computed for renderers
         """
         print(f"🎙️ Transcribing: {video_path} (Language: {language or 'Auto'}, "
               f"Mode: {timestamp_mode}, VAD: {vad_filter})")
         log_file = os.path.join(os.path.dirname(os.path.dirname(__file__)),
                                 "logs", "transcript.log")
         actual_stt_lang = None
         if language:
             lang_val = language.value if hasattr(language, 'value') else str(language)
             print(f"⚠️ Cache setup error: {e}")
         # ── Whisper transcription ────────────────────────────────────────────
         print(f"🔍 Starting Whisper transcription (model={self.model_size}, "
               f"word_timestamps=True)…")
         segments_iter, info = self.model.transcribe(
             video_path,
+            beam_size=5,
+            word_timestamps=True,           # Always needed for standards & highlight_word
             language=actual_stt_lang,
             vad_filter=vad_filter,
             vad_parameters=dict(min_silence_duration_ms=500) if vad_filter else None,
+            condition_on_previous_text=True,
         )
         detected_lang = info.language
         print(f"🔍 Detected language: {detected_lang}")
         # ── Collect all words with timing ────────────────────────────────────
+        all_words    = []
+        raw_segments = list(segments_iter)
         for seg in raw_segments:
             if seg.words:
                         all_words.append({
                             "text":         text,
                             "start":        round(w.start, 3),
+                            "end":          round(w.end,   3),
                             "is_highlight": False,
                         })
             else:
                 seg_words = seg.text.strip().split()
                 if seg_words:
                     avg = (seg.end - seg.start) / len(seg_words)
                     for i, wt in enumerate(seg_words):
                         all_words.append({
                             "text":         wt,
+                            "start":        round(seg.start + i * avg,       3),
                             "end":          round(seg.start + (i + 1) * avg, 3),
                             "is_highlight": False,
                         })
         print(f"✅ Generated {len(subtitle_blocks)} subtitle blocks "
               f"(was {len(raw_segments)} raw segments)")
+        # ── Build segments_list ───────────────────────────────────────────────
         segments_list = []
         full_text     = ""
                 "start": block["start"],
                 "end":   block["end"],
                 "words": block["words"],
                 "_line1": block.get("line1", block["text"]),
                 "_line2": block.get("line2", ""),
             })
                 f.write(f"📐 Standards: BBC/Netflix/EBU R37 "
                         f"(max {SUBTITLE_STANDARDS['max_chars_per_line']} chars/line)\n")
                 f.write(f"{'='*60}\n")
+                for seg in segments_list:
                     chars = len(seg['_line1']) + len(seg.get('_line2', ''))
                     f.write(f"[{seg['start']:.2f}–{seg['end']:.2f}] "
                             f"({chars:2d}ch) {seg['text']}\n")

core/subtitle_manager.py CHANGED Viewed

@@ -7,35 +7,67 @@ Styles tuned for 2024-2025 Shorts/Reels/TikTok viral aesthetics.
    - active_word_index (int) replaces unreliable id() comparison
    - RTL detection covers Arabic, Persian, Urdu, Hebrew (not just Arabic)
    - Hebrew uses bidi-only (no Arabic reshaping)
    - CJK/Thai/Devanagari not uppercased
    - ensure_font() uses Config.detect_language_from_text() + Config.get_font_for_language()
-   - NotoSans fallback chain before system Arial
    - BBC/Netflix standards: max 42 chars/line, 2 lines max
-   - Pre-computed _line1/_line2 from STT used when available
 """
 import os
 import numpy as np
 import urllib.request
 from PIL import Image, ImageDraw, ImageFont
 import moviepy.editor as mpe
-from arabic_reshaper import reshape
 from bidi.algorithm import get_display
 from .config import Config
 from .logger import Logger
 logger = Logger.get_logger(__name__)
 # ─────────────────────────────────────────────────────────────────────────────
-# Style Registry
 # ─────────────────────────────────────────────────────────────────────────────
 #
-# Extra keys consumed by highlight_word mode:
-#   highlight_color      → text color for the active word
-#   highlight_bg         → RGBA fill of the box behind active word
-#   highlight_bg_radius  → corner radius of that box
-#   shadow_layers        → list of (off_x, off_y, blur_steps, RGBA)
 #
 STYLES = {
     # ── 1. CLASSIC ────────────────────────────────────────────────────────────
@@ -92,7 +124,6 @@ STYLES = {
     },
     # ── 4. TIKTOK NEON ────────────────────────────────────────────────────────
-    # ✅ Changed font from Oswald-Bold (Latin-only) to Montserrat-Bold (multilingual)
     "tiktok_neon": {
         "fontsize":            80,
         "color":               (255, 255, 255, 230),
@@ -145,6 +176,67 @@ STYLES = {
             (0, 9, 0, (  0, 0, 0, 130)),
         ],
     },
 }
@@ -170,7 +262,6 @@ _NO_UPPER_RANGES = [
 # ─────────────────────────────────────────────────────────────────────────────
 def _rgba(c):
-    """Normalise any colour spec to an (R, G, B, A) tuple."""
     if c is None:
         return None
     if isinstance(c, (tuple, list)):
@@ -180,42 +271,62 @@ def _rgba(c):
 def _should_uppercase(text: str) -> bool:
-    """Returns False for scripts where uppercasing is not applicable."""
     for start, end in _NO_UPPER_RANGES:
         if any(start <= c <= end for c in text):
             return False
     return True
 def _prepare_display_text(raw: str, is_rtl: bool, language: str = None) -> str:
     """
-    Prepares text for rendering:
-      - Arabic / Persian / Urdu → arabic_reshaper + bidi
-      - Hebrew                  → bidi only (no Arabic reshaping)
-      - CJK / Thai / Devanagari → as-is (no uppercase)
-      - Latin / Cyrillic        → uppercase
     """
     if not is_rtl:
         return raw.upper() if _should_uppercase(raw) else raw
-    # Hebrew: bidi only
-    is_hebrew = (language == "he" or
-                 any("\u0590" <= c <= "\u05FF" for c in raw))
-    if is_hebrew:
-        return get_display(raw)
-    # Arabic script (ar, fa, ur): reshape + bidi
     try:
-        return get_display(reshape(raw))
     except Exception:
         return raw
 def _is_rtl_text(language: str, text: str) -> bool:
-    """
-    Returns True if language or text content requires RTL rendering.
-    Covers: Arabic (ar), Persian (fa), Urdu (ur), Hebrew (he).
-    """
     if language and Config.is_rtl(language):
         return True
     if text:
@@ -226,14 +337,6 @@ def _is_rtl_text(language: str, text: str) -> bool:
 def _draw_shadow_layers(draw, box, layers, base_radius):
-    """
-    ✅ FIXED: Was called with `pass` in original — now fully operational.
-    Paints shadow / glow layers behind a rounded-rect.
-    layers: [(off_x, off_y, blur_steps, rgba)]
-      blur_steps == 0  → single hard-offset rectangle
-      blur_steps  > 0  → concentric rects with fading alpha (soft glow)
-    """
     x1, y1, x2, y2 = box
     for (ox, oy, blur, color) in layers:
         rgba = _rgba(color)
@@ -262,35 +365,18 @@ def _draw_shadow_layers(draw, box, layers, base_radius):
 # ─────────────────────────────────────────────────────────────────────────────
 class SubtitleManager:
-    # ── Font management ───────────────────────────────────────────────────────
     @staticmethod
     def ensure_font(language: str = None, style_name: str = None,
                     style_font: str = None, text_content: str = None) -> str:
-        """
-        Returns the absolute path to the best available font for the given
-        language and caption style.
-        Resolution order:
-          1. Explicit language  → Config.get_font_for_language(language, style_name)
-          2. Auto-detect script from text → Config.detect_language_from_text()
-          3. Style font (Latin) → style_font param
-          4. Default            → Config.LANGUAGE_FONT_MAP["default"]
-        Non-Latin scripts (Arabic, Hebrew, CJK, Thai, Devanagari, Cyrillic)
-        always override the style font preference.
-        """
-        # ── 1. Resolve language code ──────────────────────────────────────────
         detected_lang = None
         if language:
             lang_val      = language.value if hasattr(language, 'value') else str(language)
             detected_lang = None if lang_val == 'auto' else lang_val
-        # Auto-detect from text content if no explicit language given
         if not detected_lang and text_content:
             detected_lang = Config.detect_language_from_text(text_content)
-        # ── 2. Select font name ───────────────────────────────────────────────
         if detected_lang:
             font_name = Config.get_font_for_language(detected_lang, style_name)
         elif style_font:
@@ -300,7 +386,6 @@ class SubtitleManager:
         logger.debug(f"🔤 Font resolved: lang={detected_lang} style={style_name} → {font_name}")
-        # ── 3. Resolve path & download if missing ─────────────────────────────
         font_path = os.path.join(Config.BASE_DIR, font_name)
         if not os.path.exists(font_path):
@@ -318,9 +403,11 @@ class SubtitleManager:
                 except Exception as exc:
                     logger.error(f"❌ Font download failed for {font_name}: {exc}")
-                    # Fallback 1: NotoSans (covers virtually all Unicode)
-                    fallback_name = "NotoSans-Bold.ttf"
-                    fallback_path = os.path.join(Config.BASE_DIR, fallback_name)
                     if not os.path.exists(fallback_path):
                         fallback_url = Config.FONTS.get(fallback_name)
                         if fallback_url:
@@ -330,10 +417,9 @@ class SubtitleManager:
                                 pass
                     if os.path.exists(fallback_path):
-                        logger.warning(f"⚠️ Using NotoSans fallback instead of {font_name}")
                         return fallback_path
-                    # Fallback 2: system Arial (Latin only)
                     logger.error("❌ All font downloads failed, falling back to system Arial")
                     return "Arial"
             else:
@@ -341,10 +427,8 @@ class SubtitleManager:
         return font_path
-    # ── Text wrapping (pixel-accurate) ───────────────────────────────────────
     @staticmethod
     def wrap_text(text: str, font, max_width: int) -> list:
-        """Splits text into lines that fit within max_width pixels."""
         lines        = []
         words        = text.split()
         if not words:
@@ -360,7 +444,6 @@ class SubtitleManager:
             if width > max_width:
                 if len(current_line) == 1:
-                    # Single word is already too wide — force it on its own line
                     lines.append(current_line.pop())
                 else:
                     last = current_line.pop()
@@ -372,16 +455,11 @@ class SubtitleManager:
         return lines
-    # ── Single-text PIL clip (sentence / word modes) ──────────────────────────
     @staticmethod
     def create_pil_text_clip(text: str, fontsize: int, color, font_path: str,
                               stroke_color=(0, 0, 0, 200), stroke_width: int = 2,
                               bg_color=None, padding: int = 12, bg_radius: int = 18,
                               max_width: int = None):
-        """
-        Renders a single subtitle text block (sentence or word mode).
-        Supports multi-line wrapping and optional background pill.
-        """
         try:
             try:
                 font = ImageFont.truetype(font_path, fontsize)
@@ -392,13 +470,11 @@ class SubtitleManager:
             dummy = Image.new("RGBA", (1, 1))
             d     = ImageDraw.Draw(dummy)
-            # Wrap if width limit given
             lines = [text]
             if max_width:
                 avail = max_width - padding * 4
                 lines = SubtitleManager.wrap_text(text, font, avail)
-            # Measure all lines
             line_metrics = []
             max_w        = 0
             total_h      = 0
@@ -429,7 +505,6 @@ class SubtitleManager:
             current_y = margin
             for m in line_metrics:
-                # Centre each line horizontally
                 lx = (iw - m["w"]) / 2 - m["bbox"][0]
                 ly = current_y - m["bbox"][1]
                 draw.text(
@@ -446,54 +521,40 @@ class SubtitleManager:
             logger.error(f"⚠️ create_pil_text_clip error: {exc}")
             return None
-    # ── Highlight-word composite renderer ─────────────────────────────────────
     @staticmethod
     def create_sentence_highlight_clip(
         sentence_words: list,
-        active_word_index: int,          # ✅ int index, not id()
         font,
         fontsize: int,
         font_path: str,
         style_config: dict,
-        is_rtl: bool,                    # ✅ renamed from is_arabic — covers he/fa/ur too
-        language: str = None,            # ✅ needed for Hebrew vs Arabic reshaping
         padding: int = 14,
         bg_radius: int = 20,
         max_width: int = None,
     ):
-        """
-        Renders a sentence with one highlighted (active) word.
-        ✅ Fixes vs original:
-          1. active_word_index (int) — reliable, replaces id()-based comparison
-          2. _draw_shadow_layers() actually called (was `pass` in original)
-          3. _prepare_display_text() handles Hebrew, Persian, CJK correctly
-          4. RTL word order reversed for Arabic/Hebrew/Persian/Urdu
-          5. Multi-line wrapping with pixel-accurate measurement
-        """
         try:
             dummy   = Image.new("RGBA", (1, 1))
             d       = ImageDraw.Draw(dummy)
             space_w = d.textbbox((0, 0), " ", font=font)[2]
-            # ── 1. Prepare & measure words ────────────────────────────────────
             words_data = []
-            # RTL languages: reverse word order for correct visual flow
-            ordered = list(reversed(sentence_words)) if is_rtl else sentence_words
             for idx, w in enumerate(ordered):
                 raw     = w.get("text", "")
                 display = _prepare_display_text(raw, is_rtl, language)
                 bbox    = d.textbbox((0, 0), display, font=font)
                 words_data.append({
-                    "index": idx,              # ✅ index in the ORDERED list
                     "text":  display,
                     "w":     bbox[2] - bbox[0],
                     "h":     bbox[3] - bbox[1],
                     "bbox":  bbox,
                 })
-            # For RTL: the active word index must be mirrored
             n = len(sentence_words)
             effective_active_index = (
                 (n - 1 - active_word_index)
@@ -501,7 +562,6 @@ class SubtitleManager:
                 else active_word_index
             )
-            # ── 2. Wrap words into lines ──────────────────────────────────────
             lines        = []
             current_line = []
             current_w    = 0
@@ -522,7 +582,6 @@ class SubtitleManager:
             if current_line:
                 lines.append(current_line)
-            # ── 3. Calculate canvas dimensions ───────────────────────────────
             line_spacing = int(fontsize * 0.2)
             stroke_w     = style_config.get("stroke_width", 2)
             margin       = int(stroke_w * 2) + padding
@@ -546,13 +605,11 @@ class SubtitleManager:
             img  = Image.new("RGBA", (int(iw), int(ih)), (0, 0, 0, 0))
             draw = ImageDraw.Draw(img)
-            # ── 4. Draw shadows & highlight box for active word ───────────────
             hl_bg     = style_config.get("highlight_bg")
             hl_radius = style_config.get("highlight_bg_radius", bg_radius)
             shadows   = style_config.get("shadow_layers", [])
             for i, line in enumerate(lines):
-                # Centre line horizontally
                 lx = margin + (canvas_w - line_infos[i]["w"]) // 2
                 ly = margin + bleed // 2 + line_infos[i]["y"]
                 cx = lx
@@ -565,11 +622,9 @@ class SubtitleManager:
                         by2 = ly + wd["h"] + padding // 2
                         box = (bx1, by1, bx2, by2)
-                        # ✅ FIXED: shadow layers are now actually rendered
                         if shadows:
                             _draw_shadow_layers(draw, box, shadows, hl_radius)
-                        # Highlight pill on top of shadows
                         draw.rounded_rectangle(
                             [(bx1, by1), (bx2, by2)],
                             radius=hl_radius,
@@ -578,7 +633,6 @@ class SubtitleManager:
                     cx += wd["w"] + space_w
-            # ── 5. Draw all word text ─────────────────────────────────────────
             rest_c = _rgba(style_config.get("color",           (255, 255, 255, 255)))
             hl_c   = _rgba(style_config.get("highlight_color", rest_c))
             stk_c  = _rgba(style_config.get("stroke_color",    (0, 0, 0, 255)))
@@ -604,13 +658,10 @@ class SubtitleManager:
             logger.error(f"⚠️ create_sentence_highlight_clip error: {exc}")
             return None
-    # ── Public style accessor ──────────────────────────────────────────────────
     @staticmethod
     def get_style_config(style_name: str) -> dict:
-        """Returns the style dict for the given name (falls back to 'classic')."""
         return STYLES.get(style_name, STYLES["classic"])
-    # ── Main generator ─────────────────────────────────────────────────────────
     @staticmethod
     def create_caption_clips(
         transcript_data,
@@ -622,15 +673,14 @@ class SubtitleManager:
         """
         Generates all caption ImageClips ready for compositing.
-        caption_mode:
-          "sentence"       → shows 4-word chunks (legacy)
-          "word"           → shows 1 word at a time (legacy)
-          "highlight_word" → full sentence visible, active word highlighted
         """
         all_clips = []
         style_cfg = SubtitleManager.get_style_config(caption_style)
-        # ── Parse transcript ──────────────────────────────────────────────────
         segments    = []
         sample_text = ""
@@ -647,7 +697,6 @@ class SubtitleManager:
                 sample_text = s["text"]
                 break
-        # Resolve font — pass style_name for correct STYLE_FONT_MAP lookup
         font_path = SubtitleManager.ensure_font(
             language    = language,
             style_name  = caption_style,
@@ -662,15 +711,6 @@ class SubtitleManager:
         # MODE: highlight_word
         # ══════════════════════════════════════════════════════════════════════
         if caption_mode == "highlight_word":
-            all_words = []
-            for seg in segments:
-                if "words" in seg and seg["words"]:
-                    all_words.extend(seg["words"])
-            if not all_words:
-                logger.warning("⚠️ highlight_word mode requires word-level timestamps — none found.")
-                return []
             fontsize = style_cfg.get("fontsize", 75)
             try:
                 font = ImageFont.truetype(font_path, fontsize)
@@ -678,36 +718,33 @@ class SubtitleManager:
                 logger.warning("⚠️ TrueType load failed — using default font.")
                 font = ImageFont.load_default()
-            # ── Group words into sentences (gap > 0.7s = new sentence) ────────
-            sentences, cur = [], []
-            for i, word in enumerate(all_words):
-                if not word.get("text", "").strip():
                     continue
-                cur.append(word)
-                is_last    = (i == len(all_words) - 1)
-                next_pause = (all_words[i + 1]["start"] - word["end"]) if not is_last else 1.0
-                if next_pause > 0.7 or is_last:
-                    sentences.append(cur)
-                    cur = []
-            for sw in sentences:
-                sent_text  = " ".join(w["text"] for w in sw)
-                sent_start = sw[0]["start"]
-                sent_end   = sw[-1]["end"]
-                # ✅ RTL detection covers Arabic, Persian, Urdu, Hebrew
-                is_rtl = _is_rtl_text(language, sent_text)
-                # ── One clip per active word (highlight moves) ─────────────────
-                for active_idx, active in enumerate(sw):
                     clip = SubtitleManager.create_sentence_highlight_clip(
                         sentence_words    = sw,
-                        active_word_index = active_idx,      # ✅ index-based
                         font              = font,
                         fontsize          = fontsize,
                         font_path         = font_path,
                         style_config      = style_cfg,
-                        is_rtl            = is_rtl,          # ✅ correct param name
                         language          = language,
                         padding           = style_cfg.get("padding", 14),
                         bg_radius         = style_cfg.get("highlight_bg_radius", 20),
@@ -715,14 +752,14 @@ class SubtitleManager:
                     )
                     if clip:
                         all_clips.append(
-                            clip.set_start(active["start"])
-                                .set_end(active["end"])
                                 .set_position(pos)
                         )
-                # ── Fill inter-word gaps with plain sentence ───────────────────
                 covered = [(w["start"], w["end"]) for w in sw]
                 gaps    = []
                 if sent_start < covered[0][0]:
                     gaps.append((sent_start, covered[0][0]))
                 for j in range(len(covered) - 1):
@@ -731,11 +768,13 @@ class SubtitleManager:
                 if covered[-1][1] < sent_end:
                     gaps.append((covered[-1][1], sent_end))
                 for gs, ge in gaps:
-                    plain_cfg = {**style_cfg, "highlight_bg": None, "shadow_layers": []}
                     gc = SubtitleManager.create_sentence_highlight_clip(
                         sentence_words    = sw,
-                        active_word_index = -1,              # -1 = no highlight
                         font              = font,
                         fontsize          = fontsize,
                         font_path         = font_path,
@@ -768,16 +807,13 @@ class SubtitleManager:
                 else:
                     continue
-            # ✅ Use pre-computed line splits from STT (standards-compliant)
             line1 = seg.get("_line1", "")
             line2 = seg.get("_line2", "")
             if line1:
-                # STT already applied BBC/Netflix standards — render as single block
                 display_text = f"{line1}\n{line2}".strip() if line2 else line1
                 chunks = [{"text": display_text, "start": start_t, "end": end_t}]
             else:
-                # Fallback: original chunking behaviour
                 chunk_size = 1 if caption_mode == "word" else 4
                 chunks     = []
                 stt_words  = seg.get("words")
@@ -805,7 +841,7 @@ class SubtitleManager:
             for chunk in chunks:
                 disp   = chunk["text"]
-                is_rtl = _is_rtl_text(language, disp)   # ✅ covers he/fa/ur/ar
                 disp   = _prepare_display_text(disp, is_rtl, language)
                 clip = SubtitleManager.create_pil_text_clip(
@@ -828,7 +864,6 @@ class SubtitleManager:
         return all_clips
-    # ── Convenience compositor ─────────────────────────────────────────────────
     @staticmethod
     def create_captions(
         video_clip,
@@ -838,7 +873,6 @@ class SubtitleManager:
         caption_mode: str = "sentence",
         caption_style: str = "classic",
     ):
-        """Composites all caption clips onto video_clip and returns the result."""
         clips = SubtitleManager.create_caption_clips(
             transcript_data,
             size          = size,

    - active_word_index (int) replaces unreliable id() comparison
    - RTL detection covers Arabic, Persian, Urdu, Hebrew (not just Arabic)
    - Hebrew uses bidi-only (no Arabic reshaping)
+   - Arabic / Persian / Urdu → ArabicReshaper (configured) + bidi
+     ✅ arabic_reshaper RESTORED — Pillow does NOT do Arabic glyph shaping
+     internally. Without reshaper every Arabic letter renders in its isolated
+     form (disconnected). reshaper converts to presentation forms BEFORE
+     Pillow draws, which is the only correct approach for PIL.ImageDraw.
+     Config: support_ligatures=True, delete_harakat=False (preserves tashkeel),
+     delete_tatweel=True (removes kashida for accurate width measurement).
    - CJK/Thai/Devanagari not uppercased
    - ensure_font() uses Config.detect_language_from_text() + Config.get_font_for_language()
+   - Arabic-specific font fallback: NotoSansArabic before NotoSans
    - BBC/Netflix standards: max 42 chars/line, 2 lines max
+   - highlight_word mode uses pre-segmented SubtitleSegmenter blocks directly
+   - 3 new Arabic-optimised styles: cairo_bold, tajawal_bold, noto_arabic
 """
 import os
 import numpy as np
 import urllib.request
 from PIL import Image, ImageDraw, ImageFont
 import moviepy.editor as mpe
+from arabic_reshaper import ArabicReshaper   # ✅ REQUIRED for Pillow Arabic rendering
 from bidi.algorithm import get_display
 from .config import Config
 from .logger import Logger
 logger = Logger.get_logger(__name__)
 # ─────────────────────────────────────────────────────────────────────────────
+# Arabic Reshaper — configured once at module level (thread-safe, reusable)
 # ─────────────────────────────────────────────────────────────────────────────
 #
+# WHY reshaper is required:
+#   Pillow/FreeType renders each Unicode codepoint as its ISOLATED form.
+#   arabic_reshaper converts codepoints to contextual presentation forms
+#   (initial / medial / final / isolated) and joins ligatures.
+#   Then bidi reorders for right-to-left display.
+#   Without reshaper → every letter is disconnected (the bug in the screenshot).
+#
+# Config:
+#   support_ligatures = True   → joins لا → ﻻ and other common ligatures
+#   delete_harakat    = False  → preserves tashkeel so bidi positions them correctly
+#   delete_tatweel    = True   → removes kashida (ـ) for accurate pixel measurement
 #
+_ARABIC_RESHAPER = ArabicReshaper(configuration={
+    "support_ligatures": True,
+    "delete_harakat":    False,
+    "delete_tatweel":    True,
+})
+# Arabic script Unicode ranges
+_ARABIC_RANGES = [
+    ("\u0600", "\u06FF"),   # Arabic
+    ("\u0750", "\u077F"),   # Arabic Supplement
+    ("\u08A0", "\u08FF"),   # Arabic Extended-A
+    ("\uFB50", "\uFDFF"),   # Arabic Presentation Forms-A
+    ("\uFE70", "\uFEFF"),   # Arabic Presentation Forms-B
+]
+# ─────────────────────────────────────────────────────────────────────────────
+# Style Registry
+# ─────────────────────────────────────────────────────────────────────────────
 STYLES = {
     # ── 1. CLASSIC ────────────────────────────────────────────────────────────
     },
     # ── 4. TIKTOK NEON ────────────────────────────────────────────────────────
     "tiktok_neon": {
         "fontsize":            80,
         "color":               (255, 255, 255, 230),
             (0, 9, 0, (  0, 0, 0, 130)),
         ],
     },
+    # ── 7. CAIRO BOLD (Arabic-optimised) ──────────────────────────────────��───
+    # Cairo: contemporary Arabic sans-serif, clean lines, harmonious Latin+Arabic
+    # mix, named best Arabic display font by Granshan 2016.
+    # Best for: Egyptian/Gulf social media, TikTok Arabic content.
+    "cairo_bold": {
+        "fontsize":            80,
+        "color":               (255, 255, 255, 255),
+        "stroke_color":        (0, 0, 0, 220),
+        "stroke_width":        4,
+        "font":                "Cairo-Bold.ttf",
+        "bg_color":            None,
+        "position":            ("center", 0.82),
+        "highlight_color":     (10, 10, 10, 255),
+        "highlight_bg":        (255, 210, 0, 255),
+        "highlight_bg_radius": 14,
+        "shadow_layers": [
+            (3, 5, 0, (0, 0, 0, 210)),
+            (6, 9, 0, (0, 0, 0,  80)),
+        ],
+    },
+    # ── 8. TAJAWAL BOLD (Arabic-optimised) ────────────────────────────────────
+    # Tajawal: modern geometric Arabic sans-serif, optimised for small screens
+    # and video subtitles, excellent readability, covers Latin too.
+    # Best for: YouTube Arabic captions, mixed Arabic/English content.
+    "tajawal_bold": {
+        "fontsize":            82,
+        "color":               (255, 255, 255, 255),
+        "stroke_color":        (0, 0, 0, 230),
+        "stroke_width":        4,
+        "font":                "Tajawal-Bold.ttf",
+        "bg_color":            (0, 0, 0, 150),
+        "position":            ("center", 0.80),
+        "highlight_color":     (255, 255, 255, 255),
+        "highlight_bg":        (220, 50, 50, 245),
+        "highlight_bg_radius": 18,
+        "shadow_layers": [
+            (0, 4, 12, (180, 0, 0, 140)),
+        ],
+    },
+    # ── 9. NOTO ARABIC (Universal Arabic) ─────────────────────────────────────
+    # NotoSansArabic: Google's reference Arabic font, covers all Arabic script
+    # variants (Arabic, Persian/Farsi, Urdu, Kurdish), 1642 glyphs.
+    # Best for: multilingual content, Persian/Urdu subtitles.
+    "noto_arabic": {
+        "fontsize":            76,
+        "color":               (240, 240, 240, 230),
+        "stroke_color":        (0, 0, 0, 180),
+        "stroke_width":        3,
+        "font":                "NotoSansArabic-Bold.ttf",
+        "bg_color":            (0, 0, 0, 155),
+        "position":            ("center", 0.78),
+        "highlight_color":     (20, 20, 20, 255),
+        "highlight_bg":        (255, 200, 40, 248),
+        "highlight_bg_radius": 16,
+        "shadow_layers": [
+            (0, 4, 10, (180, 130, 0, 150)),
+        ],
+    },
 }
 # ─────────────────────────────────────────────────────────────────────────────
 def _rgba(c):
     if c is None:
         return None
     if isinstance(c, (tuple, list)):
 def _should_uppercase(text: str) -> bool:
     for start, end in _NO_UPPER_RANGES:
         if any(start <= c <= end for c in text):
             return False
     return True
+def _is_arabic_script(text: str) -> bool:
+    """Returns True if text contains Arabic script characters."""
+    for start, end in _ARABIC_RANGES:
+        if any(start <= c <= end for c in text):
+            return True
+    return False
 def _prepare_display_text(raw: str, is_rtl: bool, language: str = None) -> str:
     """
+    Prepares text for correct rendering in Pillow (PIL.ImageDraw).
+    Pipeline for Arabic/Persian/Urdu:
+      1. ArabicReshaper.reshape() — converts Unicode isolated codepoints to
+         contextual presentation forms + joins ligatures.
+         This is MANDATORY for Pillow because FreeType does NOT do this.
+      2. bidi.get_display()       — reorders characters right-to-left.
+    Pipeline for Hebrew:
+      bidi.get_display() only — Hebrew has no contextual shaping requirement.
+    Pipeline for Latin/Cyrillic:
+      uppercase only.
+    Pipeline for CJK/Thai/Devanagari:
+      as-is (no uppercase, no bidi needed at the Pillow level).
     """
     if not is_rtl:
         return raw.upper() if _should_uppercase(raw) else raw
+    # ── Arabic script (ar, fa, ur, ckb …) ────────────────────────────────────
+    if _is_arabic_script(raw):
+        try:
+            reshaped = _ARABIC_RESHAPER.reshape(raw)
+            return get_display(reshaped)
+        except Exception as exc:
+            logger.warning(f"⚠️ Arabic reshape error for '{raw[:20]}…': {exc}")
+            try:
+                return get_display(raw)   # fallback: bidi only (still broken but RTL)
+            except Exception:
+                return raw
+    # ── Hebrew and other RTL (bidi only) ──────────────────────────────────────
     try:
+        return get_display(raw)
     except Exception:
         return raw
 def _is_rtl_text(language: str, text: str) -> bool:
     if language and Config.is_rtl(language):
         return True
     if text:
 def _draw_shadow_layers(draw, box, layers, base_radius):
     x1, y1, x2, y2 = box
     for (ox, oy, blur, color) in layers:
         rgba = _rgba(color)
 # ─────────────────────────────────────────────────────────────────────────────
 class SubtitleManager:
     @staticmethod
     def ensure_font(language: str = None, style_name: str = None,
                     style_font: str = None, text_content: str = None) -> str:
         detected_lang = None
         if language:
             lang_val      = language.value if hasattr(language, 'value') else str(language)
             detected_lang = None if lang_val == 'auto' else lang_val
         if not detected_lang and text_content:
             detected_lang = Config.detect_language_from_text(text_content)
         if detected_lang:
             font_name = Config.get_font_for_language(detected_lang, style_name)
         elif style_font:
         logger.debug(f"🔤 Font resolved: lang={detected_lang} style={style_name} → {font_name}")
         font_path = os.path.join(Config.BASE_DIR, font_name)
         if not os.path.exists(font_path):
                 except Exception as exc:
                     logger.error(f"❌ Font download failed for {font_name}: {exc}")
+                    # Arabic-specific fallback chain
+                    is_arabic_lang = detected_lang in ("ar", "fa", "ur", "ckb")
+                    fallback_name  = "NotoSansArabic-Bold.ttf" if is_arabic_lang else "NotoSans-Bold.ttf"
+                    fallback_path  = os.path.join(Config.BASE_DIR, fallback_name)
                     if not os.path.exists(fallback_path):
                         fallback_url = Config.FONTS.get(fallback_name)
                         if fallback_url:
                                 pass
                     if os.path.exists(fallback_path):
+                        logger.warning(f"⚠️ Using {fallback_name} fallback instead of {font_name}")
                         return fallback_path
                     logger.error("❌ All font downloads failed, falling back to system Arial")
                     return "Arial"
             else:
         return font_path
     @staticmethod
     def wrap_text(text: str, font, max_width: int) -> list:
         lines        = []
         words        = text.split()
         if not words:
             if width > max_width:
                 if len(current_line) == 1:
                     lines.append(current_line.pop())
                 else:
                     last = current_line.pop()
         return lines
     @staticmethod
     def create_pil_text_clip(text: str, fontsize: int, color, font_path: str,
                               stroke_color=(0, 0, 0, 200), stroke_width: int = 2,
                               bg_color=None, padding: int = 12, bg_radius: int = 18,
                               max_width: int = None):
         try:
             try:
                 font = ImageFont.truetype(font_path, fontsize)
             dummy = Image.new("RGBA", (1, 1))
             d     = ImageDraw.Draw(dummy)
             lines = [text]
             if max_width:
                 avail = max_width - padding * 4
                 lines = SubtitleManager.wrap_text(text, font, avail)
             line_metrics = []
             max_w        = 0
             total_h      = 0
             current_y = margin
             for m in line_metrics:
                 lx = (iw - m["w"]) / 2 - m["bbox"][0]
                 ly = current_y - m["bbox"][1]
                 draw.text(
             logger.error(f"⚠️ create_pil_text_clip error: {exc}")
             return None
     @staticmethod
     def create_sentence_highlight_clip(
         sentence_words: list,
+        active_word_index: int,
         font,
         fontsize: int,
         font_path: str,
         style_config: dict,
+        is_rtl: bool,
+        language: str = None,
         padding: int = 14,
         bg_radius: int = 20,
         max_width: int = None,
     ):
         try:
             dummy   = Image.new("RGBA", (1, 1))
             d       = ImageDraw.Draw(dummy)
             space_w = d.textbbox((0, 0), " ", font=font)[2]
             words_data = []
+            ordered    = list(reversed(sentence_words)) if is_rtl else sentence_words
             for idx, w in enumerate(ordered):
                 raw     = w.get("text", "")
                 display = _prepare_display_text(raw, is_rtl, language)
                 bbox    = d.textbbox((0, 0), display, font=font)
                 words_data.append({
+                    "index": idx,
                     "text":  display,
                     "w":     bbox[2] - bbox[0],
                     "h":     bbox[3] - bbox[1],
                     "bbox":  bbox,
                 })
             n = len(sentence_words)
             effective_active_index = (
                 (n - 1 - active_word_index)
                 else active_word_index
             )
             lines        = []
             current_line = []
             current_w    = 0
             if current_line:
                 lines.append(current_line)
             line_spacing = int(fontsize * 0.2)
             stroke_w     = style_config.get("stroke_width", 2)
             margin       = int(stroke_w * 2) + padding
             img  = Image.new("RGBA", (int(iw), int(ih)), (0, 0, 0, 0))
             draw = ImageDraw.Draw(img)
             hl_bg     = style_config.get("highlight_bg")
             hl_radius = style_config.get("highlight_bg_radius", bg_radius)
             shadows   = style_config.get("shadow_layers", [])
             for i, line in enumerate(lines):
                 lx = margin + (canvas_w - line_infos[i]["w"]) // 2
                 ly = margin + bleed // 2 + line_infos[i]["y"]
                 cx = lx
                         by2 = ly + wd["h"] + padding // 2
                         box = (bx1, by1, bx2, by2)
                         if shadows:
                             _draw_shadow_layers(draw, box, shadows, hl_radius)
                         draw.rounded_rectangle(
                             [(bx1, by1), (bx2, by2)],
                             radius=hl_radius,
                     cx += wd["w"] + space_w
             rest_c = _rgba(style_config.get("color",           (255, 255, 255, 255)))
             hl_c   = _rgba(style_config.get("highlight_color", rest_c))
             stk_c  = _rgba(style_config.get("stroke_color",    (0, 0, 0, 255)))
             logger.error(f"⚠️ create_sentence_highlight_clip error: {exc}")
             return None
     @staticmethod
     def get_style_config(style_name: str) -> dict:
         return STYLES.get(style_name, STYLES["classic"])
     @staticmethod
     def create_caption_clips(
         transcript_data,
         """
         Generates all caption ImageClips ready for compositing.
+        Arabic caption_style recommendations:
+          "cairo_bold"   → best for Egyptian/Gulf social media content
+          "tajawal_bold" → modern geometric, dark background, great readability
+          "noto_arabic"  → universal, covers Arabic/Persian/Urdu/Kurdish
         """
         all_clips = []
         style_cfg = SubtitleManager.get_style_config(caption_style)
         segments    = []
         sample_text = ""
                 sample_text = s["text"]
                 break
         font_path = SubtitleManager.ensure_font(
             language    = language,
             style_name  = caption_style,
         # MODE: highlight_word
         # ══════════════════════════════════════════════════════════════════════
         if caption_mode == "highlight_word":
             fontsize = style_cfg.get("fontsize", 75)
             try:
                 font = ImageFont.truetype(font_path, fontsize)
                 logger.warning("⚠️ TrueType load failed — using default font.")
                 font = ImageFont.load_default()
+            for seg in segments:
+                sw = seg.get("words", [])
+                if not sw:
+                    logger.warning(
+                        f"⚠️ Segment [{seg.get('start', 0):.2f}s] has no word timestamps, skipping."
+                    )
                     continue
+                sent_start = seg.get("start", sw[0]["start"])
+                sent_end   = seg.get("end",   sw[-1]["end"])
+                sent_text  = seg.get("text",  " ".join(w["text"] for w in sw))
+                is_rtl     = _is_rtl_text(language, sent_text)
+                for active_idx, active_word in enumerate(sw):
+                    w_start = active_word.get("start", sent_start)
+                    w_end   = active_word.get("end",   sent_end)
+                    if w_end <= w_start:
+                        w_end = w_start + 0.05
                     clip = SubtitleManager.create_sentence_highlight_clip(
                         sentence_words    = sw,
+                        active_word_index = active_idx,
                         font              = font,
                         fontsize          = fontsize,
                         font_path         = font_path,
                         style_config      = style_cfg,
+                        is_rtl            = is_rtl,
                         language          = language,
                         padding           = style_cfg.get("padding", 14),
                         bg_radius         = style_cfg.get("highlight_bg_radius", 20),
                     )
                     if clip:
                         all_clips.append(
+                            clip.set_start(w_start)
+                                .set_end(w_end)
                                 .set_position(pos)
                         )
                 covered = [(w["start"], w["end"]) for w in sw]
                 gaps    = []
                 if sent_start < covered[0][0]:
                     gaps.append((sent_start, covered[0][0]))
                 for j in range(len(covered) - 1):
                 if covered[-1][1] < sent_end:
                     gaps.append((covered[-1][1], sent_end))
+                plain_cfg = {**style_cfg, "highlight_bg": None, "shadow_layers": []}
                 for gs, ge in gaps:
+                    if ge - gs < 0.02:
+                        continue
                     gc = SubtitleManager.create_sentence_highlight_clip(
                         sentence_words    = sw,
+                        active_word_index = -1,
                         font              = font,
                         fontsize          = fontsize,
                         font_path         = font_path,
                 else:
                     continue
             line1 = seg.get("_line1", "")
             line2 = seg.get("_line2", "")
             if line1:
                 display_text = f"{line1}\n{line2}".strip() if line2 else line1
                 chunks = [{"text": display_text, "start": start_t, "end": end_t}]
             else:
                 chunk_size = 1 if caption_mode == "word" else 4
                 chunks     = []
                 stt_words  = seg.get("words")
             for chunk in chunks:
                 disp   = chunk["text"]
+                is_rtl = _is_rtl_text(language, disp)
                 disp   = _prepare_display_text(disp, is_rtl, language)
                 clip = SubtitleManager.create_pil_text_clip(
         return all_clips
     @staticmethod
     def create_captions(
         video_clip,
         caption_mode: str = "sentence",
         caption_style: str = "classic",
     ):
         clips = SubtitleManager.create_caption_clips(
             transcript_data,
             size          = size,

processor.py CHANGED Viewed

@@ -9,6 +9,10 @@ Fixes applied:
   - style string normalised once
   - get_best_segments wired into process_video
   - detected_lang used correctly for captions
 """
 import os
 import gc
@@ -20,14 +24,58 @@ import json_repair
 import core  # Applies monkey patches
 from core.config import Config
 from core.logger import Logger
-from core.stt import STT
-from core.analyze import analyze_transcript_gemini
 from core.styles import StyleFactory
 from core.subtitle_manager import SubtitleManager
 from core.free_translator import FreeTranslator
 logger = Logger.get_logger(__name__)
 # ─────────────────────────────────────────────────────────────────────────────
 class VideoProcessor:
@@ -60,7 +108,6 @@ class VideoProcessor:
         except Exception as e:
             logger.warning(f"⚠️ json_repair failed, using raw content: {e}")
-        # Last-resort brace balancing
         open_b  = content.count("{")
         close_b = content.count("}")
         if open_b > close_b:
@@ -89,7 +136,6 @@ class VideoProcessor:
                 for key in ("segments", "clips", "moments"):
                     if key in segments_data and isinstance(segments_data[key], list):
                         return segments_data[key]
-                # Fallback: first list value found
                 for v in segments_data.values():
                     if isinstance(v, list):
                         return v
@@ -104,18 +150,17 @@ class VideoProcessor:
     def analyze_impact(self,
                        video_path,
-                       source_language=None,    # ← لغة الفيديو الأصلي → بتيجي لـ Whisper
-                       target_language=None,    # ← لغة الـ output (ترجمة/كابشن)
                        timestamp_mode="segments",
                        progress_callback=None):
         """
         STT + AI viral-moment detection.
-        source_language : بيتبعت لـ Whisper مباشرة.
-                          لو None → Whisper يكتشف تلقائي (أبطأ لكن آمن).
-        target_language : بيتحفظ في data عشان process_clips يستخدمه للترجمة والكابشن.
-        Returns (unique_segments, duration, data)
         """
         if progress_callback:
             progress_callback(5, "Starting speech-to-text...")
@@ -126,7 +171,7 @@ class VideoProcessor:
         full_segments, full_text, duration, detected_lang = self.stt.get_transcript(
             video_path,
-            language=source_language,   # None = Whisper يكتشف تلقائي
             skip_ai=True,
             timestamp_mode=timestamp_mode,
         )
@@ -135,8 +180,8 @@ class VideoProcessor:
         data = {
             "segments":          full_segments,
-            "detected_language": detected_lang,   # اللغة الفعلية اللي Whisper اكتشفها
-            "target_language":   target_language,  # اللغة المطلوبة للـ output
             "duration":          duration,
         }
@@ -174,7 +219,7 @@ class VideoProcessor:
                     f"{min(current_end, max_time)/60:.1f}m …"
                 )
-                ai_res = analyze_transcript_gemini(chunk_transcript)
                 logger.info(f"🤖 AI response type: {type(ai_res)}")
                 try:
@@ -189,7 +234,6 @@ class VideoProcessor:
             if current_end >= max_time:
                 break
-        # Deduplicate by start_time
         seen, unique = set(), []
         for s in all_ai_segs:
             st = s.get("start_time")
@@ -218,14 +262,19 @@ class VideoProcessor:
         """
         Cuts, styles, captions, and exports each viral clip.
-        target_language يييجي من data["target_language"] (اللي حطّه analyze_impact).
-        Translation يحصل مرة واحدة فقط داخل segment_transcript loop.
         """
         logger.info("🎨 Phase 3: Style & Captions …")
         if progress_callback:
             progress_callback(60, "Generating clips …")
-        # ── Video duration ────────────────────────────────────────────────────
         video_duration = data.get("duration") or 0
         if not video_duration:
             try:
@@ -235,17 +284,9 @@ class VideoProcessor:
                 logger.error(f"❌ Could not determine video duration: {e}")
         # ── Language resolution ───────────────────────────────────────────────
-        #
-        #   detected_lang   = اللغة الفعلية للفيديو (من Whisper)
-        #   target_language = اللغة المطلوبة للـ output (من الريكويست)
-        #
-        #   needs_translation = True  → نترجم النص
-        #   caption_lang      = اللغة اللي هيتعمل بيها الكابشن
-        #
         detected_lang   = data.get("detected_language", "en")
-        target_language = data.get("target_language")  # من analyze_impact
-        # normalize
         if hasattr(target_language, "value"):
             target_language = target_language.value
@@ -255,7 +296,6 @@ class VideoProcessor:
             and target_language != detected_lang
         )
-        # الكابشن بيتعمل بلغة الـ output لو فيه ترجمة، وإلا بلغة الفيديو الأصلي
         caption_lang = target_language if needs_translation else detected_lang
         translator = FreeTranslator() if needs_translation else None
@@ -312,46 +352,53 @@ class VideoProcessor:
                 final_output = os.path.join(Config.OUTPUTS_DIR, "viral_clips", out_name)
                 os.makedirs(os.path.dirname(final_output), exist_ok=True)
-                # ── Cut clip (fresh VideoFileClip per iteration) ───────────────
                 current_video_clip = mpe.VideoFileClip(input_video_path)
                 clip               = current_video_clip.subclip(start, end)
                 # ── Build segment_transcript ──────────────────────────────────
-                # الترجمة بتحصل هنا فقط — مفيش أي مكان تاني بيعدّل على data
                 segment_transcript = {"segments": []}
                 for s in data["segments"]:
                     if s["start"] >= end or s["end"] <= start:
                         continue
-                    new_seg = s.copy()
                     new_seg["start"] = max(0, s["start"] - start)
                     new_seg["end"]   = min(end - start, s["end"] - start)
                     if needs_translation and translator:
-                        # ترجمة النص مع توزيع timestamps على الكلمات الجديدة
                         try:
                             translated_text, _ = translator.translate_text(
-                                s["text"], target_language
                             )
                         except Exception as te:
                             logger.warning(f"⚠️ Translation error: {te}")
                             translated_text = s["text"]
                         new_seg["text"] = translated_text
-                        words    = translated_text.split()
-                        seg_dur  = new_seg["end"] - new_seg["start"]
-                        word_dur = seg_dur / len(words) if words else seg_dur
-                        new_seg["words"] = [
-                            {
-                                "text":  w,
-                                "start": new_seg["start"] + idx * word_dur,
-                                "end":   new_seg["start"] + (idx + 1) * word_dur,
-                            }
-                            for idx, w in enumerate(words)
-                        ]
                     else:
-                        # تعديل timestamps الكلمات الموجودة بدون ترجمة
                         if "words" in s:
                             new_seg["words"] = [
                                 {
@@ -362,6 +409,8 @@ class VideoProcessor:
                                 for w in s["words"]
                                 if w["start"] < end and w["end"] > start
                             ]
                     segment_transcript["segments"].append(new_seg)
@@ -418,20 +467,20 @@ def process_video(video_path, style="cinematic_blur", model_size="base", **kwarg
     """
     End-to-end pipeline: STT → AI analysis → clip export.
-    kwargs المهمة:
-        source_language : لغة الفيديو الأصلي → بتتبعت لـ Whisper
-                          لو مش محدد → Whisper يكتشف تلقائي
-        language        : لغة الـ output المطلوبة (ترجمة + كابشن)
-                          لو نفس لغة الفيديو → مش هيترجم
         caption_mode    : sentence | word | highlight_word
-        caption_style   : classic | modern_glow | tiktok_bold | ...
     """
     try:
         processor = VideoProcessor(model_size=model_size)
         caption_mode = kwargs.get("caption_mode", "sentence")
-        # highlight_word و word كلاهما يحتاج word-level timestamps من Whisper
         timestamp_mode = (
             "words"
             if caption_mode in ("word", "highlight_word")
@@ -441,8 +490,8 @@ def process_video(video_path, style="cinematic_blur", model_size="base", **kwarg
         # Phase 1 + 2: STT + AI analysis
         viral_segments, duration, stt_data = processor.analyze_impact(
             video_path,
-            source_language = kwargs.get("source_language"),  # لـ Whisper
-            target_language = kwargs.get("language"),          # للترجمة والكابشن
             timestamp_mode  = timestamp_mode,
         )
@@ -450,7 +499,6 @@ def process_video(video_path, style="cinematic_blur", model_size="base", **kwarg
             logger.warning("⚠️ No viral segments found.")
             return []
-        # Sort by viral score
         best_clips = processor.get_best_segments(viral_segments, duration)
         # Phase 3: render

   - style string normalised once
   - get_best_segments wired into process_video
   - detected_lang used correctly for captions
+  - ✅ FIX: after translation, _line1/_line2 re-computed from translated text
+    using SubtitleSegmenter._split_into_lines so line splits match translated content
+  - ✅ FIX: translated word timestamps distributed proportional to word length
+    (instead of uniform distribution) for better highlight sync
 """
 import os
 import gc
 import core  # Applies monkey patches
 from core.config import Config
 from core.logger import Logger
+from core.stt import STT, SubtitleSegmenter
+from core.analyze import analyze_transcript
 from core.styles import StyleFactory
 from core.subtitle_manager import SubtitleManager
 from core.free_translator import FreeTranslator
 logger = Logger.get_logger(__name__)
+# Max chars per line — must match SubtitleSegmenter constant
+_MAX_CHARS_PER_LINE = 42
+def _distribute_timestamps_by_length(words: list, seg_start: float, seg_end: float) -> list:
+    """
+    ✅ FIX: Distribute word timestamps proportional to character length instead of
+    uniform distribution. Longer words get more time, giving better sync in
+    highlight_word mode after translation.
+    words: list of str (translated words)
+    Returns: list of { text, start, end }
+    """
+    if not words:
+        return []
+    total_chars = sum(len(w) for w in words)
+    seg_dur     = seg_end - seg_start
+    result  = []
+    cursor  = seg_start
+    for i, w in enumerate(words):
+        if total_chars > 0:
+            fraction = len(w) / total_chars
+        else:
+            fraction = 1.0 / len(words)
+        w_dur = seg_dur * fraction
+        w_end = cursor + w_dur
+        # Clamp last word to seg_end to avoid float drift
+        if i == len(words) - 1:
+            w_end = seg_end
+        result.append({
+            "text":  w,
+            "start": round(cursor, 3),
+            "end":   round(w_end,  3),
+        })
+        cursor = w_end
+    return result
 # ─────────────────────────────────────────────────────────────────────────────
 class VideoProcessor:
         except Exception as e:
             logger.warning(f"⚠️ json_repair failed, using raw content: {e}")
         open_b  = content.count("{")
         close_b = content.count("}")
         if open_b > close_b:
                 for key in ("segments", "clips", "moments"):
                     if key in segments_data and isinstance(segments_data[key], list):
                         return segments_data[key]
                 for v in segments_data.values():
                     if isinstance(v, list):
                         return v
     def analyze_impact(self,
                        video_path,
+                       source_language=None,
+                       target_language=None,
                        timestamp_mode="segments",
                        progress_callback=None):
         """
         STT + AI viral-moment detection.
+        source_language : passed directly to Whisper.
+                          None → Whisper auto-detects (slower but safe).
+        target_language : stored in data for process_clips to use for
+                          translation and caption rendering.
         """
         if progress_callback:
             progress_callback(5, "Starting speech-to-text...")
         full_segments, full_text, duration, detected_lang = self.stt.get_transcript(
             video_path,
+            language=source_language,
             skip_ai=True,
             timestamp_mode=timestamp_mode,
         )
         data = {
             "segments":          full_segments,
+            "detected_language": detected_lang,
+            "target_language":   target_language,
             "duration":          duration,
         }
                     f"{min(current_end, max_time)/60:.1f}m …"
                 )
+                ai_res = analyze_transcript(chunk_transcript)
                 logger.info(f"🤖 AI response type: {type(ai_res)}")
                 try:
             if current_end >= max_time:
                 break
         seen, unique = set(), []
         for s in all_ai_segs:
             st = s.get("start_time")
         """
         Cuts, styles, captions, and exports each viral clip.
+        ✅ FIX 1: After translation, _line1 and _line2 are re-computed from
+           the translated text using SubtitleSegmenter._split_into_lines.
+           Previously they were left as the original-language splits which
+           caused wrong line breaks in the translated captions.
+        ✅ FIX 2: Word timestamps after translation are distributed proportional
+           to character length (via _distribute_timestamps_by_length) instead of
+           uniform distribution, giving better sync in highlight_word mode.
         """
         logger.info("🎨 Phase 3: Style & Captions …")
         if progress_callback:
             progress_callback(60, "Generating clips …")
         video_duration = data.get("duration") or 0
         if not video_duration:
             try:
                 logger.error(f"❌ Could not determine video duration: {e}")
         # ── Language resolution ───────────────────────────────────────────────
         detected_lang   = data.get("detected_language", "en")
+        target_language = data.get("target_language")
         if hasattr(target_language, "value"):
             target_language = target_language.value
             and target_language != detected_lang
         )
         caption_lang = target_language if needs_translation else detected_lang
         translator = FreeTranslator() if needs_translation else None
                 final_output = os.path.join(Config.OUTPUTS_DIR, "viral_clips", out_name)
                 os.makedirs(os.path.dirname(final_output), exist_ok=True)
+                # ── Cut clip ──────────────────────────────────────────────────
                 current_video_clip = mpe.VideoFileClip(input_video_path)
                 clip               = current_video_clip.subclip(start, end)
                 # ── Build segment_transcript ──────────────────────────────────
                 segment_transcript = {"segments": []}
                 for s in data["segments"]:
                     if s["start"] >= end or s["end"] <= start:
                         continue
+                    new_seg         = s.copy()
                     new_seg["start"] = max(0, s["start"] - start)
                     new_seg["end"]   = min(end - start, s["end"] - start)
                     if needs_translation and translator:
+                        # ── Translate text ────────────────────────────────────
                         try:
                             translated_text, _ = translator.translate_text(
+                                s["text"], target_language, detected_lang
                             )
                         except Exception as te:
                             logger.warning(f"⚠️ Translation error: {te}")
                             translated_text = s["text"]
                         new_seg["text"] = translated_text
+                        # ✅ FIX 1: Re-compute line splits from TRANSLATED text.
+                        # Original _line1/_line2 are in the source language and
+                        # will have wrong split points after translation.
+                        translated_lines   = SubtitleSegmenter._split_into_lines(
+                            translated_text, _MAX_CHARS_PER_LINE
+                        )
+                        new_seg["_line1"]  = translated_lines[0] if len(translated_lines) > 0 else translated_text
+                        new_seg["_line2"]  = translated_lines[1] if len(translated_lines) > 1 else ""
+                        # ✅ FIX 2: Distribute word timestamps proportional to
+                        # character length for better highlight_word sync.
+                        translated_words = translated_text.split()
+                        new_seg["words"] = _distribute_timestamps_by_length(
+                            translated_words,
+                            new_seg["start"],
+                            new_seg["end"],
+                        )
                     else:
+                        # No translation — adjust existing word timestamps
                         if "words" in s:
                             new_seg["words"] = [
                                 {
                                 for w in s["words"]
                                 if w["start"] < end and w["end"] > start
                             ]
+                        # _line1/_line2 already correct from SubtitleSegmenter
+                        # (already in source lang which IS caption lang here)
                     segment_transcript["segments"].append(new_seg)
     """
     End-to-end pipeline: STT → AI analysis → clip export.
+    Important kwargs:
+        source_language : language of the original video → passed to Whisper.
+                          If not set → Whisper auto-detects.
+        language        : desired output language (translation + captions).
+                          If same as source → no translation.
         caption_mode    : sentence | word | highlight_word
+        caption_style   : classic | modern_glow | tiktok_bold | …
     """
     try:
         processor = VideoProcessor(model_size=model_size)
         caption_mode = kwargs.get("caption_mode", "sentence")
+        # highlight_word and word modes both need word-level timestamps
         timestamp_mode = (
             "words"
             if caption_mode in ("word", "highlight_word")
         # Phase 1 + 2: STT + AI analysis
         viral_segments, duration, stt_data = processor.analyze_impact(
             video_path,
+            source_language = kwargs.get("source_language"),
+            target_language = kwargs.get("language"),
             timestamp_mode  = timestamp_mode,
         )
             logger.warning("⚠️ No viral segments found.")
             return []
         best_clips = processor.get_best_segments(viral_segments, duration)
         # Phase 3: render

requirements.txt CHANGED Viewed

@@ -15,5 +15,5 @@ imageio-ffmpeg==0.4.8
 openai>=1.0.0
 scipy
 json_repair
-cryptography
-firebase-admin

 openai>=1.0.0
 scipy
 json_repair
+tiktoken
+pydantic