Spaces:

ex510
/

auto_cliper

Sleeping

App Files Files Community

aliSaac510 commited on Feb 17

Commit

0faf659

1 Parent(s): 342e0fb

applay caption styles and test it

Browse files

Files changed (11) hide show

core/analyze.py +23 -3
core/config.py +178 -64
core/database.py +144 -0
core/security.py +63 -0
core/stt.py +5 -4
core/styles.py +307 -121
core/subtitle_manager.py +489 -237
firebase_key.json +13 -0
main.py +56 -10
processor.py +377 -370
requirements.txt +2 -0

core/analyze.py CHANGED Viewed

@@ -2,14 +2,34 @@ import os
 import time
 from openai import OpenAI
 from dotenv import load_dotenv
 load_dotenv()
-# إعداد OpenRouter API
-# يتم تحميل المفتاح من ملف .env (تأكد من وجود OPENROUTER_API_KEY)
 client = OpenAI(
     base_url="https://openrouter.ai/api/v1",
-    api_key=os.getenv("OPENROUTER_API_KEY")
 )
 def analyze_transcript_gemini(transcript):

 import time
 from openai import OpenAI
 from dotenv import load_dotenv
+from .database import DatabaseManager
 load_dotenv()
+# Initialize Database Manager
+# We try to use Firebase if available, otherwise fallback to local SQLite
+try:
+    db = DatabaseManager(use_firebase=True)
+except Exception:
+    print("⚠️ Firebase not configured, falling back to local SQLite.")
+    db = DatabaseManager(use_firebase=False)
+# Retrieve API Key from Secure Storage
+# 1. Try to get from Database
+api_key = db.get_key("openrouter")
+# 2. If not in DB, fallback to .env (Legacy support)
+if not api_key:
+    api_key = os.getenv("OPENROUTER_API_KEY")
+if not api_key:
+    print("❌ ERROR: OPENROUTER_API_KEY not found in Database or .env")
+    # We don't raise error here to allow module import, but client creation will fail if used.
+# Configure OpenAI Client
 client = OpenAI(
     base_url="https://openrouter.ai/api/v1",
+    api_key=api_key
 )
 def analyze_transcript_gemini(transcript):

core/config.py CHANGED Viewed

@@ -1,105 +1,219 @@
 import os
 import requests
 class Config:
-    BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    TEMP_DIR = os.path.join(BASE_DIR, "temp")
     UPLOADS_DIR = os.path.join(BASE_DIR, "uploads")
     OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs")
-    LOGS_DIR = os.path.join(BASE_DIR, "logs")
-    # Font URLs - Google Fonts CSS API
-    # We use the CSS API to get the correct WOFF2/TTF file
     FONTS = {
-        "Roboto-Bold.ttf": "https://fonts.googleapis.com/css2?family=Roboto:wght@700&display=swap",
-        "NotoSansArabic-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@700&display=swap",
-        "NotoSansSC-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@700&display=swap",
-        "NotoSansJP-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@700&display=swap",
-        "NotoSansDevanagari-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Devanagari:wght@700&display=swap",
-        "Cairo-Bold.ttf": "https://fonts.googleapis.com/css2?family=Cairo:wght@700&display=swap",
-        "Montserrat-Bold.ttf": "https://fonts.googleapis.com/css2?family=Montserrat:wght@700&display=swap"
     }
-    # Dynamic Language to Font Mapping
     LANGUAGE_FONT_MAP = {
-        "ar": "NotoSansArabic-Bold.ttf",  # Arabic
-        "zh": "NotoSansSC-Bold.ttf",      # Chinese
-        "ja": "NotoSansJP-Bold.ttf",      # Japanese
-        "hi": "NotoSansDevanagari-Bold.ttf", # Hindi
-        "ru": "Roboto-Bold.ttf",          # Russian (Supported by Roboto)
-        "en": "Roboto-Bold.ttf",          # English
-        "default": "Roboto-Bold.ttf"
     }
-    # Video Settings
-    DEFAULT_SIZE = (1080, 1920)
-    CHUNK_SIZE_SECONDS = 600
-    OVERLAP_SECONDS = 60
-    # Styles
     STYLES = [
         "cinematic",
         "cinematic_blur",
         "vertical_full",
         "split_vertical",
-        "split_horizontal"
     ]
     @classmethod
     def setup_dirs(cls):
         for d in [cls.TEMP_DIR, cls.UPLOADS_DIR, cls.OUTPUTS_DIR, cls.LOGS_DIR]:
             os.makedirs(d, exist_ok=True)
     @staticmethod
-    def get_urls(content):
         """
-        Parses the css file and retrieves the font urls.
         """
-        urls = []
-        for i in range(len(content)):
-            if content[i: i+3] == 'url':
-                j = i + 4
-                url = ''
-                while content[j] != ')':
-                    url += content[j]
-                    j += 1
-                urls.append(url)
-        return urls
     @staticmethod
-    def download_font_from_css(css_url, output_path):
         """
-        Downloads the first font file found in the CSS to output_path.
         """
         try:
-            # 1. Fetch CSS content
-            # Add User-Agent to avoid getting minimal CSS or being blocked
-            headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
-            response = requests.get(css_url, headers=headers)
-            response.raise_for_status()
-            content = response.text
-            # 2. Extract URLs
-            urls = Config.get_urls(content)
             if not urls:
                 print(f"❌ No font URLs found in CSS: {css_url}")
                 return False
-            # 3. Download the first font found (usually the most specific/relevant or primary subset)
-            # For Arabic fonts like Cairo/NotoSansArabic, the first subset is usually the Arabic one.
             font_url = urls[0]
-            print(f"⬇️ Downloading font from: {font_url}")
-            font_response = requests.get(font_url, headers=headers)
-            font_response.raise_for_status()
-            with open(output_path, 'wb') as f:
-                f.write(font_response.content)
-            print(f"✅ Font saved to: {output_path}")
             return True
-        except Exception as e:
-            print(f"❌ Failed to download font from CSS {css_url}: {e}")
             return False

+"""
+Config — Viral Shorts Engine Configuration
+Font choices based on analysis of 2M+ short-form videos (2024-2025):
+English / Latin
+───────────────
+• Montserrat-Bold    → #1 most used font in viral Shorts (Alex Hormozi, MrBeast style)
+• Rubik-Bold         → Distinctive modern feel, high engagement, less saturated
+• Oswald-Bold        → Condensed, fits more words per line — great for fast speech
+• Roboto-Bold        → YouTube's native subtitle font, clean baseline
+Arabic
+──────
+• Tajawal-Bold       → Most used modern Arabic font on social media, youth-oriented
+• Cairo-Bold         → Clean, highly legible for captions, widely recognized
+• Almarai-Bold       → Rounded, friendly — popular in Gulf & Egyptian content
+• NotoSansArabic     → Fallback — covers all Arabic Unicode correctly
+Style → Font mapping (per caption style):
+  classic          → Montserrat-Bold  (professional, universal)
+  modern_glow      → Rubik-Bold       (distinctive, modern)
+  tiktok_bold      → Montserrat-Bold  (proven viral, MrBeast aesthetic)
+  tiktok_neon      → Oswald-Bold      (condensed punch)
+  youtube_clean    → Rubik-Bold       (clean educator look)
+  youtube_box      → Montserrat-Bold  (karaoke / game-show energy)
+"""
 import os
+import re
 import requests
+from dotenv import load_dotenv
+load_dotenv()
 class Config:
+    BASE_DIR    = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    TEMP_DIR    = os.path.join(BASE_DIR, "temp")
     UPLOADS_DIR = os.path.join(BASE_DIR, "uploads")
     OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs")
+    LOGS_DIR    = os.path.join(BASE_DIR, "logs")
+    # ── Font registry ──────────────────────────────────────────────────────────
+    # Google Fonts CSS2 API — wght@700 = Bold
     FONTS = {
+        # ── English / Latin ────────────────────────────────────────────────────
+        "Montserrat-Bold.ttf":        "https://fonts.googleapis.com/css2?family=Montserrat:wght@700&display=swap",
+        "Rubik-Bold.ttf":             "https://fonts.googleapis.com/css2?family=Rubik:wght@700&display=swap",
+        "Oswald-Bold.ttf":            "https://fonts.googleapis.com/css2?family=Oswald:wght@700&display=swap",
+        "Roboto-Bold.ttf":            "https://fonts.googleapis.com/css2?family=Roboto:wght@700&display=swap",
+        # ── Arabic ─────────────────────────────────────────────────────────────
+        "Tajawal-Bold.ttf":           "https://fonts.googleapis.com/css2?family=Tajawal:wght@700&display=swap",
+        "Cairo-Bold.ttf":             "https://fonts.googleapis.com/css2?family=Cairo:wght@700&display=swap",
+        "Almarai-Bold.ttf":           "https://fonts.googleapis.com/css2?family=Almarai:wght@800&display=swap",
+        "NotoSansArabic-Bold.ttf":    "https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@700&display=swap",
+        # ── CJK & other scripts ────────────────────────────────────────────────
+        "NotoSansSC-Bold.ttf":        "https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@700&display=swap",
+        "NotoSansJP-Bold.ttf":        "https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@700&display=swap",
+        "NotoSansDevanagari-Bold.ttf":"https://fonts.googleapis.com/css2?family=Noto+Sans+Devanagari:wght@700&display=swap",
     }
+    # ── Language → default caption font ───────────────────────────────���───────
+    # Used when no explicit style font is set, or for non-Latin scripts.
     LANGUAGE_FONT_MAP = {
+        # Arabic — Tajawal is the modern social-media standard
+        "ar": "Tajawal-Bold.ttf",
+        # CJK
+        "zh": "NotoSansSC-Bold.ttf",
+        "ja": "NotoSansJP-Bold.ttf",
+        # Devanagari
+        "hi": "NotoSansDevanagari-Bold.ttf",
+        # Cyrillic + Latin — Roboto covers both
+        "ru": "Roboto-Bold.ttf",
+        # Latin languages — Montserrat is #1 viral font
+        "en": "Montserrat-Bold.ttf",
+        "fr": "Montserrat-Bold.ttf",
+        "es": "Montserrat-Bold.ttf",
+        "de": "Montserrat-Bold.ttf",
+        "pt": "Montserrat-Bold.ttf",
+        "it": "Montserrat-Bold.ttf",
+        "tr": "Montserrat-Bold.ttf",
+        # Fallback for any unlisted language
+        "default": "Montserrat-Bold.ttf",
+    }
+    # ── Caption style → preferred font ────────────────────────────────────────
+    # SubtitleManager reads this via get_style_config()["font"].
+    # Only for Latin scripts — non-Latin always uses LANGUAGE_FONT_MAP.
+    STYLE_FONT_MAP = {
+        "classic":       "Montserrat-Bold.ttf",
+        "modern_glow":   "Rubik-Bold.ttf",
+        "tiktok_bold":   "Montserrat-Bold.ttf",
+        "tiktok_neon":   "Oswald-Bold.ttf",
+        "youtube_clean": "Rubik-Bold.ttf",
+        "youtube_box":   "Montserrat-Bold.ttf",
     }
+    # ── Video settings ─────────────────────────────────────────────────────────
+    DEFAULT_SIZE         = (1080, 1920)
+    CHUNK_SIZE_SECONDS   = 600
+    OVERLAP_SECONDS      = 60
     STYLES = [
         "cinematic",
         "cinematic_blur",
         "vertical_full",
         "split_vertical",
+        "split_horizontal",
     ]
+    # ── Directory setup ────────────────────────────────────────────────────────
     @classmethod
     def setup_dirs(cls):
         for d in [cls.TEMP_DIR, cls.UPLOADS_DIR, cls.OUTPUTS_DIR, cls.LOGS_DIR]:
             os.makedirs(d, exist_ok=True)
+    # ── Font URL extraction ────────────────────────────────────────────────────
     @staticmethod
+    def get_urls(css_content: str, prefer_latin: bool = True) -> list:
         """
+        Extracts font file URLs from a Google Fonts CSS response.
+        Google Fonts CSS contains multiple @font-face blocks, one per subset:
+            /* [0] cyrillic */
+            /* [1] latin-ext */
+            /* [2] latin */      ← we usually want this for Latin fonts
+        For Arabic fonts the arabic subset comes first — which is what we want.
+        Strategy:
+          - Parse all (comment, url) pairs.
+          - For Latin fonts (prefer_latin=True): prefer the 'latin' subset.
+          - For Arabic/CJK: prefer the script-specific subset (first one).
+          - Fallback: return the last URL found (most specific subset in Google's ordering).
         """
+        # Extract (subset_comment, url) pairs using regex
+        pattern = re.compile(
+            r'/\*\s*\[?\d*\]?\s*([\w\-]+)\s*\*/[^}]*?url\(([^)]+)\)',
+            re.DOTALL,
+        )
+        pairs = pattern.findall(css_content)
+        if not pairs:
+            # Fallback: grab all bare urls
+            bare = re.findall(r'url\(([^)]+)\)', css_content)
+            return bare if bare else []
+        subset_map = {subset.lower(): url.strip() for subset, url in pairs}
+        if prefer_latin:
+            # Priority: latin > latin-ext > first available
+            for key in ("latin", "latin-ext"):
+                if key in subset_map:
+                    return [subset_map[key]]
+            # Return the last subset (Google puts most basic last for Latin)
+            return [list(subset_map.values())[-1]]
+        else:
+            # Arabic/CJK: first subset is the script-specific one
+            return [list(subset_map.values())[0]]
     @staticmethod
+    def download_font_from_css(css_url: str, output_path: str) -> bool:
         """
+        Downloads the correct font file for the given CSS URL.
+        Automatically detects whether this is a Latin or non-Latin font
+        based on the filename so it picks the right subset.
         """
+        NON_LATIN_KEYWORDS = ("arabic", "noto", "devanagari", "sc", "jp", "kr")
+        filename      = os.path.basename(output_path).lower()
+        is_non_latin  = any(kw in filename for kw in NON_LATIN_KEYWORDS)
+        prefer_latin  = not is_non_latin
+        headers = {
+            "User-Agent": (
+                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+                "AppleWebKit/537.36 (KHTML, like Gecko) "
+                "Chrome/124.0.0.0 Safari/537.36"
+            )
+        }
         try:
+            # 1. Fetch CSS
+            resp = requests.get(css_url, headers=headers, timeout=15)
+            resp.raise_for_status()
+            css_content = resp.text
+            # 2. Extract correct URL
+            urls = Config.get_urls(css_content, prefer_latin=prefer_latin)
             if not urls:
                 print(f"❌ No font URLs found in CSS: {css_url}")
                 return False
             font_url = urls[0]
+            print(f"⬇️  Downloading font ({('latin' if prefer_latin else 'script')}) → {font_url}")
+            # 3. Download font binary
+            font_resp = requests.get(font_url, headers=headers, timeout=30)
+            font_resp.raise_for_status()
+            with open(output_path, "wb") as f:
+                f.write(font_resp.content)
+            print(f"✅ Font saved: {output_path}")
             return True
+        except requests.RequestException as e:
+            print(f"❌ Network error downloading font from {css_url}: {e}")
             return False
+        except Exception as e:
+            print(f"❌ Unexpected error: {e}")
+            return False

core/database.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import sqlite3
+import os
+import json
+from typing import Optional, List, Dict
+from .security import SecurityManager
+class DatabaseManager:
+    """
+    Manages secure storage of API keys.
+    Supports both SQLite (Local) and Firebase (Cloud/Community).
+    """
+    def __init__(self, use_firebase: bool = False, db_name="secure_storage.db"):
+        self.security = SecurityManager()
+        self.use_firebase = use_firebase
+        if self.use_firebase:
+            self._init_firebase()
+        else:
+            # Local SQLite Setup
+            base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+            self.db_path = os.path.join(base_dir, db_name)
+            self._init_sqlite()
+    def _init_sqlite(self):
+        """Initialize local SQLite table."""
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.cursor()
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS api_keys (
+                service_name TEXT,
+                encrypted_key TEXT NOT NULL,
+                added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                is_active BOOLEAN DEFAULT 1,
+                PRIMARY KEY (service_name, encrypted_key)
+            )
+        ''')
+        conn.commit()
+        conn.close()
+    def _init_firebase(self):
+        """Initialize Firebase Admin SDK."""
+        import firebase_admin
+        from firebase_admin import credentials, firestore
+        # Check if already initialized
+        if not firebase_admin._apps:
+            # Look for service account file
+            cred_path = os.getenv("FIREBASE_CREDENTIALS_PATH", "firebase_key.json")
+            if os.path.exists(cred_path):
+                cred = credentials.Certificate(cred_path)
+            else:
+                # Try to load from environment variable (For Hugging Face / Cloud)
+                firebase_json = os.getenv("FIREBASE_CREDENTIALS_JSON")
+                if firebase_json:
+                    cred_dict = json.loads(firebase_json)
+                    cred = credentials.Certificate(cred_dict)
+                else:
+                    raise ValueError("Firebase credentials not found! Set FIREBASE_CREDENTIALS_PATH or FIREBASE_CREDENTIALS_JSON.")
+            firebase_admin.initialize_app(cred)
+        self.db = firestore.client()
+        self.collection = self.db.collection('community_keys')
+    def save_key(self, service_name: str, api_key: str):
+        """Encrypts and saves an API key."""
+        encrypted = self.security.encrypt_data(api_key)
+        if self.use_firebase:
+            # Save to Firestore
+            # We use a hash of the encrypted key as document ID to prevent duplicates
+            doc_id = f"{service_name}_{hash(encrypted)}"
+            self.collection.document(doc_id).set({
+                'service': service_name.lower(),
+                'encrypted_key': encrypted,
+                'is_active': True,
+                'added_at': firestore.SERVER_TIMESTAMP
+            })
+        else:
+            # Save to SQLite
+            conn = sqlite3.connect(self.db_path)
+            cursor = conn.cursor()
+            try:
+                cursor.execute('''
+                    INSERT INTO api_keys (service_name, encrypted_key)
+                    VALUES (?, ?)
+                ''', (service_name.lower(), encrypted))
+                conn.commit()
+            except sqlite3.IntegrityError:
+                pass # Key already exists
+            finally:
+                conn.close()
+    def get_key(self, service_name: str) -> Optional[str]:
+        """Retrieves a valid API key (Round-Robin or Random could be implemented here)."""
+        # For now, just get the first available active key
+        if self.use_firebase:
+            docs = self.collection.where('service', '==', service_name.lower())\
+                                .where('is_active', '==', True)\
+                                .limit(1).stream()
+            for doc in docs:
+                data = doc.to_dict()
+                return self.security.decrypt_data(data['encrypted_key'])
+            return None
+        else:
+            conn = sqlite3.connect(self.db_path)
+            cursor = conn.cursor()
+            cursor.execute('''
+                SELECT encrypted_key FROM api_keys
+                WHERE service_name = ? AND is_active = 1
+                LIMIT 1
+            ''', (service_name.lower(),))
+            row = cursor.fetchone()
+            conn.close()
+            if row:
+                return self.security.decrypt_data(row[0])
+            return None
+    def get_all_keys(self, service_name: str) -> List[str]:
+        """Returns ALL valid decrypted keys for a service (useful for rotation)."""
+        keys = []
+        if self.use_firebase:
+            docs = self.collection.where('service', '==', service_name.lower())\
+                                .where('is_active', '==', True).stream()
+            for doc in docs:
+                decrypted = self.security.decrypt_data(doc.to_dict()['encrypted_key'])
+                if decrypted:
+                    keys.append(decrypted)
+        else:
+            conn = sqlite3.connect(self.db_path)
+            cursor = conn.cursor()
+            cursor.execute('''
+                SELECT encrypted_key FROM api_keys
+                WHERE service_name = ? AND is_active = 1
+            ''', (service_name.lower(),))
+            rows = cursor.fetchall()
+            conn.close()
+            for row in rows:
+                decrypted = self.security.decrypt_data(row[0])
+                if decrypted:
+                    keys.append(decrypted)
+        return keys

core/security.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import os
+from cryptography.fernet import Fernet
+from typing import Optional
+class SecurityManager:
+    """
+    Manages encryption and decryption of sensitive data like API keys.
+    """
+    def __init__(self, secret_key: Optional[str] = None):
+        """
+        Initialize with a secret key. If not provided, it attempts to read from FERNET_SECRET env var.
+        """
+        if not secret_key:
+            secret_key = os.getenv("FERNET_SECRET")
+        if not secret_key:
+            # For development/testing, we can generate one if not present, but warn about it
+            # In production, this should raise an error
+            print("⚠ WARNING: FERNET_SECRET not found. Generating a temporary key for this session.")
+            self._cipher = Fernet(Fernet.generate_key())
+        else:
+            try:
+                self._cipher = Fernet(secret_key.encode() if isinstance(secret_key, str) else secret_key)
+            except Exception as e:
+                raise ValueError(f"Invalid FERNET_SECRET: {e}")
+    @staticmethod
+    def generate_key() -> str:
+        """
+        Generates a new Fernet key.
+        Run this once and store the output in your environment variables.
+        """
+        return Fernet.generate_key().decode()
+    def encrypt_data(self, data: str) -> str:
+        """
+        Encrypts a string.
+        """
+        if not data:
+            return ""
+        encrypted = self._cipher.encrypt(data.encode())
+        return encrypted.decode()
+    def decrypt_data(self, encrypted_data: str) -> str:
+        """
+        Decrypts an encrypted string.
+        """
+        if not encrypted_data:
+            return ""
+        try:
+            decrypted = self._cipher.decrypt(encrypted_data.encode())
+            return decrypted.decode()
+        except Exception as e:
+            print(f"❌ Decryption failed: {e}")
+            return ""
+# Helper instance
+# You can import 'security' and use security.encrypt_data() directly if env var is set
+try:
+    security = SecurityManager()
+except Exception:
+    security = None

core/stt.py CHANGED Viewed

@@ -33,13 +33,14 @@ class STT:
             print(f"⚠️ GPU not available, using CPU with {model_size} model: {e}")
             self.model = WhisperModel(model_size, device="cpu", compute_type="int8")
-    def get_transcript(self, video_path: str, language: str = None, skip_ai: bool = False, timestamp_mode="segments"):
         """تحويل الفيديو لنص مع توقيت الكلمات باستخدام Faster-Whisper
         Args:
             timestamp_mode: "words" للكلمات الفردية, "segments" للجمل الكاملة
         """
-        print(f"🎙️ Transcribing: {video_path} (Language: {language if language else 'Auto'}, Mode: {timestamp_mode})")
         # تسجيل الـ transcript في ملف logs
         log_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs", "transcript.log")
@@ -104,8 +105,8 @@ class STT:
             beam_size=1,
             word_timestamps=word_timestamps,
             language=actual_stt_lang,
-            vad_filter=True,  # تصفية الصوت الفارغ
-            vad_parameters=dict(min_silence_duration_ms=500)
         )
         detected_lang = info.language

             print(f"⚠️ GPU not available, using CPU with {model_size} model: {e}")
             self.model = WhisperModel(model_size, device="cpu", compute_type="int8")
+    def get_transcript(self, video_path: str, language: str = None, skip_ai: bool = False, timestamp_mode="segments", vad_filter=True):
         """تحويل الفيديو لنص مع توقيت الكلمات باستخدام Faster-Whisper
         Args:
             timestamp_mode: "words" للكلمات الفردية, "segments" للجمل الكاملة
+            vad_filter: تصفية الصوت الفارغ (True/False)
         """
+        print(f"🎙️ Transcribing: {video_path} (Language: {language if language else 'Auto'}, Mode: {timestamp_mode}, VAD: {vad_filter})")
         # تسجيل الـ transcript في ملف logs
         log_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs", "transcript.log")
             beam_size=1,
             word_timestamps=word_timestamps,
             language=actual_stt_lang,
+            vad_filter=vad_filter,  # استخدام المعامل الممرر
+            vad_parameters=dict(min_silence_duration_ms=500) if vad_filter else None
         )
         detected_lang = info.language

core/styles.py CHANGED Viewed

@@ -1,6 +1,12 @@
 from abc import ABC, abstractmethod
 import os
 import cv2
 import moviepy.editor as mpe
 from .config import Config
 from .logger import Logger
@@ -8,34 +14,114 @@ from .subtitle_manager import SubtitleManager
 logger = Logger.get_logger(__name__)
 class SmartFaceCropper:
     def __init__(self, output_size=(1080, 1920)):
         self.output_size = output_size
-        self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
-        self.last_coords = None
-        self.smoothed_x = None
-        self.smoothing = 0.2
-        self.frame_count = 0
     def get_crop_coordinates(self, frame):
-        h, w = frame.shape[:2]
         target_w = int(h * self.output_size[0] / self.output_size[1])
-        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-        small_gray = cv2.resize(gray, (0, 0), fx=0.5, fy=0.5)
-        faces = self.face_cascade.detectMultiScale(small_gray, 1.1, 8, minSize=(50, 50))
         if len(faces) > 0:
-            faces = sorted(faces, key=lambda f: f[2]*f[3], reverse=True)
-            fx, fy, fw, fh = [v * 2 for v in faces[0]]
             current_center_x = fx + fw // 2
-            self.last_coords = (fx, fy, fw, fh)
         else:
             current_center_x = w // 2 if self.smoothed_x is None else self.smoothed_x
         if self.smoothed_x is None:
             self.smoothed_x = current_center_x
         else:
-            self.smoothed_x = self.smoothed_x * (1 - self.smoothing) + current_center_x * self.smoothing
         left = int(self.smoothed_x - target_w // 2)
         left = max(0, min(left, w - target_w))
@@ -43,24 +129,27 @@ class SmartFaceCropper:
     def apply_to_clip(self, clip):
         frame_skip = 5
         def filter_frame(get_frame, t):
             frame = get_frame(t)
             self.frame_count += 1
             if self.frame_count % frame_skip == 0 or self.last_coords is None:
-                left, top, right, bottom = self.get_crop_coordinates(frame)
             else:
-                h, w = frame.shape[:2]
                 target_w = int(h * self.output_size[0] / self.output_size[1])
-                left = int(self.smoothed_x - target_w // 2) if self.smoothed_x else w // 2 - target_w // 2
-                left = max(0, min(left, w - target_w))
-                right = left + target_w
-            cropped = frame[:, left:right]
-            return cv2.resize(cropped, self.output_size)
         return clip.fl(filter_frame)
 class BaseStyle(ABC):
     def __init__(self, output_size=Config.DEFAULT_SIZE):
         self.output_size = output_size
@@ -68,162 +157,259 @@ class BaseStyle(ABC):
     @abstractmethod
     def apply(self, clip, **kwargs):
         pass
-    # --------------------------------------------------------------------------
-    # Refactored method to combine Style + Captions in ONE CompositeVideoClip
-    # --------------------------------------------------------------------------
-    def apply_with_captions(self, clip, transcript_data=None, language=None, caption_mode="sentence", **kwargs):
-        """
-        Applies style AND adds captions in a single composition step.
-        This prevents double rendering (CompositeVideoClip inside CompositeVideoClip).
-        """
-        # 1. Get the base styled clip (which might be a CompositeVideoClip itself)
         styled_clip = self.apply(clip, **kwargs)
-        # 2. If no captions needed, just return the styled clip
         if not transcript_data:
             return styled_clip
-        # 3. Generate caption CLIPS (ImageClips) only, do not composite yet
         caption_clips = self._create_caption_clips(transcript_data, language, caption_mode)
         if not caption_clips:
             return styled_clip
-        # 4. Optimize Composition:
-        # If styled_clip is already a CompositeVideoClip, we can flatten the list
-        # instead of nesting composites.
         if isinstance(styled_clip, mpe.CompositeVideoClip):
-            # IMPORTANT: We must copy the list to avoid modifying the original list in place if it's reused
-            final_layers = list(styled_clip.clips) + caption_clips
-            return mpe.CompositeVideoClip(final_layers, size=self.output_size)
-        else:
-            # If styled_clip is just a simple VideoFileClip or similar, wrap it
-            return mpe.CompositeVideoClip([styled_clip] + caption_clips, size=self.output_size)
-    # --------------------------------------------------------------------------
     def add_captions(self, clip, transcript_data, language=None, caption_mode="sentence"):
-        # This method is now DEPRECATED in favor of passing transcript_data to apply()
-        # but kept for backward compatibility if needed.
         if not transcript_data:
             return clip
         return SubtitleManager.create_captions(
-            clip,
-            transcript_data,
-            size=self.output_size,
-            language=language,
-            caption_mode=caption_mode
         )
     def _create_caption_clips(self, transcript_data, language=None, caption_mode="sentence"):
-        """Helper to create just the caption clips list, not a full CompositeVideoClip"""
         return SubtitleManager.create_caption_clips(
-            transcript_data,
-            size=self.output_size,
-            language=language,
-            caption_mode=caption_mode
         )
 class CinematicStyle(BaseStyle):
     def apply(self, clip, background_path=None, **kwargs):
         if background_path and os.path.exists(background_path):
             ext = os.path.splitext(background_path)[1].lower()
-            video_extensions = ['.mp4', '.avi', '.mov', '.mkv', '.webm']
-            if ext in video_extensions:
-                bg = mpe.VideoFileClip(background_path).without_audio().resize(height=self.output_size[1])
-                if bg.duration < clip.duration:
-                    bg = bg.loop(duration=clip.duration)
-                else:
-                    bg = bg.subclip(0, clip.duration)
             else:
-                bg = mpe.ImageClip(background_path).set_duration(clip.duration).resize(height=self.output_size[1])
             if bg.w > self.output_size[0]:
-                bg = bg.crop(x_center=bg.w/2, width=self.output_size[0])
             else:
                 bg = bg.resize(width=self.output_size[0])
         else:
             bg = mpe.ColorClip(size=self.output_size, color=(0, 0, 0)).set_duration(clip.duration)
-        main_video = clip.resize(width=self.output_size[0]).set_position("center")
-        if main_video.h > self.output_size[1]:
-            main_video = clip.resize(height=self.output_size[1]).set_position("center")
-        return mpe.CompositeVideoClip([bg, main_video], size=self.output_size)
 class CinematicBlurStyle(BaseStyle):
     def apply(self, clip, **kwargs):
         bg = clip.resize(height=self.output_size[1])
         if bg.w < self.output_size[0]:
             bg = clip.resize(width=self.output_size[0])
         def make_blur(get_frame, t):
-            frame = get_frame(t)
-            small = cv2.resize(frame, (16, 16))
-            blurred = cv2.resize(small, (self.output_size[0], self.output_size[1]), interpolation=cv2.INTER_LINEAR)
-            blurred = cv2.GaussianBlur(blurred, (21, 21), 0)
-            return blurred
         bg_blurred = bg.fl(make_blur).set_opacity(0.6)
-        main_video = clip.resize(width=self.output_size[0]).set_position("center")
-        if main_video.h > self.output_size[1]:
-            main_video = clip.resize(height=self.output_size[1]).set_position("center")
-        return mpe.CompositeVideoClip([bg_blurred, main_video], size=self.output_size)
 class SplitVerticalStyle(BaseStyle):
     def apply(self, clip, playground_path=None, **kwargs):
-        h_half = self.output_size[1] // 2
-        top = clip.resize(height=h_half).set_position(('center', 'top'))
-        bottom = None
         if playground_path and os.path.exists(playground_path):
-            bottom = mpe.VideoFileClip(playground_path).without_audio().resize(height=h_half).set_position(('center', 'bottom'))
-            if bottom.duration < clip.duration:
-                bottom = bottom.loop(duration=clip.duration)
-            else:
-                bottom = bottom.subclip(0, clip.duration)
         else:
-            bottom = clip.resize(height=h_half).set_position(('center', 'bottom')).set_opacity(0.5)
-        return mpe.CompositeVideoClip([top, bottom], size=self.output_size)
 class SplitHorizontalStyle(BaseStyle):
     def apply(self, clip, playground_path=None, **kwargs):
-        w_half = self.output_size[0] // 2
-        left = clip.resize(width=w_half).set_position(('left', 'center'))
-        right = None
         if playground_path and os.path.exists(playground_path):
-            right = mpe.VideoFileClip(playground_path).without_audio().resize(width=w_half).set_position(('right', 'center'))
-            if right.duration < clip.duration:
-                right = right.loop(duration=clip.duration)
-            else:
-                right = right.subclip(0, clip.duration)
         else:
-            right = clip.resize(width=w_half).set_position(('right', 'center')).set_opacity(0.5)
-        return mpe.CompositeVideoClip([left, right], size=self.output_size)
 class VerticalFullStyle(BaseStyle):
     def apply(self, clip, **kwargs):
         cropper = SmartFaceCropper(output_size=self.output_size)
         return cropper.apply_to_clip(clip)
 class StyleFactory:
     _styles = {
-        "cinematic": CinematicStyle,
-        "cinematic_blur": CinematicBlurStyle,
-        "split_vertical": SplitVerticalStyle,
         "split_horizontal": SplitHorizontalStyle,
-        "vertical_full": VerticalFullStyle
     }
     @staticmethod
     def get_style(style_name) -> BaseStyle:
         style_class = StyleFactory._styles.get(style_name, CinematicBlurStyle)
-        return style_class()

+"""
+Video Styles — YouTube Shorts Production Engine
+SplitVertical & SplitHorizontal rebuilt with seamless gradient blending.
+All class/method names kept identical for drop-in integration.
+"""
 from abc import ABC, abstractmethod
 import os
 import cv2
+import numpy as np
 import moviepy.editor as mpe
 from .config import Config
 from .logger import Logger
 logger = Logger.get_logger(__name__)
+# ─────────────────────────────────────────────────────────────────────────────
+# Gradient Mask Helpers
+# ─────────────────────────────────────────────────────────────────────────────
+def _linear_gradient(length: int, fade_from_zero: bool) -> np.ndarray:
+    """
+    Returns a 1-D float32 array [0..1] of given length.
+    fade_from_zero=True  → 0 → 1  (clip fades IN at this edge)
+    fade_from_zero=False → 1 → 0  (clip fades OUT at this edge)
+    """
+    arr = np.linspace(0.0, 1.0, length, dtype=np.float32)
+    return arr if fade_from_zero else arr[::-1]
+def _make_vertical_mask(clip_w: int, clip_h: int,
+                        blend_top: int = 0, blend_bottom: int = 0) -> np.ndarray:
+    """
+    Float32 mask (clip_h × clip_w) in [0,1].
+    blend_top    → pixels from top that fade in  (0→1)
+    blend_bottom → pixels from bottom that fade out (1→0)
+    """
+    mask = np.ones((clip_h, clip_w), dtype=np.float32)
+    if blend_top > 0:
+        grad = _linear_gradient(blend_top, fade_from_zero=True)
+        mask[:blend_top, :] = grad[:, np.newaxis]
+    if blend_bottom > 0:
+        grad = _linear_gradient(blend_bottom, fade_from_zero=False)
+        mask[clip_h - blend_bottom:, :] = grad[:, np.newaxis]
+    return mask
+def _make_horizontal_mask(clip_w: int, clip_h: int,
+                          blend_left: int = 0, blend_right: int = 0) -> np.ndarray:
+    """
+    Float32 mask (clip_h × clip_w) in [0,1].
+    blend_left  → pixels from left  that fade in  (0→1)
+    blend_right → pixels from right that fade out (1→0)
+    """
+    mask = np.ones((clip_h, clip_w), dtype=np.float32)
+    if blend_left > 0:
+        grad = _linear_gradient(blend_left, fade_from_zero=True)
+        mask[:, :blend_left] = grad[np.newaxis, :]
+    if blend_right > 0:
+        grad = _linear_gradient(blend_right, fade_from_zero=False)
+        mask[:, clip_w - blend_right:] = grad[np.newaxis, :]
+    return mask
+def _apply_mask(clip: mpe.VideoClip, mask_array: np.ndarray) -> mpe.VideoClip:
+    """Attach a static float32 numpy mask to a video clip."""
+    mask_clip = mpe.ImageClip(mask_array, ismask=True, duration=clip.duration)
+    return clip.set_mask(mask_clip)
+def _fit_to_width(clip: mpe.VideoClip, target_w: int) -> mpe.VideoClip:
+    """Resize clip so width == target_w, keeping aspect ratio."""
+    return clip.resize(width=target_w)
+def _fit_to_height(clip: mpe.VideoClip, target_h: int) -> mpe.VideoClip:
+    """Resize clip so height == target_h, keeping aspect ratio."""
+    return clip.resize(height=target_h)
+def _loop_or_cut(clip: mpe.VideoClip, duration: float) -> mpe.VideoClip:
+    if clip.duration < duration:
+        return clip.loop(duration=duration)
+    return clip.subclip(0, duration)
+# ─────────────────────────────────────────────────────────────────────────────
+# Smart Face Cropper
+# ─────────────────────────────────────────────────────────────────────────────
 class SmartFaceCropper:
     def __init__(self, output_size=(1080, 1920)):
         self.output_size = output_size
+        self.face_cascade = cv2.CascadeClassifier(
+            cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
+        )
+        self.last_coords  = None
+        self.smoothed_x   = None
+        self.smoothing    = 0.2
+        self.frame_count  = 0
     def get_crop_coordinates(self, frame):
+        h, w    = frame.shape[:2]
         target_w = int(h * self.output_size[0] / self.output_size[1])
+        gray     = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        small    = cv2.resize(gray, (0, 0), fx=0.5, fy=0.5)
+        faces    = self.face_cascade.detectMultiScale(small, 1.1, 8, minSize=(50, 50))
         if len(faces) > 0:
+            faces = sorted(faces, key=lambda f: f[2] * f[3], reverse=True)
+            fx, fy, fw, fh  = [v * 2 for v in faces[0]]
             current_center_x = fx + fw // 2
+            self.last_coords  = (fx, fy, fw, fh)
         else:
             current_center_x = w // 2 if self.smoothed_x is None else self.smoothed_x
         if self.smoothed_x is None:
             self.smoothed_x = current_center_x
         else:
+            self.smoothed_x = (
+                self.smoothed_x * (1 - self.smoothing)
+                + current_center_x * self.smoothing
+            )
         left = int(self.smoothed_x - target_w // 2)
         left = max(0, min(left, w - target_w))
     def apply_to_clip(self, clip):
         frame_skip = 5
         def filter_frame(get_frame, t):
             frame = get_frame(t)
             self.frame_count += 1
             if self.frame_count % frame_skip == 0 or self.last_coords is None:
+                left, _, right, _ = self.get_crop_coordinates(frame)
             else:
+                h, w     = frame.shape[:2]
                 target_w = int(h * self.output_size[0] / self.output_size[1])
+                left     = int(self.smoothed_x - target_w // 2) if self.smoothed_x else w // 2 - target_w // 2
+                left     = max(0, min(left, w - target_w))
+                right    = left + target_w
+            return cv2.resize(frame[:, left:right], self.output_size)
         return clip.fl(filter_frame)
+# ─────────────────────────────────────────────────────────────────────────────
+# Base Style
+# ─────────────────────────────────────────────────────────────────────────────
 class BaseStyle(ABC):
     def __init__(self, output_size=Config.DEFAULT_SIZE):
         self.output_size = output_size
     @abstractmethod
     def apply(self, clip, **kwargs):
         pass
+    def apply_with_captions(self, clip, transcript_data=None, language=None,
+                            caption_mode="sentence", **kwargs):
         styled_clip = self.apply(clip, **kwargs)
         if not transcript_data:
             return styled_clip
         caption_clips = self._create_caption_clips(transcript_data, language, caption_mode)
         if not caption_clips:
             return styled_clip
         if isinstance(styled_clip, mpe.CompositeVideoClip):
+            return mpe.CompositeVideoClip(
+                list(styled_clip.clips) + caption_clips, size=self.output_size
+            )
+        return mpe.CompositeVideoClip([styled_clip] + caption_clips, size=self.output_size)
     def add_captions(self, clip, transcript_data, language=None, caption_mode="sentence"):
+        """Kept for backward compatibility."""
         if not transcript_data:
             return clip
         return SubtitleManager.create_captions(
+            clip, transcript_data, size=self.output_size,
+            language=language, caption_mode=caption_mode,
         )
     def _create_caption_clips(self, transcript_data, language=None, caption_mode="sentence"):
         return SubtitleManager.create_caption_clips(
+            transcript_data, size=self.output_size,
+            language=language, caption_mode=caption_mode,
         )
+# ─────────────────────────────────────────────────────────────────────────────
+# Cinematic Style
+# ─────────────────────────────────────────────────────────────────────────────
 class CinematicStyle(BaseStyle):
     def apply(self, clip, background_path=None, **kwargs):
         if background_path and os.path.exists(background_path):
             ext = os.path.splitext(background_path)[1].lower()
+            video_ext = {".mp4", ".avi", ".mov", ".mkv", ".webm"}
+            if ext in video_ext:
+                bg = _loop_or_cut(
+                    mpe.VideoFileClip(background_path).without_audio()
+                    .resize(height=self.output_size[1]),
+                    clip.duration,
+                )
             else:
+                bg = (
+                    mpe.ImageClip(background_path)
+                    .set_duration(clip.duration)
+                    .resize(height=self.output_size[1])
+                )
             if bg.w > self.output_size[0]:
+                bg = bg.crop(x_center=bg.w / 2, width=self.output_size[0])
             else:
                 bg = bg.resize(width=self.output_size[0])
         else:
             bg = mpe.ColorClip(size=self.output_size, color=(0, 0, 0)).set_duration(clip.duration)
+        main = clip.resize(width=self.output_size[0]).set_position("center")
+        if main.h > self.output_size[1]:
+            main = clip.resize(height=self.output_size[1]).set_position("center")
+        return mpe.CompositeVideoClip([bg, main], size=self.output_size)
+# ─────────────────────────────────────────────────────────────────────────────
+# Cinematic Blur Style
+# ─────────────────────────────────────────────────────────────────────────────
 class CinematicBlurStyle(BaseStyle):
     def apply(self, clip, **kwargs):
         bg = clip.resize(height=self.output_size[1])
         if bg.w < self.output_size[0]:
             bg = clip.resize(width=self.output_size[0])
         def make_blur(get_frame, t):
+            frame   = get_frame(t)
+            small   = cv2.resize(frame, (16, 16))
+            blurred = cv2.resize(
+                small, (self.output_size[0], self.output_size[1]),
+                interpolation=cv2.INTER_LINEAR,
+            )
+            return cv2.GaussianBlur(blurred, (21, 21), 0)
         bg_blurred = bg.fl(make_blur).set_opacity(0.6)
+        main = clip.resize(width=self.output_size[0]).set_position("center")
+        if main.h > self.output_size[1]:
+            main = clip.resize(height=self.output_size[1]).set_position("center")
+        return mpe.CompositeVideoClip([bg_blurred, main], size=self.output_size)
+# ─────────────────────────────────────────────────────────────────────────────
+# Split Vertical  (top / bottom, seamless gradient blend)
+# ─────────────────────────────────────────────────────────────────────────────
 class SplitVerticalStyle(BaseStyle):
+    """
+    Splits the Shorts canvas (1080 × 1920) into top and bottom segments.
+    Layout
+    ──────
+    • Top segment  : 58 % of canvas height  → ~1114 px
+    • Bottom segment: fills the rest        → ~926 px
+    • Blend zone   : 120 px overlap where the two clips cross-fade via
+                     gradient masks — no hard dividing line visible.
+    The gradient is very subtle (linear alpha), so it doesn't destroy
+    content near the seam, it just dissolves one clip into the other.
+    """
+    SPLIT_RATIO  : float = 0.58   # top segment fraction of total height
+    BLEND_PX     : int   = 120    # overlap / blend zone height in pixels
     def apply(self, clip, playground_path=None, **kwargs):
+        W, H       = self.output_size          # 1080 × 1920
+        blend      = self.BLEND_PX
+        h_top_seg  = int(H * self.SPLIT_RATIO)            # ~1114
+        h_bot_seg  = H - h_top_seg + blend                # ~926 (includes overlap)
+        # ── Prepare main clip for top segment ───────────────────────────────
+        top_clip = _fit_to_width(clip, W)
+        # Crop to the top portion we need (+ blend zone so gradient has room)
+        top_h = min(top_clip.h, h_top_seg + blend // 2)
+        top_clip = top_clip.crop(x1=0, y1=0, x2=W, y2=top_h).resize((W, h_top_seg))
+        # Gradient: fade out the bottom `blend` rows → seamless merge
+        top_mask = _make_vertical_mask(W, h_top_seg, blend_bottom=blend)
+        top_clip = _apply_mask(top_clip, top_mask).set_position((0, 0))
+        # ── Prepare playground / fallback clip for bottom segment ────────────
         if playground_path and os.path.exists(playground_path):
+            bot_src = _loop_or_cut(
+                mpe.VideoFileClip(playground_path).without_audio(), clip.duration
+            )
         else:
+            # Fallback: mirror/tint of the same source
+            bot_src = clip.set_opacity(0.85)
+        bot_clip = _fit_to_width(bot_src, W)
+        # We want the middle/lower portion of the source for the bottom panel
+        if bot_clip.h > h_bot_seg:
+            y_start = max(0, bot_clip.h - h_bot_seg)
+            bot_clip = bot_clip.crop(x1=0, y1=y_start,
+                                     x2=W, y2=bot_clip.h)
+        bot_clip = bot_clip.resize((W, h_bot_seg))
+        # Gradient: fade in the top `blend` rows → seamless merge
+        bot_mask = _make_vertical_mask(W, h_bot_seg, blend_top=blend)
+        bot_y    = h_top_seg - blend                      # overlaps by `blend` px
+        bot_clip = _apply_mask(bot_clip, bot_mask).set_position((0, bot_y))
+        return mpe.CompositeVideoClip([bot_clip, top_clip], size=self.output_size)
+# ─────────────────────────────────────────────────────────────────────────────
+# Split Horizontal  (left / right, seamless gradient blend)
+# ─────────────────────────────────────────────────────────────────────────────
 class SplitHorizontalStyle(BaseStyle):
+    """
+    Splits the Shorts canvas (1080 × 1920) into left and right panels.
+    Layout
+    ──────
+    • Each panel fills the full 1920 px height.
+    • Left  panel: 52 % of canvas width → ~562 px
+    • Right panel: fills the rest       → ~518 px
+    • Blend zone : 80 px overlap with cross-fade gradient masks.
+    Both panels are individually cropped to portrait aspect ratio
+    (each showing a 540-wide slice of a 1080-wide source),
+    then blended at the seam — no visible dividing line.
+    """
+    SPLIT_RATIO : float = 0.52   # left panel fraction of total width
+    BLEND_PX    : int   = 80     # horizontal overlap / blend zone
     def apply(self, clip, playground_path=None, **kwargs):
+        W, H      = self.output_size          # 1080 × 1920
+        blend     = self.BLEND_PX
+        w_left_seg = int(W * self.SPLIT_RATIO)             # ~562
+        w_right_seg = W - w_left_seg + blend               # ~598 (includes overlap)
+        # ── Left panel from main clip ────────────────────────────────────────
+        left_src  = _fit_to_height(clip, H)
+        lw        = left_src.w
+        # Crop the left portion (slightly more than half for a natural look)
+        crop_w_l  = min(lw, w_left_seg + blend)
+        left_clip = left_src.crop(x1=max(0, lw // 2 - crop_w_l),
+                                  y1=0, x2=lw // 2, y2=H)
+        left_clip = left_clip.resize((w_left_seg, H))
+        # Gradient: fade out rightmost `blend` columns
+        left_mask = _make_horizontal_mask(w_left_seg, H, blend_right=blend)
+        left_clip = _apply_mask(left_clip, left_mask).set_position((0, 0))
+        # ── Right panel from playground or fallback ───────────────────────────
         if playground_path and os.path.exists(playground_path):
+            right_src = _loop_or_cut(
+                mpe.VideoFileClip(playground_path).without_audio(), clip.duration
+            )
         else:
+            right_src = clip.set_opacity(0.85)
+        right_full = _fit_to_height(right_src, H)
+        rw         = right_full.w
+        # Crop the right portion of the source
+        crop_w_r   = min(rw, w_right_seg + blend)
+        right_clip = right_full.crop(x1=rw // 2, y1=0,
+                                     x2=rw // 2 + crop_w_r, y2=H)
+        right_clip = right_clip.resize((w_right_seg, H))
+        # Gradient: fade in leftmost `blend` columns
+        right_mask = _make_horizontal_mask(w_right_seg, H, blend_left=blend)
+        right_x    = w_left_seg - blend                    # overlaps by `blend` px
+        right_clip = _apply_mask(right_clip, right_mask).set_position((right_x, 0))
+        return mpe.CompositeVideoClip([right_clip, left_clip], size=self.output_size)
+# ─────────────────────────────────────────────────────────────────────────────
+# Vertical Full Style
+# ─────────────────────────────────────────────────────────────────────────────
 class VerticalFullStyle(BaseStyle):
     def apply(self, clip, **kwargs):
         cropper = SmartFaceCropper(output_size=self.output_size)
         return cropper.apply_to_clip(clip)
+# ─────────────────────────────────────────────────────────────────────────────
+# Style Factory  (unchanged API)
+# ─────────────────────────────────────────────────────────────────────────────
 class StyleFactory:
     _styles = {
+        "cinematic":        CinematicStyle,
+        "cinematic_blur":   CinematicBlurStyle,
+        "split_vertical":   SplitVerticalStyle,
         "split_horizontal": SplitHorizontalStyle,
+        "vertical_full":    VerticalFullStyle,
     }
     @staticmethod
     def get_style(style_name) -> BaseStyle:
         style_class = StyleFactory._styles.get(style_name, CinematicBlurStyle)
+        return style_class()

core/subtitle_manager.py CHANGED Viewed

@@ -1,3 +1,8 @@
 import os
 import numpy as np
 import urllib.request
@@ -10,307 +15,554 @@ from .logger import Logger
 logger = Logger.get_logger(__name__)
 class SubtitleManager:
     @staticmethod
     def ensure_font(language=None, style_font=None, text_content=None):
-        """Ensures a valid font exists dynamically based on language or content."""
-        # 1. Determine Font Name
         font_name = Config.LANGUAGE_FONT_MAP.get("default", "Roboto-Bold.ttf")
-        # Priority 1: Explicit Style Font (if language supports it or it's Latin)
-        # However, if text is Arabic/CJK, style font (usually Latin) might break it.
-        # So we should check language compatibility first.
         detected_lang = language
         if not detected_lang and text_content:
-            # Simple script detection
             if any("\u0600" <= c <= "\u06FF" for c in text_content):
                 detected_lang = "ar"
             elif any("\u4E00" <= c <= "\u9FFF" for c in text_content):
                 detected_lang = "zh"
-            elif any("\u3040" <= c <= "\u309F" for c in text_content) or any("\u30A0" <= c <= "\u30FF" for c in text_content):
                 detected_lang = "ja"
             elif any("\u0900" <= c <= "\u097F" for c in text_content):
                 detected_lang = "hi"
             elif any("\u0400" <= c <= "\u04FF" for c in text_content):
                 detected_lang = "ru"
-        # Priority 2: Language-specific font from Config Map
         if detected_lang in Config.LANGUAGE_FONT_MAP:
-             font_name = Config.LANGUAGE_FONT_MAP[detected_lang]
-        elif style_font and not detected_lang:
-             # Only use style font if no specific non-Latin language detected
-             font_name = style_font
-        # Fallback: if detected language is known but not in map (shouldn't happen with default keys)
         if detected_lang and detected_lang not in Config.LANGUAGE_FONT_MAP:
-             logger.warning(f"⚠️ Language {detected_lang} not in font map, using default.")
         font_path = os.path.join(Config.BASE_DIR, font_name)
         if not os.path.exists(font_path):
-            logger.info(f"📥 Downloading font: {font_name}...")
-            # We might need to add more fonts to Config.FONTS or download dynamically
             url = Config.FONTS.get(font_name)
             if url:
                 try:
-                    # Use Config's CSS downloader for Google Fonts
                     if "fonts.googleapis.com/css" in url:
-                        success = Config.download_font_from_css(url, font_path)
-                        if not success:
-                            raise Exception("CSS font download failed")
                     else:
-                        # Fallback for direct links
                         urllib.request.urlretrieve(url, font_path)
-                    logger.info(f"✅ Font downloaded: {font_name}")
-                except Exception as e:
-                    logger.error(f"❌ Failed to download font: {e}")
                     return "Arial"
             else:
-                 logger.warning(f"⚠️ No URL found for font: {font_name}")
-                 # Fallback for now if not in config
-                 if font_name == "Montserrat-Bold.ttf": # TikTok popular
-                     # Add logic to download or use system font
-                     pass
         return font_path
     @staticmethod
-    def create_pil_text_clip(text, fontsize, color, font_path, stroke_color='black', stroke_width=2, bg_color=None, padding=10):
-        """Creates a text clip using PIL."""
         try:
             try:
                 font = ImageFont.truetype(font_path, fontsize)
-            except:
-                logger.warning(f"⚠️ Failed to load font {font_path}, using default.")
                 font = ImageFont.load_default()
-            dummy_img = Image.new('RGBA', (1, 1))
-            draw = ImageDraw.Draw(dummy_img)
-            bbox = draw.textbbox((0, 0), text, font=font)
-            text_width = bbox[2] - bbox[0]
-            text_height = bbox[3] - bbox[1]
             margin = int(stroke_width * 2) + padding
-            img_width = text_width + margin * 2
-            img_height = text_height + margin * 2
-            img = Image.new('RGBA', (int(img_width), int(img_height)), (0, 0, 0, 0))
             draw = ImageDraw.Draw(img)
-            # Draw Background if requested
             if bg_color:
-                 draw.rounded_rectangle(
-                     [(0, 0), (img_width, img_height)],
-                     radius=15,
-                     fill=bg_color
-                 )
-            x = (img_width - text_width) / 2 - bbox[0]
-            y = (img_height - text_height) / 2 - bbox[1]
-            draw.text(
-                (x, y),
-                text,
-                font=font,
-                fill=color,
-                stroke_width=stroke_width,
-                stroke_fill=stroke_color
-            )
             return mpe.ImageClip(np.array(img))
-        except Exception as e:
-            logger.error(f"⚠️ PIL Text Error: {e}")
             return None
     @staticmethod
     def get_style_config(style_name):
-        """Returns configuration for different caption styles."""
-        styles = {
-            "classic": {
-                "fontsize": 75,
-                "color": "white",
-                "stroke_color": "black",
-                "stroke_width": 2,
-                "font": None, # Default based on language
-                "bg_color": None,
-                "position": ("center", 1350)
-            },
-            "tiktok_bold": {
-                "fontsize": 85,
-                "color": "white",
-                "stroke_color": "black",
-                "stroke_width": 4,
-                "font": "Montserrat-Bold.ttf", # Popular on TikTok
-                "bg_color": None, # Shadow usually used instead of BG
-                "position": ("center", 1400)
-            },
-            "tiktok_neon": {
-                "fontsize": 80,
-                "color": "#00f2ea", # TikTok Cyan
-                "stroke_color": "#ff0050", # TikTok Red
-                "stroke_width": 3,
-                "font": "Roboto-Bold.ttf",
-                "bg_color": None,
-                "position": ("center", 1400)
-            },
-            "youtube_clean": {
-                "fontsize": 70,
-                "color": "yellow",
-                "stroke_color": "black",
-                "stroke_width": 3,
-                "font": "Roboto-Bold.ttf",
-                "bg_color": None,
-                "position": ("center", 1300)
-            },
-            "youtube_box": {
-                "fontsize": 65,
-                "color": "white",
-                "stroke_color": None,
-                "stroke_width": 0,
-                "font": "Roboto-Bold.ttf",
-                "bg_color": "red", # YouTube Red Box
-                "position": ("center", 1300)
-            }
-        }
-        return styles.get(style_name, styles["classic"])
     @staticmethod
-    def create_caption_clips(transcript_data, size=(1080, 1920), language=None, caption_mode="sentence", caption_style="classic"):
-        """Generates a list of caption ImageClips for the video, without composing them."""
-        all_text_clips = []
-        style_config = SubtitleManager.get_style_config(caption_style)
-        # We need to peek at the first segment to determine language if not provided
-        # Or better, check each chunk dynamically?
-        # For simplicity and consistency, let's check the first non-empty text.
         sample_text = ""
-        segments = []
         if isinstance(transcript_data, list):
-             if len(transcript_data) > 0 and 'segments' in transcript_data[0]:
-                segments = transcript_data[0]['segments']
-             else:
                 segments = transcript_data
-        elif isinstance(transcript_data, dict) and 'segments' in transcript_data:
-            segments = transcript_data['segments']
-        if segments:
-             for s in segments:
-                 if s.get('text'):
-                     sample_text = s['text']
-                     break
-        font_path = SubtitleManager.ensure_font(language, style_config.get("font"), text_content=sample_text)
-        for segment in segments:
-            full_text = segment.get('text', '').strip()
-            if not full_text:
-                words = segment.get('words', [])
-                full_text = " ".join([w['text'] for w in words])
             if not full_text:
                 continue
-            start_t = segment.get('start', 0)
-            end_t = segment.get('end', 0)
             if end_t <= start_t:
-                if segment.get('words'):
-                    start_t = segment['words'][0]['start']
-                    end_t = segment['words'][-1]['end']
                 else:
                     continue
-            words_list = full_text.split()
-            if not words_list:
-                continue
             chunk_size = 1 if caption_mode == "word" else 4
-            chunks = []
-            # Use Word Timestamps if available (More Accurate)
-            stt_words = segment.get('words')
-            if stt_words and len(stt_words) > 0:
-                valid_words = [w for w in stt_words if w.get('text', '').strip()]
-                if valid_words:
-                    for i in range(0, len(valid_words), chunk_size):
-                        chunk_group = valid_words[i:i + chunk_size]
-                        chunk_text = " ".join([w['text'] for w in chunk_group])
-                        chunk_start = chunk_group[0]['start']
-                        chunk_end = chunk_group[-1]['end']
-                        chunks.append({
-                            "text": chunk_text,
-                            "start": chunk_start,
-                            "end": chunk_end
-                        })
             else:
-                # Fallback to linear interpolation (Less Accurate)
-                words_list = full_text.split()
-                if not words_list:
-                    continue
-                for i in range(0, len(words_list), chunk_size):
-                    chunk_words = words_list[i:i + chunk_size]
-                    chunk_text = " ".join(chunk_words)
-                    chunk_duration = (end_t - start_t) * (len(chunk_words) / len(words_list))
-                    chunk_start = start_t + (end_t - start_t) * (i / len(words_list))
-                    chunk_end = chunk_start + chunk_duration
-                    if chunk_end <= chunk_start:
-                        chunk_end = chunk_start + 0.5
-                    chunks.append({
-                        "text": chunk_text,
-                        "start": chunk_start,
-                        "end": chunk_end
-                    })
             for chunk in chunks:
-                display_text = chunk["text"]
-                is_arabic = language == "ar" or any("\u0600" <= c <= "\u06FF" for c in display_text)
-                if is_arabic:
                     try:
-                        display_text = get_display(reshape(display_text))
-                    except:
                         pass
                 else:
-                    display_text = display_text.upper()
-                # Override size if provided in style, else use dynamic size based on mode
-                f_size = style_config.get("fontsize", 75)
-                if caption_mode == "word":
-                    f_size = int(f_size * 1.4) # Make word mode larger
-                img_clip = SubtitleManager.create_pil_text_clip(
-                    display_text,
-                    fontsize=f_size,
-                    color=style_config.get("color", "white"),
-                    font_path=font_path,
-                    stroke_color=style_config.get("stroke_color", "black"),
-                    stroke_width=style_config.get("stroke_width", 2),
-                    bg_color=style_config.get("bg_color")
                 )
-                if img_clip:
-                    # Center horizontally, and place near bottom
-                    pos = style_config.get("position", ('center', 1350))
-                    txt_clip = img_clip.set_start(chunk["start"]).set_end(chunk["end"]).set_position(pos)
-                    all_text_clips.append(txt_clip)
-        return all_text_clips
     @staticmethod
-    def create_captions(video_clip, transcript_data, size=(1080, 1920), language=None, caption_mode="sentence"):
-        """Generates caption clips and composites them onto the video."""
-        text_clips = SubtitleManager.create_caption_clips(transcript_data, size, language, caption_mode)
-        return mpe.CompositeVideoClip([video_clip] + text_clips, size=size)

+"""
+SubtitleManager — Viral YouTube Shorts Caption Engine
+Styles tuned for 2024-2025 Shorts/Reels/TikTok viral aesthetics.
+All style names kept identical to the original for drop-in integration.
+"""
 import os
 import numpy as np
 import urllib.request
 logger = Logger.get_logger(__name__)
+# ─────────────────────────────────────────────────────────────────────────────
+# Style Registry  (same 6 keys as original — drop-in compatible)
+# ─────────────────────────────────────────────────────────────────────────────
+#
+# Extra keys consumed only by highlight_word mode:
+#   highlight_color      → text color for the active word
+#   highlight_bg         → RGBA fill of the box behind active word
+#   highlight_bg_radius  → corner radius of that box
+#   shadow_layers        → list of (off_x, off_y, blur_steps, RGBA)
+#                          drawn UNDER the highlight box for depth/glow
+#
+STYLES = {
+    # ── 1. CLASSIC ────────────────────────────────────────────────────────────
+    # Clean, professional — news / podcast feel.
+    # Active word: crisp white on a near-black pill with a soft drop shadow.
+    "classic": {
+        "fontsize":            72,
+        "color":               (255, 255, 255, 255),
+        "stroke_color":        (0, 0, 0, 200),
+        "stroke_width":        3,
+        "font":                None,
+        "bg_color":            None,
+        "position":            ("center", 0.80),
+        "highlight_color":     (255, 255, 255, 255),
+        "highlight_bg":        (18, 18, 18, 220),
+        "highlight_bg_radius": 20,
+        "shadow_layers": [
+            (0, 6, 8, (0, 0, 0, 160)),   # soft drop-shadow
+        ],
+    },
+    # ── 2. MODERN GLOW ────────────────────────────────────────────────────────
+    # Apple / high-end documentary aesthetic.
+    # Dark frosted sentence bar; electric-blue glowing pill on active word.
+    "modern_glow": {
+        "fontsize":            78,
+        "color":               (200, 225, 255, 200),
+        "stroke_color":        (0, 10, 40, 255),
+        "stroke_width":        2,
+        "font":                "Montserrat-Bold.ttf",
+        "bg_color":            (10, 10, 30, 160),        # dark frosted bar
+        "position":            ("center", 0.83),
+        "highlight_color":     (130, 230, 255, 255),     # electric cyan text
+        "highlight_bg":        (0, 130, 255, 210),       # vivid blue pill
+        "highlight_bg_radius": 22,
+        "shadow_layers": [
+            (0, 0, 16, (0, 160, 255, 110)),   # wide outer glow
+            (0, 3,  6, (0,  60, 160, 180)),   # tight drop-shadow
+        ],
+    },
+    # ── 3. TIKTOK BOLD ────────────────────────────────────────────────────────
+    # MrBeast / Sidemen. High-contrast yellow box, heavy stroke.
+    # Active word: black text on pure yellow — impossible to miss.
+    "tiktok_bold": {
+        "fontsize":            90,
+        "color":               (255, 255, 255, 255),
+        "stroke_color":        (0, 0, 0, 255),
+        "stroke_width":        5,
+        "font":                "Montserrat-Bold.ttf",
+        "bg_color":            None,
+        "position":            ("center", 0.84),
+        "highlight_color":     (10, 10, 10, 255),        # almost-black on yellow
+        "highlight_bg":        (255, 220, 0, 255),       # MrBeast yellow
+        "highlight_bg_radius": 12,
+        "shadow_layers": [
+            (4,  6, 0, (0, 0, 0, 230)),   # hard pixel-offset (punchy feel)
+            (7, 10, 0, (0, 0, 0,  90)),
+        ],
+    },
+    # ── 4. TIKTOK NEON ────────────────────────────────────────────────────────
+    # Y2K / EDM / night-out. Hot-pink pill, cyan text — maximum vibe.
+    "tiktok_neon": {
+        "fontsize":            80,
+        "color":               (255, 255, 255, 230),
+        "stroke_color":        (100, 0, 60, 255),
+        "stroke_width":        3,
+        "font":                "Roboto-Bold.ttf",
+        "bg_color":            None,
+        "position":            ("center", 0.85),
+        "highlight_color":     (0, 242, 234, 255),       # TikTok cyan
+        "highlight_bg":        (255, 0, 80, 235),        # TikTok pink-red
+        "highlight_bg_radius": 22,
+        "shadow_layers": [
+            (0, 0, 20, (255,   0,  80, 120)),   # pink outer glow
+            (0, 0,  8, (0,   242, 234,  80)),   # cyan inner glow
+            (3, 5,  0, (80,    0,  40, 210)),   # hard dark offset
+        ],
+    },
+    # ── 5. YOUTUBE CLEAN ──────────────────────────────────────────────────────
+    # Educator / talking-head minimal style.
+    # Frosted dark pill under sentence; warm amber box on active word.
+    "youtube_clean": {
+        "fontsize":            70,
+        "color":               (240, 240, 240, 220),
+        "stroke_color":        (0, 0, 0, 160),
+        "stroke_width":        2,
+        "font":                "Roboto-Bold.ttf",
+        "bg_color":            (0, 0, 0, 140),           # subtle sentence pill
+        "position":            ("center", 0.76),
+        "highlight_color":     (20, 20, 20, 255),        # dark text on amber
+        "highlight_bg":        (255, 200, 40, 248),      # warm amber
+        "highlight_bg_radius": 16,
+        "shadow_layers": [
+            (0, 4, 10, (180, 130, 0, 170)),   # amber drop-shadow
+        ],
+    },
+    # ── 6. YOUTUBE BOX ────────────────────────────────────────────────────────
+    # Karaoke / game-show energy.
+    # Solid dark sentence bar; bold YouTube-red box on active word.
+    "youtube_box": {
+        "fontsize":            68,
+        "color":               (255, 255, 255, 255),
+        "stroke_color":        (0, 0, 0, 255),
+        "stroke_width":        2,
+        "font":                "Roboto-Bold.ttf",
+        "bg_color":            (15, 15, 15, 210),        # dark sentence bar
+        "position":            ("center", 0.77),
+        "highlight_color":     (255, 255, 255, 255),
+        "highlight_bg":        (200, 0, 0, 255),         # YouTube red
+        "highlight_bg_radius": 8,
+        "shadow_layers": [
+            (0, 5, 0, (110, 0, 0, 230)),   # hard dark-red offset
+            (0, 9, 0, (  0, 0, 0, 130)),
+        ],
+    },
+}
+# ─────────────────────────────────────────────────────────────────────────────
+# Helpers
+# ─────────────────────────────────────────────────────────────────────────────
+def _rgba(c):
+    """Normalise any colour spec to an (R,G,B,A) tuple."""
+    if c is None:
+        return None
+    if isinstance(c, (tuple, list)):
+        return (*c[:3], c[3] if len(c) == 4 else 255)
+    tmp = Image.new("RGBA", (1, 1), c)
+    return tmp.getpixel((0, 0))
+def _draw_shadow_layers(draw, box, layers, base_radius):
+    """
+    Paint shadow / glow layers behind a rounded-rect.
+    layers: [(off_x, off_y, blur_steps, rgba)]
+      blur_steps == 0  → single hard-offset rectangle
+      blur_steps  > 0  → concentric rects with fading alpha (soft glow)
+    """
+    x1, y1, x2, y2 = box
+    for (ox, oy, blur, color) in layers:
+        rgba = _rgba(color)
+        if blur == 0:
+            draw.rounded_rectangle(
+                [(x1 + ox, y1 + oy), (x2 + ox, y2 + oy)],
+                radius=base_radius, fill=rgba,
+            )
+        else:
+            steps = max(blur // 2, 3)
+            base_a = rgba[3]
+            for s in range(steps, 0, -1):
+                expand     = s * (blur / steps)
+                step_alpha = int(base_a * (1 - s / (steps + 1)))
+                draw.rounded_rectangle(
+                    [
+                        (x1 + ox - expand, y1 + oy - expand),
+                        (x2 + ox + expand, y2 + oy + expand),
+                    ],
+                    radius=int(base_radius + expand),
+                    fill=(*rgba[:3], step_alpha),
+                )
+# ─────────────────────────────────────────────────────────────────────────────
 class SubtitleManager:
+    # ── Font management ───────────────────────────────────────────────────────
     @staticmethod
     def ensure_font(language=None, style_font=None, text_content=None):
+        """Returns an absolute path to a valid font file."""
         font_name = Config.LANGUAGE_FONT_MAP.get("default", "Roboto-Bold.ttf")
         detected_lang = language
         if not detected_lang and text_content:
             if any("\u0600" <= c <= "\u06FF" for c in text_content):
                 detected_lang = "ar"
             elif any("\u4E00" <= c <= "\u9FFF" for c in text_content):
                 detected_lang = "zh"
+            elif any("\u3040" <= c <= "\u30FF" for c in text_content):
                 detected_lang = "ja"
             elif any("\u0900" <= c <= "\u097F" for c in text_content):
                 detected_lang = "hi"
             elif any("\u0400" <= c <= "\u04FF" for c in text_content):
                 detected_lang = "ru"
         if detected_lang in Config.LANGUAGE_FONT_MAP:
+            font_name = Config.LANGUAGE_FONT_MAP[detected_lang]
+        elif style_font and not detected_lang:
+            font_name = style_font
         if detected_lang and detected_lang not in Config.LANGUAGE_FONT_MAP:
+            logger.warning(f"⚠️ Language '{detected_lang}' not in font map, using default.")
         font_path = os.path.join(Config.BASE_DIR, font_name)
         if not os.path.exists(font_path):
+            logger.info(f"📥 Downloading font: {font_name} …")
             url = Config.FONTS.get(font_name)
             if url:
                 try:
                     if "fonts.googleapis.com/css" in url:
+                        if not Config.download_font_from_css(url, font_path):
+                            raise RuntimeError("CSS font download failed")
                     else:
                         urllib.request.urlretrieve(url, font_path)
+                    logger.info(f"✅ Font ready: {font_name}")
+                except Exception as exc:
+                    logger.error(f"❌ Font download failed: {exc}")
                     return "Arial"
             else:
+                logger.warning(f"⚠️ No URL configured for font: {font_name}")
         return font_path
+    # ── Legacy single-text clip (sentence / word modes) ───────────────────────
     @staticmethod
+    def create_pil_text_clip(text, fontsize, color, font_path,
+                              stroke_color="black", stroke_width=2,
+                              bg_color=None, padding=12, bg_radius=18):
         try:
             try:
                 font = ImageFont.truetype(font_path, fontsize)
+            except Exception:
+                logger.warning(f"⚠️ Could not load {font_path}, using default.")
                 font = ImageFont.load_default()
+            dummy = Image.new("RGBA", (1, 1))
+            d     = ImageDraw.Draw(dummy)
+            bbox  = d.textbbox((0, 0), text, font=font)
+            tw    = bbox[2] - bbox[0]
+            th    = bbox[3] - bbox[1]
             margin = int(stroke_width * 2) + padding
+            iw, ih = tw + margin * 2, th + margin * 2
+            img  = Image.new("RGBA", (int(iw), int(ih)), (0, 0, 0, 0))
             draw = ImageDraw.Draw(img)
             if bg_color:
+                draw.rounded_rectangle([(0, 0), (iw, ih)],
+                                       radius=bg_radius, fill=_rgba(bg_color))
+            x = (iw - tw) / 2 - bbox[0]
+            y = (ih - th) / 2 - bbox[1]
+            draw.text((x, y), text, font=font, fill=_rgba(color),
+                      stroke_width=stroke_width, stroke_fill=_rgba(stroke_color))
             return mpe.ImageClip(np.array(img))
+        except Exception as exc:
+            logger.error(f"⚠️ create_pil_text_clip: {exc}")
             return None
+    # ── Highlight-word composite renderer ─────────────────────────────────────
+    @staticmethod
+    def create_sentence_highlight_clip(
+        sentence_words, active_word, font, fontsize, font_path,
+        style_config, is_arabic, padding=14, bg_radius=20,
+    ):
+        """
+        Renders the entire sentence as ONE image.
+        The active word gets a visible shadow-box underneath + highlight colour.
+        No floating clip artefacts — position is always correct.
+        """
+        try:
+            dummy = Image.new("RGBA", (1, 1))
+            d     = ImageDraw.Draw(dummy)
+            sp_w = max(d.textbbox((0, 0), " ", font=font)[2], 4)
+            # ── 1. Measure each word ─────────────────────────────────────────
+            ordered      = list(reversed(sentence_words)) if is_arabic else sentence_words
+            word_metrics = []
+            cursor       = 0
+            for w in ordered:
+                raw     = w.get("text", "")
+                display = get_display(reshape(raw)) if is_arabic else raw.upper()
+                bbox    = d.textbbox((0, 0), display, font=font)
+                ww      = bbox[2] - bbox[0]
+                word_metrics.append({"id": id(w), "display": display,
+                                     "bbox": bbox, "x": cursor, "width": ww})
+                cursor += ww + sp_w
+            total_w  = cursor - sp_w
+            ref_bbox = d.textbbox((0, 0), "Ag", font=font)
+            text_h   = ref_bbox[3] - ref_bbox[1]
+            stroke_w = style_config.get("stroke_width", 2)
+            margin   = int(stroke_w * 2) + padding
+            # Extra vertical bleed for shadow layers
+            bleed = 14
+            iw    = int(total_w + margin * 2)
+            ih    = int(text_h  + margin * 2 + bleed)
+            img  = Image.new("RGBA", (iw, ih), (0, 0, 0, 0))
+            draw = ImageDraw.Draw(img)
+            # ── 2. Optional full-sentence background bar ──────────────────────
+            sentence_bg = style_config.get("bg_color")
+            if sentence_bg:
+                draw.rounded_rectangle(
+                    [(0, bleed // 2), (iw, ih - bleed // 2)],
+                    radius=bg_radius, fill=_rgba(sentence_bg),
+                )
+            # ── 3. Shadow + highlight box for active word ─────────────────────
+            active_id = id(active_word)
+            hl_bg     = _rgba(style_config.get("highlight_bg"))
+            hl_radius = style_config.get("highlight_bg_radius", bg_radius)
+            shadows   = style_config.get("shadow_layers", [])
+            active_wm = next((wm for wm in word_metrics if wm["id"] == active_id), None)
+            if active_wm and hl_bg:
+                bx1 = margin + active_wm["x"] - active_wm["bbox"][0] - padding
+                by1 = bleed // 2
+                bx2 = bx1 + active_wm["width"] + padding * 2
+                by2 = ih  - bleed // 2
+                # Shadow / glow layers first
+                _draw_shadow_layers(draw, (bx1, by1, bx2, by2), shadows, hl_radius)
+                # Main highlight box
+                draw.rounded_rectangle([(bx1, by1), (bx2, by2)],
+                                       radius=hl_radius, fill=hl_bg)
+            # ── 4. Draw text words ────────────────────────────────────────────
+            rest_c = _rgba(style_config.get("color", (255, 255, 255, 255)))
+            hl_c   = _rgba(style_config.get("highlight_color", rest_c))
+            stk_c  = _rgba(style_config.get("stroke_color", (0, 0, 0, 255)))
+            for wm in word_metrics:
+                col = hl_c if (wm["id"] == active_id) else rest_c
+                tx  = margin + wm["x"] - wm["bbox"][0]
+                ty  = margin + bleed // 2 - wm["bbox"][1]
+                draw.text((tx, ty), wm["display"], font=font, fill=col,
+                          stroke_width=stroke_w, stroke_fill=stk_c)
+            return mpe.ImageClip(np.array(img))
+        except Exception as exc:
+            logger.error(f"⚠️ create_sentence_highlight_clip: {exc}")
+            return None
+    # ── Public style accessor ──────────────────────────────────────────────────
     @staticmethod
     def get_style_config(style_name):
+        """Returns the style dict for the given name (falls back to 'classic')."""
+        return STYLES.get(style_name, STYLES["classic"])
+    # ── Main generator ─────────────────────────────────────────────────────────
     @staticmethod
+    def create_caption_clips(transcript_data, size=(1080, 1920), language=None,
+                             caption_mode="sentence", caption_style="classic"):
+        """Generates all caption ImageClips ready for compositing."""
+        all_clips   = []
+        style_cfg   = SubtitleManager.get_style_config(caption_style)
+        # ── Parse transcript ─────────────────────────────────────────────────
+        segments    = []
         sample_text = ""
         if isinstance(transcript_data, list):
+            if transcript_data and "segments" in transcript_data[0]:
+                segments = transcript_data[0]["segments"]
+            else:
                 segments = transcript_data
+        elif isinstance(transcript_data, dict) and "segments" in transcript_data:
+            segments = transcript_data["segments"]
+        for s in segments:
+            if s.get("text"):
+                sample_text = s["text"]
+                break
+        font_path = SubtitleManager.ensure_font(
+            language, style_cfg.get("font"), text_content=sample_text
+        )
+        pos_cfg = style_cfg.get("position", ("center", 0.80))
+        pos     = (pos_cfg[0], int(pos_cfg[1] * size[1]))
+        # ════════════════════════════════════════════════════════════════════
+        # MODE: highlight_word
+        # ════════════════════════════════════════════════════════════════════
+        if caption_mode == "highlight_word":
+            all_words = []
+            for seg in segments:
+                if "words" in seg and seg["words"]:
+                    all_words.extend(seg["words"])
+            if not all_words:
+                logger.warning("⚠️ highlight_word needs word-level timestamps — none found.")
+                return []
+            fontsize = style_cfg.get("fontsize", 75)
+            try:
+                font = ImageFont.truetype(font_path, fontsize)
+            except Exception:
+                logger.warning("⚠️ TrueType load failed — falling back to default font.")
+                font = ImageFont.load_default()
+            # Group words into sentences (gap > 0.7 s = new sentence)
+            sentences, cur = [], []
+            for i, word in enumerate(all_words):
+                if not word.get("text", "").strip():
+                    continue
+                cur.append(word)
+                is_last = (i == len(all_words) - 1)
+                pause   = (all_words[i + 1]["start"] - word["end"]) if not is_last else 1.0
+                if pause > 0.7 or is_last:
+                    sentences.append(cur)
+                    cur = []
+            for sw in sentences:
+                sent_text  = " ".join(w["text"] for w in sw)
+                sent_start = sw[0]["start"]
+                sent_end   = sw[-1]["end"]
+                is_ar      = language == "ar" or any("\u0600" <= c <= "\u06FF" for c in sent_text)
+                # One frame per word (active highlight moves)
+                for active in sw:
+                    clip = SubtitleManager.create_sentence_highlight_clip(
+                        sentence_words=sw, active_word=active,
+                        font=font, fontsize=fontsize, font_path=font_path,
+                        style_config=style_cfg, is_arabic=is_ar,
+                        padding=style_cfg.get("padding", 14),
+                        bg_radius=style_cfg.get("highlight_bg_radius", 20),
+                    )
+                    if clip:
+                        all_clips.append(
+                            clip.set_start(active["start"])
+                                .set_end(active["end"])
+                                .set_position(pos)
+                        )
+                # Fill inter-word gaps (no active word) with plain sentence
+                covered = [(w["start"], w["end"]) for w in sw]
+                gaps = []
+                if sent_start < covered[0][0]:
+                    gaps.append((sent_start, covered[0][0]))
+                for j in range(len(covered) - 1):
+                    if covered[j][1] < covered[j + 1][0]:
+                        gaps.append((covered[j][1], covered[j + 1][0]))
+                if covered[-1][1] < sent_end:
+                    gaps.append((covered[-1][1], sent_end))
+                for gs, ge in gaps:
+                    plain_cfg = {**style_cfg, "highlight_bg": None, "shadow_layers": []}
+                    dummy_w   = {"text": "", "start": gs, "end": ge}
+                    gc = SubtitleManager.create_sentence_highlight_clip(
+                        sentence_words=sw, active_word=dummy_w,
+                        font=font, fontsize=fontsize, font_path=font_path,
+                        style_config=plain_cfg, is_arabic=is_ar,
+                    )
+                    if gc:
+                        all_clips.append(gc.set_start(gs).set_end(ge).set_position(pos))
+            return all_clips
+        # ════════════════════════════════════════════════════════════════════
+        # LEGACY MODES: sentence / word
+        # ════════════════════════════════════════════════════════════════════
+        for seg in segments:
+            full_text = seg.get("text", "").strip() or " ".join(
+                w["text"] for w in seg.get("words", [])
+            )
             if not full_text:
                 continue
+            start_t = seg.get("start", 0)
+            end_t   = seg.get("end",   0)
             if end_t <= start_t:
+                ws = seg.get("words", [])
+                if ws:
+                    start_t, end_t = ws[0]["start"], ws[-1]["end"]
                 else:
                     continue
             chunk_size = 1 if caption_mode == "word" else 4
+            chunks     = []
+            stt_words  = seg.get("words")
+            if stt_words:
+                valid = [w for w in stt_words if w.get("text", "").strip()]
+                for i in range(0, len(valid), chunk_size):
+                    grp = valid[i:i + chunk_size]
+                    chunks.append({"text": " ".join(w["text"] for w in grp),
+                                   "start": grp[0]["start"], "end": grp[-1]["end"]})
             else:
+                wl = full_text.split()
+                for i in range(0, len(wl), chunk_size):
+                    cw = wl[i:i + chunk_size]
+                    cs = start_t + (end_t - start_t) * (i / len(wl))
+                    ce = cs      + (end_t - start_t) * (len(cw) / len(wl))
+                    chunks.append({"text": " ".join(cw),
+                                   "start": cs, "end": max(ce, cs + 0.1)})
             for chunk in chunks:
+                disp  = chunk["text"]
+                is_ar = language == "ar" or any("\u0600" <= c <= "\u06FF" for c in disp)
+                if is_ar:
                     try:
+                        disp = get_display(reshape(disp))
+                    except Exception:
                         pass
                 else:
+                    disp = disp.upper()
+                clip = SubtitleManager.create_pil_text_clip(
+                    disp,
+                    fontsize     = style_cfg.get("fontsize", 72),
+                    color        = style_cfg.get("color", (255, 255, 255, 255)),
+                    font_path    = font_path,
+                    stroke_color = style_cfg.get("stroke_color", (0, 0, 0, 200)),
+                    stroke_width = style_cfg.get("stroke_width", 2),
+                    bg_color     = style_cfg.get("bg_color"),
+                    bg_radius    = style_cfg.get("highlight_bg_radius", 18),
                 )
+                if clip:
+                    all_clips.append(
+                        clip.set_start(chunk["start"])
+                            .set_end(chunk["end"])
+                            .set_position(pos)
+                    )
+        return all_clips
+    # ── Convenience compositor ─────────────────────────────────────────────────
     @staticmethod
+    def create_captions(video_clip, transcript_data, size=(1080, 1920),
+                        language=None, caption_mode="sentence"):
+        clips = SubtitleManager.create_caption_clips(
+            transcript_data, size, language, caption_mode
+        )
+        return mpe.CompositeVideoClip([video_clip] + clips, size=size)

firebase_key.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "type": "service_account",
+  "project_id": "clippingcommunity-caf5a",
+  "private_key_id": "787bbcf48f5a4924137010157aa70faac25d6b3c",
+  "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDP8JezwCe7gxDn\nNTqbRWCn2Y9w9XtvLT3Rh/SS+XIgzzGZupsCz/gtPJzrPEMhR2NsEwyk0uPvumd5\nICLIBaYrmfpB6h/CtvD9bvNyea8BvPuMxkJ/COSwm4kUDlutExI5WCr3XKxdrAQB\n7pZv//vI9xgWrR08/EP/YeP74L3nb5+z1DeW1C5lxvGalYJQu80iWMSMgr7kHaQL\ner5K2kl6cpQS0+mS+A3jsj8gtTZRrviZEMpAuRbd9PTVq3FDDCZK978KJ3tt+Hpv\nOlnjbzdH10+WtU4Br9H1xLD4VKfakoXYU9lUMyVdfvQoPhpuNrcdXUWIRSpHjHKU\nvCEYnxtxAgMBAAECggEAGo/FmRD1Ilc6LAaZH2dr0tUQJSI+a7OwP1kqDTIu+XDu\nCZCUJ3ZQYdrpwoS2oIQhd5cxWMaVljRN7mOD/d/223/2P0A8YV+EBiOLlnZjvAmb\nal6S9O46ZDLh/j1dSYzzmX6hMmrUm7yS4HpNHi5pR8EEgzUG1Jj2yKME/9Iz/+F/\nuih50z9UiKA7TmCUwfn63l8kT1trBoRYUTqDFoDmul/gbDazfwqCSwtw3BXeOfHY\n98WjhPq3krhuct/nKwY3XzjCchdrej4UaOYMQNN9jRU5haq/L+JKXClEyjgp4IUR\nKywcyxj/QgfbPRFyZMrgmSsGdRBKhOhI83FyMjNLSQKBgQD5W77I2KlIqSqHuzhg\nNOMqYt3FHa86c7kcuZjFquWxEtgFYVvmN9o6IyDcOlo6yYwR0dGmBrADf1Izi/BP\nGyanYvZW2djYpb2j+V/ovPx1br9or54icVjR7eAXJQfRAuJ6pcsK916U32GuAvU+\nTMxp0kBExt+8sBI3E6mMelIAHQKBgQDVem8fw2hDRFEREKWWme9z/usRVmazw3On\nyyMsWa7dsL9tfvZVGp+NZsND4CvZbPLRM1QEyXK6TrgAv1C+lvtNah5/qdIY0NYa\nTXlI7RaP/DS8UszVtOYbVxdRt98Rz3K1vANjCS9v6Kqq3O1CQp3kxQ/07v+j5Udh\n4ALbsmxQZQKBgQDOfTjJnRDhyKQdbd3LXUBYEzLOTjySzM2XieGGnnRCY/ZazjqJ\ns+qUhg5qEDAzyj6havyO3a7X1pE9ej5vY25o4jxXw2IcVXNq29CeLBFrNWBv0i3D\nG9WPUcploBaO8DKXmb8/v4SlBy4eKPjotDP51U+/JGiWGb+buD6Iw6ovyQKBgBad\nrO4hhocx0qDLMa+9ySdxxzeD/sdmmncZRzWonqTv16fi/nfPpT2WuHMVaa/UIflV\nxb3oFZZ1RnsVyZkXZ7Iw3uBJfm+QmE4bDRFTxMMmRfP5lafCTWpyFI9cum4pmw5z\nx6wTSgpCDOqjEyOk6RNWaTVaqIyVerV8xPC/e0gdAoGBAPkFU9UePr4pMEdchvwd\nd+H7kbywlodwiPh7SoeInYMZSBxK6rzZAaRRv6nNGlBe8HoqDud5SOR1X8T0VYk+\n6Ou0s6ploSL9vbM3YkhiYqKXj1tCg+emBtucp19bILUPGBW56Aje0merJmDg5kyf\nvUTLlRBbkNySxP08n86/Fcrc\n-----END PRIVATE KEY-----\n",
+  "client_email": "firebase-adminsdk-fbsvc@clippingcommunity-caf5a.iam.gserviceaccount.com",
+  "client_id": "110147267785886278722",
+  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+  "token_uri": "https://oauth2.googleapis.com/token",
+  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/firebase-adminsdk-fbsvc%40clippingcommunity-caf5a.iam.gserviceaccount.com",
+  "universe_domain": "googleapis.com"
+}

main.py CHANGED Viewed

@@ -12,13 +12,26 @@ from processor import VideoProcessor
 from core.config import Config
 from core.logger import Logger
 from core.task_queue import TaskManager
 logger = Logger.get_logger(__name__)
 task_manager = TaskManager()
 # Ensure directories exist
 Config.setup_dirs()
 class VideoStyle(str, Enum):
     cinematic = "cinematic"
     cinematic_blur = "cinematic_blur"
@@ -29,9 +42,11 @@ class VideoStyle(str, Enum):
 class CaptionMode(str, Enum):
     word = "word"
     sentence = "sentence"
 class CaptionStyle(str, Enum):
     classic = "classic"
     tiktok_bold = "tiktok_bold"
     tiktok_neon = "tiktok_neon"
     youtube_clean = "youtube_clean"
@@ -52,6 +67,37 @@ class Language(str, Enum):
 app = FastAPI(title="Auto-Clipping API")
 clipper = VideoProcessor()
 def process_video_task(
     task_id: str,
     video_path: str,
@@ -69,9 +115,6 @@ def process_video_task(
     from moviepy.editor import VideoFileClip
     full_video_clip = None
     try:
-        # Optimization: Open video once
-        full_video_clip = VideoFileClip(video_path)
         # Helper for progress updates
         def update_progress(progress, message):
             task_manager.update_task_progress(task_id, progress, message)
@@ -79,11 +122,15 @@ def process_video_task(
         update_progress(1, "Starting video analysis...")
         # 1. Analyze video
-        timestamp_mode = "words" if caption_mode == CaptionMode.word else "segments"
         scored_segments, total_duration, llm_moments = clipper.analyze_impact(
             video_path,
-            video_clip=full_video_clip,
-            language=language,
             timestamp_mode=timestamp_mode,
             progress_callback=update_progress
         )
@@ -101,8 +148,8 @@ def process_video_task(
             llm_moments,
             style=style,
             task_id=task_id,
-            language=language,
-            video_clip=full_video_clip,
             playground_path=playground_path,
             audio_path=audio_path,
             bg_music_volume=bg_music_volume,
@@ -135,8 +182,7 @@ def process_video_task(
             "traceback": traceback.format_exc()
         }
     finally:
-        if full_video_clip:
-            full_video_clip.close()
     # Send webhook
     if webhook_url and webhook_url.strip() and webhook_url.startswith(('http://', 'https://')):

 from core.config import Config
 from core.logger import Logger
 from core.task_queue import TaskManager
+from core.database import DatabaseManager
+from pydantic import BaseModel
 logger = Logger.get_logger(__name__)
 task_manager = TaskManager()
+# Initialize Database Manager (Try Firebase, fallback to Local)
+try:
+    db_manager = DatabaseManager(use_firebase=True)
+except Exception:
+    db_manager = DatabaseManager(use_firebase=False)
 # Ensure directories exist
 Config.setup_dirs()
+class APIKeyInput(BaseModel):
+    service: str
+    key: str
+    use_firebase: bool = False
 class VideoStyle(str, Enum):
     cinematic = "cinematic"
     cinematic_blur = "cinematic_blur"
 class CaptionMode(str, Enum):
     word = "word"
     sentence = "sentence"
+    highlight_word = "highlight_word"
 class CaptionStyle(str, Enum):
     classic = "classic"
+    modern_glow = "modern_glow"
     tiktok_bold = "tiktok_bold"
     tiktok_neon = "tiktok_neon"
     youtube_clean = "youtube_clean"
 app = FastAPI(title="Auto-Clipping API")
 clipper = VideoProcessor()
+@app.post("/api/keys")
+async def add_api_key(input_data: APIKeyInput):
+    """
+    Securely adds an API key to the database.
+    - service: Service name (e.g., 'openrouter', 'openai')
+    - key: The API key string
+    - use_firebase: If true, saves to community database (Firebase). If false, saves to local SQLite.
+    """
+    try:
+        # If user explicitly requested Firebase but it wasn't initialized globally
+        target_db = db_manager
+        if input_data.use_firebase and not db_manager.use_firebase:
+            # Try to init a temporary firebase manager
+            try:
+                target_db = DatabaseManager(use_firebase=True)
+            except Exception as e:
+                return JSONResponse(
+                    status_code=400,
+                    content={"error": f"Firebase not configured: {str(e)}"}
+                )
+        # Save key
+        target_db.save_key(input_data.service, input_data.key)
+        dest = "Firebase (Community)" if input_data.use_firebase else "Local SQLite"
+        return {"message": f"API Key for {input_data.service} saved securely to {dest}."}
+    except Exception as e:
+        logger.error(f"Error saving API key: {e}")
+        return JSONResponse(status_code=500, content={"error": str(e)})
 def process_video_task(
     task_id: str,
     video_path: str,
     from moviepy.editor import VideoFileClip
     full_video_clip = None
     try:
         # Helper for progress updates
         def update_progress(progress, message):
             task_manager.update_task_progress(task_id, progress, message)
         update_progress(1, "Starting video analysis...")
         # 1. Analyze video
+        # Fix: Ensure 'words' mode is used for highlight_word too
+        timestamp_mode = "words" if caption_mode in (CaptionMode.word, CaptionMode.highlight_word) else "segments"
         scored_segments, total_duration, llm_moments = clipper.analyze_impact(
             video_path,
+            # video_clip removed as it's not supported
+            # language passed as target_language if needed, or source?
+            # In processor.py: source_language=None (auto), target_language=...
+            # main.py seems to treat 'language' as the output/target language
+            target_language=language,
             timestamp_mode=timestamp_mode,
             progress_callback=update_progress
         )
             llm_moments,
             style=style,
             task_id=task_id,
+            language=language, # target language
+            # video_clip removed
             playground_path=playground_path,
             audio_path=audio_path,
             bg_music_volume=bg_music_volume,
             "traceback": traceback.format_exc()
         }
     finally:
+        pass
     # Send webhook
     if webhook_url and webhook_url.strip() and webhook_url.startswith(('http://', 'https://')):

processor.py CHANGED Viewed

@@ -1,9 +1,23 @@
 import os
 import json
 import traceback
-from datetime import datetime
 import moviepy.editor as mpe
-import core # Applies monkey patches
 from core.config import Config
 from core.logger import Logger
 from core.stt import STT
@@ -11,455 +25,448 @@ from core.analyze import analyze_transcript_gemini
 from core.styles import StyleFactory
 from core.subtitle_manager import SubtitleManager
 from core.free_translator import FreeTranslator
-import json_repair
 logger = Logger.get_logger(__name__)
 class VideoProcessor:
     def __init__(self, model_size="base"):
         self.stt = STT(model_size=model_size)
         Config.setup_dirs()
     def _clean_json_response(self, content):
-        """Cleans AI JSON response using json_repair."""
         if not isinstance(content, str):
             return content
-        # Remove markdown blocks if present
         content = content.strip()
-        if content.startswith("```json"):
-            content = content[7:]
-        if content.startswith("```"):
-            content = content[3:]
         if content.endswith("```"):
             content = content[:-3]
         content = content.strip()
-        # Use json_repair to fix truncated or malformed JSON
         try:
-            repaired_json = json_repair.loads(content)
-            return json.dumps(repaired_json)
         except Exception as e:
-            logger.warning(f"⚠️ json_repair failed, falling back to manual fix: {e}")
-        # Fallback manual fix (though json_repair is usually sufficient)
-        if content and not content.endswith('}'):
-            open_braces = content.count('{')
-            close_braces = content.count('}')
-            if open_braces > close_braces:
-                content += '}' * (open_braces - close_braces)
-                logger.info(f"🔧 Fixed truncated JSON with {open_braces - close_braces} closing braces")
         return content
     def parse_ai_response(self, ai_res):
-        """
-        Parses the JSON response from the AI and returns a list of segments.
-        Handles both string and dictionary responses, and various potential key names.
-        """
         if not isinstance(ai_res, dict):
-            logger.error(f"❌ Invalid AI response format: expected dict, got {type(ai_res)}")
             return []
         res_content = ai_res.get("content")
-        segments_data = {}
         try:
             if isinstance(res_content, str):
-                cleaned_content = self._clean_json_response(res_content)
-                segments_data = json.loads(cleaned_content)
             else:
                 segments_data = res_content
-            chunk_segments = []
             if isinstance(segments_data, dict):
-                for key in ["segments", "clips", "moments"]:
                     if key in segments_data and isinstance(segments_data[key], list):
-                        chunk_segments = segments_data[key]
-                        break
-                if not chunk_segments and any(isinstance(v, list) for v in segments_data.values()):
-                     for v in segments_data.values():
-                        if isinstance(v, list):
-                            chunk_segments = v
-                            break
-            elif isinstance(segments_data, list):
-                chunk_segments = segments_data
-            return chunk_segments
         except Exception as e:
             logger.error(f"❌ Failed to parse AI response: {e}")
-            logger.error(f"Raw Content: {res_content}")
-            return []
-    def _clean_json_response(self, text):
-        """Clean markdown and other noise from AI JSON response."""
-        text = text.strip()
-        if text.startswith("```json"):
-            text = text[7:]
-        if text.startswith("```"):
-            text = text[3:]
-        if text.endswith("```"):
-            text = text[:-3]
-        return text.strip()
-    def analyze_impact(self, video_path, video_clip=None, language=None, timestamp_mode="segments", progress_callback=None):
-        """Analyzes video content and suggests viral clips using AI."""
-        if progress_callback: progress_callback(5, "Starting speech-to-text...")
-        logger.info("🎙️ Phase 1: Speech-to-Text (STT)...")
-        # Always transcribe in source language (auto detect)
         full_segments, full_text, duration, detected_lang = self.stt.get_transcript(
-            video_path,
-            language=None, # Always auto-detect source language
             skip_ai=True,
-            timestamp_mode=timestamp_mode
         )
-        # Check if translation is needed
-        target_lang = None
-        if language:
-            if hasattr(language, 'value'):
-                target_lang = language.value
-            else:
-                target_lang = str(language)
         data = {
-            "segments": full_segments,
-            "detected_language": detected_lang,
-            "duration": duration
         }
-        logger.info("🤖 Phase 2: AI Viral Moment Analysis...")
-        if progress_callback: progress_callback(20, "Analyzing content for viral moments...")
-        chunk_size = Config.CHUNK_SIZE_SECONDS
-        overlap = Config.OVERLAP_SECONDS
-        all_ai_segments = []
-        max_time = full_segments[-1]["end"] if full_segments else 0
         current_start = 0
         while current_start < max_time:
-            current_end = current_start + chunk_size
             chunk_transcript = ""
             for seg in full_segments:
                 if seg["start"] >= current_start and seg["start"] < current_end:
-                    chunk_transcript += f"[{seg['start']:.2f} - {seg['end']:.2f}] {seg['text']}\n"
             if chunk_transcript.strip():
-                transcript_len = len(chunk_transcript)
-                # Calculate progress
-                current_progress = 20 + int((current_start / max_time) * 40) # 20% to 60%
-                if progress_callback:
-                    progress_callback(current_progress, f"Analyzing chunk {current_start/60:.1f}m - {min(current_end, max_time)/60:.1f}m")
-                logger.info(f"🧠 Analyzing chunk: {current_start/60:.1f}m to {min(current_end, max_time)/60:.1f}m (Length: {transcript_len} chars)...")
                 ai_res = analyze_transcript_gemini(chunk_transcript)
-                # Log debug info
-                logger.info(f"🤖 AI Response Type: {type(ai_res)}")
-                if isinstance(ai_res, dict) and "content" in ai_res:
-                     logger.info(f"🤖 Raw AI Response (First 500 chars): {ai_res['content'][:500]}...")
-                else:
-                     logger.info(f"🤖 Raw AI Response (Structure): {str(ai_res)[:500]}...")
                 try:
-                    chunk_segments = self.parse_ai_response(ai_res)
-                    logger.info(f"✅ Found {len(chunk_segments)} segments in chunk")
-                    all_ai_segments.extend(chunk_segments)
                 except Exception as e:
-                    logger.error(f"❌ Error processing chunk: {e}")
                     logger.error(traceback.format_exc())
-            current_start += (chunk_size - overlap)
-            if current_end >= max_time: break
-        # Deduplicate
-        unique_segments = []
-        seen_starts = set()
-        for s in all_ai_segments:
-            start_t = s.get("start_time")
-            if start_t not in seen_starts:
-                unique_segments.append(s)
-                seen_starts.add(start_t)
-        return unique_segments, duration, data
     def get_best_segments(self, segments, video_duration=0):
-        """Sorts segments by viral score."""
         return sorted(segments, key=lambda x: x.get("viral_score", 0), reverse=True)
-    def process_clips(self, input_video_path, best_clips, data, style="cinematic", language=None, video_clip=None, progress_callback=None, **kwargs):
-        """Processes the selected viral clips with styles and captions."""
-        logger.info("🎨 Phase 3: Style & Captions...")
-        if progress_callback: progress_callback(60, "Generating clips...")
-        # Determine video duration safely
-        video_duration = 0
-        if "duration" in data and data["duration"]:
-             video_duration = data["duration"]
-        elif video_clip:
-             video_duration = video_clip.duration
         else:
-             try:
-                 with mpe.VideoFileClip(input_video_path) as temp_vid:
-                     video_duration = temp_vid.duration
-             except Exception as e:
-                 logger.error(f"❌ Failed to get video duration: {e}")
         output_files = []
-        # Initialize Translator if needed
-        translator = None
-        target_language = None
-        if language:
-            target_language = language.value if hasattr(language, 'value') else language
-        detected_lang = data.get("detected_language", "en")
-        needs_translation = (target_language and
-                           target_language != "auto" and
-                           target_language != detected_lang)
-        if needs_translation:
-            logger.info(f"🌍 Translating from {detected_lang} to {target_language}...")
-            translator = FreeTranslator()
-        try:
-            if not best_clips:
-                logger.warning("⚠️ No best clips provided to process_clips!")
-                return []
-            logger.info(f"📊 Starting processing for {len(best_clips)} clips...")
-            for i, seg in enumerate(best_clips):
-                # Update progress
-                current_progress = 60 + int((i / len(best_clips)) * 35) # 60% to 95%
-                if progress_callback:
-                    progress_callback(current_progress, f"Rendering clip {i+1} of {len(best_clips)}...")
-                clip = None
-                final_clip = None
-                current_video_clip = None # Local handle for this iteration
-                try:
-                    start = max(0, seg.get("start_time", 0))
-                    end = min(video_duration, seg.get("end_time", 0))
-                    # Ensure valid duration
-                    if end - start < 1.0:
-                        logger.warning(f"⚠️ Clip {i+1} too short ({end-start:.2f}s), skipping.")
                         continue
-                    # TRANSLATION STEP: Translate only the current segment if needed
                     if needs_translation and translator:
                         try:
-                            # Find matching transcript segments for this clip
-                            # Note: segments in 'data' use 'start' and 'end' keys
-                            matching_segs = [
-                                s for s in data.get('segments', [])
-                                if s['start'] >= start and s['end'] <= end
                             ]
-                            if matching_segs:
-                                logger.info(f"🌍 Translating {len(matching_segs)} segments for Clip {i+1}...")
-                                for match_s in matching_segs:
-                                    # Skip if already translated (heuristic check if needed, but safe to re-translate if simple)
-                                    # Or better, check if text is already in target language?
-                                    # Since we modify in place, subsequent clips covering same segment might re-translate.
-                                    # But clips usually don't overlap much.
-                                    tr_text, _ = translator.translate_text(match_s['text'], target_language)
-                                    match_s['text'] = tr_text
-                                    # Clear words to force interpolation since word-level timing is lost
-                                    if 'words' in match_s:
-                                        match_s['words'] = []
-                                logger.info(f"✅ Translated clip {i+1} content to {target_language}")
-                        except Exception as e:
-                            logger.warning(f"⚠️ Translation failed for clip {i+1}: {e}")
-                    logger.info(f"\n🎬 Processing Clip {i+1}/{len(best_clips)} ({start:.2f} - {end:.2f})...")
-                    # Ensure style is a clean string
-                    style_str = style.value if hasattr(style, "value") else str(style)
-                    if "." in style_str:
-                         style_str = style_str.split(".")[-1] # Handle VideoStyle.split_vertical
-                    output_filename = f"viral_{i+1}_{style_str}.mp4"
-                    # Add task_id to filename if provided to avoid collisions
-                    task_id = kwargs.get("task_id")
-                    if task_id:
-                         output_filename = f"viral_{task_id}_{i+1}_{style_str}.mp4"
-                    final_output = os.path.join(Config.OUTPUTS_DIR, "viral_clips", output_filename)
-                    os.makedirs(os.path.dirname(final_output), exist_ok=True)
-                    if start >= video_duration:
-                        logger.warning(f"⚠️ Clip start time {start} is beyond video duration {video_duration}, skipping.")
-                        continue
-                    # 1. Cut the clip
-                    # ALWAYS open a fresh VideoFileClip for each iteration to avoid shared reader issues
-                    # and allow full cleanup (closing reader) after each clip.
-                    current_video_clip = mpe.VideoFileClip(input_video_path)
-                    clip = current_video_clip.subclip(start, end)
-                    # 2. Get the style strategy
-                    style_strategy = StyleFactory.get_style(style)
-                    logger.info(f"✨ Applying style: {style}")
-                    # 3. Handle Translation and Captions PREPARATION
-                    segment_transcript = {"segments": []}
-                    # Filter relevant segments for this clip
-                    for s in data["segments"]:
-                        if s["start"] < end and s["end"] > start:
-                            # Clone the segment to avoid modifying original data
-                            new_seg = s.copy()
-                            # Adjust timestamps relative to clip start
-                            new_seg["start"] = max(0, s["start"] - start)
-                            new_seg["end"] = min(end - start, s["end"] - start)
-                            if needs_translation and translator:
-                                logger.info(f"🌍 Translating segment: {s['text'][:30]}...")
-                                translated_text, _ = translator.translate_text(s['text'], target_language)
-                                new_seg["text"] = translated_text
-                                # Simple word distribution for translated text
-                                words = translated_text.split()
-                                seg_duration = new_seg["end"] - new_seg["start"]
-                                word_duration = seg_duration / len(words) if words else seg_duration
-                                new_seg["words"] = []
-                                for idx, w in enumerate(words):
-                                    new_seg["words"].append({
-                                        "text": w,
-                                        "start": new_seg["start"] + (idx * word_duration),
-                                        "end": new_seg["start"] + ((idx + 1) * word_duration)
-                                    })
-                            else:
-                                # Adjust word timestamps if they exist
-                                if "words" in s:
-                                    new_words = []
-                                    for w in s["words"]:
-                                        if w["start"] < end and w["end"] > start:
-                                            nw = w.copy()
-                                            nw["start"] = max(0, w["start"] - start)
-                                            nw["end"] = min(end - start, w["end"] - start)
-                                            new_words.append(nw)
-                                    new_seg["words"] = new_words
-                            segment_transcript["segments"].append(new_seg)
-                    # 4. Use the optimized apply_with_captions method
-                    final_clip = style_strategy.apply_with_captions(
-                        clip,
-                        transcript_data=segment_transcript,
-                        language=target_language if needs_translation else detected_lang,
-                        caption_mode=kwargs.get('caption_mode', 'sentence'),
-                        caption_style=kwargs.get('caption_style', 'classic'),
-                        background_path=kwargs.get("background_path"),
-                        playground_path=kwargs.get("playground_path")
-                    )
-                    # 5. Write Output
-                    # Automatically use all available CPU cores
-                    # os.cpu_count() returns None if undetermined, so we default to 4 in that case
-                    cpu_count = os.cpu_count() or 4
-                    logger.info(f"⚙️ Using {cpu_count} threads for video rendering")
-                    final_clip.write_videofile(
-                        final_output,
-                        codec="libx264",
-                        audio_codec="aac",
-                        threads=cpu_count,
-                        logger=None
-                    )
-                    output_files.append(final_output)
-                except Exception as e:
-                    logger.error(f"❌ Error processing clip {i+1}: {e}")
-                    logger.error(traceback.format_exc())
-                finally:
-                    # 🧹 Explicit Cleanup
-                    if final_clip:
-                        try: final_clip.close()
-                        except: pass
-                    if clip:
-                        try: clip.close()
-                        except: pass
-                    if current_video_clip:
-                        try: current_video_clip.close()
-                        except: pass
-                    # Force garbage collection
-                    import gc
-                    gc.collect()
-        except Exception as e:
-            logger.error(f"❌ Error in processing loop: {e}")
-            logger.error(traceback.format_exc())
-        # Note: We don't close passed video_clip here because we didn't open it (or we treated it as read-only for duration)
-        # The caller is responsible for closing video_clip if they passed it.
         return output_files
-# -----------------------------------------------------------------------------
-# Module Level Function to wrap the class usage
-# -----------------------------------------------------------------------------
 def process_video(video_path, style="cinematic_blur", model_size="base", **kwargs):
     """
-    Main entry point to process a video end-to-end.
     """
-    video_clip = None
     try:
         processor = VideoProcessor(model_size=model_size)
-        # 1. Open Video Clip ONCE
-        video_clip = mpe.VideoFileClip(video_path)
-        # 2. Analyze (Reuse video_clip)
         caption_mode = kwargs.get("caption_mode", "sentence")
-        timestamp_mode = "words" if caption_mode == "word" else "segments"
         viral_segments, duration, stt_data = processor.analyze_impact(
-            video_path,
-            video_clip=video_clip,
-            language=kwargs.get("language"),
-            timestamp_mode=timestamp_mode
         )
         if not viral_segments:
             logger.warning("⚠️ No viral segments found.")
             return []
-        # 3. Process Clips (Reuse video_clip and STT data)
-        output_files = processor.process_clips(
-            video_path,
-            viral_segments,
-            stt_data,
-            style=style,
-            language=kwargs.get("language"),
-            video_clip=video_clip,
-            **kwargs
         )
-        return output_files
     except Exception as e:
         logger.error(f"❌ Processing failed: {e}")
         logger.error(traceback.format_exc())
         return []
-    finally:
-        if video_clip:
-            video_clip.close()
 if __name__ == "__main__":
     import sys

+"""
+VideoProcessor — Core pipeline for viral clip extraction.
+Fixes applied:
+  - source_language (for Whisper) separated from target_language (for translation/captions)
+  - Removed duplicate _clean_json_response (json_repair version kept)
+  - Single translation pass only (no double-translate on data in-place)
+  - timestamp_mode handles highlight_word correctly
+  - style string normalised once
+  - get_best_segments wired into process_video
+  - detected_lang used correctly for captions
+"""
 import os
+import gc
 import json
 import traceback
 import moviepy.editor as mpe
+import json_repair
+import core  # Applies monkey patches
 from core.config import Config
 from core.logger import Logger
 from core.stt import STT
 from core.styles import StyleFactory
 from core.subtitle_manager import SubtitleManager
 from core.free_translator import FreeTranslator
 logger = Logger.get_logger(__name__)
+# ─────────────────────────────────────────────────────────────────────────────
 class VideoProcessor:
     def __init__(self, model_size="base"):
         self.stt = STT(model_size=model_size)
         Config.setup_dirs()
+    # ── JSON helpers ──────────────────────────────────────────────────────────
     def _clean_json_response(self, content):
+        """
+        Strips markdown fences then uses json_repair to fix malformed JSON.
+        Single definition — json_repair version only.
+        """
         if not isinstance(content, str):
             return content
         content = content.strip()
+        for fence in ("```json", "```"):
+            if content.startswith(fence):
+                content = content[len(fence):]
         if content.endswith("```"):
             content = content[:-3]
         content = content.strip()
         try:
+            repaired = json_repair.loads(content)
+            return json.dumps(repaired)
         except Exception as e:
+            logger.warning(f"⚠️ json_repair failed, using raw content: {e}")
+        # Last-resort brace balancing
+        open_b  = content.count("{")
+        close_b = content.count("}")
+        if open_b > close_b:
+            content += "}" * (open_b - close_b)
+            logger.info(f"🔧 Appended {open_b - close_b} closing brace(s)")
         return content
     def parse_ai_response(self, ai_res):
+        """Parses AI JSON response → list of segment dicts."""
         if not isinstance(ai_res, dict):
+            logger.error(f"❌ Invalid AI response type: {type(ai_res)}")
             return []
         res_content = ai_res.get("content")
         try:
             if isinstance(res_content, str):
+                segments_data = json.loads(self._clean_json_response(res_content))
             else:
                 segments_data = res_content
+            if isinstance(segments_data, list):
+                return segments_data
             if isinstance(segments_data, dict):
+                for key in ("segments", "clips", "moments"):
                     if key in segments_data and isinstance(segments_data[key], list):
+                        return segments_data[key]
+                # Fallback: first list value found
+                for v in segments_data.values():
+                    if isinstance(v, list):
+                        return v
         except Exception as e:
             logger.error(f"❌ Failed to parse AI response: {e}")
+            logger.error(f"Raw content: {res_content}")
+        return []
+    # ── Analysis ──────────────────────────────────────────────────────────────
+    def analyze_impact(self,
+                       video_path,
+                       source_language=None,    # ← لغة الفيديو الأصلي → بتيجي لـ Whisper
+                       target_language=None,    # ← لغة الـ output (ترجمة/كابشن)
+                       timestamp_mode="segments",
+                       progress_callback=None):
+        """
+        STT + AI viral-moment detection.
+        source_language : بيتبعت لـ Whisper مباشرة.
+                          لو None → Whisper يكتشف تلقائي (أبطأ لكن آمن).
+        target_language : بيتحفظ في data عشان process_clips يستخدمه للترجمة والكابشن.
+        Returns (unique_segments, duration, data)
+        """
+        if progress_callback:
+            progress_callback(5, "Starting speech-to-text...")
+        logger.info(
+            f"🎙️ Phase 1: STT  |  source_language={source_language or 'auto-detect'}"
+        )
         full_segments, full_text, duration, detected_lang = self.stt.get_transcript(
+            video_path,
+            language=source_language,   # None = Whisper يكتشف تلقائي
             skip_ai=True,
+            timestamp_mode=timestamp_mode,
         )
+        logger.info(f"🔍 Whisper detected language: {detected_lang}")
         data = {
+            "segments":          full_segments,
+            "detected_language": detected_lang,   # اللغة الفعلية اللي Whisper اكتشفها
+            "target_language":   target_language,  # اللغة المطلوبة للـ output
+            "duration":          duration,
         }
+        # ── AI Viral Analysis ─────────────────────────────────────────────────
+        logger.info("🤖 Phase 2: AI Viral Moment Analysis …")
+        if progress_callback:
+            progress_callback(20, "Analysing content for viral moments …")
+        chunk_size    = Config.CHUNK_SIZE_SECONDS
+        overlap       = Config.OVERLAP_SECONDS
+        max_time      = full_segments[-1]["end"] if full_segments else 0
+        all_ai_segs   = []
         current_start = 0
         while current_start < max_time:
+            current_end      = current_start + chunk_size
             chunk_transcript = ""
             for seg in full_segments:
                 if seg["start"] >= current_start and seg["start"] < current_end:
+                    chunk_transcript += (
+                        f"[{seg['start']:.2f} - {seg['end']:.2f}] {seg['text']}\n"
+                    )
             if chunk_transcript.strip():
+                pct = 20 + int((current_start / max_time) * 40)
+                if progress_callback:
+                    progress_callback(
+                        pct,
+                        f"Analysing {current_start/60:.1f}m – "
+                        f"{min(current_end, max_time)/60:.1f}m",
+                    )
+                logger.info(
+                    f"🧠 Chunk {current_start/60:.1f}m → "
+                    f"{min(current_end, max_time)/60:.1f}m …"
+                )
                 ai_res = analyze_transcript_gemini(chunk_transcript)
+                logger.info(f"🤖 AI response type: {type(ai_res)}")
                 try:
+                    chunk_segs = self.parse_ai_response(ai_res)
+                    logger.info(f"✅ {len(chunk_segs)} segments in chunk")
+                    all_ai_segs.extend(chunk_segs)
                 except Exception as e:
+                    logger.error(f"❌ Chunk processing error: {e}")
                     logger.error(traceback.format_exc())
+            current_start += chunk_size - overlap
+            if current_end >= max_time:
+                break
+        # Deduplicate by start_time
+        seen, unique = set(), []
+        for s in all_ai_segs:
+            st = s.get("start_time")
+            if st not in seen:
+                unique.append(s)
+                seen.add(st)
+        logger.info(f"📊 Total unique viral segments found: {len(unique)}")
+        return unique, duration, data
+    # ── Sorting ───────────────────────────────────────────────────────────────
     def get_best_segments(self, segments, video_duration=0):
+        """Sort segments by viral_score descending."""
         return sorted(segments, key=lambda x: x.get("viral_score", 0), reverse=True)
+    # ── Processing ────────────────────────────────────────────────────────────
+    def process_clips(self,
+                      input_video_path,
+                      best_clips,
+                      data,
+                      style="cinematic",
+                      progress_callback=None,
+                      **kwargs):
+        """
+        Cuts, styles, captions, and exports each viral clip.
+        target_language يييجي من data["target_language"] (اللي حطّه analyze_impact).
+        Translation يحصل مرة واحدة فقط داخل segment_transcript loop.
+        """
+        logger.info("🎨 Phase 3: Style & Captions …")
+        if progress_callback:
+            progress_callback(60, "Generating clips …")
+        # ── Video duration ────────────────────────────────────────────────────
+        video_duration = data.get("duration") or 0
+        if not video_duration:
+            try:
+                with mpe.VideoFileClip(input_video_path) as tmp:
+                    video_duration = tmp.duration
+            except Exception as e:
+                logger.error(f"❌ Could not determine video duration: {e}")
+        # ── Language resolution ───────────────────────────────────────────────
+        #
+        #   detected_lang   = اللغة الفعلية للفيديو (من Whisper)
+        #   target_language = اللغة المطلوبة للـ output (من الريكويست)
+        #
+        #   needs_translation = True  → نترجم النص
+        #   caption_lang      = اللغة اللي هيتعمل بيها الكابشن
+        #
+        detected_lang   = data.get("detected_language", "en")
+        target_language = data.get("target_language")  # من analyze_impact
+        # normalize
+        if hasattr(target_language, "value"):
+            target_language = target_language.value
+        needs_translation = bool(
+            target_language
+            and target_language != "auto"
+            and target_language != detected_lang
+        )
+        # الكابشن بيتعمل بلغة الـ output لو فيه ترجمة، وإلا بلغة الفيديو الأصلي
+        caption_lang = target_language if needs_translation else detected_lang
+        translator = FreeTranslator() if needs_translation else None
+        if needs_translation:
+            logger.info(f"🌍 Will translate: {detected_lang} → {target_language}")
         else:
+            logger.info(f"🗣️ No translation needed — captions in: {caption_lang}")
+        # ── Normalise style string once ───────────────────────────────────────
+        style_str = style.value if hasattr(style, "value") else str(style)
+        if "." in style_str:
+            style_str = style_str.split(".")[-1]
+        # ── Main loop ─────────────────────────────────────────────────────────
         output_files = []
+        if not best_clips:
+            logger.warning("⚠️ No clips to process.")
+            return []
+        logger.info(f"📊 Processing {len(best_clips)} clip(s) …")
+        for i, seg in enumerate(best_clips):
+            pct = 60 + int((i / len(best_clips)) * 35)
+            if progress_callback:
+                progress_callback(pct, f"Rendering clip {i+1}/{len(best_clips)} …")
+            clip               = None
+            final_clip         = None
+            current_video_clip = None
+            try:
+                start = max(0, seg.get("start_time", 0))
+                end   = min(video_duration, seg.get("end_time", 0))
+                if end - start < 1.0:
+                    logger.warning(
+                        f"⚠️ Clip {i+1} too short ({end-start:.2f}s), skipping."
+                    )
+                    continue
+                if start >= video_duration:
+                    logger.warning(
+                        f"⚠️ Clip {i+1} start {start}s ≥ duration {video_duration}s, skipping."
+                    )
+                    continue
+                logger.info(f"\n🎬 Clip {i+1}/{len(best_clips)} ({start:.2f}s – {end:.2f}s)")
+                # ── Output path ───────────────────────────────────────────────
+                task_id  = kwargs.get("task_id")
+                prefix   = f"viral_{task_id}_{i+1}" if task_id else f"viral_{i+1}"
+                out_name = f"{prefix}_{style_str}.mp4"
+                final_output = os.path.join(Config.OUTPUTS_DIR, "viral_clips", out_name)
+                os.makedirs(os.path.dirname(final_output), exist_ok=True)
+                # ── Cut clip (fresh VideoFileClip per iteration) ───────────────
+                current_video_clip = mpe.VideoFileClip(input_video_path)
+                clip               = current_video_clip.subclip(start, end)
+                # ── Build segment_transcript ──────────────────────────────────
+                # الترجمة بتحصل هنا فقط — مفيش أي مكان تاني بيعدّل على data
+                segment_transcript = {"segments": []}
+                for s in data["segments"]:
+                    if s["start"] >= end or s["end"] <= start:
                         continue
+                    new_seg = s.copy()
+                    new_seg["start"] = max(0, s["start"] - start)
+                    new_seg["end"]   = min(end - start, s["end"] - start)
                     if needs_translation and translator:
+                        # ترجمة النص مع توزيع timestamps على الكلمات الجديدة
                         try:
+                            translated_text, _ = translator.translate_text(
+                                s["text"], target_language
+                            )
+                        except Exception as te:
+                            logger.warning(f"⚠️ Translation error: {te}")
+                            translated_text = s["text"]
+                        new_seg["text"] = translated_text
+                        words    = translated_text.split()
+                        seg_dur  = new_seg["end"] - new_seg["start"]
+                        word_dur = seg_dur / len(words) if words else seg_dur
+                        new_seg["words"] = [
+                            {
+                                "text":  w,
+                                "start": new_seg["start"] + idx * word_dur,
+                                "end":   new_seg["start"] + (idx + 1) * word_dur,
+                            }
+                            for idx, w in enumerate(words)
+                        ]
+                    else:
+                        # تعديل timestamps الكلمات الموجودة بدون ترجمة
+                        if "words" in s:
+                            new_seg["words"] = [
+                                {
+                                    **w,
+                                    "start": max(0, w["start"] - start),
+                                    "end":   min(end - start, w["end"] - start),
+                                }
+                                for w in s["words"]
+                                if w["start"] < end and w["end"] > start
                             ]
+                    segment_transcript["segments"].append(new_seg)
+                # ── Apply style + captions ────────────────────────────────────
+                style_strategy = StyleFactory.get_style(style_str)
+                logger.info(f"✨ Style: {style_str} | Caption lang: {caption_lang}")
+                final_clip = style_strategy.apply_with_captions(
+                    clip,
+                    transcript_data = segment_transcript,
+                    language        = caption_lang,
+                    caption_mode    = kwargs.get("caption_mode",  "sentence"),
+                    caption_style   = kwargs.get("caption_style", "classic"),
+                    background_path = kwargs.get("background_path"),
+                    playground_path = kwargs.get("playground_path"),
+                )
+                # ── Export ────────────────────────────────────────────────────
+                cpu_count = os.cpu_count() or 4
+                logger.info(f"⚙️ Rendering with {cpu_count} thread(s) …")
+                final_clip.write_videofile(
+                    final_output,
+                    codec       = "libx264",
+                    audio_codec = "aac",
+                    threads     = cpu_count,
+                    logger      = None,
+                )
+                output_files.append(final_output)
+                logger.info(f"✅ Saved: {final_output}")
+            except Exception as e:
+                logger.error(f"❌ Clip {i+1} error: {e}")
+                logger.error(traceback.format_exc())
+            finally:
+                for obj in (final_clip, clip, current_video_clip):
+                    if obj:
+                        try:
+                            obj.close()
+                        except Exception:
+                            pass
+                gc.collect()
         return output_files
+# ─────────────────────────────────────────────────────────────────────────────
+# Module-level convenience wrapper
+# ─────────────────────────────────────────────────────────────────────────────
 def process_video(video_path, style="cinematic_blur", model_size="base", **kwargs):
     """
+    End-to-end pipeline: STT → AI analysis → clip export.
+    kwargs المهمة:
+        source_language : لغة الفيديو الأصلي → بتتبعت لـ Whisper
+                          لو مش محدد → Whisper يكتشف تلقائي
+        language        : لغة الـ output المطلوبة (ترجمة + كابشن)
+                          لو نفس لغة الفيديو → مش هيترجم
+        caption_mode    : sentence | word | highlight_word
+        caption_style   : classic | modern_glow | tiktok_bold | ...
     """
     try:
         processor = VideoProcessor(model_size=model_size)
         caption_mode = kwargs.get("caption_mode", "sentence")
+        # highlight_word و word كلاهما يحتاج word-level timestamps من Whisper
+        timestamp_mode = (
+            "words"
+            if caption_mode in ("word", "highlight_word")
+            else "segments"
+        )
+        # Phase 1 + 2: STT + AI analysis
         viral_segments, duration, stt_data = processor.analyze_impact(
+            video_path,
+            source_language = kwargs.get("source_language"),  # لـ Whisper
+            target_language = kwargs.get("language"),          # للترجمة والكابشن
+            timestamp_mode  = timestamp_mode,
         )
         if not viral_segments:
             logger.warning("⚠️ No viral segments found.")
             return []
+        # Sort by viral score
+        best_clips = processor.get_best_segments(viral_segments, duration)
+        # Phase 3: render
+        return processor.process_clips(
+            video_path,
+            best_clips,
+            stt_data,
+            style = style,
+            **kwargs,
         )
     except Exception as e:
         logger.error(f"❌ Processing failed: {e}")
         logger.error(traceback.format_exc())
         return []
 if __name__ == "__main__":
     import sys

requirements.txt CHANGED Viewed

@@ -15,3 +15,5 @@ imageio-ffmpeg==0.4.8
 openai>=1.0.0
 scipy
 json_repair

 openai>=1.0.0
 scipy
 json_repair
+cryptography
+firebase-admin