Spaces:

ex510
/

auto_cliper

Sleeping

App Files Files Community

aliSaac510 commited on Feb 17

Commit

342e0fb

0 Parent(s):

Update: Auto-thread support and Task Queue

Browse files

Files changed (18) hide show

.dockerignore +15 -0
.gitattributes +1 -0
.gitignore +36 -0
Dockerfile +56 -0
README.md +28 -0
core/__init__.py +12 -0
core/analyze.py +104 -0
core/config.py +105 -0
core/free_translator.py +78 -0
core/logger.py +45 -0
core/stt.py +301 -0
core/styles.py +229 -0
core/subtitle_manager.py +316 -0
core/task_queue.py +115 -0
dependencies_scan.txt +14 -0
main.py +300 -0
processor.py +467 -0
requirements.txt +17 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,15 @@

+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+env
+venv
+.env
+.git
+.gitignore
+uploads/
+outputs/
+temp/
+logs/
+.DS_Store

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.ttf filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,36 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+venv/
+.env
+.venv/
+# Project Specific
+uploads/*
+outputs/*
+tmp/*
+temp/*
+logs/*
+fonts/*
+!uploads/.gitkeep
+!outputs/.gitkeep
+!tmp/.gitkeep
+!temp/.gitkeep
+!logs/.gitkeep
+*.mp4
+*.mp3
+*.wav
+*.ttf
+# Test files
+test_*.py
+test.py
+my_movie.mp4
+My Recording_1.mp4
+# OS
+.DS_Store
+Thumbs.db
+.idea/
+.vscode/

Dockerfile ADDED Viewed

	@@ -0,0 +1,56 @@

+# Use an official Python runtime as a parent image
+FROM python:3.10-slim
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+ENV PORT=7860
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    imagemagick \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    build-essential \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+# Fix ImageMagick policy to allow processing (checking both version 6 and 7)
+RUN if [ -f /etc/ImageMagick-6/policy.xml ]; then \
+        sed -i 's/domain="path" rights="none" pattern="@\*"/domain="path" rights="read|write" pattern="@\*"/g' /etc/ImageMagick-6/policy.xml; \
+    fi; \
+    if [ -f /etc/ImageMagick-7/policy.xml ]; then \
+        sed -i 's/domain="path" rights="none" pattern="@\*"/domain="path" rights="read|write" pattern="@\*"/g' /etc/ImageMagick-7/policy.xml; \
+    fi
+# Create a non-root user
+RUN useradd -m -u 1000 user
+# Set working directory and ownership
+WORKDIR /app
+RUN chown -R user:user /app
+# Switch to non-root user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+# Copy requirements and install
+COPY --chown=user:user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Copy the rest of the application
+COPY --chown=user:user . /app
+# Create necessary directories with correct permissions
+RUN mkdir -p uploads outputs/viral_clips temp logs fonts && \
+    chmod -R 755 uploads outputs/viral_clips temp logs fonts
+# Expose the port
+EXPOSE 7860
+# Run the application
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,28 @@

+---
+title: Auto Clipper
+emoji: 🎬
+colorFrom: blue
+colorTo: purple
+sdk: docker
+pinned: false
+app_port: 7860
+---
+# Auto Clipper AI 🚀
+An automated AI tool to extract viral clips from long videos using Faster-Whisper and Llama-3.
+## Features
+- 🎙️ Automatic Speech Recognition (STT)
+- 🤖 AI-powered viral segment analysis
+- ✨ Multiple video styles (Cinematic Blur, Split Screen, Smart Crop)
+- 📝 Automatic TikTok-style captions
+## Local Setup
+1. Clone the repository
+2. Install dependencies: `pip install -r requirements.txt`
+3. Set up `.env` with your `GROQ_API_KEY`
+4. Run: `uvicorn main:app --port 8000`
+## API Endpoints
+- `POST /auto-clip`: Upload a video and get viral clips.

core/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import moviepy.editor as mpe
+# Monkeypatch for compatibility
+if not hasattr(mpe, 'AudioClip'):
+    mpe.AudioClip = mpe.AudioClip
+if not hasattr(mpe, 'VideoFileClip'):
+    mpe.VideoFileClip = mpe.VideoFileClip
+mpe.VideoFileClip.with_audio = mpe.VideoFileClip.set_audio
+mpe.VideoFileClip.resized = mpe.VideoFileClip.resize
+mpe.VideoFileClip.subclipped = mpe.VideoFileClip.subclip
+mpe.AudioFileClip.subclipped = mpe.AudioFileClip.subclip

core/analyze.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import os
+import time
+from openai import OpenAI
+from dotenv import load_dotenv
+load_dotenv()
+# إعداد OpenRouter API
+# يتم تحميل المفتاح من ملف .env (تأكد من وجود OPENROUTER_API_KEY)
+client = OpenAI(
+    base_url="https://openrouter.ai/api/v1",
+    api_key=os.getenv("OPENROUTER_API_KEY")
+)
+def analyze_transcript_gemini(transcript):
+    """تحليل النص باستخدام OpenRouter (DeepSeek) للحصول على أفضل النتائج"""
+    prompt = f"""
+    You are an expert video editor and viral content strategist. Your task is to identify the most engaging segments from the provided transcript that are suitable for short-form video platforms like TikTok, Reels, and YouTube Shorts.
+    STRICT JSON OUTPUT FORMAT REQUIRED:
+    You must output ONLY valid JSON. Do not include any markdown formatting (like ```json ... ```), explanations, or additional text outside the JSON object.
+    The JSON structure must be exactly as follows:
+    {{
+      "segments": [
+        {{
+          "start_time": <float, start time in seconds>,
+          "end_time": <float, end time in seconds>,
+          "duration": <float, duration in seconds>,
+          "description": "<string, brief summary of the clip content 10 words max>",
+          "viral_score": <float, score from 0-10 indicating viral potential>,
+          "reason": "<string, explanation of why this segment is engaging>"
+        }}
+      ]
+    }}
+    SELECTION CRITERIA:
+    1.  **Standalone Quality**: Each clip must make sense on its own without prior context. Avoid starting with conjunctions like "And", "But", "So" unless they are part of a complete thought.
+    2.  **Engagement**: Look for strong hooks, emotional moments, humor, surprising facts, or actionable advice.
+    3.  **Duration**: Prioritize clips between 30 and 180 seconds.
+    4.  **Completeness**: Ensure the clip has a clear beginning and end. Do not cut off sentences.
+    IMPORTANT:
+    - Return valid JSON only.
+    - If no suitable segments are found, return {{ "segments": [] }}.
+    - Ensure all strings are properly escaped.
+    Transcript to Analyze:
+    {transcript}
+    """
+    max_retries = 3
+    base_delay = 5
+    for attempt in range(max_retries):
+        try:
+            # استخدام DeepSeek-V3 عبر OpenRouter
+            model_name = "deepseek/deepseek-v3.2"
+            # يمكنك تغيير headers هنا
+            response = client.chat.completions.create(
+                model=model_name,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": prompt
+                    }
+                ],
+                extra_headers={
+                    "HTTP-Referer": "https://github.com/Start-To-End-AI", # Optional. Site URL for rankings on openrouter.ai.
+                    "X-Title": "Video Clipper AI", # Optional. Site title for rankings on openrouter.ai.
+                },
+                extra_body={
+                    "reasoning": {"enabled": True}
+                },
+                temperature=0.7,
+            )
+            content = response.choices[0].message.content
+            return {"content": content}
+        except Exception as e:
+            error_str = str(e)
+            print(f"❌ Error in OpenRouter analysis: {e}")
+            if attempt < max_retries - 1:
+                wait_time = base_delay * (2 ** attempt)
+                print(f"⚠️ Retrying in {wait_time}s... (Attempt {attempt + 1}/{max_retries})")
+                time.sleep(wait_time)
+            else:
+                break
+    print("❌ All retry attempts failed.")
+    return {"content": '{"segments": []}'}
+# إعداد متغير البيئة
+if __name__ == "__main__":
+    # اختبار سريع
+    test_transcript = "[0.0 - 5.0] This is amazing content about viral videos!"
+    result = analyze_transcript_gemini(test_transcript)
+    print("Gemini Analysis Result:", result)

core/config.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import os
+import requests
+class Config:
+    BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    TEMP_DIR = os.path.join(BASE_DIR, "temp")
+    UPLOADS_DIR = os.path.join(BASE_DIR, "uploads")
+    OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs")
+    LOGS_DIR = os.path.join(BASE_DIR, "logs")
+    # Font URLs - Google Fonts CSS API
+    # We use the CSS API to get the correct WOFF2/TTF file
+    FONTS = {
+        "Roboto-Bold.ttf": "https://fonts.googleapis.com/css2?family=Roboto:wght@700&display=swap",
+        "NotoSansArabic-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@700&display=swap",
+        "NotoSansSC-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@700&display=swap",
+        "NotoSansJP-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@700&display=swap",
+        "NotoSansDevanagari-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Devanagari:wght@700&display=swap",
+        "Cairo-Bold.ttf": "https://fonts.googleapis.com/css2?family=Cairo:wght@700&display=swap",
+        "Montserrat-Bold.ttf": "https://fonts.googleapis.com/css2?family=Montserrat:wght@700&display=swap"
+    }
+    # Dynamic Language to Font Mapping
+    LANGUAGE_FONT_MAP = {
+        "ar": "NotoSansArabic-Bold.ttf",  # Arabic
+        "zh": "NotoSansSC-Bold.ttf",      # Chinese
+        "ja": "NotoSansJP-Bold.ttf",      # Japanese
+        "hi": "NotoSansDevanagari-Bold.ttf", # Hindi
+        "ru": "Roboto-Bold.ttf",          # Russian (Supported by Roboto)
+        "en": "Roboto-Bold.ttf",          # English
+        "default": "Roboto-Bold.ttf"
+    }
+    # Video Settings
+    DEFAULT_SIZE = (1080, 1920)
+    CHUNK_SIZE_SECONDS = 600
+    OVERLAP_SECONDS = 60
+    # Styles
+    STYLES = [
+        "cinematic",
+        "cinematic_blur",
+        "vertical_full",
+        "split_vertical",
+        "split_horizontal"
+    ]
+    @classmethod
+    def setup_dirs(cls):
+        for d in [cls.TEMP_DIR, cls.UPLOADS_DIR, cls.OUTPUTS_DIR, cls.LOGS_DIR]:
+            os.makedirs(d, exist_ok=True)
+    @staticmethod
+    def get_urls(content):
+        """
+        Parses the css file and retrieves the font urls.
+        """
+        urls = []
+        for i in range(len(content)):
+            if content[i: i+3] == 'url':
+                j = i + 4
+                url = ''
+                while content[j] != ')':
+                    url += content[j]
+                    j += 1
+                urls.append(url)
+        return urls
+    @staticmethod
+    def download_font_from_css(css_url, output_path):
+        """
+        Downloads the first font file found in the CSS to output_path.
+        """
+        try:
+            # 1. Fetch CSS content
+            # Add User-Agent to avoid getting minimal CSS or being blocked
+            headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
+            response = requests.get(css_url, headers=headers)
+            response.raise_for_status()
+            content = response.text
+            # 2. Extract URLs
+            urls = Config.get_urls(content)
+            if not urls:
+                print(f"❌ No font URLs found in CSS: {css_url}")
+                return False
+            # 3. Download the first font found (usually the most specific/relevant or primary subset)
+            # For Arabic fonts like Cairo/NotoSansArabic, the first subset is usually the Arabic one.
+            font_url = urls[0]
+            print(f"⬇️ Downloading font from: {font_url}")
+            font_response = requests.get(font_url, headers=headers)
+            font_response.raise_for_status()
+            with open(output_path, 'wb') as f:
+                f.write(font_response.content)
+            print(f"✅ Font saved to: {output_path}")
+            return True
+        except Exception as e:
+            print(f"❌ Failed to download font from CSS {css_url}: {e}")
+            return False

core/free_translator.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import os
+import json
+import urllib.request
+import urllib.parse
+class FreeTranslator:
+    def __init__(self):
+        pass
+    def translate_text(self, text, target_language_code):
+        """ترجمة مجانية باستخدام MyMemory API بدون httpx"""
+        if not text.strip():
+            return "", []
+        # خريطة اللغات
+        lang_map = {
+            "ar": "ar",
+            "en": "en",
+            "hi": "hi",
+            "zh": "zh",
+            "es": "es",
+            "fr": "fr",
+            "de": "de",
+            "ru": "ru",
+            "ja": "ja"
+        }
+        target_lang = lang_map.get(target_language_code, target_language_code)
+        try:
+            # استخدام urllib بدلاً من requests لتجنب مشكلة httpx
+            url = "https://api.mymemory.translated.net/get"
+            params = {
+                'q': text,
+                'langpair': f'en|{target_lang}'
+            }
+            # بناء URL مع parameters
+            full_url = url + '?' + urllib.parse.urlencode(params)
+            # استخدام urllib.request بدلاً من requests
+            req = urllib.request.Request(full_url)
+            req.add_header('User-Agent', 'Mozilla/5.0')
+            with urllib.request.urlopen(req, timeout=10) as response:
+                if response.status == 200:
+                    data = json.loads(response.read().decode())
+                    if data.get('responseStatus') == 200:
+                        translated_text = data['responseData']['translatedText']
+                        # تحديد الكلمات المهمة
+                        words = translated_text.split()
+                        highlight_words = []
+                        # كلمات حماسية شائعة
+                        exciting_words = [
+                            "amazing", "incredible", "awesome", "fantastic", "perfect", "best", "ultimate",
+                            "رائع", "مذهل", "أفضل", "مثالي", "خرافي", "لا يصدق", "عجيب"
+                        ]
+                        for word in words:
+                            clean_word = word.lower().strip(".,!?")
+                            if clean_word in exciting_words:
+                                highlight_words.append(word)
+                        # إذا مفيش كلمات حماسية، نختار أطول كلمتين
+                        if not highlight_words and len(words) >= 2:
+                            sorted_words = sorted(words, key=len, reverse=True)
+                            highlight_words = sorted_words[:2]
+                        return translated_text, highlight_words
+            # fallback: إرجاع النص الأصلي
+            return text, []
+        except Exception as e:
+            print(f"⚠️ Error in free translation: {e}")
+            return text, []

core/logger.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+import logging
+from datetime import datetime
+class Logger:
+    _instance = None
+    @staticmethod
+    def get_logger(name="AppLogger"):
+        # Configure root logger once
+        if not getattr(Logger, "_is_configured", False):
+            Logger._setup_handlers()
+            Logger._is_configured = True
+        return logging.getLogger(name)
+    @staticmethod
+    def _setup_handlers():
+        # Get root logger
+        logger = logging.getLogger()
+        logger.setLevel(logging.INFO)
+        # Clear existing handlers to avoid duplicates
+        if logger.handlers:
+            logger.handlers.clear()
+        # Console Handler
+        c_handler = logging.StreamHandler()
+        c_handler.setLevel(logging.INFO)
+        c_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+        c_handler.setFormatter(c_format)
+        logger.addHandler(c_handler)
+        # File Handler
+        try:
+            log_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "logs")
+            os.makedirs(log_dir, exist_ok=True)
+            f_handler = logging.FileHandler(os.path.join(log_dir, f"{datetime.now().strftime('%Y-%m-%d')}.log"), encoding='utf-8')
+            f_handler.setLevel(logging.INFO)
+            f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+            f_handler.setFormatter(f_format)
+            logger.addHandler(f_handler)
+        except Exception as e:
+            print(f"Failed to setup file logging: {e}")

core/stt.py ADDED Viewed

	@@ -0,0 +1,301 @@

+import os
+import moviepy.editor as mpe
+import os
+import sys
+import json
+from datetime import datetime
+# إضافة المسار الجذري للمشروع لضمان استيراد الموديولات بشكل صحيح
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
+# استخدام Faster-Whisper مع حل مشكلة httpx
+try:
+    from faster_whisper import WhisperModel
+    faster_whisper_available = True
+except ImportError:
+    print("⚠️ Faster-Whisper not available, please install: pip install faster-whisper")
+    faster_whisper_available = False
+class STT:
+    def __init__(self, model_size="base"):
+        self.duration = 0
+        self.model_size = model_size
+        if not faster_whisper_available:
+            raise ImportError("Faster-Whisper is not available")
+        # تحميل الموديل مرة واحدة عند تهيئة الكلاس مع دعم GPU للتسريع
+        print(f"🚀 Loading Faster-Whisper Model ({model_size})...")
+        try:
+            # محاولة استخدام GPU أولاً للتسريع 10x
+            self.model = WhisperModel(model_size, device="cuda", compute_type="float16")
+            print("✅ Using GPU for faster processing")
+        except Exception as e:
+            print(f"⚠️ GPU not available, using CPU with {model_size} model: {e}")
+            self.model = WhisperModel(model_size, device="cpu", compute_type="int8")
+    def get_transcript(self, video_path: str, language: str = None, skip_ai: bool = False, timestamp_mode="segments"):
+        """تحويل الفيديو لنص مع توقيت الكلمات باستخدام Faster-Whisper
+        Args:
+            timestamp_mode: "words" للكلمات الفردية, "segments" للجمل الكاملة
+        """
+        print(f"🎙️ Transcribing: {video_path} (Language: {language if language else 'Auto'}, Mode: {timestamp_mode})")
+        # تسجيل الـ transcript في ملف logs
+        log_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs", "transcript.log")
+        # تحويل اللغة إلى الكود الصحيح لـ Whisper
+        actual_stt_lang = None
+        if language:
+            # لو اللغة enum object (زي Language.ar)
+            if hasattr(language, 'value'):
+                lang_val = language.value
+            else:
+                lang_val = str(language)
+            # لو مش auto، استخدم اللغة المحددة
+            if lang_val != 'auto':
+                actual_stt_lang = lang_val
+            else:
+                actual_stt_lang = None  # Whisper هيكشف تلقائياً
+        print(f"🔍 STT Debug - Language param: {language} -> actual_stt_lang: {actual_stt_lang}")
+        # -------------------------------------------------------------------------
+        # ⚡ PERFORMANCE CACHING START
+        # -------------------------------------------------------------------------
+        import hashlib
+        # Create a unique cache key based on file properties and parameters
+        try:
+            file_stat = os.stat(video_path)
+            unique_str = f"{video_path}_{file_stat.st_size}_{file_stat.st_mtime}_{actual_stt_lang}_{timestamp_mode}"
+            file_hash = hashlib.md5(unique_str.encode()).hexdigest()
+            cache_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "temp", "stt_cache")
+            os.makedirs(cache_dir, exist_ok=True)
+            cache_path = os.path.join(cache_dir, f"{file_hash}.json")
+            if os.path.exists(cache_path):
+                print(f"🚀 PERFORMANCE: Loading cached transcript from {cache_path}")
+                try:
+                    with open(cache_path, "r", encoding="utf-8") as f:
+                        cached_data = json.load(f)
+                        print(f"✅ Cache Hit! Skipping Whisper processing.")
+                        return cached_data["segments"], cached_data["text"], cached_data["duration"], cached_data["language"]
+                except Exception as e:
+                    print(f"⚠️ Cache file corrupted, re-processing: {e}")
+        except Exception as e:
+            print(f"⚠️ Could not setup caching: {e}")
+        # -------------------------------------------------------------------------
+        # ⚡ PERFORMANCE CACHING END
+        # -------------------------------------------------------------------------
+        # تحديد word_timestamps بناءً على الوضع المطلوب
+        word_timestamps = timestamp_mode == "words"
+        print(f"🔍 STT Debug - Video: {os.path.basename(video_path)}")
+        print(f"🔍 STT Debug - Model size: {self.model_size}")
+        print(f"🔍 STT Debug - Beam size: 1, Word timestamps: {word_timestamps}")
+        print(f"🔍 STT Debug - Starting transcription...")
+        segments, info = self.model.transcribe(
+            video_path,
+            beam_size=1,
+            word_timestamps=word_timestamps,
+            language=actual_stt_lang,
+            vad_filter=True,  # تصفية الصوت الفارغ
+            vad_parameters=dict(min_silence_duration_ms=500)
+        )
+        detected_lang = info.language
+        print(f"🔍 STT Debug - Detected language: {detected_lang}")
+        print(f"🔍 STT Debug - Processing segments...")
+        segments_list = []
+        full_text = ""
+        # فتح ملف الـ log للتسجيل
+        try:
+            with open(log_file, "a", encoding="utf-8") as f:
+                f.write(f"\n{'='*60}\n")
+                f.write(f"🎙️ TRANSCRIPT SESSION\n")
+                f.write(f"📅 Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+                f.write(f"📹 Video: {os.path.basename(video_path)}\n")
+                f.write(f"🌍 Language: {actual_stt_lang if actual_stt_lang else 'Auto-detected'}\n")
+                f.write(f"🎯 Mode: {timestamp_mode}\n")
+                f.write(f"{'='*60}\n\n")
+        except Exception as e:
+            print(f"⚠️ Could not open log file: {e}")
+        # استيراد دالة الترجمة المجانية فقط
+        try:
+            from core.free_translator import FreeTranslator
+            translator = FreeTranslator()
+            use_free_translator = True
+        except ImportError:
+            print("⚠️ Free translator not available, using original text")
+            use_free_translator = False
+        # تحديد ما إذا كنا نحتاج لمعالجة عبر AI (ترجمة أو تلوين)
+        # بنستخدم اللغة المكتشفة تلقائياً مش اللغة المطلوبة
+        # الترجمة بتتم في مرحلة منفصلة في processor.py
+        needs_ai_processing = False  # ملغي - الترجمة في processor.py
+        # التأكد من أن اللغة هي نص بسيط وليس Enum object
+        if hasattr(language, 'value'):
+            lang_str = language.value
+        else:
+            lang_str = str(language) if language else detected_lang
+        # معالجة النتائج
+        for segment in segments:
+            segment_text = segment.text.strip()
+            # تسجيل الـ segment في ملف الـ log
+            try:
+                with open(log_file, "a", encoding="utf-8") as f:
+                    f.write(f"[{segment.start:.2f} - {segment.end:.2f}] {segment_text}\n")
+            except Exception as e:
+                print(f"⚠️ Could not write to log file: {e}")
+            if needs_ai_processing and use_free_translator:
+                print(f"🧠 AI Processing ({detected_lang} -> {lang_str}): {segment_text[:50]}...")
+                processed_text, highlight_words = translator.translate_text(segment_text, lang_str)
+                if timestamp_mode == "words" and segment.words:
+                    # وضع الكلمات الفردية - نستخدم توقيتات الكلمات الأصلية
+                    target_words = processed_text.split()
+                    words_list = []
+                    for i, word_info in enumerate(segment.words):
+                        if i < len(target_words):
+                            is_highlight = any(h in target_words[i] for h in highlight_words)
+                            words_list.append({
+                                "text": target_words[i],
+                                "start": word_info.start,
+                                "end": word_info.end,
+                                "is_highlight": is_highlight
+                            })
+                    segments_list.append({
+                        "text": processed_text,
+                        "start": segment.start,
+                        "end": segment.end,
+                        "words": words_list
+                    })
+                else:
+                    # وضع الجمل - تقسيم ذكي مثل قبل
+                    target_words = processed_text.split()
+                    # --- نظام تقسيم الجمل الاحترافي (Max 5 words or 3 seconds) ---
+                    MAX_WORDS_PER_SEGMENT = 5
+                    MAX_DURATION_PER_SEGMENT = 3.0
+                    current_sub_words = []
+                    segment_duration = segment.end - segment.start
+                    avg_word_duration = segment_duration / max(len(target_words), 1)
+                    for i, w in enumerate(target_words):
+                        is_highlight = any(h in w for h in highlight_words)
+                        word_data = {
+                            "text": w,
+                            "start": segment.start + (i * avg_word_duration),
+                            "end": segment.start + ((i + 1) * avg_word_duration),
+                            "is_highlight": is_highlight
+                        }
+                        current_sub_words.append(word_data)
+                        # تحقق من شروط كسر الجملة (عدد الكلمات أو المدة الزمنية)
+                        current_duration = current_sub_words[-1]["end"] - current_sub_words[0]["start"]
+                        if len(current_sub_words) >= MAX_WORDS_PER_SEGMENT or current_duration >= MAX_DURATION_PER_SEGMENT or i == len(target_words) - 1:
+                            sub_segment_text = " ".join([sw["text"] for sw in current_sub_words])
+                            segments_list.append({
+                                "text": sub_segment_text,
+                                "start": current_sub_words[0]["start"],
+                                "end": current_sub_words[-1]["end"],
+                                "words": current_sub_words.copy()
+                            })
+                            full_text += sub_segment_text + " "
+                            current_sub_words = []
+                    # ----------------------------------------------------------
+            else:
+                # بدون معالجة AI
+                if timestamp_mode == "words" and segment.words:
+                    # وضع الكلمات الفردية بدون ترجمة
+                    words_list = []
+                    for word_info in segment.words:
+                        words_list.append({
+                            "text": word_info.word.strip(),
+                            "start": word_info.start,
+                            "end": word_info.end,
+                            "is_highlight": False
+                        })
+                    segments_list.append({
+                        "text": segment_text,
+                        "start": segment.start,
+                        "end": segment.end,
+                        "words": words_list
+                    })
+                else:
+                    # وضع الجمل بدون ترجمة
+                    words = []
+                    if segment.words:
+                        for word in segment.words:
+                            words.append({
+                                "text": word.word.strip(),
+                                "start": word.start,
+                                "end": word.end,
+                                "is_highlight": False
+                            })
+                    segments_list.append({
+                        "text": segment_text,
+                        "start": segment.start,
+                        "end": segment.end,
+                        "words": words
+                    })
+                full_text += segment_text + " "
+        # إنهاء تسجيل الجلسة في ملف الـ log
+        try:
+            with open(log_file, "a", encoding="utf-8") as f:
+                f.write(f"\n{'='*60}\n")
+                f.write(f"📊 SUMMARY:\n")
+                f.write(f"📝 Total Segments: {len(segments_list)}\n")
+                f.write(f"⏱️  Total Duration: {info.duration:.2f} seconds\n")
+                f.write(f"🌍 Detected Language: {detected_lang}\n")
+                f.write(f"📄 Full Text Length: {len(full_text)} characters\n")
+                f.write(f"🎯 Processing Mode: {timestamp_mode}\n")
+                f.write(f"{'='*60}\n\n")
+        except Exception as e:
+            print(f"⚠️ Could not complete log file: {e}")
+        print(f"✅ STT Completed: {len(segments_list)} segments, language: {detected_lang}")
+        # -------------------------------------------------------------------------
+        # ⚡ PERFORMANCE CACHING SAVE
+        # -------------------------------------------------------------------------
+        try:
+            with open(cache_path, "w", encoding="utf-8") as f:
+                json.dump({
+                    "segments": segments_list,
+                    "text": full_text,
+                    "duration": info.duration,
+                    "language": detected_lang
+                }, f, ensure_ascii=False)
+                print(f"💾 Transcript cached to {cache_path}")
+        except Exception as e:
+            print(f"⚠️ Failed to save cache: {e}")
+        # -------------------------------------------------------------------------
+        return segments_list, full_text, info.duration, detected_lang
+    def __call_whisper__(self, audio_path, language=None, skip_ai=False):
+        """دالة متوافقة مع الاستدعاء المباشر في processor.py"""
+        segments_list, full_text, duration, detected_lang = self.get_transcript(audio_path, language=language, skip_ai=skip_ai)
+        return {"segments": segments_list, "detected_language": detected_lang, "duration": duration}

core/styles.py ADDED Viewed

	@@ -0,0 +1,229 @@

+from abc import ABC, abstractmethod
+import os
+import cv2
+import moviepy.editor as mpe
+from .config import Config
+from .logger import Logger
+from .subtitle_manager import SubtitleManager
+logger = Logger.get_logger(__name__)
+class SmartFaceCropper:
+    def __init__(self, output_size=(1080, 1920)):
+        self.output_size = output_size
+        self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
+        self.last_coords = None
+        self.smoothed_x = None
+        self.smoothing = 0.2
+        self.frame_count = 0
+    def get_crop_coordinates(self, frame):
+        h, w = frame.shape[:2]
+        target_w = int(h * self.output_size[0] / self.output_size[1])
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        small_gray = cv2.resize(gray, (0, 0), fx=0.5, fy=0.5)
+        faces = self.face_cascade.detectMultiScale(small_gray, 1.1, 8, minSize=(50, 50))
+        if len(faces) > 0:
+            faces = sorted(faces, key=lambda f: f[2]*f[3], reverse=True)
+            fx, fy, fw, fh = [v * 2 for v in faces[0]]
+            current_center_x = fx + fw // 2
+            self.last_coords = (fx, fy, fw, fh)
+        else:
+            current_center_x = w // 2 if self.smoothed_x is None else self.smoothed_x
+        if self.smoothed_x is None:
+            self.smoothed_x = current_center_x
+        else:
+            self.smoothed_x = self.smoothed_x * (1 - self.smoothing) + current_center_x * self.smoothing
+        left = int(self.smoothed_x - target_w // 2)
+        left = max(0, min(left, w - target_w))
+        return left, 0, left + target_w, h
+    def apply_to_clip(self, clip):
+        frame_skip = 5
+        def filter_frame(get_frame, t):
+            frame = get_frame(t)
+            self.frame_count += 1
+            if self.frame_count % frame_skip == 0 or self.last_coords is None:
+                left, top, right, bottom = self.get_crop_coordinates(frame)
+            else:
+                h, w = frame.shape[:2]
+                target_w = int(h * self.output_size[0] / self.output_size[1])
+                left = int(self.smoothed_x - target_w // 2) if self.smoothed_x else w // 2 - target_w // 2
+                left = max(0, min(left, w - target_w))
+                right = left + target_w
+            cropped = frame[:, left:right]
+            return cv2.resize(cropped, self.output_size)
+        return clip.fl(filter_frame)
+class BaseStyle(ABC):
+    def __init__(self, output_size=Config.DEFAULT_SIZE):
+        self.output_size = output_size
+    @abstractmethod
+    def apply(self, clip, **kwargs):
+        pass
+    # --------------------------------------------------------------------------
+    # Refactored method to combine Style + Captions in ONE CompositeVideoClip
+    # --------------------------------------------------------------------------
+    def apply_with_captions(self, clip, transcript_data=None, language=None, caption_mode="sentence", **kwargs):
+        """
+        Applies style AND adds captions in a single composition step.
+        This prevents double rendering (CompositeVideoClip inside CompositeVideoClip).
+        """
+        # 1. Get the base styled clip (which might be a CompositeVideoClip itself)
+        styled_clip = self.apply(clip, **kwargs)
+        # 2. If no captions needed, just return the styled clip
+        if not transcript_data:
+            return styled_clip
+        # 3. Generate caption CLIPS (ImageClips) only, do not composite yet
+        caption_clips = self._create_caption_clips(transcript_data, language, caption_mode)
+        if not caption_clips:
+            return styled_clip
+        # 4. Optimize Composition:
+        # If styled_clip is already a CompositeVideoClip, we can flatten the list
+        # instead of nesting composites.
+        if isinstance(styled_clip, mpe.CompositeVideoClip):
+            # IMPORTANT: We must copy the list to avoid modifying the original list in place if it's reused
+            final_layers = list(styled_clip.clips) + caption_clips
+            return mpe.CompositeVideoClip(final_layers, size=self.output_size)
+        else:
+            # If styled_clip is just a simple VideoFileClip or similar, wrap it
+            return mpe.CompositeVideoClip([styled_clip] + caption_clips, size=self.output_size)
+    # --------------------------------------------------------------------------
+    def add_captions(self, clip, transcript_data, language=None, caption_mode="sentence"):
+        # This method is now DEPRECATED in favor of passing transcript_data to apply()
+        # but kept for backward compatibility if needed.
+        if not transcript_data:
+            return clip
+        return SubtitleManager.create_captions(
+            clip,
+            transcript_data,
+            size=self.output_size,
+            language=language,
+            caption_mode=caption_mode
+        )
+    def _create_caption_clips(self, transcript_data, language=None, caption_mode="sentence"):
+        """Helper to create just the caption clips list, not a full CompositeVideoClip"""
+        return SubtitleManager.create_caption_clips(
+            transcript_data,
+            size=self.output_size,
+            language=language,
+            caption_mode=caption_mode
+        )
+class CinematicStyle(BaseStyle):
+    def apply(self, clip, background_path=None, **kwargs):
+        if background_path and os.path.exists(background_path):
+            ext = os.path.splitext(background_path)[1].lower()
+            video_extensions = ['.mp4', '.avi', '.mov', '.mkv', '.webm']
+            if ext in video_extensions:
+                bg = mpe.VideoFileClip(background_path).without_audio().resize(height=self.output_size[1])
+                if bg.duration < clip.duration:
+                    bg = bg.loop(duration=clip.duration)
+                else:
+                    bg = bg.subclip(0, clip.duration)
+            else:
+                bg = mpe.ImageClip(background_path).set_duration(clip.duration).resize(height=self.output_size[1])
+            if bg.w > self.output_size[0]:
+                bg = bg.crop(x_center=bg.w/2, width=self.output_size[0])
+            else:
+                bg = bg.resize(width=self.output_size[0])
+        else:
+            bg = mpe.ColorClip(size=self.output_size, color=(0, 0, 0)).set_duration(clip.duration)
+        main_video = clip.resize(width=self.output_size[0]).set_position("center")
+        if main_video.h > self.output_size[1]:
+            main_video = clip.resize(height=self.output_size[1]).set_position("center")
+        return mpe.CompositeVideoClip([bg, main_video], size=self.output_size)
+class CinematicBlurStyle(BaseStyle):
+    def apply(self, clip, **kwargs):
+        bg = clip.resize(height=self.output_size[1])
+        if bg.w < self.output_size[0]:
+            bg = clip.resize(width=self.output_size[0])
+        def make_blur(get_frame, t):
+            frame = get_frame(t)
+            small = cv2.resize(frame, (16, 16))
+            blurred = cv2.resize(small, (self.output_size[0], self.output_size[1]), interpolation=cv2.INTER_LINEAR)
+            blurred = cv2.GaussianBlur(blurred, (21, 21), 0)
+            return blurred
+        bg_blurred = bg.fl(make_blur).set_opacity(0.6)
+        main_video = clip.resize(width=self.output_size[0]).set_position("center")
+        if main_video.h > self.output_size[1]:
+            main_video = clip.resize(height=self.output_size[1]).set_position("center")
+        return mpe.CompositeVideoClip([bg_blurred, main_video], size=self.output_size)
+class SplitVerticalStyle(BaseStyle):
+    def apply(self, clip, playground_path=None, **kwargs):
+        h_half = self.output_size[1] // 2
+        top = clip.resize(height=h_half).set_position(('center', 'top'))
+        bottom = None
+        if playground_path and os.path.exists(playground_path):
+            bottom = mpe.VideoFileClip(playground_path).without_audio().resize(height=h_half).set_position(('center', 'bottom'))
+            if bottom.duration < clip.duration:
+                bottom = bottom.loop(duration=clip.duration)
+            else:
+                bottom = bottom.subclip(0, clip.duration)
+        else:
+            bottom = clip.resize(height=h_half).set_position(('center', 'bottom')).set_opacity(0.5)
+        return mpe.CompositeVideoClip([top, bottom], size=self.output_size)
+class SplitHorizontalStyle(BaseStyle):
+    def apply(self, clip, playground_path=None, **kwargs):
+        w_half = self.output_size[0] // 2
+        left = clip.resize(width=w_half).set_position(('left', 'center'))
+        right = None
+        if playground_path and os.path.exists(playground_path):
+            right = mpe.VideoFileClip(playground_path).without_audio().resize(width=w_half).set_position(('right', 'center'))
+            if right.duration < clip.duration:
+                right = right.loop(duration=clip.duration)
+            else:
+                right = right.subclip(0, clip.duration)
+        else:
+            right = clip.resize(width=w_half).set_position(('right', 'center')).set_opacity(0.5)
+        return mpe.CompositeVideoClip([left, right], size=self.output_size)
+class VerticalFullStyle(BaseStyle):
+    def apply(self, clip, **kwargs):
+        cropper = SmartFaceCropper(output_size=self.output_size)
+        return cropper.apply_to_clip(clip)
+class StyleFactory:
+    _styles = {
+        "cinematic": CinematicStyle,
+        "cinematic_blur": CinematicBlurStyle,
+        "split_vertical": SplitVerticalStyle,
+        "split_horizontal": SplitHorizontalStyle,
+        "vertical_full": VerticalFullStyle
+    }
+    @staticmethod
+    def get_style(style_name) -> BaseStyle:
+        style_class = StyleFactory._styles.get(style_name, CinematicBlurStyle)
+        return style_class()

core/subtitle_manager.py ADDED Viewed

	@@ -0,0 +1,316 @@

+import os
+import numpy as np
+import urllib.request
+from PIL import Image, ImageDraw, ImageFont
+import moviepy.editor as mpe
+from arabic_reshaper import reshape
+from bidi.algorithm import get_display
+from .config import Config
+from .logger import Logger
+logger = Logger.get_logger(__name__)
+class SubtitleManager:
+    @staticmethod
+    def ensure_font(language=None, style_font=None, text_content=None):
+        """Ensures a valid font exists dynamically based on language or content."""
+        # 1. Determine Font Name
+        font_name = Config.LANGUAGE_FONT_MAP.get("default", "Roboto-Bold.ttf")
+        # Priority 1: Explicit Style Font (if language supports it or it's Latin)
+        # However, if text is Arabic/CJK, style font (usually Latin) might break it.
+        # So we should check language compatibility first.
+        detected_lang = language
+        if not detected_lang and text_content:
+            # Simple script detection
+            if any("\u0600" <= c <= "\u06FF" for c in text_content):
+                detected_lang = "ar"
+            elif any("\u4E00" <= c <= "\u9FFF" for c in text_content):
+                detected_lang = "zh"
+            elif any("\u3040" <= c <= "\u309F" for c in text_content) or any("\u30A0" <= c <= "\u30FF" for c in text_content):
+                detected_lang = "ja"
+            elif any("\u0900" <= c <= "\u097F" for c in text_content):
+                detected_lang = "hi"
+            elif any("\u0400" <= c <= "\u04FF" for c in text_content):
+                detected_lang = "ru"
+        # Priority 2: Language-specific font from Config Map
+        if detected_lang in Config.LANGUAGE_FONT_MAP:
+             font_name = Config.LANGUAGE_FONT_MAP[detected_lang]
+        elif style_font and not detected_lang:
+             # Only use style font if no specific non-Latin language detected
+             font_name = style_font
+        # Fallback: if detected language is known but not in map (shouldn't happen with default keys)
+        if detected_lang and detected_lang not in Config.LANGUAGE_FONT_MAP:
+             logger.warning(f"⚠️ Language {detected_lang} not in font map, using default.")
+        font_path = os.path.join(Config.BASE_DIR, font_name)
+        if not os.path.exists(font_path):
+            logger.info(f"📥 Downloading font: {font_name}...")
+            # We might need to add more fonts to Config.FONTS or download dynamically
+            url = Config.FONTS.get(font_name)
+            if url:
+                try:
+                    # Use Config's CSS downloader for Google Fonts
+                    if "fonts.googleapis.com/css" in url:
+                        success = Config.download_font_from_css(url, font_path)
+                        if not success:
+                            raise Exception("CSS font download failed")
+                    else:
+                        # Fallback for direct links
+                        urllib.request.urlretrieve(url, font_path)
+                    logger.info(f"✅ Font downloaded: {font_name}")
+                except Exception as e:
+                    logger.error(f"❌ Failed to download font: {e}")
+                    return "Arial"
+            else:
+                 logger.warning(f"⚠️ No URL found for font: {font_name}")
+                 # Fallback for now if not in config
+                 if font_name == "Montserrat-Bold.ttf": # TikTok popular
+                     # Add logic to download or use system font
+                     pass
+        return font_path
+    @staticmethod
+    def create_pil_text_clip(text, fontsize, color, font_path, stroke_color='black', stroke_width=2, bg_color=None, padding=10):
+        """Creates a text clip using PIL."""
+        try:
+            try:
+                font = ImageFont.truetype(font_path, fontsize)
+            except:
+                logger.warning(f"⚠️ Failed to load font {font_path}, using default.")
+                font = ImageFont.load_default()
+            dummy_img = Image.new('RGBA', (1, 1))
+            draw = ImageDraw.Draw(dummy_img)
+            bbox = draw.textbbox((0, 0), text, font=font)
+            text_width = bbox[2] - bbox[0]
+            text_height = bbox[3] - bbox[1]
+            margin = int(stroke_width * 2) + padding
+            img_width = text_width + margin * 2
+            img_height = text_height + margin * 2
+            img = Image.new('RGBA', (int(img_width), int(img_height)), (0, 0, 0, 0))
+            draw = ImageDraw.Draw(img)
+            # Draw Background if requested
+            if bg_color:
+                 draw.rounded_rectangle(
+                     [(0, 0), (img_width, img_height)],
+                     radius=15,
+                     fill=bg_color
+                 )
+            x = (img_width - text_width) / 2 - bbox[0]
+            y = (img_height - text_height) / 2 - bbox[1]
+            draw.text(
+                (x, y),
+                text,
+                font=font,
+                fill=color,
+                stroke_width=stroke_width,
+                stroke_fill=stroke_color
+            )
+            return mpe.ImageClip(np.array(img))
+        except Exception as e:
+            logger.error(f"⚠️ PIL Text Error: {e}")
+            return None
+    @staticmethod
+    def get_style_config(style_name):
+        """Returns configuration for different caption styles."""
+        styles = {
+            "classic": {
+                "fontsize": 75,
+                "color": "white",
+                "stroke_color": "black",
+                "stroke_width": 2,
+                "font": None, # Default based on language
+                "bg_color": None,
+                "position": ("center", 1350)
+            },
+            "tiktok_bold": {
+                "fontsize": 85,
+                "color": "white",
+                "stroke_color": "black",
+                "stroke_width": 4,
+                "font": "Montserrat-Bold.ttf", # Popular on TikTok
+                "bg_color": None, # Shadow usually used instead of BG
+                "position": ("center", 1400)
+            },
+            "tiktok_neon": {
+                "fontsize": 80,
+                "color": "#00f2ea", # TikTok Cyan
+                "stroke_color": "#ff0050", # TikTok Red
+                "stroke_width": 3,
+                "font": "Roboto-Bold.ttf",
+                "bg_color": None,
+                "position": ("center", 1400)
+            },
+            "youtube_clean": {
+                "fontsize": 70,
+                "color": "yellow",
+                "stroke_color": "black",
+                "stroke_width": 3,
+                "font": "Roboto-Bold.ttf",
+                "bg_color": None,
+                "position": ("center", 1300)
+            },
+            "youtube_box": {
+                "fontsize": 65,
+                "color": "white",
+                "stroke_color": None,
+                "stroke_width": 0,
+                "font": "Roboto-Bold.ttf",
+                "bg_color": "red", # YouTube Red Box
+                "position": ("center", 1300)
+            }
+        }
+        return styles.get(style_name, styles["classic"])
+    @staticmethod
+    def create_caption_clips(transcript_data, size=(1080, 1920), language=None, caption_mode="sentence", caption_style="classic"):
+        """Generates a list of caption ImageClips for the video, without composing them."""
+        all_text_clips = []
+        style_config = SubtitleManager.get_style_config(caption_style)
+        # We need to peek at the first segment to determine language if not provided
+        # Or better, check each chunk dynamically?
+        # For simplicity and consistency, let's check the first non-empty text.
+        sample_text = ""
+        segments = []
+        if isinstance(transcript_data, list):
+             if len(transcript_data) > 0 and 'segments' in transcript_data[0]:
+                segments = transcript_data[0]['segments']
+             else:
+                segments = transcript_data
+        elif isinstance(transcript_data, dict) and 'segments' in transcript_data:
+            segments = transcript_data['segments']
+        if segments:
+             for s in segments:
+                 if s.get('text'):
+                     sample_text = s['text']
+                     break
+        font_path = SubtitleManager.ensure_font(language, style_config.get("font"), text_content=sample_text)
+        for segment in segments:
+            full_text = segment.get('text', '').strip()
+            if not full_text:
+                words = segment.get('words', [])
+                full_text = " ".join([w['text'] for w in words])
+            if not full_text:
+                continue
+            start_t = segment.get('start', 0)
+            end_t = segment.get('end', 0)
+            if end_t <= start_t:
+                if segment.get('words'):
+                    start_t = segment['words'][0]['start']
+                    end_t = segment['words'][-1]['end']
+                else:
+                    continue
+            words_list = full_text.split()
+            if not words_list:
+                continue
+            chunk_size = 1 if caption_mode == "word" else 4
+            chunks = []
+            # Use Word Timestamps if available (More Accurate)
+            stt_words = segment.get('words')
+            if stt_words and len(stt_words) > 0:
+                valid_words = [w for w in stt_words if w.get('text', '').strip()]
+                if valid_words:
+                    for i in range(0, len(valid_words), chunk_size):
+                        chunk_group = valid_words[i:i + chunk_size]
+                        chunk_text = " ".join([w['text'] for w in chunk_group])
+                        chunk_start = chunk_group[0]['start']
+                        chunk_end = chunk_group[-1]['end']
+                        chunks.append({
+                            "text": chunk_text,
+                            "start": chunk_start,
+                            "end": chunk_end
+                        })
+            else:
+                # Fallback to linear interpolation (Less Accurate)
+                words_list = full_text.split()
+                if not words_list:
+                    continue
+                for i in range(0, len(words_list), chunk_size):
+                    chunk_words = words_list[i:i + chunk_size]
+                    chunk_text = " ".join(chunk_words)
+                    chunk_duration = (end_t - start_t) * (len(chunk_words) / len(words_list))
+                    chunk_start = start_t + (end_t - start_t) * (i / len(words_list))
+                    chunk_end = chunk_start + chunk_duration
+                    if chunk_end <= chunk_start:
+                        chunk_end = chunk_start + 0.5
+                    chunks.append({
+                        "text": chunk_text,
+                        "start": chunk_start,
+                        "end": chunk_end
+                    })
+            for chunk in chunks:
+                display_text = chunk["text"]
+                is_arabic = language == "ar" or any("\u0600" <= c <= "\u06FF" for c in display_text)
+                if is_arabic:
+                    try:
+                        display_text = get_display(reshape(display_text))
+                    except:
+                        pass
+                else:
+                    display_text = display_text.upper()
+                # Override size if provided in style, else use dynamic size based on mode
+                f_size = style_config.get("fontsize", 75)
+                if caption_mode == "word":
+                    f_size = int(f_size * 1.4) # Make word mode larger
+                img_clip = SubtitleManager.create_pil_text_clip(
+                    display_text,
+                    fontsize=f_size,
+                    color=style_config.get("color", "white"),
+                    font_path=font_path,
+                    stroke_color=style_config.get("stroke_color", "black"),
+                    stroke_width=style_config.get("stroke_width", 2),
+                    bg_color=style_config.get("bg_color")
+                )
+                if img_clip:
+                    # Center horizontally, and place near bottom
+                    pos = style_config.get("position", ('center', 1350))
+                    txt_clip = img_clip.set_start(chunk["start"]).set_end(chunk["end"]).set_position(pos)
+                    all_text_clips.append(txt_clip)
+        return all_text_clips
+    @staticmethod
+    def create_captions(video_clip, transcript_data, size=(1080, 1920), language=None, caption_mode="sentence"):
+        """Generates caption clips and composites them onto the video."""
+        text_clips = SubtitleManager.create_caption_clips(transcript_data, size, language, caption_mode)
+        return mpe.CompositeVideoClip([video_clip] + text_clips, size=size)

core/task_queue.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import threading
+import queue
+import uuid
+import time
+from enum import Enum
+from typing import Dict, Any, Optional, Callable
+from core.logger import Logger
+logger = Logger.get_logger(__name__)
+class TaskStatus(str, Enum):
+    PENDING = "pending"
+    PROCESSING = "processing"
+    COMPLETED = "completed"
+    FAILED = "failed"
+class TaskManager:
+    _instance = None
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(TaskManager, cls).__new__(cls)
+            cls._instance._initialized = False
+        return cls._instance
+    def __init__(self):
+        if self._initialized:
+            return
+        self.task_queue = queue.Queue()
+        self.tasks: Dict[str, Dict[str, Any]] = {}
+        self.worker_thread = threading.Thread(target=self._worker, daemon=True)
+        self.worker_thread.start()
+        self._initialized = True
+        logger.info("🚀 Task Manager initialized with background worker")
+    def add_task(self, task_func: Callable, *args, **kwargs) -> str:
+        """
+        Add a task to the processing queue.
+        Returns the task_id.
+        """
+        # Extract task_id if provided, otherwise generate one
+        task_id = kwargs.get('task_id')
+        if not task_id:
+            task_id = uuid.uuid4().hex[:8]
+            kwargs['task_id'] = task_id
+        self.tasks[task_id] = {
+            "id": task_id,
+            "status": TaskStatus.PENDING,
+            "submitted_at": time.time(),
+            "result": None,
+            "error": None
+        }
+        # Add to queue
+        self.task_queue.put((task_id, task_func, args, kwargs))
+        logger.info(f"📥 Task {task_id} added to queue (Position: {self.task_queue.qsize()})")
+        return task_id
+    def get_task_status(self, task_id: str) -> Optional[Dict[str, Any]]:
+        """Get the current status and result of a task."""
+        return self.tasks.get(task_id)
+    def update_task_progress(self, task_id: str, progress: int, message: str = ""):
+        """Update the progress of a running task."""
+        if task_id in self.tasks:
+            self.tasks[task_id]["progress"] = progress
+            self.tasks[task_id]["message"] = message
+            logger.info(f"📈 Task {task_id} progress: {progress}% - {message}")
+    def _worker(self):
+        """Background worker that processes tasks sequentially."""
+        logger.info("👷 Task Worker loop started")
+        while True:
+            try:
+                # Block until a task is available
+                task_id, func, args, kwargs = self.task_queue.get()
+                logger.info(f"🔄 Processing Task {task_id}...")
+                self.tasks[task_id]["status"] = TaskStatus.PROCESSING
+                self.tasks[task_id]["started_at"] = time.time()
+                try:
+                    # Execute the task
+                    result = func(*args, **kwargs)
+                    self.tasks[task_id]["status"] = TaskStatus.COMPLETED
+                    self.tasks[task_id]["completed_at"] = time.time()
+                    self.tasks[task_id]["result"] = result
+                    # If the result itself indicates an error (from our app logic)
+                    if isinstance(result, dict) and result.get("status") == "error":
+                         self.tasks[task_id]["status"] = TaskStatus.FAILED
+                         self.tasks[task_id]["error"] = result.get("error")
+                    logger.info(f"✅ Task {task_id} completed successfully")
+                except Exception as e:
+                    import traceback
+                    error_trace = traceback.format_exc()
+                    logger.error(f"❌ Task {task_id} failed with exception: {e}")
+                    logger.error(error_trace)
+                    self.tasks[task_id]["status"] = TaskStatus.FAILED
+                    self.tasks[task_id]["error"] = str(e)
+                    self.tasks[task_id]["traceback"] = error_trace
+                    self.tasks[task_id]["completed_at"] = time.time()
+                finally:
+                    self.task_queue.task_done()
+            except Exception as e:
+                logger.error(f"💀 Critical Worker Error: {e}")
+                time.sleep(1) # Prevent tight loop if queue is broken

dependencies_scan.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+openai
+python-dotenv
+numpy
+pillow
+moviepy
+arabic-reshaper
+python-bidi
+requests
+json_repair
+fastapi
+uvicorn[standard]
+python-multipart
+opencv-python-headless
+faster-whisper

main.py ADDED Viewed

	@@ -0,0 +1,300 @@

+from fastapi import FastAPI, UploadFile, File, Form, BackgroundTasks
+from fastapi.responses import JSONResponse, FileResponse
+from typing import Optional, Union, Any
+from enum import Enum
+import os
+import uuid
+import shutil
+import glob
+import requests
+import json
+from processor import VideoProcessor
+from core.config import Config
+from core.logger import Logger
+from core.task_queue import TaskManager
+logger = Logger.get_logger(__name__)
+task_manager = TaskManager()
+# Ensure directories exist
+Config.setup_dirs()
+class VideoStyle(str, Enum):
+    cinematic = "cinematic"
+    cinematic_blur = "cinematic_blur"
+    vertical_full = "vertical_full"
+    split_vertical = "split_vertical"
+    split_horizontal = "split_horizontal"
+class CaptionMode(str, Enum):
+    word = "word"
+    sentence = "sentence"
+class CaptionStyle(str, Enum):
+    classic = "classic"
+    tiktok_bold = "tiktok_bold"
+    tiktok_neon = "tiktok_neon"
+    youtube_clean = "youtube_clean"
+    youtube_box = "youtube_box"
+class Language(str, Enum):
+    auto = "auto"
+    ar = "ar"
+    en = "en"
+    hi = "hi"
+    zh = "zh"
+    es = "es"
+    fr = "fr"
+    de = "de"
+    ru = "ru"
+    ja = "ja"
+app = FastAPI(title="Auto-Clipping API")
+clipper = VideoProcessor()
+def process_video_task(
+    task_id: str,
+    video_path: str,
+    playground_path: Optional[str],
+    audio_path: Optional[str],
+    bg_image_path: Optional[str],
+    style: VideoStyle,
+    bg_music_volume: float,
+    secondary_video_volume: float,
+    webhook_url: Optional[str],
+    language: Language = Language.auto,
+    caption_mode: CaptionMode = CaptionMode.sentence,
+    caption_style: CaptionStyle = CaptionStyle.classic
+):
+    from moviepy.editor import VideoFileClip
+    full_video_clip = None
+    try:
+        # Optimization: Open video once
+        full_video_clip = VideoFileClip(video_path)
+        # Helper for progress updates
+        def update_progress(progress, message):
+            task_manager.update_task_progress(task_id, progress, message)
+        update_progress(1, "Starting video analysis...")
+        # 1. Analyze video
+        timestamp_mode = "words" if caption_mode == CaptionMode.word else "segments"
+        scored_segments, total_duration, llm_moments = clipper.analyze_impact(
+            video_path,
+            video_clip=full_video_clip,
+            language=language,
+            timestamp_mode=timestamp_mode,
+            progress_callback=update_progress
+        )
+        # 2. Select best clips
+        best_clips = clipper.get_best_segments(
+            scored_segments,
+            video_duration=total_duration
+        )
+        # 3. Final processing
+        output_files = clipper.process_clips(
+            video_path,
+            best_clips,
+            llm_moments,
+            style=style,
+            task_id=task_id,
+            language=language,
+            video_clip=full_video_clip,
+            playground_path=playground_path,
+            audio_path=audio_path,
+            bg_music_volume=bg_music_volume,
+            secondary_video_volume=secondary_video_volume,
+            background_path=bg_image_path,
+            caption_mode=caption_mode,
+            caption_style=caption_style,
+            progress_callback=update_progress
+        )
+        result = {
+            "status": "success",
+            "task_id": task_id,
+            "clips_found": len(best_clips),
+            "output_files": [os.path.basename(f) for f in output_files],
+            "best_segments_info": best_clips
+        }
+        task_manager.update_task_progress(task_id, 100, "Completed successfully")
+    except Exception as e:
+        import traceback
+        error_msg = f"❌ Error during processing: {str(e)}"
+        logger.error(error_msg)
+        logger.error(traceback.format_exc())
+        result = {
+            "status": "error",
+            "task_id": task_id,
+            "error": str(e),
+            "traceback": traceback.format_exc()
+        }
+    finally:
+        if full_video_clip:
+            full_video_clip.close()
+    # Send webhook
+    if webhook_url and webhook_url.strip() and webhook_url.startswith(('http://', 'https://')):
+        try:
+            logger.info(f"📡 Sending results to webhook: {webhook_url}")
+            json_payload = json.dumps(result)
+            headers = {'Content-Type': 'application/json'}
+            response = requests.post(webhook_url, data=json_payload, headers=headers, timeout=30)
+            logger.info(f"✅ Webhook sent. Status Code: {response.status_code}")
+            if response.status_code >= 400:
+                logger.warning(f"⚠️ Webhook Response Error: {response.text}")
+        except Exception as webhook_err:
+            logger.error(f"⚠️ Failed to send webhook: {webhook_err}")
+    else:
+        logger.info("ℹ️ No webhook URL provided, skipping webhook notification")
+    return result
+@app.get("/download/{filename}")
+async def download_video(filename: str):
+    """Download video from outputs folder"""
+    file_path = os.path.join(Config.OUTPUTS_DIR, "viral_clips", filename)
+    # Check if file exists in the specific viral_clips folder or root outputs
+    if not os.path.exists(file_path):
+         file_path = os.path.join(Config.OUTPUTS_DIR, filename)
+    if os.path.exists(file_path):
+        return FileResponse(file_path, media_type='video/mp4', filename=filename)
+    return JSONResponse(status_code=404, content={"error": "File not found"})
+@app.get("/status/{task_id}")
+async def get_task_status(task_id: str):
+    """Check the status of a specific task"""
+    status_info = task_manager.get_task_status(task_id)
+    if not status_info:
+        return JSONResponse(status_code=404, content={"error": "Task not found"})
+    return status_info
+@app.get("/files")
+async def list_files():
+    """List all files in outputs folder"""
+    try:
+        files = []
+        # Search in viral_clips subdirectory as well
+        search_dirs = [Config.OUTPUTS_DIR, os.path.join(Config.OUTPUTS_DIR, "viral_clips")]
+        for d in search_dirs:
+            if os.path.exists(d):
+                for filename in os.listdir(d):
+                    file_path = os.path.join(d, filename)
+                    if os.path.isfile(file_path) and filename.endswith('.mp4'):
+                        file_size = os.path.getsize(file_path)
+                        files.append({
+                            "filename": filename,
+                            "size": file_size,
+                            "size_mb": round(file_size / (1024 * 1024), 2),
+                            "download_url": f"/download/{filename}"
+                        })
+        return {
+            "status": "success",
+            "total_files": len(files),
+            "files": files
+        }
+    except Exception as e:
+        return JSONResponse(status_code=500, content={"error": str(e)})
+@app.post("/clear")
+async def clear_files():
+    """Clear all files in upload, output and temp directories"""
+    try:
+        count = 0
+        for directory in [Config.UPLOADS_DIR, Config.OUTPUTS_DIR, Config.TEMP_DIR]:
+            if os.path.exists(directory):
+                files = glob.glob(os.path.join(directory, "**", "*"), recursive=True)
+                for f in files:
+                    try:
+                        if os.path.isfile(f):
+                            os.remove(f)
+                            count += 1
+                        elif os.path.isdir(f) and f != directory:
+                            # Don't delete the root directories themselves, just content
+                            # But glob returns directories too.
+                            pass
+                    except Exception as e:
+                        logger.error(f"Error deleting {f}: {e}")
+        return {"status": "success", "message": f"Cleared {count} files"}
+    except Exception as e:
+        return JSONResponse(status_code=500, content={"error": str(e)})
+@app.post("/auto-clip")
+async def create_auto_clip(
+    video: UploadFile = File(...),
+    playground_video: Optional[UploadFile] = File(None),
+    audio: Optional[UploadFile] = File(None),
+    background_image: Optional[UploadFile] = File(None),
+    style: VideoStyle = Form(VideoStyle.cinematic_blur),
+    caption_mode: CaptionMode = Form(CaptionMode.sentence),
+    caption_style: CaptionStyle = Form(CaptionStyle.classic),
+    webhook_url: Optional[str] = Form(None),
+    language: Language = Form(Language.auto),
+    bg_music_volume: float = Form(0.1),
+    secondary_video_volume: float = Form(0.2)
+):
+    task_id = uuid.uuid4().hex[:8]
+    # 1. Save main video
+    video_path = os.path.join(Config.UPLOADS_DIR, f"{task_id}_{video.filename}")
+    with open(video_path, "wb") as f:
+        shutil.copyfileobj(video.file, f)
+    # 2. Save secondary video
+    playground_path = None
+    if playground_video and playground_video.filename and style in [VideoStyle.split_vertical, VideoStyle.split_horizontal]:
+        playground_path = os.path.join(Config.UPLOADS_DIR, f"{task_id}_{playground_video.filename}")
+        with open(playground_path, "wb") as f:
+            shutil.copyfileobj(playground_video.file, f)
+    # 3. Save background image
+    bg_image_path = None
+    if background_image and background_image.filename:
+        bg_image_path = os.path.join(Config.UPLOADS_DIR, f"{task_id}_{background_image.filename}")
+        with open(bg_image_path, "wb") as f:
+            shutil.copyfileobj(background_image.file, f)
+    # 4. Save audio file
+    audio_path = None
+    if audio and audio.filename:
+        audio_path = os.path.join(Config.UPLOADS_DIR, f"{task_id}_{audio.filename}")
+        with open(audio_path, "wb") as f:
+            shutil.copyfileobj(audio.file, f)
+    # Add task to queue
+    task_manager.add_task(
+        process_video_task,
+        task_id=task_id,
+        video_path=video_path,
+        playground_path=playground_path,
+        audio_path=audio_path,
+        bg_image_path=bg_image_path,
+        style=style,
+        bg_music_volume=bg_music_volume,
+        secondary_video_volume=secondary_video_volume,
+        webhook_url=webhook_url,
+        language=language,
+        caption_mode=caption_mode,
+        caption_style=caption_style
+    )
+    return {
+        "status": "queued",
+        "task_id": task_id,
+        "message": "Task added to queue. Check status at /status/{task_id}"
+    }
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

processor.py ADDED Viewed

	@@ -0,0 +1,467 @@

+import os
+import json
+import traceback
+from datetime import datetime
+import moviepy.editor as mpe
+import core # Applies monkey patches
+from core.config import Config
+from core.logger import Logger
+from core.stt import STT
+from core.analyze import analyze_transcript_gemini
+from core.styles import StyleFactory
+from core.subtitle_manager import SubtitleManager
+from core.free_translator import FreeTranslator
+import json_repair
+logger = Logger.get_logger(__name__)
+class VideoProcessor:
+    def __init__(self, model_size="base"):
+        self.stt = STT(model_size=model_size)
+        Config.setup_dirs()
+    def _clean_json_response(self, content):
+        """Cleans AI JSON response using json_repair."""
+        if not isinstance(content, str):
+            return content
+        # Remove markdown blocks if present
+        content = content.strip()
+        if content.startswith("```json"):
+            content = content[7:]
+        if content.startswith("```"):
+            content = content[3:]
+        if content.endswith("```"):
+            content = content[:-3]
+        content = content.strip()
+        # Use json_repair to fix truncated or malformed JSON
+        try:
+            repaired_json = json_repair.loads(content)
+            return json.dumps(repaired_json)
+        except Exception as e:
+            logger.warning(f"⚠️ json_repair failed, falling back to manual fix: {e}")
+        # Fallback manual fix (though json_repair is usually sufficient)
+        if content and not content.endswith('}'):
+            open_braces = content.count('{')
+            close_braces = content.count('}')
+            if open_braces > close_braces:
+                content += '}' * (open_braces - close_braces)
+                logger.info(f"🔧 Fixed truncated JSON with {open_braces - close_braces} closing braces")
+        return content
+    def parse_ai_response(self, ai_res):
+        """
+        Parses the JSON response from the AI and returns a list of segments.
+        Handles both string and dictionary responses, and various potential key names.
+        """
+        if not isinstance(ai_res, dict):
+            logger.error(f"❌ Invalid AI response format: expected dict, got {type(ai_res)}")
+            return []
+        res_content = ai_res.get("content")
+        segments_data = {}
+        try:
+            if isinstance(res_content, str):
+                cleaned_content = self._clean_json_response(res_content)
+                segments_data = json.loads(cleaned_content)
+            else:
+                segments_data = res_content
+            chunk_segments = []
+            if isinstance(segments_data, dict):
+                for key in ["segments", "clips", "moments"]:
+                    if key in segments_data and isinstance(segments_data[key], list):
+                        chunk_segments = segments_data[key]
+                        break
+                if not chunk_segments and any(isinstance(v, list) for v in segments_data.values()):
+                     for v in segments_data.values():
+                        if isinstance(v, list):
+                            chunk_segments = v
+                            break
+            elif isinstance(segments_data, list):
+                chunk_segments = segments_data
+            return chunk_segments
+        except Exception as e:
+            logger.error(f"❌ Failed to parse AI response: {e}")
+            logger.error(f"Raw Content: {res_content}")
+            return []
+    def _clean_json_response(self, text):
+        """Clean markdown and other noise from AI JSON response."""
+        text = text.strip()
+        if text.startswith("```json"):
+            text = text[7:]
+        if text.startswith("```"):
+            text = text[3:]
+        if text.endswith("```"):
+            text = text[:-3]
+        return text.strip()
+    def analyze_impact(self, video_path, video_clip=None, language=None, timestamp_mode="segments", progress_callback=None):
+        """Analyzes video content and suggests viral clips using AI."""
+        if progress_callback: progress_callback(5, "Starting speech-to-text...")
+        logger.info("🎙️ Phase 1: Speech-to-Text (STT)...")
+        # Always transcribe in source language (auto detect)
+        full_segments, full_text, duration, detected_lang = self.stt.get_transcript(
+            video_path,
+            language=None, # Always auto-detect source language
+            skip_ai=True,
+            timestamp_mode=timestamp_mode
+        )
+        # Check if translation is needed
+        target_lang = None
+        if language:
+            if hasattr(language, 'value'):
+                target_lang = language.value
+            else:
+                target_lang = str(language)
+        data = {
+            "segments": full_segments,
+            "detected_language": detected_lang,
+            "duration": duration
+        }
+        logger.info("🤖 Phase 2: AI Viral Moment Analysis...")
+        if progress_callback: progress_callback(20, "Analyzing content for viral moments...")
+        chunk_size = Config.CHUNK_SIZE_SECONDS
+        overlap = Config.OVERLAP_SECONDS
+        all_ai_segments = []
+        max_time = full_segments[-1]["end"] if full_segments else 0
+        current_start = 0
+        while current_start < max_time:
+            current_end = current_start + chunk_size
+            chunk_transcript = ""
+            for seg in full_segments:
+                if seg["start"] >= current_start and seg["start"] < current_end:
+                    chunk_transcript += f"[{seg['start']:.2f} - {seg['end']:.2f}] {seg['text']}\n"
+            if chunk_transcript.strip():
+                transcript_len = len(chunk_transcript)
+                # Calculate progress
+                current_progress = 20 + int((current_start / max_time) * 40) # 20% to 60%
+                if progress_callback:
+                    progress_callback(current_progress, f"Analyzing chunk {current_start/60:.1f}m - {min(current_end, max_time)/60:.1f}m")
+                logger.info(f"🧠 Analyzing chunk: {current_start/60:.1f}m to {min(current_end, max_time)/60:.1f}m (Length: {transcript_len} chars)...")
+                ai_res = analyze_transcript_gemini(chunk_transcript)
+                # Log debug info
+                logger.info(f"🤖 AI Response Type: {type(ai_res)}")
+                if isinstance(ai_res, dict) and "content" in ai_res:
+                     logger.info(f"🤖 Raw AI Response (First 500 chars): {ai_res['content'][:500]}...")
+                else:
+                     logger.info(f"🤖 Raw AI Response (Structure): {str(ai_res)[:500]}...")
+                try:
+                    chunk_segments = self.parse_ai_response(ai_res)
+                    logger.info(f"✅ Found {len(chunk_segments)} segments in chunk")
+                    all_ai_segments.extend(chunk_segments)
+                except Exception as e:
+                    logger.error(f"❌ Error processing chunk: {e}")
+                    logger.error(traceback.format_exc())
+            current_start += (chunk_size - overlap)
+            if current_end >= max_time: break
+        # Deduplicate
+        unique_segments = []
+        seen_starts = set()
+        for s in all_ai_segments:
+            start_t = s.get("start_time")
+            if start_t not in seen_starts:
+                unique_segments.append(s)
+                seen_starts.add(start_t)
+        return unique_segments, duration, data
+    def get_best_segments(self, segments, video_duration=0):
+        """Sorts segments by viral score."""
+        return sorted(segments, key=lambda x: x.get("viral_score", 0), reverse=True)
+    def process_clips(self, input_video_path, best_clips, data, style="cinematic", language=None, video_clip=None, progress_callback=None, **kwargs):
+        """Processes the selected viral clips with styles and captions."""
+        logger.info("🎨 Phase 3: Style & Captions...")
+        if progress_callback: progress_callback(60, "Generating clips...")
+        # Determine video duration safely
+        video_duration = 0
+        if "duration" in data and data["duration"]:
+             video_duration = data["duration"]
+        elif video_clip:
+             video_duration = video_clip.duration
+        else:
+             try:
+                 with mpe.VideoFileClip(input_video_path) as temp_vid:
+                     video_duration = temp_vid.duration
+             except Exception as e:
+                 logger.error(f"❌ Failed to get video duration: {e}")
+        output_files = []
+        # Initialize Translator if needed
+        translator = None
+        target_language = None
+        if language:
+            target_language = language.value if hasattr(language, 'value') else language
+        detected_lang = data.get("detected_language", "en")
+        needs_translation = (target_language and
+                           target_language != "auto" and
+                           target_language != detected_lang)
+        if needs_translation:
+            logger.info(f"🌍 Translating from {detected_lang} to {target_language}...")
+            translator = FreeTranslator()
+        try:
+            if not best_clips:
+                logger.warning("⚠️ No best clips provided to process_clips!")
+                return []
+            logger.info(f"📊 Starting processing for {len(best_clips)} clips...")
+            for i, seg in enumerate(best_clips):
+                # Update progress
+                current_progress = 60 + int((i / len(best_clips)) * 35) # 60% to 95%
+                if progress_callback:
+                    progress_callback(current_progress, f"Rendering clip {i+1} of {len(best_clips)}...")
+                clip = None
+                final_clip = None
+                current_video_clip = None # Local handle for this iteration
+                try:
+                    start = max(0, seg.get("start_time", 0))
+                    end = min(video_duration, seg.get("end_time", 0))
+                    # Ensure valid duration
+                    if end - start < 1.0:
+                        logger.warning(f"⚠️ Clip {i+1} too short ({end-start:.2f}s), skipping.")
+                        continue
+                    # TRANSLATION STEP: Translate only the current segment if needed
+                    if needs_translation and translator:
+                        try:
+                            # Find matching transcript segments for this clip
+                            # Note: segments in 'data' use 'start' and 'end' keys
+                            matching_segs = [
+                                s for s in data.get('segments', [])
+                                if s['start'] >= start and s['end'] <= end
+                            ]
+                            if matching_segs:
+                                logger.info(f"🌍 Translating {len(matching_segs)} segments for Clip {i+1}...")
+                                for match_s in matching_segs:
+                                    # Skip if already translated (heuristic check if needed, but safe to re-translate if simple)
+                                    # Or better, check if text is already in target language?
+                                    # Since we modify in place, subsequent clips covering same segment might re-translate.
+                                    # But clips usually don't overlap much.
+                                    tr_text, _ = translator.translate_text(match_s['text'], target_language)
+                                    match_s['text'] = tr_text
+                                    # Clear words to force interpolation since word-level timing is lost
+                                    if 'words' in match_s:
+                                        match_s['words'] = []
+                                logger.info(f"✅ Translated clip {i+1} content to {target_language}")
+                        except Exception as e:
+                            logger.warning(f"⚠️ Translation failed for clip {i+1}: {e}")
+                    logger.info(f"\n🎬 Processing Clip {i+1}/{len(best_clips)} ({start:.2f} - {end:.2f})...")
+                    # Ensure style is a clean string
+                    style_str = style.value if hasattr(style, "value") else str(style)
+                    if "." in style_str:
+                         style_str = style_str.split(".")[-1] # Handle VideoStyle.split_vertical
+                    output_filename = f"viral_{i+1}_{style_str}.mp4"
+                    # Add task_id to filename if provided to avoid collisions
+                    task_id = kwargs.get("task_id")
+                    if task_id:
+                         output_filename = f"viral_{task_id}_{i+1}_{style_str}.mp4"
+                    final_output = os.path.join(Config.OUTPUTS_DIR, "viral_clips", output_filename)
+                    os.makedirs(os.path.dirname(final_output), exist_ok=True)
+                    if start >= video_duration:
+                        logger.warning(f"⚠️ Clip start time {start} is beyond video duration {video_duration}, skipping.")
+                        continue
+                    # 1. Cut the clip
+                    # ALWAYS open a fresh VideoFileClip for each iteration to avoid shared reader issues
+                    # and allow full cleanup (closing reader) after each clip.
+                    current_video_clip = mpe.VideoFileClip(input_video_path)
+                    clip = current_video_clip.subclip(start, end)
+                    # 2. Get the style strategy
+                    style_strategy = StyleFactory.get_style(style)
+                    logger.info(f"✨ Applying style: {style}")
+                    # 3. Handle Translation and Captions PREPARATION
+                    segment_transcript = {"segments": []}
+                    # Filter relevant segments for this clip
+                    for s in data["segments"]:
+                        if s["start"] < end and s["end"] > start:
+                            # Clone the segment to avoid modifying original data
+                            new_seg = s.copy()
+                            # Adjust timestamps relative to clip start
+                            new_seg["start"] = max(0, s["start"] - start)
+                            new_seg["end"] = min(end - start, s["end"] - start)
+                            if needs_translation and translator:
+                                logger.info(f"🌍 Translating segment: {s['text'][:30]}...")
+                                translated_text, _ = translator.translate_text(s['text'], target_language)
+                                new_seg["text"] = translated_text
+                                # Simple word distribution for translated text
+                                words = translated_text.split()
+                                seg_duration = new_seg["end"] - new_seg["start"]
+                                word_duration = seg_duration / len(words) if words else seg_duration
+                                new_seg["words"] = []
+                                for idx, w in enumerate(words):
+                                    new_seg["words"].append({
+                                        "text": w,
+                                        "start": new_seg["start"] + (idx * word_duration),
+                                        "end": new_seg["start"] + ((idx + 1) * word_duration)
+                                    })
+                            else:
+                                # Adjust word timestamps if they exist
+                                if "words" in s:
+                                    new_words = []
+                                    for w in s["words"]:
+                                        if w["start"] < end and w["end"] > start:
+                                            nw = w.copy()
+                                            nw["start"] = max(0, w["start"] - start)
+                                            nw["end"] = min(end - start, w["end"] - start)
+                                            new_words.append(nw)
+                                    new_seg["words"] = new_words
+                            segment_transcript["segments"].append(new_seg)
+                    # 4. Use the optimized apply_with_captions method
+                    final_clip = style_strategy.apply_with_captions(
+                        clip,
+                        transcript_data=segment_transcript,
+                        language=target_language if needs_translation else detected_lang,
+                        caption_mode=kwargs.get('caption_mode', 'sentence'),
+                        caption_style=kwargs.get('caption_style', 'classic'),
+                        background_path=kwargs.get("background_path"),
+                        playground_path=kwargs.get("playground_path")
+                    )
+                    # 5. Write Output
+                    # Automatically use all available CPU cores
+                    # os.cpu_count() returns None if undetermined, so we default to 4 in that case
+                    cpu_count = os.cpu_count() or 4
+                    logger.info(f"⚙️ Using {cpu_count} threads for video rendering")
+                    final_clip.write_videofile(
+                        final_output,
+                        codec="libx264",
+                        audio_codec="aac",
+                        threads=cpu_count,
+                        logger=None
+                    )
+                    output_files.append(final_output)
+                except Exception as e:
+                    logger.error(f"❌ Error processing clip {i+1}: {e}")
+                    logger.error(traceback.format_exc())
+                finally:
+                    # 🧹 Explicit Cleanup
+                    if final_clip:
+                        try: final_clip.close()
+                        except: pass
+                    if clip:
+                        try: clip.close()
+                        except: pass
+                    if current_video_clip:
+                        try: current_video_clip.close()
+                        except: pass
+                    # Force garbage collection
+                    import gc
+                    gc.collect()
+        except Exception as e:
+            logger.error(f"❌ Error in processing loop: {e}")
+            logger.error(traceback.format_exc())
+        # Note: We don't close passed video_clip here because we didn't open it (or we treated it as read-only for duration)
+        # The caller is responsible for closing video_clip if they passed it.
+        return output_files
+# -----------------------------------------------------------------------------
+# Module Level Function to wrap the class usage
+# -----------------------------------------------------------------------------
+def process_video(video_path, style="cinematic_blur", model_size="base", **kwargs):
+    """
+    Main entry point to process a video end-to-end.
+    """
+    video_clip = None
+    try:
+        processor = VideoProcessor(model_size=model_size)
+        # 1. Open Video Clip ONCE
+        video_clip = mpe.VideoFileClip(video_path)
+        # 2. Analyze (Reuse video_clip)
+        caption_mode = kwargs.get("caption_mode", "sentence")
+        timestamp_mode = "words" if caption_mode == "word" else "segments"
+        viral_segments, duration, stt_data = processor.analyze_impact(
+            video_path,
+            video_clip=video_clip,
+            language=kwargs.get("language"),
+            timestamp_mode=timestamp_mode
+        )
+        if not viral_segments:
+            logger.warning("⚠️ No viral segments found.")
+            return []
+        # 3. Process Clips (Reuse video_clip and STT data)
+        output_files = processor.process_clips(
+            video_path,
+            viral_segments,
+            stt_data,
+            style=style,
+            language=kwargs.get("language"),
+            video_clip=video_clip,
+            **kwargs
+        )
+        return output_files
+    except Exception as e:
+        logger.error(f"❌ Processing failed: {e}")
+        logger.error(traceback.format_exc())
+        return []
+    finally:
+        if video_clip:
+            video_clip.close()
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) > 1:
+        process_video(sys.argv[1])

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+fastapi
+uvicorn[standard]
+python-multipart
+moviepy==1.0.3
+faster-whisper
+python-dotenv
+opencv-python-headless
+numpy
+pillow
+requests
+arabic-reshaper
+python-bidi
+imageio==2.25.1
+imageio-ffmpeg==0.4.8
+openai>=1.0.0
+scipy
+json_repair