aliSaac510 commited on
Commit
342e0fb
·
0 Parent(s):

Update: Auto-thread support and Task Queue

Browse files
.dockerignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ .Python
6
+ env
7
+ venv
8
+ .env
9
+ .git
10
+ .gitignore
11
+ uploads/
12
+ outputs/
13
+ temp/
14
+ logs/
15
+ .DS_Store
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.ttf filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ venv/
6
+ .env
7
+ .venv/
8
+
9
+ # Project Specific
10
+ uploads/*
11
+ outputs/*
12
+ tmp/*
13
+ temp/*
14
+ logs/*
15
+ fonts/*
16
+ !uploads/.gitkeep
17
+ !outputs/.gitkeep
18
+ !tmp/.gitkeep
19
+ !temp/.gitkeep
20
+ !logs/.gitkeep
21
+ *.mp4
22
+ *.mp3
23
+ *.wav
24
+ *.ttf
25
+
26
+ # Test files
27
+ test_*.py
28
+ test.py
29
+ my_movie.mp4
30
+ My Recording_1.mp4
31
+
32
+ # OS
33
+ .DS_Store
34
+ Thumbs.db
35
+ .idea/
36
+ .vscode/
Dockerfile ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.10-slim
3
+
4
+ # Set environment variables
5
+ ENV PYTHONDONTWRITEBYTECODE=1
6
+ ENV PYTHONUNBUFFERED=1
7
+ ENV PORT=7860
8
+
9
+ # Install system dependencies
10
+ RUN apt-get update && apt-get install -y \
11
+ ffmpeg \
12
+ imagemagick \
13
+ libgl1 \
14
+ libglib2.0-0 \
15
+ libsm6 \
16
+ libxext6 \
17
+ libxrender1 \
18
+ build-essential \
19
+ && apt-get clean \
20
+ && rm -rf /var/lib/apt/lists/*
21
+
22
+ # Fix ImageMagick policy to allow processing (checking both version 6 and 7)
23
+ RUN if [ -f /etc/ImageMagick-6/policy.xml ]; then \
24
+ sed -i 's/domain="path" rights="none" pattern="@\*"/domain="path" rights="read|write" pattern="@\*"/g' /etc/ImageMagick-6/policy.xml; \
25
+ fi; \
26
+ if [ -f /etc/ImageMagick-7/policy.xml ]; then \
27
+ sed -i 's/domain="path" rights="none" pattern="@\*"/domain="path" rights="read|write" pattern="@\*"/g' /etc/ImageMagick-7/policy.xml; \
28
+ fi
29
+
30
+ # Create a non-root user
31
+ RUN useradd -m -u 1000 user
32
+
33
+ # Set working directory and ownership
34
+ WORKDIR /app
35
+ RUN chown -R user:user /app
36
+
37
+ # Switch to non-root user
38
+ USER user
39
+ ENV PATH="/home/user/.local/bin:$PATH"
40
+
41
+ # Copy requirements and install
42
+ COPY --chown=user:user ./requirements.txt requirements.txt
43
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
44
+
45
+ # Copy the rest of the application
46
+ COPY --chown=user:user . /app
47
+
48
+ # Create necessary directories with correct permissions
49
+ RUN mkdir -p uploads outputs/viral_clips temp logs fonts && \
50
+ chmod -R 755 uploads outputs/viral_clips temp logs fonts
51
+
52
+ # Expose the port
53
+ EXPOSE 7860
54
+
55
+ # Run the application
56
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Auto Clipper
3
+ emoji: 🎬
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ app_port: 7860
9
+ ---
10
+
11
+ # Auto Clipper AI 🚀
12
+
13
+ An automated AI tool to extract viral clips from long videos using Faster-Whisper and Llama-3.
14
+
15
+ ## Features
16
+ - 🎙️ Automatic Speech Recognition (STT)
17
+ - 🤖 AI-powered viral segment analysis
18
+ - ✨ Multiple video styles (Cinematic Blur, Split Screen, Smart Crop)
19
+ - 📝 Automatic TikTok-style captions
20
+
21
+ ## Local Setup
22
+ 1. Clone the repository
23
+ 2. Install dependencies: `pip install -r requirements.txt`
24
+ 3. Set up `.env` with your `GROQ_API_KEY`
25
+ 4. Run: `uvicorn main:app --port 8000`
26
+
27
+ ## API Endpoints
28
+ - `POST /auto-clip`: Upload a video and get viral clips.
core/__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import moviepy.editor as mpe
2
+
3
+ # Monkeypatch for compatibility
4
+ if not hasattr(mpe, 'AudioClip'):
5
+ mpe.AudioClip = mpe.AudioClip
6
+ if not hasattr(mpe, 'VideoFileClip'):
7
+ mpe.VideoFileClip = mpe.VideoFileClip
8
+
9
+ mpe.VideoFileClip.with_audio = mpe.VideoFileClip.set_audio
10
+ mpe.VideoFileClip.resized = mpe.VideoFileClip.resize
11
+ mpe.VideoFileClip.subclipped = mpe.VideoFileClip.subclip
12
+ mpe.AudioFileClip.subclipped = mpe.AudioFileClip.subclip
core/analyze.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ from openai import OpenAI
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ # إعداد OpenRouter API
9
+ # يتم تحميل المفتاح من ملف .env (تأكد من وجود OPENROUTER_API_KEY)
10
+ client = OpenAI(
11
+ base_url="https://openrouter.ai/api/v1",
12
+ api_key=os.getenv("OPENROUTER_API_KEY")
13
+ )
14
+
15
+ def analyze_transcript_gemini(transcript):
16
+ """تحليل النص باستخدام OpenRouter (DeepSeek) للحصول على أفضل النتائج"""
17
+
18
+ prompt = f"""
19
+ You are an expert video editor and viral content strategist. Your task is to identify the most engaging segments from the provided transcript that are suitable for short-form video platforms like TikTok, Reels, and YouTube Shorts.
20
+
21
+ STRICT JSON OUTPUT FORMAT REQUIRED:
22
+ You must output ONLY valid JSON. Do not include any markdown formatting (like ```json ... ```), explanations, or additional text outside the JSON object.
23
+
24
+ The JSON structure must be exactly as follows:
25
+ {{
26
+ "segments": [
27
+ {{
28
+ "start_time": <float, start time in seconds>,
29
+ "end_time": <float, end time in seconds>,
30
+ "duration": <float, duration in seconds>,
31
+ "description": "<string, brief summary of the clip content 10 words max>",
32
+ "viral_score": <float, score from 0-10 indicating viral potential>,
33
+ "reason": "<string, explanation of why this segment is engaging>"
34
+ }}
35
+ ]
36
+ }}
37
+
38
+ SELECTION CRITERIA:
39
+ 1. **Standalone Quality**: Each clip must make sense on its own without prior context. Avoid starting with conjunctions like "And", "But", "So" unless they are part of a complete thought.
40
+ 2. **Engagement**: Look for strong hooks, emotional moments, humor, surprising facts, or actionable advice.
41
+ 3. **Duration**: Prioritize clips between 30 and 180 seconds.
42
+ 4. **Completeness**: Ensure the clip has a clear beginning and end. Do not cut off sentences.
43
+
44
+ IMPORTANT:
45
+ - Return valid JSON only.
46
+ - If no suitable segments are found, return {{ "segments": [] }}.
47
+ - Ensure all strings are properly escaped.
48
+
49
+ Transcript to Analyze:
50
+ {transcript}
51
+ """
52
+
53
+ max_retries = 3
54
+ base_delay = 5
55
+
56
+ for attempt in range(max_retries):
57
+ try:
58
+ # استخدام DeepSeek-V3 عبر OpenRouter
59
+ model_name = "deepseek/deepseek-v3.2"
60
+
61
+ # يمكنك تغيير headers هنا
62
+ response = client.chat.completions.create(
63
+ model=model_name,
64
+ messages=[
65
+ {
66
+ "role": "user",
67
+ "content": prompt
68
+ }
69
+ ],
70
+ extra_headers={
71
+ "HTTP-Referer": "https://github.com/Start-To-End-AI", # Optional. Site URL for rankings on openrouter.ai.
72
+ "X-Title": "Video Clipper AI", # Optional. Site title for rankings on openrouter.ai.
73
+ },
74
+ extra_body={
75
+ "reasoning": {"enabled": True}
76
+ },
77
+ temperature=0.7,
78
+ )
79
+
80
+ content = response.choices[0].message.content
81
+ return {"content": content}
82
+
83
+ except Exception as e:
84
+ error_str = str(e)
85
+ print(f"❌ Error in OpenRouter analysis: {e}")
86
+
87
+ if attempt < max_retries - 1:
88
+ wait_time = base_delay * (2 ** attempt)
89
+ print(f"⚠️ Retrying in {wait_time}s... (Attempt {attempt + 1}/{max_retries})")
90
+ time.sleep(wait_time)
91
+ else:
92
+ break
93
+
94
+ print("❌ All retry attempts failed.")
95
+ return {"content": '{"segments": []}'}
96
+
97
+
98
+
99
+ # إعداد متغير البيئة
100
+ if __name__ == "__main__":
101
+ # اختبار سريع
102
+ test_transcript = "[0.0 - 5.0] This is amazing content about viral videos!"
103
+ result = analyze_transcript_gemini(test_transcript)
104
+ print("Gemini Analysis Result:", result)
core/config.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+
4
+ class Config:
5
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
6
+ TEMP_DIR = os.path.join(BASE_DIR, "temp")
7
+ UPLOADS_DIR = os.path.join(BASE_DIR, "uploads")
8
+ OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs")
9
+ LOGS_DIR = os.path.join(BASE_DIR, "logs")
10
+
11
+ # Font URLs - Google Fonts CSS API
12
+ # We use the CSS API to get the correct WOFF2/TTF file
13
+ FONTS = {
14
+ "Roboto-Bold.ttf": "https://fonts.googleapis.com/css2?family=Roboto:wght@700&display=swap",
15
+ "NotoSansArabic-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@700&display=swap",
16
+ "NotoSansSC-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@700&display=swap",
17
+ "NotoSansJP-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@700&display=swap",
18
+ "NotoSansDevanagari-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Devanagari:wght@700&display=swap",
19
+ "Cairo-Bold.ttf": "https://fonts.googleapis.com/css2?family=Cairo:wght@700&display=swap",
20
+ "Montserrat-Bold.ttf": "https://fonts.googleapis.com/css2?family=Montserrat:wght@700&display=swap"
21
+ }
22
+
23
+ # Dynamic Language to Font Mapping
24
+ LANGUAGE_FONT_MAP = {
25
+ "ar": "NotoSansArabic-Bold.ttf", # Arabic
26
+ "zh": "NotoSansSC-Bold.ttf", # Chinese
27
+ "ja": "NotoSansJP-Bold.ttf", # Japanese
28
+ "hi": "NotoSansDevanagari-Bold.ttf", # Hindi
29
+ "ru": "Roboto-Bold.ttf", # Russian (Supported by Roboto)
30
+ "en": "Roboto-Bold.ttf", # English
31
+ "default": "Roboto-Bold.ttf"
32
+ }
33
+
34
+ # Video Settings
35
+ DEFAULT_SIZE = (1080, 1920)
36
+ CHUNK_SIZE_SECONDS = 600
37
+ OVERLAP_SECONDS = 60
38
+
39
+ # Styles
40
+ STYLES = [
41
+ "cinematic",
42
+ "cinematic_blur",
43
+ "vertical_full",
44
+ "split_vertical",
45
+ "split_horizontal"
46
+ ]
47
+
48
+ @classmethod
49
+ def setup_dirs(cls):
50
+ for d in [cls.TEMP_DIR, cls.UPLOADS_DIR, cls.OUTPUTS_DIR, cls.LOGS_DIR]:
51
+ os.makedirs(d, exist_ok=True)
52
+
53
+ @staticmethod
54
+ def get_urls(content):
55
+ """
56
+ Parses the css file and retrieves the font urls.
57
+ """
58
+ urls = []
59
+ for i in range(len(content)):
60
+ if content[i: i+3] == 'url':
61
+ j = i + 4
62
+ url = ''
63
+ while content[j] != ')':
64
+ url += content[j]
65
+ j += 1
66
+ urls.append(url)
67
+ return urls
68
+
69
+ @staticmethod
70
+ def download_font_from_css(css_url, output_path):
71
+ """
72
+ Downloads the first font file found in the CSS to output_path.
73
+ """
74
+ try:
75
+ # 1. Fetch CSS content
76
+ # Add User-Agent to avoid getting minimal CSS or being blocked
77
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
78
+ response = requests.get(css_url, headers=headers)
79
+ response.raise_for_status()
80
+ content = response.text
81
+
82
+ # 2. Extract URLs
83
+ urls = Config.get_urls(content)
84
+
85
+ if not urls:
86
+ print(f"❌ No font URLs found in CSS: {css_url}")
87
+ return False
88
+
89
+ # 3. Download the first font found (usually the most specific/relevant or primary subset)
90
+ # For Arabic fonts like Cairo/NotoSansArabic, the first subset is usually the Arabic one.
91
+ font_url = urls[0]
92
+
93
+ print(f"⬇️ Downloading font from: {font_url}")
94
+ font_response = requests.get(font_url, headers=headers)
95
+ font_response.raise_for_status()
96
+
97
+ with open(output_path, 'wb') as f:
98
+ f.write(font_response.content)
99
+
100
+ print(f"✅ Font saved to: {output_path}")
101
+ return True
102
+
103
+ except Exception as e:
104
+ print(f"❌ Failed to download font from CSS {css_url}: {e}")
105
+ return False
core/free_translator.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import urllib.request
4
+ import urllib.parse
5
+
6
+ class FreeTranslator:
7
+ def __init__(self):
8
+ pass
9
+
10
+ def translate_text(self, text, target_language_code):
11
+ """ترجمة مجانية باستخدام MyMemory API بدون httpx"""
12
+ if not text.strip():
13
+ return "", []
14
+
15
+ # خريطة اللغات
16
+ lang_map = {
17
+ "ar": "ar",
18
+ "en": "en",
19
+ "hi": "hi",
20
+ "zh": "zh",
21
+ "es": "es",
22
+ "fr": "fr",
23
+ "de": "de",
24
+ "ru": "ru",
25
+ "ja": "ja"
26
+ }
27
+
28
+ target_lang = lang_map.get(target_language_code, target_language_code)
29
+
30
+ try:
31
+ # استخدام urllib بدلاً من requests لتجنب مشكلة httpx
32
+ url = "https://api.mymemory.translated.net/get"
33
+ params = {
34
+ 'q': text,
35
+ 'langpair': f'en|{target_lang}'
36
+ }
37
+
38
+ # بناء URL مع parameters
39
+ full_url = url + '?' + urllib.parse.urlencode(params)
40
+
41
+ # استخدام urllib.request بدلاً من requests
42
+ req = urllib.request.Request(full_url)
43
+ req.add_header('User-Agent', 'Mozilla/5.0')
44
+
45
+ with urllib.request.urlopen(req, timeout=10) as response:
46
+ if response.status == 200:
47
+ data = json.loads(response.read().decode())
48
+ if data.get('responseStatus') == 200:
49
+ translated_text = data['responseData']['translatedText']
50
+
51
+ # تحديد الكلمات المهمة
52
+ words = translated_text.split()
53
+ highlight_words = []
54
+
55
+ # كلمات حماسية شائعة
56
+ exciting_words = [
57
+ "amazing", "incredible", "awesome", "fantastic", "perfect", "best", "ultimate",
58
+ "رائع", "مذهل", "أفضل", "مثالي", "خرافي", "لا يصدق", "عجيب"
59
+ ]
60
+
61
+ for word in words:
62
+ clean_word = word.lower().strip(".,!?")
63
+ if clean_word in exciting_words:
64
+ highlight_words.append(word)
65
+
66
+ # إذا مفيش كلمات حماسية، نختار أطول كلمتين
67
+ if not highlight_words and len(words) >= 2:
68
+ sorted_words = sorted(words, key=len, reverse=True)
69
+ highlight_words = sorted_words[:2]
70
+
71
+ return translated_text, highlight_words
72
+
73
+ # fallback: إرجاع النص الأصلي
74
+ return text, []
75
+
76
+ except Exception as e:
77
+ print(f"⚠️ Error in free translation: {e}")
78
+ return text, []
core/logger.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from datetime import datetime
4
+
5
+ class Logger:
6
+ _instance = None
7
+
8
+ @staticmethod
9
+ def get_logger(name="AppLogger"):
10
+ # Configure root logger once
11
+ if not getattr(Logger, "_is_configured", False):
12
+ Logger._setup_handlers()
13
+ Logger._is_configured = True
14
+
15
+ return logging.getLogger(name)
16
+
17
+ @staticmethod
18
+ def _setup_handlers():
19
+ # Get root logger
20
+ logger = logging.getLogger()
21
+ logger.setLevel(logging.INFO)
22
+
23
+ # Clear existing handlers to avoid duplicates
24
+ if logger.handlers:
25
+ logger.handlers.clear()
26
+
27
+ # Console Handler
28
+ c_handler = logging.StreamHandler()
29
+ c_handler.setLevel(logging.INFO)
30
+ c_format = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
31
+ c_handler.setFormatter(c_format)
32
+ logger.addHandler(c_handler)
33
+
34
+ # File Handler
35
+ try:
36
+ log_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "logs")
37
+ os.makedirs(log_dir, exist_ok=True)
38
+ f_handler = logging.FileHandler(os.path.join(log_dir, f"{datetime.now().strftime('%Y-%m-%d')}.log"), encoding='utf-8')
39
+ f_handler.setLevel(logging.INFO)
40
+ f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
41
+ f_handler.setFormatter(f_format)
42
+ logger.addHandler(f_handler)
43
+ except Exception as e:
44
+ print(f"Failed to setup file logging: {e}")
45
+
core/stt.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import moviepy.editor as mpe
3
+ import os
4
+ import sys
5
+ import json
6
+ from datetime import datetime
7
+
8
+ # إضافة المسار الجذري للمشروع لضمان استيراد الموديولات بشكل صحيح
9
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
10
+
11
+ # استخدام Faster-Whisper مع حل مشكلة httpx
12
+ try:
13
+ from faster_whisper import WhisperModel
14
+ faster_whisper_available = True
15
+ except ImportError:
16
+ print("⚠️ Faster-Whisper not available, please install: pip install faster-whisper")
17
+ faster_whisper_available = False
18
+
19
+ class STT:
20
+ def __init__(self, model_size="base"):
21
+ self.duration = 0
22
+ self.model_size = model_size
23
+ if not faster_whisper_available:
24
+ raise ImportError("Faster-Whisper is not available")
25
+
26
+ # تحميل الموديل مرة واحدة عند تهيئة الكلاس مع دعم GPU للتسريع
27
+ print(f"🚀 Loading Faster-Whisper Model ({model_size})...")
28
+ try:
29
+ # محاولة استخدام GPU أولاً للتسريع 10x
30
+ self.model = WhisperModel(model_size, device="cuda", compute_type="float16")
31
+ print("✅ Using GPU for faster processing")
32
+ except Exception as e:
33
+ print(f"⚠️ GPU not available, using CPU with {model_size} model: {e}")
34
+ self.model = WhisperModel(model_size, device="cpu", compute_type="int8")
35
+
36
+ def get_transcript(self, video_path: str, language: str = None, skip_ai: bool = False, timestamp_mode="segments"):
37
+ """تحويل الفيديو لنص مع توقيت الكلمات باستخدام Faster-Whisper
38
+
39
+ Args:
40
+ timestamp_mode: "words" للكلمات الفردية, "segments" للجمل الكاملة
41
+ """
42
+ print(f"🎙️ Transcribing: {video_path} (Language: {language if language else 'Auto'}, Mode: {timestamp_mode})")
43
+
44
+ # تسجيل الـ transcript في ملف logs
45
+ log_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs", "transcript.log")
46
+
47
+ # تحويل اللغة إلى الكود الصحيح لـ Whisper
48
+ actual_stt_lang = None
49
+ if language:
50
+ # لو اللغة enum object (زي Language.ar)
51
+ if hasattr(language, 'value'):
52
+ lang_val = language.value
53
+ else:
54
+ lang_val = str(language)
55
+
56
+ # لو مش auto، استخدم اللغة المحددة
57
+ if lang_val != 'auto':
58
+ actual_stt_lang = lang_val
59
+ else:
60
+ actual_stt_lang = None # Whisper هيكشف تلقائياً
61
+
62
+ print(f"🔍 STT Debug - Language param: {language} -> actual_stt_lang: {actual_stt_lang}")
63
+
64
+ # -------------------------------------------------------------------------
65
+ # ⚡ PERFORMANCE CACHING START
66
+ # -------------------------------------------------------------------------
67
+ import hashlib
68
+
69
+ # Create a unique cache key based on file properties and parameters
70
+ try:
71
+ file_stat = os.stat(video_path)
72
+ unique_str = f"{video_path}_{file_stat.st_size}_{file_stat.st_mtime}_{actual_stt_lang}_{timestamp_mode}"
73
+ file_hash = hashlib.md5(unique_str.encode()).hexdigest()
74
+
75
+ cache_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "temp", "stt_cache")
76
+ os.makedirs(cache_dir, exist_ok=True)
77
+ cache_path = os.path.join(cache_dir, f"{file_hash}.json")
78
+
79
+ if os.path.exists(cache_path):
80
+ print(f"🚀 PERFORMANCE: Loading cached transcript from {cache_path}")
81
+ try:
82
+ with open(cache_path, "r", encoding="utf-8") as f:
83
+ cached_data = json.load(f)
84
+ print(f"✅ Cache Hit! Skipping Whisper processing.")
85
+ return cached_data["segments"], cached_data["text"], cached_data["duration"], cached_data["language"]
86
+ except Exception as e:
87
+ print(f"⚠️ Cache file corrupted, re-processing: {e}")
88
+ except Exception as e:
89
+ print(f"⚠️ Could not setup caching: {e}")
90
+ # -------------------------------------------------------------------------
91
+ # ⚡ PERFORMANCE CACHING END
92
+ # -------------------------------------------------------------------------
93
+
94
+ # تحديد word_timestamps بناءً على الوضع المطلوب
95
+ word_timestamps = timestamp_mode == "words"
96
+
97
+ print(f"🔍 STT Debug - Video: {os.path.basename(video_path)}")
98
+ print(f"🔍 STT Debug - Model size: {self.model_size}")
99
+ print(f"🔍 STT Debug - Beam size: 1, Word timestamps: {word_timestamps}")
100
+ print(f"🔍 STT Debug - Starting transcription...")
101
+
102
+ segments, info = self.model.transcribe(
103
+ video_path,
104
+ beam_size=1,
105
+ word_timestamps=word_timestamps,
106
+ language=actual_stt_lang,
107
+ vad_filter=True, # تصفية الصوت الفارغ
108
+ vad_parameters=dict(min_silence_duration_ms=500)
109
+ )
110
+ detected_lang = info.language
111
+
112
+ print(f"🔍 STT Debug - Detected language: {detected_lang}")
113
+ print(f"🔍 STT Debug - Processing segments...")
114
+
115
+ segments_list = []
116
+ full_text = ""
117
+
118
+ # فتح ملف الـ log للتسجيل
119
+ try:
120
+ with open(log_file, "a", encoding="utf-8") as f:
121
+ f.write(f"\n{'='*60}\n")
122
+ f.write(f"🎙️ TRANSCRIPT SESSION\n")
123
+ f.write(f"📅 Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
124
+ f.write(f"📹 Video: {os.path.basename(video_path)}\n")
125
+ f.write(f"🌍 Language: {actual_stt_lang if actual_stt_lang else 'Auto-detected'}\n")
126
+ f.write(f"🎯 Mode: {timestamp_mode}\n")
127
+ f.write(f"{'='*60}\n\n")
128
+ except Exception as e:
129
+ print(f"⚠️ Could not open log file: {e}")
130
+
131
+ # استيراد دالة الترجمة المجانية فقط
132
+ try:
133
+ from core.free_translator import FreeTranslator
134
+ translator = FreeTranslator()
135
+ use_free_translator = True
136
+ except ImportError:
137
+ print("⚠️ Free translator not available, using original text")
138
+ use_free_translator = False
139
+
140
+ # تحديد ما إذا كنا نحتاج لمعالجة عبر AI (ترجمة أو تلوين)
141
+ # بنستخدم اللغة المكتشفة تلقائياً مش اللغة المطلوبة
142
+ # الترجمة بتتم في مرحلة منفصلة في processor.py
143
+ needs_ai_processing = False # ملغي - الترجمة في processor.py
144
+
145
+ # التأكد من أن اللغة هي نص بسيط وليس Enum object
146
+ if hasattr(language, 'value'):
147
+ lang_str = language.value
148
+ else:
149
+ lang_str = str(language) if language else detected_lang
150
+
151
+ # معالجة النتائج
152
+ for segment in segments:
153
+ segment_text = segment.text.strip()
154
+
155
+ # تسجيل الـ segment في ملف الـ log
156
+ try:
157
+ with open(log_file, "a", encoding="utf-8") as f:
158
+ f.write(f"[{segment.start:.2f} - {segment.end:.2f}] {segment_text}\n")
159
+ except Exception as e:
160
+ print(f"⚠️ Could not write to log file: {e}")
161
+
162
+ if needs_ai_processing and use_free_translator:
163
+ print(f"🧠 AI Processing ({detected_lang} -> {lang_str}): {segment_text[:50]}...")
164
+ processed_text, highlight_words = translator.translate_text(segment_text, lang_str)
165
+
166
+ if timestamp_mode == "words" and segment.words:
167
+ # وضع الكلمات الفردية - نستخدم توقيتات الكلمات الأصلية
168
+ target_words = processed_text.split()
169
+ words_list = []
170
+
171
+ for i, word_info in enumerate(segment.words):
172
+ if i < len(target_words):
173
+ is_highlight = any(h in target_words[i] for h in highlight_words)
174
+ words_list.append({
175
+ "text": target_words[i],
176
+ "start": word_info.start,
177
+ "end": word_info.end,
178
+ "is_highlight": is_highlight
179
+ })
180
+
181
+ segments_list.append({
182
+ "text": processed_text,
183
+ "start": segment.start,
184
+ "end": segment.end,
185
+ "words": words_list
186
+ })
187
+
188
+ else:
189
+ # وضع الجمل - تقسيم ذكي مثل قبل
190
+ target_words = processed_text.split()
191
+
192
+ # --- نظام تقسيم الجمل الاحترافي (Max 5 words or 3 seconds) ---
193
+ MAX_WORDS_PER_SEGMENT = 5
194
+ MAX_DURATION_PER_SEGMENT = 3.0
195
+
196
+ current_sub_words = []
197
+ segment_duration = segment.end - segment.start
198
+ avg_word_duration = segment_duration / max(len(target_words), 1)
199
+
200
+ for i, w in enumerate(target_words):
201
+ is_highlight = any(h in w for h in highlight_words)
202
+ word_data = {
203
+ "text": w,
204
+ "start": segment.start + (i * avg_word_duration),
205
+ "end": segment.start + ((i + 1) * avg_word_duration),
206
+ "is_highlight": is_highlight
207
+ }
208
+ current_sub_words.append(word_data)
209
+
210
+ # تحقق من شروط كسر الجملة (عدد الكلمات أو المدة الزمنية)
211
+ current_duration = current_sub_words[-1]["end"] - current_sub_words[0]["start"]
212
+
213
+ if len(current_sub_words) >= MAX_WORDS_PER_SEGMENT or current_duration >= MAX_DURATION_PER_SEGMENT or i == len(target_words) - 1:
214
+ sub_segment_text = " ".join([sw["text"] for sw in current_sub_words])
215
+ segments_list.append({
216
+ "text": sub_segment_text,
217
+ "start": current_sub_words[0]["start"],
218
+ "end": current_sub_words[-1]["end"],
219
+ "words": current_sub_words.copy()
220
+ })
221
+ full_text += sub_segment_text + " "
222
+ current_sub_words = []
223
+ # ----------------------------------------------------------
224
+ else:
225
+ # بدون معالجة AI
226
+ if timestamp_mode == "words" and segment.words:
227
+ # وضع الكلمات الفردية بدون ترجمة
228
+ words_list = []
229
+ for word_info in segment.words:
230
+ words_list.append({
231
+ "text": word_info.word.strip(),
232
+ "start": word_info.start,
233
+ "end": word_info.end,
234
+ "is_highlight": False
235
+ })
236
+
237
+ segments_list.append({
238
+ "text": segment_text,
239
+ "start": segment.start,
240
+ "end": segment.end,
241
+ "words": words_list
242
+ })
243
+ else:
244
+ # وضع الجمل بدون ترجمة
245
+ words = []
246
+ if segment.words:
247
+ for word in segment.words:
248
+ words.append({
249
+ "text": word.word.strip(),
250
+ "start": word.start,
251
+ "end": word.end,
252
+ "is_highlight": False
253
+ })
254
+
255
+ segments_list.append({
256
+ "text": segment_text,
257
+ "start": segment.start,
258
+ "end": segment.end,
259
+ "words": words
260
+ })
261
+
262
+ full_text += segment_text + " "
263
+
264
+ # إنهاء تسجيل الجلسة في ملف الـ log
265
+ try:
266
+ with open(log_file, "a", encoding="utf-8") as f:
267
+ f.write(f"\n{'='*60}\n")
268
+ f.write(f"📊 SUMMARY:\n")
269
+ f.write(f"📝 Total Segments: {len(segments_list)}\n")
270
+ f.write(f"⏱️ Total Duration: {info.duration:.2f} seconds\n")
271
+ f.write(f"🌍 Detected Language: {detected_lang}\n")
272
+ f.write(f"📄 Full Text Length: {len(full_text)} characters\n")
273
+ f.write(f"🎯 Processing Mode: {timestamp_mode}\n")
274
+ f.write(f"{'='*60}\n\n")
275
+ except Exception as e:
276
+ print(f"⚠️ Could not complete log file: {e}")
277
+
278
+ print(f"✅ STT Completed: {len(segments_list)} segments, language: {detected_lang}")
279
+
280
+ # -------------------------------------------------------------------------
281
+ # ⚡ PERFORMANCE CACHING SAVE
282
+ # -------------------------------------------------------------------------
283
+ try:
284
+ with open(cache_path, "w", encoding="utf-8") as f:
285
+ json.dump({
286
+ "segments": segments_list,
287
+ "text": full_text,
288
+ "duration": info.duration,
289
+ "language": detected_lang
290
+ }, f, ensure_ascii=False)
291
+ print(f"💾 Transcript cached to {cache_path}")
292
+ except Exception as e:
293
+ print(f"⚠️ Failed to save cache: {e}")
294
+ # -------------------------------------------------------------------------
295
+
296
+ return segments_list, full_text, info.duration, detected_lang
297
+
298
+ def __call_whisper__(self, audio_path, language=None, skip_ai=False):
299
+ """دالة متوافقة مع الاستدعاء المباشر في processor.py"""
300
+ segments_list, full_text, duration, detected_lang = self.get_transcript(audio_path, language=language, skip_ai=skip_ai)
301
+ return {"segments": segments_list, "detected_language": detected_lang, "duration": duration}
core/styles.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ import os
3
+ import cv2
4
+ import moviepy.editor as mpe
5
+ from .config import Config
6
+ from .logger import Logger
7
+ from .subtitle_manager import SubtitleManager
8
+
9
+ logger = Logger.get_logger(__name__)
10
+
11
+ class SmartFaceCropper:
12
+ def __init__(self, output_size=(1080, 1920)):
13
+ self.output_size = output_size
14
+ self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
15
+ self.last_coords = None
16
+ self.smoothed_x = None
17
+ self.smoothing = 0.2
18
+ self.frame_count = 0
19
+
20
+ def get_crop_coordinates(self, frame):
21
+ h, w = frame.shape[:2]
22
+ target_w = int(h * self.output_size[0] / self.output_size[1])
23
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
24
+ small_gray = cv2.resize(gray, (0, 0), fx=0.5, fy=0.5)
25
+ faces = self.face_cascade.detectMultiScale(small_gray, 1.1, 8, minSize=(50, 50))
26
+
27
+ if len(faces) > 0:
28
+ faces = sorted(faces, key=lambda f: f[2]*f[3], reverse=True)
29
+ fx, fy, fw, fh = [v * 2 for v in faces[0]]
30
+ current_center_x = fx + fw // 2
31
+ self.last_coords = (fx, fy, fw, fh)
32
+ else:
33
+ current_center_x = w // 2 if self.smoothed_x is None else self.smoothed_x
34
+
35
+ if self.smoothed_x is None:
36
+ self.smoothed_x = current_center_x
37
+ else:
38
+ self.smoothed_x = self.smoothed_x * (1 - self.smoothing) + current_center_x * self.smoothing
39
+
40
+ left = int(self.smoothed_x - target_w // 2)
41
+ left = max(0, min(left, w - target_w))
42
+ return left, 0, left + target_w, h
43
+
44
+ def apply_to_clip(self, clip):
45
+ frame_skip = 5
46
+
47
+ def filter_frame(get_frame, t):
48
+ frame = get_frame(t)
49
+ self.frame_count += 1
50
+
51
+ if self.frame_count % frame_skip == 0 or self.last_coords is None:
52
+ left, top, right, bottom = self.get_crop_coordinates(frame)
53
+ else:
54
+ h, w = frame.shape[:2]
55
+ target_w = int(h * self.output_size[0] / self.output_size[1])
56
+ left = int(self.smoothed_x - target_w // 2) if self.smoothed_x else w // 2 - target_w // 2
57
+ left = max(0, min(left, w - target_w))
58
+ right = left + target_w
59
+
60
+ cropped = frame[:, left:right]
61
+ return cv2.resize(cropped, self.output_size)
62
+ return clip.fl(filter_frame)
63
+
64
+ class BaseStyle(ABC):
65
+ def __init__(self, output_size=Config.DEFAULT_SIZE):
66
+ self.output_size = output_size
67
+
68
+ @abstractmethod
69
+ def apply(self, clip, **kwargs):
70
+ pass
71
+
72
+ # --------------------------------------------------------------------------
73
+ # Refactored method to combine Style + Captions in ONE CompositeVideoClip
74
+ # --------------------------------------------------------------------------
75
+ def apply_with_captions(self, clip, transcript_data=None, language=None, caption_mode="sentence", **kwargs):
76
+ """
77
+ Applies style AND adds captions in a single composition step.
78
+ This prevents double rendering (CompositeVideoClip inside CompositeVideoClip).
79
+ """
80
+ # 1. Get the base styled clip (which might be a CompositeVideoClip itself)
81
+ styled_clip = self.apply(clip, **kwargs)
82
+
83
+ # 2. If no captions needed, just return the styled clip
84
+ if not transcript_data:
85
+ return styled_clip
86
+
87
+ # 3. Generate caption CLIPS (ImageClips) only, do not composite yet
88
+ caption_clips = self._create_caption_clips(transcript_data, language, caption_mode)
89
+
90
+ if not caption_clips:
91
+ return styled_clip
92
+
93
+ # 4. Optimize Composition:
94
+ # If styled_clip is already a CompositeVideoClip, we can flatten the list
95
+ # instead of nesting composites.
96
+ if isinstance(styled_clip, mpe.CompositeVideoClip):
97
+ # IMPORTANT: We must copy the list to avoid modifying the original list in place if it's reused
98
+ final_layers = list(styled_clip.clips) + caption_clips
99
+ return mpe.CompositeVideoClip(final_layers, size=self.output_size)
100
+ else:
101
+ # If styled_clip is just a simple VideoFileClip or similar, wrap it
102
+ return mpe.CompositeVideoClip([styled_clip] + caption_clips, size=self.output_size)
103
+ # --------------------------------------------------------------------------
104
+
105
+ def add_captions(self, clip, transcript_data, language=None, caption_mode="sentence"):
106
+ # This method is now DEPRECATED in favor of passing transcript_data to apply()
107
+ # but kept for backward compatibility if needed.
108
+ if not transcript_data:
109
+ return clip
110
+ return SubtitleManager.create_captions(
111
+ clip,
112
+ transcript_data,
113
+ size=self.output_size,
114
+ language=language,
115
+ caption_mode=caption_mode
116
+ )
117
+
118
+ def _create_caption_clips(self, transcript_data, language=None, caption_mode="sentence"):
119
+ """Helper to create just the caption clips list, not a full CompositeVideoClip"""
120
+ return SubtitleManager.create_caption_clips(
121
+ transcript_data,
122
+ size=self.output_size,
123
+ language=language,
124
+ caption_mode=caption_mode
125
+ )
126
+
127
+ class CinematicStyle(BaseStyle):
128
+ def apply(self, clip, background_path=None, **kwargs):
129
+ if background_path and os.path.exists(background_path):
130
+ ext = os.path.splitext(background_path)[1].lower()
131
+ video_extensions = ['.mp4', '.avi', '.mov', '.mkv', '.webm']
132
+
133
+ if ext in video_extensions:
134
+ bg = mpe.VideoFileClip(background_path).without_audio().resize(height=self.output_size[1])
135
+ if bg.duration < clip.duration:
136
+ bg = bg.loop(duration=clip.duration)
137
+ else:
138
+ bg = bg.subclip(0, clip.duration)
139
+ else:
140
+ bg = mpe.ImageClip(background_path).set_duration(clip.duration).resize(height=self.output_size[1])
141
+
142
+ if bg.w > self.output_size[0]:
143
+ bg = bg.crop(x_center=bg.w/2, width=self.output_size[0])
144
+ else:
145
+ bg = bg.resize(width=self.output_size[0])
146
+ else:
147
+ bg = mpe.ColorClip(size=self.output_size, color=(0, 0, 0)).set_duration(clip.duration)
148
+
149
+ main_video = clip.resize(width=self.output_size[0]).set_position("center")
150
+
151
+ if main_video.h > self.output_size[1]:
152
+ main_video = clip.resize(height=self.output_size[1]).set_position("center")
153
+
154
+ return mpe.CompositeVideoClip([bg, main_video], size=self.output_size)
155
+
156
+ class CinematicBlurStyle(BaseStyle):
157
+ def apply(self, clip, **kwargs):
158
+ bg = clip.resize(height=self.output_size[1])
159
+ if bg.w < self.output_size[0]:
160
+ bg = clip.resize(width=self.output_size[0])
161
+
162
+ def make_blur(get_frame, t):
163
+ frame = get_frame(t)
164
+ small = cv2.resize(frame, (16, 16))
165
+ blurred = cv2.resize(small, (self.output_size[0], self.output_size[1]), interpolation=cv2.INTER_LINEAR)
166
+ blurred = cv2.GaussianBlur(blurred, (21, 21), 0)
167
+ return blurred
168
+
169
+ bg_blurred = bg.fl(make_blur).set_opacity(0.6)
170
+
171
+ main_video = clip.resize(width=self.output_size[0]).set_position("center")
172
+
173
+ if main_video.h > self.output_size[1]:
174
+ main_video = clip.resize(height=self.output_size[1]).set_position("center")
175
+
176
+ return mpe.CompositeVideoClip([bg_blurred, main_video], size=self.output_size)
177
+
178
+ class SplitVerticalStyle(BaseStyle):
179
+ def apply(self, clip, playground_path=None, **kwargs):
180
+ h_half = self.output_size[1] // 2
181
+ top = clip.resize(height=h_half).set_position(('center', 'top'))
182
+
183
+ bottom = None
184
+ if playground_path and os.path.exists(playground_path):
185
+ bottom = mpe.VideoFileClip(playground_path).without_audio().resize(height=h_half).set_position(('center', 'bottom'))
186
+ if bottom.duration < clip.duration:
187
+ bottom = bottom.loop(duration=clip.duration)
188
+ else:
189
+ bottom = bottom.subclip(0, clip.duration)
190
+ else:
191
+ bottom = clip.resize(height=h_half).set_position(('center', 'bottom')).set_opacity(0.5)
192
+
193
+ return mpe.CompositeVideoClip([top, bottom], size=self.output_size)
194
+
195
+ class SplitHorizontalStyle(BaseStyle):
196
+ def apply(self, clip, playground_path=None, **kwargs):
197
+ w_half = self.output_size[0] // 2
198
+ left = clip.resize(width=w_half).set_position(('left', 'center'))
199
+
200
+ right = None
201
+ if playground_path and os.path.exists(playground_path):
202
+ right = mpe.VideoFileClip(playground_path).without_audio().resize(width=w_half).set_position(('right', 'center'))
203
+ if right.duration < clip.duration:
204
+ right = right.loop(duration=clip.duration)
205
+ else:
206
+ right = right.subclip(0, clip.duration)
207
+ else:
208
+ right = clip.resize(width=w_half).set_position(('right', 'center')).set_opacity(0.5)
209
+
210
+ return mpe.CompositeVideoClip([left, right], size=self.output_size)
211
+
212
+ class VerticalFullStyle(BaseStyle):
213
+ def apply(self, clip, **kwargs):
214
+ cropper = SmartFaceCropper(output_size=self.output_size)
215
+ return cropper.apply_to_clip(clip)
216
+
217
+ class StyleFactory:
218
+ _styles = {
219
+ "cinematic": CinematicStyle,
220
+ "cinematic_blur": CinematicBlurStyle,
221
+ "split_vertical": SplitVerticalStyle,
222
+ "split_horizontal": SplitHorizontalStyle,
223
+ "vertical_full": VerticalFullStyle
224
+ }
225
+
226
+ @staticmethod
227
+ def get_style(style_name) -> BaseStyle:
228
+ style_class = StyleFactory._styles.get(style_name, CinematicBlurStyle)
229
+ return style_class()
core/subtitle_manager.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import urllib.request
4
+ from PIL import Image, ImageDraw, ImageFont
5
+ import moviepy.editor as mpe
6
+ from arabic_reshaper import reshape
7
+ from bidi.algorithm import get_display
8
+ from .config import Config
9
+ from .logger import Logger
10
+
11
+ logger = Logger.get_logger(__name__)
12
+
13
+ class SubtitleManager:
14
+ @staticmethod
15
+ def ensure_font(language=None, style_font=None, text_content=None):
16
+ """Ensures a valid font exists dynamically based on language or content."""
17
+
18
+ # 1. Determine Font Name
19
+ font_name = Config.LANGUAGE_FONT_MAP.get("default", "Roboto-Bold.ttf")
20
+
21
+ # Priority 1: Explicit Style Font (if language supports it or it's Latin)
22
+ # However, if text is Arabic/CJK, style font (usually Latin) might break it.
23
+ # So we should check language compatibility first.
24
+
25
+ detected_lang = language
26
+ if not detected_lang and text_content:
27
+ # Simple script detection
28
+ if any("\u0600" <= c <= "\u06FF" for c in text_content):
29
+ detected_lang = "ar"
30
+ elif any("\u4E00" <= c <= "\u9FFF" for c in text_content):
31
+ detected_lang = "zh"
32
+ elif any("\u3040" <= c <= "\u309F" for c in text_content) or any("\u30A0" <= c <= "\u30FF" for c in text_content):
33
+ detected_lang = "ja"
34
+ elif any("\u0900" <= c <= "\u097F" for c in text_content):
35
+ detected_lang = "hi"
36
+ elif any("\u0400" <= c <= "\u04FF" for c in text_content):
37
+ detected_lang = "ru"
38
+
39
+ # Priority 2: Language-specific font from Config Map
40
+ if detected_lang in Config.LANGUAGE_FONT_MAP:
41
+ font_name = Config.LANGUAGE_FONT_MAP[detected_lang]
42
+ elif style_font and not detected_lang:
43
+ # Only use style font if no specific non-Latin language detected
44
+ font_name = style_font
45
+
46
+ # Fallback: if detected language is known but not in map (shouldn't happen with default keys)
47
+ if detected_lang and detected_lang not in Config.LANGUAGE_FONT_MAP:
48
+ logger.warning(f"⚠️ Language {detected_lang} not in font map, using default.")
49
+
50
+ font_path = os.path.join(Config.BASE_DIR, font_name)
51
+
52
+ if not os.path.exists(font_path):
53
+ logger.info(f"📥 Downloading font: {font_name}...")
54
+ # We might need to add more fonts to Config.FONTS or download dynamically
55
+ url = Config.FONTS.get(font_name)
56
+ if url:
57
+ try:
58
+ # Use Config's CSS downloader for Google Fonts
59
+ if "fonts.googleapis.com/css" in url:
60
+ success = Config.download_font_from_css(url, font_path)
61
+ if not success:
62
+ raise Exception("CSS font download failed")
63
+ else:
64
+ # Fallback for direct links
65
+ urllib.request.urlretrieve(url, font_path)
66
+
67
+ logger.info(f"✅ Font downloaded: {font_name}")
68
+ except Exception as e:
69
+ logger.error(f"❌ Failed to download font: {e}")
70
+ return "Arial"
71
+ else:
72
+ logger.warning(f"⚠️ No URL found for font: {font_name}")
73
+ # Fallback for now if not in config
74
+ if font_name == "Montserrat-Bold.ttf": # TikTok popular
75
+ # Add logic to download or use system font
76
+ pass
77
+
78
+ return font_path
79
+
80
+ @staticmethod
81
+ def create_pil_text_clip(text, fontsize, color, font_path, stroke_color='black', stroke_width=2, bg_color=None, padding=10):
82
+ """Creates a text clip using PIL."""
83
+ try:
84
+ try:
85
+ font = ImageFont.truetype(font_path, fontsize)
86
+ except:
87
+ logger.warning(f"⚠️ Failed to load font {font_path}, using default.")
88
+ font = ImageFont.load_default()
89
+
90
+ dummy_img = Image.new('RGBA', (1, 1))
91
+ draw = ImageDraw.Draw(dummy_img)
92
+ bbox = draw.textbbox((0, 0), text, font=font)
93
+ text_width = bbox[2] - bbox[0]
94
+ text_height = bbox[3] - bbox[1]
95
+
96
+ margin = int(stroke_width * 2) + padding
97
+ img_width = text_width + margin * 2
98
+ img_height = text_height + margin * 2
99
+
100
+ img = Image.new('RGBA', (int(img_width), int(img_height)), (0, 0, 0, 0))
101
+ draw = ImageDraw.Draw(img)
102
+
103
+ # Draw Background if requested
104
+ if bg_color:
105
+ draw.rounded_rectangle(
106
+ [(0, 0), (img_width, img_height)],
107
+ radius=15,
108
+ fill=bg_color
109
+ )
110
+
111
+ x = (img_width - text_width) / 2 - bbox[0]
112
+ y = (img_height - text_height) / 2 - bbox[1]
113
+
114
+ draw.text(
115
+ (x, y),
116
+ text,
117
+ font=font,
118
+ fill=color,
119
+ stroke_width=stroke_width,
120
+ stroke_fill=stroke_color
121
+ )
122
+
123
+ return mpe.ImageClip(np.array(img))
124
+
125
+ except Exception as e:
126
+ logger.error(f"⚠️ PIL Text Error: {e}")
127
+ return None
128
+
129
+ @staticmethod
130
+ def get_style_config(style_name):
131
+ """Returns configuration for different caption styles."""
132
+ styles = {
133
+ "classic": {
134
+ "fontsize": 75,
135
+ "color": "white",
136
+ "stroke_color": "black",
137
+ "stroke_width": 2,
138
+ "font": None, # Default based on language
139
+ "bg_color": None,
140
+ "position": ("center", 1350)
141
+ },
142
+ "tiktok_bold": {
143
+ "fontsize": 85,
144
+ "color": "white",
145
+ "stroke_color": "black",
146
+ "stroke_width": 4,
147
+ "font": "Montserrat-Bold.ttf", # Popular on TikTok
148
+ "bg_color": None, # Shadow usually used instead of BG
149
+ "position": ("center", 1400)
150
+ },
151
+ "tiktok_neon": {
152
+ "fontsize": 80,
153
+ "color": "#00f2ea", # TikTok Cyan
154
+ "stroke_color": "#ff0050", # TikTok Red
155
+ "stroke_width": 3,
156
+ "font": "Roboto-Bold.ttf",
157
+ "bg_color": None,
158
+ "position": ("center", 1400)
159
+ },
160
+ "youtube_clean": {
161
+ "fontsize": 70,
162
+ "color": "yellow",
163
+ "stroke_color": "black",
164
+ "stroke_width": 3,
165
+ "font": "Roboto-Bold.ttf",
166
+ "bg_color": None,
167
+ "position": ("center", 1300)
168
+ },
169
+ "youtube_box": {
170
+ "fontsize": 65,
171
+ "color": "white",
172
+ "stroke_color": None,
173
+ "stroke_width": 0,
174
+ "font": "Roboto-Bold.ttf",
175
+ "bg_color": "red", # YouTube Red Box
176
+ "position": ("center", 1300)
177
+ }
178
+ }
179
+ return styles.get(style_name, styles["classic"])
180
+
181
+ @staticmethod
182
+ def create_caption_clips(transcript_data, size=(1080, 1920), language=None, caption_mode="sentence", caption_style="classic"):
183
+ """Generates a list of caption ImageClips for the video, without composing them."""
184
+ all_text_clips = []
185
+
186
+ style_config = SubtitleManager.get_style_config(caption_style)
187
+
188
+ # We need to peek at the first segment to determine language if not provided
189
+ # Or better, check each chunk dynamically?
190
+ # For simplicity and consistency, let's check the first non-empty text.
191
+
192
+ sample_text = ""
193
+ segments = []
194
+ if isinstance(transcript_data, list):
195
+ if len(transcript_data) > 0 and 'segments' in transcript_data[0]:
196
+ segments = transcript_data[0]['segments']
197
+ else:
198
+ segments = transcript_data
199
+ elif isinstance(transcript_data, dict) and 'segments' in transcript_data:
200
+ segments = transcript_data['segments']
201
+
202
+ if segments:
203
+ for s in segments:
204
+ if s.get('text'):
205
+ sample_text = s['text']
206
+ break
207
+
208
+ font_path = SubtitleManager.ensure_font(language, style_config.get("font"), text_content=sample_text)
209
+
210
+ for segment in segments:
211
+ full_text = segment.get('text', '').strip()
212
+ if not full_text:
213
+ words = segment.get('words', [])
214
+ full_text = " ".join([w['text'] for w in words])
215
+
216
+ if not full_text:
217
+ continue
218
+
219
+ start_t = segment.get('start', 0)
220
+ end_t = segment.get('end', 0)
221
+
222
+ if end_t <= start_t:
223
+ if segment.get('words'):
224
+ start_t = segment['words'][0]['start']
225
+ end_t = segment['words'][-1]['end']
226
+ else:
227
+ continue
228
+
229
+ words_list = full_text.split()
230
+ if not words_list:
231
+ continue
232
+
233
+ chunk_size = 1 if caption_mode == "word" else 4
234
+ chunks = []
235
+
236
+ # Use Word Timestamps if available (More Accurate)
237
+ stt_words = segment.get('words')
238
+ if stt_words and len(stt_words) > 0:
239
+ valid_words = [w for w in stt_words if w.get('text', '').strip()]
240
+
241
+ if valid_words:
242
+ for i in range(0, len(valid_words), chunk_size):
243
+ chunk_group = valid_words[i:i + chunk_size]
244
+
245
+ chunk_text = " ".join([w['text'] for w in chunk_group])
246
+ chunk_start = chunk_group[0]['start']
247
+ chunk_end = chunk_group[-1]['end']
248
+
249
+ chunks.append({
250
+ "text": chunk_text,
251
+ "start": chunk_start,
252
+ "end": chunk_end
253
+ })
254
+ else:
255
+ # Fallback to linear interpolation (Less Accurate)
256
+ words_list = full_text.split()
257
+ if not words_list:
258
+ continue
259
+
260
+ for i in range(0, len(words_list), chunk_size):
261
+ chunk_words = words_list[i:i + chunk_size]
262
+ chunk_text = " ".join(chunk_words)
263
+
264
+ chunk_duration = (end_t - start_t) * (len(chunk_words) / len(words_list))
265
+ chunk_start = start_t + (end_t - start_t) * (i / len(words_list))
266
+ chunk_end = chunk_start + chunk_duration
267
+
268
+ if chunk_end <= chunk_start:
269
+ chunk_end = chunk_start + 0.5
270
+
271
+ chunks.append({
272
+ "text": chunk_text,
273
+ "start": chunk_start,
274
+ "end": chunk_end
275
+ })
276
+
277
+ for chunk in chunks:
278
+ display_text = chunk["text"]
279
+ is_arabic = language == "ar" or any("\u0600" <= c <= "\u06FF" for c in display_text)
280
+
281
+ if is_arabic:
282
+ try:
283
+ display_text = get_display(reshape(display_text))
284
+ except:
285
+ pass
286
+ else:
287
+ display_text = display_text.upper()
288
+
289
+ # Override size if provided in style, else use dynamic size based on mode
290
+ f_size = style_config.get("fontsize", 75)
291
+ if caption_mode == "word":
292
+ f_size = int(f_size * 1.4) # Make word mode larger
293
+
294
+ img_clip = SubtitleManager.create_pil_text_clip(
295
+ display_text,
296
+ fontsize=f_size,
297
+ color=style_config.get("color", "white"),
298
+ font_path=font_path,
299
+ stroke_color=style_config.get("stroke_color", "black"),
300
+ stroke_width=style_config.get("stroke_width", 2),
301
+ bg_color=style_config.get("bg_color")
302
+ )
303
+
304
+ if img_clip:
305
+ # Center horizontally, and place near bottom
306
+ pos = style_config.get("position", ('center', 1350))
307
+ txt_clip = img_clip.set_start(chunk["start"]).set_end(chunk["end"]).set_position(pos)
308
+ all_text_clips.append(txt_clip)
309
+
310
+ return all_text_clips
311
+
312
+ @staticmethod
313
+ def create_captions(video_clip, transcript_data, size=(1080, 1920), language=None, caption_mode="sentence"):
314
+ """Generates caption clips and composites them onto the video."""
315
+ text_clips = SubtitleManager.create_caption_clips(transcript_data, size, language, caption_mode)
316
+ return mpe.CompositeVideoClip([video_clip] + text_clips, size=size)
core/task_queue.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import threading
2
+ import queue
3
+ import uuid
4
+ import time
5
+ from enum import Enum
6
+ from typing import Dict, Any, Optional, Callable
7
+ from core.logger import Logger
8
+
9
+ logger = Logger.get_logger(__name__)
10
+
11
+ class TaskStatus(str, Enum):
12
+ PENDING = "pending"
13
+ PROCESSING = "processing"
14
+ COMPLETED = "completed"
15
+ FAILED = "failed"
16
+
17
+ class TaskManager:
18
+ _instance = None
19
+
20
+ def __new__(cls):
21
+ if cls._instance is None:
22
+ cls._instance = super(TaskManager, cls).__new__(cls)
23
+ cls._instance._initialized = False
24
+ return cls._instance
25
+
26
+ def __init__(self):
27
+ if self._initialized:
28
+ return
29
+
30
+ self.task_queue = queue.Queue()
31
+ self.tasks: Dict[str, Dict[str, Any]] = {}
32
+ self.worker_thread = threading.Thread(target=self._worker, daemon=True)
33
+ self.worker_thread.start()
34
+ self._initialized = True
35
+ logger.info("🚀 Task Manager initialized with background worker")
36
+
37
+ def add_task(self, task_func: Callable, *args, **kwargs) -> str:
38
+ """
39
+ Add a task to the processing queue.
40
+ Returns the task_id.
41
+ """
42
+ # Extract task_id if provided, otherwise generate one
43
+ task_id = kwargs.get('task_id')
44
+ if not task_id:
45
+ task_id = uuid.uuid4().hex[:8]
46
+ kwargs['task_id'] = task_id
47
+
48
+ self.tasks[task_id] = {
49
+ "id": task_id,
50
+ "status": TaskStatus.PENDING,
51
+ "submitted_at": time.time(),
52
+ "result": None,
53
+ "error": None
54
+ }
55
+
56
+ # Add to queue
57
+ self.task_queue.put((task_id, task_func, args, kwargs))
58
+ logger.info(f"📥 Task {task_id} added to queue (Position: {self.task_queue.qsize()})")
59
+ return task_id
60
+
61
+ def get_task_status(self, task_id: str) -> Optional[Dict[str, Any]]:
62
+ """Get the current status and result of a task."""
63
+ return self.tasks.get(task_id)
64
+
65
+ def update_task_progress(self, task_id: str, progress: int, message: str = ""):
66
+ """Update the progress of a running task."""
67
+ if task_id in self.tasks:
68
+ self.tasks[task_id]["progress"] = progress
69
+ self.tasks[task_id]["message"] = message
70
+ logger.info(f"📈 Task {task_id} progress: {progress}% - {message}")
71
+
72
+ def _worker(self):
73
+ """Background worker that processes tasks sequentially."""
74
+ logger.info("👷 Task Worker loop started")
75
+ while True:
76
+ try:
77
+ # Block until a task is available
78
+ task_id, func, args, kwargs = self.task_queue.get()
79
+
80
+ logger.info(f"🔄 Processing Task {task_id}...")
81
+ self.tasks[task_id]["status"] = TaskStatus.PROCESSING
82
+ self.tasks[task_id]["started_at"] = time.time()
83
+
84
+ try:
85
+ # Execute the task
86
+ result = func(*args, **kwargs)
87
+
88
+ self.tasks[task_id]["status"] = TaskStatus.COMPLETED
89
+ self.tasks[task_id]["completed_at"] = time.time()
90
+ self.tasks[task_id]["result"] = result
91
+
92
+ # If the result itself indicates an error (from our app logic)
93
+ if isinstance(result, dict) and result.get("status") == "error":
94
+ self.tasks[task_id]["status"] = TaskStatus.FAILED
95
+ self.tasks[task_id]["error"] = result.get("error")
96
+
97
+ logger.info(f"✅ Task {task_id} completed successfully")
98
+
99
+ except Exception as e:
100
+ import traceback
101
+ error_trace = traceback.format_exc()
102
+ logger.error(f"❌ Task {task_id} failed with exception: {e}")
103
+ logger.error(error_trace)
104
+
105
+ self.tasks[task_id]["status"] = TaskStatus.FAILED
106
+ self.tasks[task_id]["error"] = str(e)
107
+ self.tasks[task_id]["traceback"] = error_trace
108
+ self.tasks[task_id]["completed_at"] = time.time()
109
+
110
+ finally:
111
+ self.task_queue.task_done()
112
+
113
+ except Exception as e:
114
+ logger.error(f"💀 Critical Worker Error: {e}")
115
+ time.sleep(1) # Prevent tight loop if queue is broken
dependencies_scan.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ openai
2
+ python-dotenv
3
+ numpy
4
+ pillow
5
+ moviepy
6
+ arabic-reshaper
7
+ python-bidi
8
+ requests
9
+ json_repair
10
+ fastapi
11
+ uvicorn[standard]
12
+ python-multipart
13
+ opencv-python-headless
14
+ faster-whisper
main.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, Form, BackgroundTasks
2
+ from fastapi.responses import JSONResponse, FileResponse
3
+ from typing import Optional, Union, Any
4
+ from enum import Enum
5
+ import os
6
+ import uuid
7
+ import shutil
8
+ import glob
9
+ import requests
10
+ import json
11
+ from processor import VideoProcessor
12
+ from core.config import Config
13
+ from core.logger import Logger
14
+ from core.task_queue import TaskManager
15
+
16
+ logger = Logger.get_logger(__name__)
17
+ task_manager = TaskManager()
18
+
19
+ # Ensure directories exist
20
+ Config.setup_dirs()
21
+
22
+ class VideoStyle(str, Enum):
23
+ cinematic = "cinematic"
24
+ cinematic_blur = "cinematic_blur"
25
+ vertical_full = "vertical_full"
26
+ split_vertical = "split_vertical"
27
+ split_horizontal = "split_horizontal"
28
+
29
+ class CaptionMode(str, Enum):
30
+ word = "word"
31
+ sentence = "sentence"
32
+
33
+ class CaptionStyle(str, Enum):
34
+ classic = "classic"
35
+ tiktok_bold = "tiktok_bold"
36
+ tiktok_neon = "tiktok_neon"
37
+ youtube_clean = "youtube_clean"
38
+ youtube_box = "youtube_box"
39
+
40
+ class Language(str, Enum):
41
+ auto = "auto"
42
+ ar = "ar"
43
+ en = "en"
44
+ hi = "hi"
45
+ zh = "zh"
46
+ es = "es"
47
+ fr = "fr"
48
+ de = "de"
49
+ ru = "ru"
50
+ ja = "ja"
51
+
52
+ app = FastAPI(title="Auto-Clipping API")
53
+ clipper = VideoProcessor()
54
+
55
+ def process_video_task(
56
+ task_id: str,
57
+ video_path: str,
58
+ playground_path: Optional[str],
59
+ audio_path: Optional[str],
60
+ bg_image_path: Optional[str],
61
+ style: VideoStyle,
62
+ bg_music_volume: float,
63
+ secondary_video_volume: float,
64
+ webhook_url: Optional[str],
65
+ language: Language = Language.auto,
66
+ caption_mode: CaptionMode = CaptionMode.sentence,
67
+ caption_style: CaptionStyle = CaptionStyle.classic
68
+ ):
69
+ from moviepy.editor import VideoFileClip
70
+ full_video_clip = None
71
+ try:
72
+ # Optimization: Open video once
73
+ full_video_clip = VideoFileClip(video_path)
74
+
75
+ # Helper for progress updates
76
+ def update_progress(progress, message):
77
+ task_manager.update_task_progress(task_id, progress, message)
78
+
79
+ update_progress(1, "Starting video analysis...")
80
+
81
+ # 1. Analyze video
82
+ timestamp_mode = "words" if caption_mode == CaptionMode.word else "segments"
83
+ scored_segments, total_duration, llm_moments = clipper.analyze_impact(
84
+ video_path,
85
+ video_clip=full_video_clip,
86
+ language=language,
87
+ timestamp_mode=timestamp_mode,
88
+ progress_callback=update_progress
89
+ )
90
+
91
+ # 2. Select best clips
92
+ best_clips = clipper.get_best_segments(
93
+ scored_segments,
94
+ video_duration=total_duration
95
+ )
96
+
97
+ # 3. Final processing
98
+ output_files = clipper.process_clips(
99
+ video_path,
100
+ best_clips,
101
+ llm_moments,
102
+ style=style,
103
+ task_id=task_id,
104
+ language=language,
105
+ video_clip=full_video_clip,
106
+ playground_path=playground_path,
107
+ audio_path=audio_path,
108
+ bg_music_volume=bg_music_volume,
109
+ secondary_video_volume=secondary_video_volume,
110
+ background_path=bg_image_path,
111
+ caption_mode=caption_mode,
112
+ caption_style=caption_style,
113
+ progress_callback=update_progress
114
+ )
115
+
116
+ result = {
117
+ "status": "success",
118
+ "task_id": task_id,
119
+ "clips_found": len(best_clips),
120
+ "output_files": [os.path.basename(f) for f in output_files],
121
+ "best_segments_info": best_clips
122
+ }
123
+
124
+ task_manager.update_task_progress(task_id, 100, "Completed successfully")
125
+
126
+ except Exception as e:
127
+ import traceback
128
+ error_msg = f"❌ Error during processing: {str(e)}"
129
+ logger.error(error_msg)
130
+ logger.error(traceback.format_exc())
131
+ result = {
132
+ "status": "error",
133
+ "task_id": task_id,
134
+ "error": str(e),
135
+ "traceback": traceback.format_exc()
136
+ }
137
+ finally:
138
+ if full_video_clip:
139
+ full_video_clip.close()
140
+
141
+ # Send webhook
142
+ if webhook_url and webhook_url.strip() and webhook_url.startswith(('http://', 'https://')):
143
+ try:
144
+ logger.info(f"📡 Sending results to webhook: {webhook_url}")
145
+ json_payload = json.dumps(result)
146
+ headers = {'Content-Type': 'application/json'}
147
+
148
+ response = requests.post(webhook_url, data=json_payload, headers=headers, timeout=30)
149
+
150
+ logger.info(f"✅ Webhook sent. Status Code: {response.status_code}")
151
+ if response.status_code >= 400:
152
+ logger.warning(f"⚠️ Webhook Response Error: {response.text}")
153
+ except Exception as webhook_err:
154
+ logger.error(f"⚠️ Failed to send webhook: {webhook_err}")
155
+ else:
156
+ logger.info("ℹ️ No webhook URL provided, skipping webhook notification")
157
+
158
+ return result
159
+
160
+ @app.get("/download/{filename}")
161
+ async def download_video(filename: str):
162
+ """Download video from outputs folder"""
163
+ file_path = os.path.join(Config.OUTPUTS_DIR, "viral_clips", filename)
164
+ # Check if file exists in the specific viral_clips folder or root outputs
165
+ if not os.path.exists(file_path):
166
+ file_path = os.path.join(Config.OUTPUTS_DIR, filename)
167
+
168
+ if os.path.exists(file_path):
169
+ return FileResponse(file_path, media_type='video/mp4', filename=filename)
170
+ return JSONResponse(status_code=404, content={"error": "File not found"})
171
+
172
+ @app.get("/status/{task_id}")
173
+ async def get_task_status(task_id: str):
174
+ """Check the status of a specific task"""
175
+ status_info = task_manager.get_task_status(task_id)
176
+ if not status_info:
177
+ return JSONResponse(status_code=404, content={"error": "Task not found"})
178
+
179
+ return status_info
180
+
181
+ @app.get("/files")
182
+ async def list_files():
183
+ """List all files in outputs folder"""
184
+ try:
185
+ files = []
186
+ # Search in viral_clips subdirectory as well
187
+ search_dirs = [Config.OUTPUTS_DIR, os.path.join(Config.OUTPUTS_DIR, "viral_clips")]
188
+
189
+ for d in search_dirs:
190
+ if os.path.exists(d):
191
+ for filename in os.listdir(d):
192
+ file_path = os.path.join(d, filename)
193
+ if os.path.isfile(file_path) and filename.endswith('.mp4'):
194
+ file_size = os.path.getsize(file_path)
195
+ files.append({
196
+ "filename": filename,
197
+ "size": file_size,
198
+ "size_mb": round(file_size / (1024 * 1024), 2),
199
+ "download_url": f"/download/{filename}"
200
+ })
201
+
202
+ return {
203
+ "status": "success",
204
+ "total_files": len(files),
205
+ "files": files
206
+ }
207
+ except Exception as e:
208
+ return JSONResponse(status_code=500, content={"error": str(e)})
209
+
210
+ @app.post("/clear")
211
+ async def clear_files():
212
+ """Clear all files in upload, output and temp directories"""
213
+ try:
214
+ count = 0
215
+ for directory in [Config.UPLOADS_DIR, Config.OUTPUTS_DIR, Config.TEMP_DIR]:
216
+ if os.path.exists(directory):
217
+ files = glob.glob(os.path.join(directory, "**", "*"), recursive=True)
218
+ for f in files:
219
+ try:
220
+ if os.path.isfile(f):
221
+ os.remove(f)
222
+ count += 1
223
+ elif os.path.isdir(f) and f != directory:
224
+ # Don't delete the root directories themselves, just content
225
+ # But glob returns directories too.
226
+ pass
227
+ except Exception as e:
228
+ logger.error(f"Error deleting {f}: {e}")
229
+ return {"status": "success", "message": f"Cleared {count} files"}
230
+ except Exception as e:
231
+ return JSONResponse(status_code=500, content={"error": str(e)})
232
+
233
+ @app.post("/auto-clip")
234
+ async def create_auto_clip(
235
+ video: UploadFile = File(...),
236
+ playground_video: Optional[UploadFile] = File(None),
237
+ audio: Optional[UploadFile] = File(None),
238
+ background_image: Optional[UploadFile] = File(None),
239
+ style: VideoStyle = Form(VideoStyle.cinematic_blur),
240
+ caption_mode: CaptionMode = Form(CaptionMode.sentence),
241
+ caption_style: CaptionStyle = Form(CaptionStyle.classic),
242
+ webhook_url: Optional[str] = Form(None),
243
+ language: Language = Form(Language.auto),
244
+ bg_music_volume: float = Form(0.1),
245
+ secondary_video_volume: float = Form(0.2)
246
+ ):
247
+ task_id = uuid.uuid4().hex[:8]
248
+
249
+ # 1. Save main video
250
+ video_path = os.path.join(Config.UPLOADS_DIR, f"{task_id}_{video.filename}")
251
+ with open(video_path, "wb") as f:
252
+ shutil.copyfileobj(video.file, f)
253
+
254
+ # 2. Save secondary video
255
+ playground_path = None
256
+ if playground_video and playground_video.filename and style in [VideoStyle.split_vertical, VideoStyle.split_horizontal]:
257
+ playground_path = os.path.join(Config.UPLOADS_DIR, f"{task_id}_{playground_video.filename}")
258
+ with open(playground_path, "wb") as f:
259
+ shutil.copyfileobj(playground_video.file, f)
260
+
261
+ # 3. Save background image
262
+ bg_image_path = None
263
+ if background_image and background_image.filename:
264
+ bg_image_path = os.path.join(Config.UPLOADS_DIR, f"{task_id}_{background_image.filename}")
265
+ with open(bg_image_path, "wb") as f:
266
+ shutil.copyfileobj(background_image.file, f)
267
+
268
+ # 4. Save audio file
269
+ audio_path = None
270
+ if audio and audio.filename:
271
+ audio_path = os.path.join(Config.UPLOADS_DIR, f"{task_id}_{audio.filename}")
272
+ with open(audio_path, "wb") as f:
273
+ shutil.copyfileobj(audio.file, f)
274
+
275
+ # Add task to queue
276
+ task_manager.add_task(
277
+ process_video_task,
278
+ task_id=task_id,
279
+ video_path=video_path,
280
+ playground_path=playground_path,
281
+ audio_path=audio_path,
282
+ bg_image_path=bg_image_path,
283
+ style=style,
284
+ bg_music_volume=bg_music_volume,
285
+ secondary_video_volume=secondary_video_volume,
286
+ webhook_url=webhook_url,
287
+ language=language,
288
+ caption_mode=caption_mode,
289
+ caption_style=caption_style
290
+ )
291
+
292
+ return {
293
+ "status": "queued",
294
+ "task_id": task_id,
295
+ "message": "Task added to queue. Check status at /status/{task_id}"
296
+ }
297
+
298
+ if __name__ == "__main__":
299
+ import uvicorn
300
+ uvicorn.run(app, host="0.0.0.0", port=7860)
processor.py ADDED
@@ -0,0 +1,467 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import traceback
4
+ from datetime import datetime
5
+ import moviepy.editor as mpe
6
+ import core # Applies monkey patches
7
+ from core.config import Config
8
+ from core.logger import Logger
9
+ from core.stt import STT
10
+ from core.analyze import analyze_transcript_gemini
11
+ from core.styles import StyleFactory
12
+ from core.subtitle_manager import SubtitleManager
13
+ from core.free_translator import FreeTranslator
14
+ import json_repair
15
+
16
+ logger = Logger.get_logger(__name__)
17
+
18
+ class VideoProcessor:
19
+ def __init__(self, model_size="base"):
20
+ self.stt = STT(model_size=model_size)
21
+ Config.setup_dirs()
22
+
23
+ def _clean_json_response(self, content):
24
+ """Cleans AI JSON response using json_repair."""
25
+ if not isinstance(content, str):
26
+ return content
27
+
28
+ # Remove markdown blocks if present
29
+ content = content.strip()
30
+ if content.startswith("```json"):
31
+ content = content[7:]
32
+ if content.startswith("```"):
33
+ content = content[3:]
34
+ if content.endswith("```"):
35
+ content = content[:-3]
36
+
37
+ content = content.strip()
38
+
39
+ # Use json_repair to fix truncated or malformed JSON
40
+ try:
41
+ repaired_json = json_repair.loads(content)
42
+ return json.dumps(repaired_json)
43
+ except Exception as e:
44
+ logger.warning(f"⚠️ json_repair failed, falling back to manual fix: {e}")
45
+
46
+ # Fallback manual fix (though json_repair is usually sufficient)
47
+ if content and not content.endswith('}'):
48
+ open_braces = content.count('{')
49
+ close_braces = content.count('}')
50
+ if open_braces > close_braces:
51
+ content += '}' * (open_braces - close_braces)
52
+ logger.info(f"🔧 Fixed truncated JSON with {open_braces - close_braces} closing braces")
53
+
54
+ return content
55
+
56
+ def parse_ai_response(self, ai_res):
57
+ """
58
+ Parses the JSON response from the AI and returns a list of segments.
59
+ Handles both string and dictionary responses, and various potential key names.
60
+ """
61
+ if not isinstance(ai_res, dict):
62
+ logger.error(f"❌ Invalid AI response format: expected dict, got {type(ai_res)}")
63
+ return []
64
+
65
+ res_content = ai_res.get("content")
66
+ segments_data = {}
67
+
68
+ try:
69
+ if isinstance(res_content, str):
70
+ cleaned_content = self._clean_json_response(res_content)
71
+ segments_data = json.loads(cleaned_content)
72
+ else:
73
+ segments_data = res_content
74
+
75
+ chunk_segments = []
76
+ if isinstance(segments_data, dict):
77
+ for key in ["segments", "clips", "moments"]:
78
+ if key in segments_data and isinstance(segments_data[key], list):
79
+ chunk_segments = segments_data[key]
80
+ break
81
+ if not chunk_segments and any(isinstance(v, list) for v in segments_data.values()):
82
+ for v in segments_data.values():
83
+ if isinstance(v, list):
84
+ chunk_segments = v
85
+ break
86
+ elif isinstance(segments_data, list):
87
+ chunk_segments = segments_data
88
+
89
+ return chunk_segments
90
+
91
+ except Exception as e:
92
+ logger.error(f"❌ Failed to parse AI response: {e}")
93
+ logger.error(f"Raw Content: {res_content}")
94
+ return []
95
+
96
+ def _clean_json_response(self, text):
97
+ """Clean markdown and other noise from AI JSON response."""
98
+ text = text.strip()
99
+ if text.startswith("```json"):
100
+ text = text[7:]
101
+ if text.startswith("```"):
102
+ text = text[3:]
103
+ if text.endswith("```"):
104
+ text = text[:-3]
105
+ return text.strip()
106
+
107
+ def analyze_impact(self, video_path, video_clip=None, language=None, timestamp_mode="segments", progress_callback=None):
108
+ """Analyzes video content and suggests viral clips using AI."""
109
+ if progress_callback: progress_callback(5, "Starting speech-to-text...")
110
+ logger.info("🎙️ Phase 1: Speech-to-Text (STT)...")
111
+ # Always transcribe in source language (auto detect)
112
+ full_segments, full_text, duration, detected_lang = self.stt.get_transcript(
113
+ video_path,
114
+ language=None, # Always auto-detect source language
115
+ skip_ai=True,
116
+ timestamp_mode=timestamp_mode
117
+ )
118
+
119
+ # Check if translation is needed
120
+ target_lang = None
121
+ if language:
122
+ if hasattr(language, 'value'):
123
+ target_lang = language.value
124
+ else:
125
+ target_lang = str(language)
126
+
127
+
128
+ data = {
129
+ "segments": full_segments,
130
+ "detected_language": detected_lang,
131
+ "duration": duration
132
+ }
133
+
134
+ logger.info("🤖 Phase 2: AI Viral Moment Analysis...")
135
+ if progress_callback: progress_callback(20, "Analyzing content for viral moments...")
136
+
137
+ chunk_size = Config.CHUNK_SIZE_SECONDS
138
+ overlap = Config.OVERLAP_SECONDS
139
+
140
+ all_ai_segments = []
141
+ max_time = full_segments[-1]["end"] if full_segments else 0
142
+
143
+ current_start = 0
144
+ while current_start < max_time:
145
+ current_end = current_start + chunk_size
146
+
147
+ chunk_transcript = ""
148
+ for seg in full_segments:
149
+ if seg["start"] >= current_start and seg["start"] < current_end:
150
+ chunk_transcript += f"[{seg['start']:.2f} - {seg['end']:.2f}] {seg['text']}\n"
151
+
152
+ if chunk_transcript.strip():
153
+ transcript_len = len(chunk_transcript)
154
+
155
+ # Calculate progress
156
+ current_progress = 20 + int((current_start / max_time) * 40) # 20% to 60%
157
+ if progress_callback:
158
+ progress_callback(current_progress, f"Analyzing chunk {current_start/60:.1f}m - {min(current_end, max_time)/60:.1f}m")
159
+
160
+ logger.info(f"🧠 Analyzing chunk: {current_start/60:.1f}m to {min(current_end, max_time)/60:.1f}m (Length: {transcript_len} chars)...")
161
+
162
+ ai_res = analyze_transcript_gemini(chunk_transcript)
163
+
164
+ # Log debug info
165
+ logger.info(f"🤖 AI Response Type: {type(ai_res)}")
166
+ if isinstance(ai_res, dict) and "content" in ai_res:
167
+ logger.info(f"🤖 Raw AI Response (First 500 chars): {ai_res['content'][:500]}...")
168
+ else:
169
+ logger.info(f"🤖 Raw AI Response (Structure): {str(ai_res)[:500]}...")
170
+
171
+ try:
172
+ chunk_segments = self.parse_ai_response(ai_res)
173
+ logger.info(f"✅ Found {len(chunk_segments)} segments in chunk")
174
+ all_ai_segments.extend(chunk_segments)
175
+ except Exception as e:
176
+ logger.error(f"❌ Error processing chunk: {e}")
177
+ logger.error(traceback.format_exc())
178
+
179
+ current_start += (chunk_size - overlap)
180
+ if current_end >= max_time: break
181
+
182
+ # Deduplicate
183
+ unique_segments = []
184
+ seen_starts = set()
185
+ for s in all_ai_segments:
186
+ start_t = s.get("start_time")
187
+ if start_t not in seen_starts:
188
+ unique_segments.append(s)
189
+ seen_starts.add(start_t)
190
+
191
+ return unique_segments, duration, data
192
+
193
+ def get_best_segments(self, segments, video_duration=0):
194
+ """Sorts segments by viral score."""
195
+ return sorted(segments, key=lambda x: x.get("viral_score", 0), reverse=True)
196
+
197
+ def process_clips(self, input_video_path, best_clips, data, style="cinematic", language=None, video_clip=None, progress_callback=None, **kwargs):
198
+ """Processes the selected viral clips with styles and captions."""
199
+ logger.info("🎨 Phase 3: Style & Captions...")
200
+ if progress_callback: progress_callback(60, "Generating clips...")
201
+
202
+ # Determine video duration safely
203
+ video_duration = 0
204
+ if "duration" in data and data["duration"]:
205
+ video_duration = data["duration"]
206
+ elif video_clip:
207
+ video_duration = video_clip.duration
208
+ else:
209
+ try:
210
+ with mpe.VideoFileClip(input_video_path) as temp_vid:
211
+ video_duration = temp_vid.duration
212
+ except Exception as e:
213
+ logger.error(f"❌ Failed to get video duration: {e}")
214
+
215
+ output_files = []
216
+
217
+ # Initialize Translator if needed
218
+ translator = None
219
+ target_language = None
220
+ if language:
221
+ target_language = language.value if hasattr(language, 'value') else language
222
+
223
+ detected_lang = data.get("detected_language", "en")
224
+ needs_translation = (target_language and
225
+ target_language != "auto" and
226
+ target_language != detected_lang)
227
+
228
+ if needs_translation:
229
+ logger.info(f"🌍 Translating from {detected_lang} to {target_language}...")
230
+ translator = FreeTranslator()
231
+
232
+ try:
233
+ if not best_clips:
234
+ logger.warning("⚠️ No best clips provided to process_clips!")
235
+ return []
236
+
237
+ logger.info(f"📊 Starting processing for {len(best_clips)} clips...")
238
+
239
+ for i, seg in enumerate(best_clips):
240
+ # Update progress
241
+ current_progress = 60 + int((i / len(best_clips)) * 35) # 60% to 95%
242
+ if progress_callback:
243
+ progress_callback(current_progress, f"Rendering clip {i+1} of {len(best_clips)}...")
244
+
245
+ clip = None
246
+ final_clip = None
247
+ current_video_clip = None # Local handle for this iteration
248
+
249
+ try:
250
+ start = max(0, seg.get("start_time", 0))
251
+ end = min(video_duration, seg.get("end_time", 0))
252
+
253
+ # Ensure valid duration
254
+ if end - start < 1.0:
255
+ logger.warning(f"⚠️ Clip {i+1} too short ({end-start:.2f}s), skipping.")
256
+ continue
257
+
258
+ # TRANSLATION STEP: Translate only the current segment if needed
259
+ if needs_translation and translator:
260
+ try:
261
+ # Find matching transcript segments for this clip
262
+ # Note: segments in 'data' use 'start' and 'end' keys
263
+ matching_segs = [
264
+ s for s in data.get('segments', [])
265
+ if s['start'] >= start and s['end'] <= end
266
+ ]
267
+
268
+ if matching_segs:
269
+ logger.info(f"🌍 Translating {len(matching_segs)} segments for Clip {i+1}...")
270
+ for match_s in matching_segs:
271
+ # Skip if already translated (heuristic check if needed, but safe to re-translate if simple)
272
+ # Or better, check if text is already in target language?
273
+ # Since we modify in place, subsequent clips covering same segment might re-translate.
274
+ # But clips usually don't overlap much.
275
+
276
+ tr_text, _ = translator.translate_text(match_s['text'], target_language)
277
+ match_s['text'] = tr_text
278
+ # Clear words to force interpolation since word-level timing is lost
279
+ if 'words' in match_s:
280
+ match_s['words'] = []
281
+
282
+ logger.info(f"✅ Translated clip {i+1} content to {target_language}")
283
+
284
+ except Exception as e:
285
+ logger.warning(f"⚠️ Translation failed for clip {i+1}: {e}")
286
+
287
+ logger.info(f"\n🎬 Processing Clip {i+1}/{len(best_clips)} ({start:.2f} - {end:.2f})...")
288
+
289
+ # Ensure style is a clean string
290
+ style_str = style.value if hasattr(style, "value") else str(style)
291
+ if "." in style_str:
292
+ style_str = style_str.split(".")[-1] # Handle VideoStyle.split_vertical
293
+
294
+ output_filename = f"viral_{i+1}_{style_str}.mp4"
295
+ # Add task_id to filename if provided to avoid collisions
296
+ task_id = kwargs.get("task_id")
297
+ if task_id:
298
+ output_filename = f"viral_{task_id}_{i+1}_{style_str}.mp4"
299
+
300
+ final_output = os.path.join(Config.OUTPUTS_DIR, "viral_clips", output_filename)
301
+ os.makedirs(os.path.dirname(final_output), exist_ok=True)
302
+
303
+ if start >= video_duration:
304
+ logger.warning(f"⚠️ Clip start time {start} is beyond video duration {video_duration}, skipping.")
305
+ continue
306
+
307
+ # 1. Cut the clip
308
+ # ALWAYS open a fresh VideoFileClip for each iteration to avoid shared reader issues
309
+ # and allow full cleanup (closing reader) after each clip.
310
+ current_video_clip = mpe.VideoFileClip(input_video_path)
311
+ clip = current_video_clip.subclip(start, end)
312
+
313
+ # 2. Get the style strategy
314
+ style_strategy = StyleFactory.get_style(style)
315
+ logger.info(f"✨ Applying style: {style}")
316
+
317
+ # 3. Handle Translation and Captions PREPARATION
318
+ segment_transcript = {"segments": []}
319
+
320
+ # Filter relevant segments for this clip
321
+ for s in data["segments"]:
322
+ if s["start"] < end and s["end"] > start:
323
+ # Clone the segment to avoid modifying original data
324
+ new_seg = s.copy()
325
+
326
+ # Adjust timestamps relative to clip start
327
+ new_seg["start"] = max(0, s["start"] - start)
328
+ new_seg["end"] = min(end - start, s["end"] - start)
329
+
330
+ if needs_translation and translator:
331
+ logger.info(f"🌍 Translating segment: {s['text'][:30]}...")
332
+ translated_text, _ = translator.translate_text(s['text'], target_language)
333
+ new_seg["text"] = translated_text
334
+ # Simple word distribution for translated text
335
+ words = translated_text.split()
336
+ seg_duration = new_seg["end"] - new_seg["start"]
337
+ word_duration = seg_duration / len(words) if words else seg_duration
338
+ new_seg["words"] = []
339
+ for idx, w in enumerate(words):
340
+ new_seg["words"].append({
341
+ "text": w,
342
+ "start": new_seg["start"] + (idx * word_duration),
343
+ "end": new_seg["start"] + ((idx + 1) * word_duration)
344
+ })
345
+ else:
346
+ # Adjust word timestamps if they exist
347
+ if "words" in s:
348
+ new_words = []
349
+ for w in s["words"]:
350
+ if w["start"] < end and w["end"] > start:
351
+ nw = w.copy()
352
+ nw["start"] = max(0, w["start"] - start)
353
+ nw["end"] = min(end - start, w["end"] - start)
354
+ new_words.append(nw)
355
+ new_seg["words"] = new_words
356
+
357
+ segment_transcript["segments"].append(new_seg)
358
+
359
+ # 4. Use the optimized apply_with_captions method
360
+ final_clip = style_strategy.apply_with_captions(
361
+ clip,
362
+ transcript_data=segment_transcript,
363
+ language=target_language if needs_translation else detected_lang,
364
+ caption_mode=kwargs.get('caption_mode', 'sentence'),
365
+ caption_style=kwargs.get('caption_style', 'classic'),
366
+ background_path=kwargs.get("background_path"),
367
+ playground_path=kwargs.get("playground_path")
368
+ )
369
+
370
+ # 5. Write Output
371
+ # Automatically use all available CPU cores
372
+ # os.cpu_count() returns None if undetermined, so we default to 4 in that case
373
+ cpu_count = os.cpu_count() or 4
374
+ logger.info(f"⚙️ Using {cpu_count} threads for video rendering")
375
+
376
+ final_clip.write_videofile(
377
+ final_output,
378
+ codec="libx264",
379
+ audio_codec="aac",
380
+ threads=cpu_count,
381
+ logger=None
382
+ )
383
+
384
+ output_files.append(final_output)
385
+
386
+ except Exception as e:
387
+ logger.error(f"❌ Error processing clip {i+1}: {e}")
388
+ logger.error(traceback.format_exc())
389
+ finally:
390
+ # 🧹 Explicit Cleanup
391
+ if final_clip:
392
+ try: final_clip.close()
393
+ except: pass
394
+ if clip:
395
+ try: clip.close()
396
+ except: pass
397
+ if current_video_clip:
398
+ try: current_video_clip.close()
399
+ except: pass
400
+
401
+ # Force garbage collection
402
+ import gc
403
+ gc.collect()
404
+
405
+ except Exception as e:
406
+ logger.error(f"❌ Error in processing loop: {e}")
407
+ logger.error(traceback.format_exc())
408
+
409
+ # Note: We don't close passed video_clip here because we didn't open it (or we treated it as read-only for duration)
410
+ # The caller is responsible for closing video_clip if they passed it.
411
+
412
+ return output_files
413
+
414
+ # -----------------------------------------------------------------------------
415
+ # Module Level Function to wrap the class usage
416
+ # -----------------------------------------------------------------------------
417
+ def process_video(video_path, style="cinematic_blur", model_size="base", **kwargs):
418
+ """
419
+ Main entry point to process a video end-to-end.
420
+ """
421
+ video_clip = None
422
+ try:
423
+ processor = VideoProcessor(model_size=model_size)
424
+
425
+ # 1. Open Video Clip ONCE
426
+ video_clip = mpe.VideoFileClip(video_path)
427
+
428
+ # 2. Analyze (Reuse video_clip)
429
+ caption_mode = kwargs.get("caption_mode", "sentence")
430
+ timestamp_mode = "words" if caption_mode == "word" else "segments"
431
+
432
+ viral_segments, duration, stt_data = processor.analyze_impact(
433
+ video_path,
434
+ video_clip=video_clip,
435
+ language=kwargs.get("language"),
436
+ timestamp_mode=timestamp_mode
437
+ )
438
+
439
+ if not viral_segments:
440
+ logger.warning("⚠️ No viral segments found.")
441
+ return []
442
+
443
+ # 3. Process Clips (Reuse video_clip and STT data)
444
+ output_files = processor.process_clips(
445
+ video_path,
446
+ viral_segments,
447
+ stt_data,
448
+ style=style,
449
+ language=kwargs.get("language"),
450
+ video_clip=video_clip,
451
+ **kwargs
452
+ )
453
+
454
+ return output_files
455
+
456
+ except Exception as e:
457
+ logger.error(f"❌ Processing failed: {e}")
458
+ logger.error(traceback.format_exc())
459
+ return []
460
+ finally:
461
+ if video_clip:
462
+ video_clip.close()
463
+
464
+ if __name__ == "__main__":
465
+ import sys
466
+ if len(sys.argv) > 1:
467
+ process_video(sys.argv[1])
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ python-multipart
4
+ moviepy==1.0.3
5
+ faster-whisper
6
+ python-dotenv
7
+ opencv-python-headless
8
+ numpy
9
+ pillow
10
+ requests
11
+ arabic-reshaper
12
+ python-bidi
13
+ imageio==2.25.1
14
+ imageio-ffmpeg==0.4.8
15
+ openai>=1.0.0
16
+ scipy
17
+ json_repair