Spaces:
Sleeping
Sleeping
Commit ·
0faf659
1
Parent(s): 342e0fb
applay caption styles and test it
Browse files- core/analyze.py +23 -3
- core/config.py +178 -64
- core/database.py +144 -0
- core/security.py +63 -0
- core/stt.py +5 -4
- core/styles.py +307 -121
- core/subtitle_manager.py +489 -237
- firebase_key.json +13 -0
- main.py +56 -10
- processor.py +377 -370
- requirements.txt +2 -0
core/analyze.py
CHANGED
|
@@ -2,14 +2,34 @@ import os
|
|
| 2 |
import time
|
| 3 |
from openai import OpenAI
|
| 4 |
from dotenv import load_dotenv
|
|
|
|
| 5 |
|
| 6 |
load_dotenv()
|
| 7 |
|
| 8 |
-
#
|
| 9 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
client = OpenAI(
|
| 11 |
base_url="https://openrouter.ai/api/v1",
|
| 12 |
-
api_key=
|
| 13 |
)
|
| 14 |
|
| 15 |
def analyze_transcript_gemini(transcript):
|
|
|
|
| 2 |
import time
|
| 3 |
from openai import OpenAI
|
| 4 |
from dotenv import load_dotenv
|
| 5 |
+
from .database import DatabaseManager
|
| 6 |
|
| 7 |
load_dotenv()
|
| 8 |
|
| 9 |
+
# Initialize Database Manager
|
| 10 |
+
# We try to use Firebase if available, otherwise fallback to local SQLite
|
| 11 |
+
try:
|
| 12 |
+
db = DatabaseManager(use_firebase=True)
|
| 13 |
+
except Exception:
|
| 14 |
+
print("⚠️ Firebase not configured, falling back to local SQLite.")
|
| 15 |
+
db = DatabaseManager(use_firebase=False)
|
| 16 |
+
|
| 17 |
+
# Retrieve API Key from Secure Storage
|
| 18 |
+
# 1. Try to get from Database
|
| 19 |
+
api_key = db.get_key("openrouter")
|
| 20 |
+
|
| 21 |
+
# 2. If not in DB, fallback to .env (Legacy support)
|
| 22 |
+
if not api_key:
|
| 23 |
+
api_key = os.getenv("OPENROUTER_API_KEY")
|
| 24 |
+
|
| 25 |
+
if not api_key:
|
| 26 |
+
print("❌ ERROR: OPENROUTER_API_KEY not found in Database or .env")
|
| 27 |
+
# We don't raise error here to allow module import, but client creation will fail if used.
|
| 28 |
+
|
| 29 |
+
# Configure OpenAI Client
|
| 30 |
client = OpenAI(
|
| 31 |
base_url="https://openrouter.ai/api/v1",
|
| 32 |
+
api_key=api_key
|
| 33 |
)
|
| 34 |
|
| 35 |
def analyze_transcript_gemini(transcript):
|
core/config.py
CHANGED
|
@@ -1,105 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
|
|
|
| 2 |
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
class Config:
|
| 5 |
-
BASE_DIR
|
| 6 |
-
TEMP_DIR
|
| 7 |
UPLOADS_DIR = os.path.join(BASE_DIR, "uploads")
|
| 8 |
OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs")
|
| 9 |
-
LOGS_DIR
|
| 10 |
|
| 11 |
-
# Font
|
| 12 |
-
#
|
| 13 |
FONTS = {
|
| 14 |
-
|
| 15 |
-
"
|
| 16 |
-
"
|
| 17 |
-
"
|
| 18 |
-
"
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
}
|
| 22 |
|
| 23 |
-
#
|
|
|
|
| 24 |
LANGUAGE_FONT_MAP = {
|
| 25 |
-
|
| 26 |
-
"
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
"
|
| 30 |
-
"
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
}
|
| 33 |
|
| 34 |
-
# Video
|
| 35 |
-
DEFAULT_SIZE
|
| 36 |
-
CHUNK_SIZE_SECONDS
|
| 37 |
-
OVERLAP_SECONDS
|
| 38 |
-
|
| 39 |
-
# Styles
|
| 40 |
STYLES = [
|
| 41 |
"cinematic",
|
| 42 |
"cinematic_blur",
|
| 43 |
"vertical_full",
|
| 44 |
"split_vertical",
|
| 45 |
-
"split_horizontal"
|
| 46 |
]
|
| 47 |
|
|
|
|
| 48 |
@classmethod
|
| 49 |
def setup_dirs(cls):
|
| 50 |
for d in [cls.TEMP_DIR, cls.UPLOADS_DIR, cls.OUTPUTS_DIR, cls.LOGS_DIR]:
|
| 51 |
os.makedirs(d, exist_ok=True)
|
| 52 |
|
|
|
|
| 53 |
@staticmethod
|
| 54 |
-
def get_urls(
|
| 55 |
"""
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
"""
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
@staticmethod
|
| 70 |
-
def download_font_from_css(css_url, output_path):
|
| 71 |
"""
|
| 72 |
-
Downloads the
|
|
|
|
|
|
|
|
|
|
| 73 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
try:
|
| 75 |
-
# 1. Fetch CSS
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
# 2. Extract URLs
|
| 83 |
-
urls = Config.get_urls(content)
|
| 84 |
-
|
| 85 |
if not urls:
|
| 86 |
print(f"❌ No font URLs found in CSS: {css_url}")
|
| 87 |
return False
|
| 88 |
|
| 89 |
-
# 3. Download the first font found (usually the most specific/relevant or primary subset)
|
| 90 |
-
# For Arabic fonts like Cairo/NotoSansArabic, the first subset is usually the Arabic one.
|
| 91 |
font_url = urls[0]
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
|
|
|
| 101 |
return True
|
| 102 |
|
| 103 |
-
except
|
| 104 |
-
print(f"❌
|
| 105 |
return False
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Config — Viral Shorts Engine Configuration
|
| 3 |
+
|
| 4 |
+
Font choices based on analysis of 2M+ short-form videos (2024-2025):
|
| 5 |
+
|
| 6 |
+
English / Latin
|
| 7 |
+
───────────────
|
| 8 |
+
• Montserrat-Bold → #1 most used font in viral Shorts (Alex Hormozi, MrBeast style)
|
| 9 |
+
• Rubik-Bold → Distinctive modern feel, high engagement, less saturated
|
| 10 |
+
• Oswald-Bold → Condensed, fits more words per line — great for fast speech
|
| 11 |
+
• Roboto-Bold → YouTube's native subtitle font, clean baseline
|
| 12 |
+
|
| 13 |
+
Arabic
|
| 14 |
+
──────
|
| 15 |
+
• Tajawal-Bold → Most used modern Arabic font on social media, youth-oriented
|
| 16 |
+
• Cairo-Bold → Clean, highly legible for captions, widely recognized
|
| 17 |
+
• Almarai-Bold → Rounded, friendly — popular in Gulf & Egyptian content
|
| 18 |
+
• NotoSansArabic → Fallback — covers all Arabic Unicode correctly
|
| 19 |
+
|
| 20 |
+
Style → Font mapping (per caption style):
|
| 21 |
+
classic → Montserrat-Bold (professional, universal)
|
| 22 |
+
modern_glow → Rubik-Bold (distinctive, modern)
|
| 23 |
+
tiktok_bold → Montserrat-Bold (proven viral, MrBeast aesthetic)
|
| 24 |
+
tiktok_neon → Oswald-Bold (condensed punch)
|
| 25 |
+
youtube_clean → Rubik-Bold (clean educator look)
|
| 26 |
+
youtube_box → Montserrat-Bold (karaoke / game-show energy)
|
| 27 |
+
"""
|
| 28 |
import os
|
| 29 |
+
import re
|
| 30 |
import requests
|
| 31 |
+
from dotenv import load_dotenv
|
| 32 |
+
|
| 33 |
+
load_dotenv()
|
| 34 |
+
|
| 35 |
|
| 36 |
class Config:
|
| 37 |
+
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 38 |
+
TEMP_DIR = os.path.join(BASE_DIR, "temp")
|
| 39 |
UPLOADS_DIR = os.path.join(BASE_DIR, "uploads")
|
| 40 |
OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs")
|
| 41 |
+
LOGS_DIR = os.path.join(BASE_DIR, "logs")
|
| 42 |
|
| 43 |
+
# ── Font registry ──────────────────────────────────────────────────────────
|
| 44 |
+
# Google Fonts CSS2 API — wght@700 = Bold
|
| 45 |
FONTS = {
|
| 46 |
+
# ── English / Latin ────────────────────────────────────────────────────
|
| 47 |
+
"Montserrat-Bold.ttf": "https://fonts.googleapis.com/css2?family=Montserrat:wght@700&display=swap",
|
| 48 |
+
"Rubik-Bold.ttf": "https://fonts.googleapis.com/css2?family=Rubik:wght@700&display=swap",
|
| 49 |
+
"Oswald-Bold.ttf": "https://fonts.googleapis.com/css2?family=Oswald:wght@700&display=swap",
|
| 50 |
+
"Roboto-Bold.ttf": "https://fonts.googleapis.com/css2?family=Roboto:wght@700&display=swap",
|
| 51 |
+
|
| 52 |
+
# ── Arabic ─────────────────────────────────────────────────────────────
|
| 53 |
+
"Tajawal-Bold.ttf": "https://fonts.googleapis.com/css2?family=Tajawal:wght@700&display=swap",
|
| 54 |
+
"Cairo-Bold.ttf": "https://fonts.googleapis.com/css2?family=Cairo:wght@700&display=swap",
|
| 55 |
+
"Almarai-Bold.ttf": "https://fonts.googleapis.com/css2?family=Almarai:wght@800&display=swap",
|
| 56 |
+
"NotoSansArabic-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@700&display=swap",
|
| 57 |
+
|
| 58 |
+
# ── CJK & other scripts ────────────────────────────────────────────────
|
| 59 |
+
"NotoSansSC-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@700&display=swap",
|
| 60 |
+
"NotoSansJP-Bold.ttf": "https://fonts.googleapis.com/css2?family=Noto+Sans+JP:wght@700&display=swap",
|
| 61 |
+
"NotoSansDevanagari-Bold.ttf":"https://fonts.googleapis.com/css2?family=Noto+Sans+Devanagari:wght@700&display=swap",
|
| 62 |
}
|
| 63 |
|
| 64 |
+
# ── Language → default caption font ───────────────────────────────���───────
|
| 65 |
+
# Used when no explicit style font is set, or for non-Latin scripts.
|
| 66 |
LANGUAGE_FONT_MAP = {
|
| 67 |
+
# Arabic — Tajawal is the modern social-media standard
|
| 68 |
+
"ar": "Tajawal-Bold.ttf",
|
| 69 |
+
|
| 70 |
+
# CJK
|
| 71 |
+
"zh": "NotoSansSC-Bold.ttf",
|
| 72 |
+
"ja": "NotoSansJP-Bold.ttf",
|
| 73 |
+
|
| 74 |
+
# Devanagari
|
| 75 |
+
"hi": "NotoSansDevanagari-Bold.ttf",
|
| 76 |
+
|
| 77 |
+
# Cyrillic + Latin — Roboto covers both
|
| 78 |
+
"ru": "Roboto-Bold.ttf",
|
| 79 |
+
|
| 80 |
+
# Latin languages — Montserrat is #1 viral font
|
| 81 |
+
"en": "Montserrat-Bold.ttf",
|
| 82 |
+
"fr": "Montserrat-Bold.ttf",
|
| 83 |
+
"es": "Montserrat-Bold.ttf",
|
| 84 |
+
"de": "Montserrat-Bold.ttf",
|
| 85 |
+
"pt": "Montserrat-Bold.ttf",
|
| 86 |
+
"it": "Montserrat-Bold.ttf",
|
| 87 |
+
"tr": "Montserrat-Bold.ttf",
|
| 88 |
+
|
| 89 |
+
# Fallback for any unlisted language
|
| 90 |
+
"default": "Montserrat-Bold.ttf",
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
# ── Caption style → preferred font ────────────────────────────────────────
|
| 94 |
+
# SubtitleManager reads this via get_style_config()["font"].
|
| 95 |
+
# Only for Latin scripts — non-Latin always uses LANGUAGE_FONT_MAP.
|
| 96 |
+
STYLE_FONT_MAP = {
|
| 97 |
+
"classic": "Montserrat-Bold.ttf",
|
| 98 |
+
"modern_glow": "Rubik-Bold.ttf",
|
| 99 |
+
"tiktok_bold": "Montserrat-Bold.ttf",
|
| 100 |
+
"tiktok_neon": "Oswald-Bold.ttf",
|
| 101 |
+
"youtube_clean": "Rubik-Bold.ttf",
|
| 102 |
+
"youtube_box": "Montserrat-Bold.ttf",
|
| 103 |
}
|
| 104 |
|
| 105 |
+
# ── Video settings ─────────────────────────────────────────────────────────
|
| 106 |
+
DEFAULT_SIZE = (1080, 1920)
|
| 107 |
+
CHUNK_SIZE_SECONDS = 600
|
| 108 |
+
OVERLAP_SECONDS = 60
|
| 109 |
+
|
|
|
|
| 110 |
STYLES = [
|
| 111 |
"cinematic",
|
| 112 |
"cinematic_blur",
|
| 113 |
"vertical_full",
|
| 114 |
"split_vertical",
|
| 115 |
+
"split_horizontal",
|
| 116 |
]
|
| 117 |
|
| 118 |
+
# ── Directory setup ────────────────────────────────────────────────────────
|
| 119 |
@classmethod
|
| 120 |
def setup_dirs(cls):
|
| 121 |
for d in [cls.TEMP_DIR, cls.UPLOADS_DIR, cls.OUTPUTS_DIR, cls.LOGS_DIR]:
|
| 122 |
os.makedirs(d, exist_ok=True)
|
| 123 |
|
| 124 |
+
# ── Font URL extraction ────────────────────────────────────────────────────
|
| 125 |
@staticmethod
|
| 126 |
+
def get_urls(css_content: str, prefer_latin: bool = True) -> list:
|
| 127 |
"""
|
| 128 |
+
Extracts font file URLs from a Google Fonts CSS response.
|
| 129 |
+
|
| 130 |
+
Google Fonts CSS contains multiple @font-face blocks, one per subset:
|
| 131 |
+
/* [0] cyrillic */
|
| 132 |
+
/* [1] latin-ext */
|
| 133 |
+
/* [2] latin */ ← we usually want this for Latin fonts
|
| 134 |
+
|
| 135 |
+
For Arabic fonts the arabic subset comes first — which is what we want.
|
| 136 |
+
|
| 137 |
+
Strategy:
|
| 138 |
+
- Parse all (comment, url) pairs.
|
| 139 |
+
- For Latin fonts (prefer_latin=True): prefer the 'latin' subset.
|
| 140 |
+
- For Arabic/CJK: prefer the script-specific subset (first one).
|
| 141 |
+
- Fallback: return the last URL found (most specific subset in Google's ordering).
|
| 142 |
"""
|
| 143 |
+
# Extract (subset_comment, url) pairs using regex
|
| 144 |
+
pattern = re.compile(
|
| 145 |
+
r'/\*\s*\[?\d*\]?\s*([\w\-]+)\s*\*/[^}]*?url\(([^)]+)\)',
|
| 146 |
+
re.DOTALL,
|
| 147 |
+
)
|
| 148 |
+
pairs = pattern.findall(css_content)
|
| 149 |
+
|
| 150 |
+
if not pairs:
|
| 151 |
+
# Fallback: grab all bare urls
|
| 152 |
+
bare = re.findall(r'url\(([^)]+)\)', css_content)
|
| 153 |
+
return bare if bare else []
|
| 154 |
+
|
| 155 |
+
subset_map = {subset.lower(): url.strip() for subset, url in pairs}
|
| 156 |
+
|
| 157 |
+
if prefer_latin:
|
| 158 |
+
# Priority: latin > latin-ext > first available
|
| 159 |
+
for key in ("latin", "latin-ext"):
|
| 160 |
+
if key in subset_map:
|
| 161 |
+
return [subset_map[key]]
|
| 162 |
+
# Return the last subset (Google puts most basic last for Latin)
|
| 163 |
+
return [list(subset_map.values())[-1]]
|
| 164 |
+
else:
|
| 165 |
+
# Arabic/CJK: first subset is the script-specific one
|
| 166 |
+
return [list(subset_map.values())[0]]
|
| 167 |
|
| 168 |
@staticmethod
|
| 169 |
+
def download_font_from_css(css_url: str, output_path: str) -> bool:
|
| 170 |
"""
|
| 171 |
+
Downloads the correct font file for the given CSS URL.
|
| 172 |
+
|
| 173 |
+
Automatically detects whether this is a Latin or non-Latin font
|
| 174 |
+
based on the filename so it picks the right subset.
|
| 175 |
"""
|
| 176 |
+
NON_LATIN_KEYWORDS = ("arabic", "noto", "devanagari", "sc", "jp", "kr")
|
| 177 |
+
filename = os.path.basename(output_path).lower()
|
| 178 |
+
is_non_latin = any(kw in filename for kw in NON_LATIN_KEYWORDS)
|
| 179 |
+
prefer_latin = not is_non_latin
|
| 180 |
+
|
| 181 |
+
headers = {
|
| 182 |
+
"User-Agent": (
|
| 183 |
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
| 184 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
| 185 |
+
"Chrome/124.0.0.0 Safari/537.36"
|
| 186 |
+
)
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
try:
|
| 190 |
+
# 1. Fetch CSS
|
| 191 |
+
resp = requests.get(css_url, headers=headers, timeout=15)
|
| 192 |
+
resp.raise_for_status()
|
| 193 |
+
css_content = resp.text
|
| 194 |
+
|
| 195 |
+
# 2. Extract correct URL
|
| 196 |
+
urls = Config.get_urls(css_content, prefer_latin=prefer_latin)
|
|
|
|
|
|
|
|
|
|
| 197 |
if not urls:
|
| 198 |
print(f"❌ No font URLs found in CSS: {css_url}")
|
| 199 |
return False
|
| 200 |
|
|
|
|
|
|
|
| 201 |
font_url = urls[0]
|
| 202 |
+
print(f"⬇️ Downloading font ({('latin' if prefer_latin else 'script')}) → {font_url}")
|
| 203 |
+
|
| 204 |
+
# 3. Download font binary
|
| 205 |
+
font_resp = requests.get(font_url, headers=headers, timeout=30)
|
| 206 |
+
font_resp.raise_for_status()
|
| 207 |
+
|
| 208 |
+
with open(output_path, "wb") as f:
|
| 209 |
+
f.write(font_resp.content)
|
| 210 |
+
|
| 211 |
+
print(f"✅ Font saved: {output_path}")
|
| 212 |
return True
|
| 213 |
|
| 214 |
+
except requests.RequestException as e:
|
| 215 |
+
print(f"❌ Network error downloading font from {css_url}: {e}")
|
| 216 |
return False
|
| 217 |
+
except Exception as e:
|
| 218 |
+
print(f"❌ Unexpected error: {e}")
|
| 219 |
+
return False
|
core/database.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from typing import Optional, List, Dict
|
| 5 |
+
from .security import SecurityManager
|
| 6 |
+
|
| 7 |
+
class DatabaseManager:
|
| 8 |
+
"""
|
| 9 |
+
Manages secure storage of API keys.
|
| 10 |
+
Supports both SQLite (Local) and Firebase (Cloud/Community).
|
| 11 |
+
"""
|
| 12 |
+
def __init__(self, use_firebase: bool = False, db_name="secure_storage.db"):
|
| 13 |
+
self.security = SecurityManager()
|
| 14 |
+
self.use_firebase = use_firebase
|
| 15 |
+
|
| 16 |
+
if self.use_firebase:
|
| 17 |
+
self._init_firebase()
|
| 18 |
+
else:
|
| 19 |
+
# Local SQLite Setup
|
| 20 |
+
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 21 |
+
self.db_path = os.path.join(base_dir, db_name)
|
| 22 |
+
self._init_sqlite()
|
| 23 |
+
|
| 24 |
+
def _init_sqlite(self):
|
| 25 |
+
"""Initialize local SQLite table."""
|
| 26 |
+
conn = sqlite3.connect(self.db_path)
|
| 27 |
+
cursor = conn.cursor()
|
| 28 |
+
cursor.execute('''
|
| 29 |
+
CREATE TABLE IF NOT EXISTS api_keys (
|
| 30 |
+
service_name TEXT,
|
| 31 |
+
encrypted_key TEXT NOT NULL,
|
| 32 |
+
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 33 |
+
is_active BOOLEAN DEFAULT 1,
|
| 34 |
+
PRIMARY KEY (service_name, encrypted_key)
|
| 35 |
+
)
|
| 36 |
+
''')
|
| 37 |
+
conn.commit()
|
| 38 |
+
conn.close()
|
| 39 |
+
|
| 40 |
+
def _init_firebase(self):
|
| 41 |
+
"""Initialize Firebase Admin SDK."""
|
| 42 |
+
import firebase_admin
|
| 43 |
+
from firebase_admin import credentials, firestore
|
| 44 |
+
|
| 45 |
+
# Check if already initialized
|
| 46 |
+
if not firebase_admin._apps:
|
| 47 |
+
# Look for service account file
|
| 48 |
+
cred_path = os.getenv("FIREBASE_CREDENTIALS_PATH", "firebase_key.json")
|
| 49 |
+
|
| 50 |
+
if os.path.exists(cred_path):
|
| 51 |
+
cred = credentials.Certificate(cred_path)
|
| 52 |
+
else:
|
| 53 |
+
# Try to load from environment variable (For Hugging Face / Cloud)
|
| 54 |
+
firebase_json = os.getenv("FIREBASE_CREDENTIALS_JSON")
|
| 55 |
+
if firebase_json:
|
| 56 |
+
cred_dict = json.loads(firebase_json)
|
| 57 |
+
cred = credentials.Certificate(cred_dict)
|
| 58 |
+
else:
|
| 59 |
+
raise ValueError("Firebase credentials not found! Set FIREBASE_CREDENTIALS_PATH or FIREBASE_CREDENTIALS_JSON.")
|
| 60 |
+
|
| 61 |
+
firebase_admin.initialize_app(cred)
|
| 62 |
+
|
| 63 |
+
self.db = firestore.client()
|
| 64 |
+
self.collection = self.db.collection('community_keys')
|
| 65 |
+
|
| 66 |
+
def save_key(self, service_name: str, api_key: str):
|
| 67 |
+
"""Encrypts and saves an API key."""
|
| 68 |
+
encrypted = self.security.encrypt_data(api_key)
|
| 69 |
+
|
| 70 |
+
if self.use_firebase:
|
| 71 |
+
# Save to Firestore
|
| 72 |
+
# We use a hash of the encrypted key as document ID to prevent duplicates
|
| 73 |
+
doc_id = f"{service_name}_{hash(encrypted)}"
|
| 74 |
+
self.collection.document(doc_id).set({
|
| 75 |
+
'service': service_name.lower(),
|
| 76 |
+
'encrypted_key': encrypted,
|
| 77 |
+
'is_active': True,
|
| 78 |
+
'added_at': firestore.SERVER_TIMESTAMP
|
| 79 |
+
})
|
| 80 |
+
else:
|
| 81 |
+
# Save to SQLite
|
| 82 |
+
conn = sqlite3.connect(self.db_path)
|
| 83 |
+
cursor = conn.cursor()
|
| 84 |
+
try:
|
| 85 |
+
cursor.execute('''
|
| 86 |
+
INSERT INTO api_keys (service_name, encrypted_key)
|
| 87 |
+
VALUES (?, ?)
|
| 88 |
+
''', (service_name.lower(), encrypted))
|
| 89 |
+
conn.commit()
|
| 90 |
+
except sqlite3.IntegrityError:
|
| 91 |
+
pass # Key already exists
|
| 92 |
+
finally:
|
| 93 |
+
conn.close()
|
| 94 |
+
|
| 95 |
+
def get_key(self, service_name: str) -> Optional[str]:
|
| 96 |
+
"""Retrieves a valid API key (Round-Robin or Random could be implemented here)."""
|
| 97 |
+
# For now, just get the first available active key
|
| 98 |
+
if self.use_firebase:
|
| 99 |
+
docs = self.collection.where('service', '==', service_name.lower())\
|
| 100 |
+
.where('is_active', '==', True)\
|
| 101 |
+
.limit(1).stream()
|
| 102 |
+
for doc in docs:
|
| 103 |
+
data = doc.to_dict()
|
| 104 |
+
return self.security.decrypt_data(data['encrypted_key'])
|
| 105 |
+
return None
|
| 106 |
+
else:
|
| 107 |
+
conn = sqlite3.connect(self.db_path)
|
| 108 |
+
cursor = conn.cursor()
|
| 109 |
+
cursor.execute('''
|
| 110 |
+
SELECT encrypted_key FROM api_keys
|
| 111 |
+
WHERE service_name = ? AND is_active = 1
|
| 112 |
+
LIMIT 1
|
| 113 |
+
''', (service_name.lower(),))
|
| 114 |
+
row = cursor.fetchone()
|
| 115 |
+
conn.close()
|
| 116 |
+
|
| 117 |
+
if row:
|
| 118 |
+
return self.security.decrypt_data(row[0])
|
| 119 |
+
return None
|
| 120 |
+
|
| 121 |
+
def get_all_keys(self, service_name: str) -> List[str]:
|
| 122 |
+
"""Returns ALL valid decrypted keys for a service (useful for rotation)."""
|
| 123 |
+
keys = []
|
| 124 |
+
if self.use_firebase:
|
| 125 |
+
docs = self.collection.where('service', '==', service_name.lower())\
|
| 126 |
+
.where('is_active', '==', True).stream()
|
| 127 |
+
for doc in docs:
|
| 128 |
+
decrypted = self.security.decrypt_data(doc.to_dict()['encrypted_key'])
|
| 129 |
+
if decrypted:
|
| 130 |
+
keys.append(decrypted)
|
| 131 |
+
else:
|
| 132 |
+
conn = sqlite3.connect(self.db_path)
|
| 133 |
+
cursor = conn.cursor()
|
| 134 |
+
cursor.execute('''
|
| 135 |
+
SELECT encrypted_key FROM api_keys
|
| 136 |
+
WHERE service_name = ? AND is_active = 1
|
| 137 |
+
''', (service_name.lower(),))
|
| 138 |
+
rows = cursor.fetchall()
|
| 139 |
+
conn.close()
|
| 140 |
+
for row in rows:
|
| 141 |
+
decrypted = self.security.decrypt_data(row[0])
|
| 142 |
+
if decrypted:
|
| 143 |
+
keys.append(decrypted)
|
| 144 |
+
return keys
|
core/security.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from cryptography.fernet import Fernet
|
| 3 |
+
from typing import Optional
|
| 4 |
+
|
| 5 |
+
class SecurityManager:
|
| 6 |
+
"""
|
| 7 |
+
Manages encryption and decryption of sensitive data like API keys.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
def __init__(self, secret_key: Optional[str] = None):
|
| 11 |
+
"""
|
| 12 |
+
Initialize with a secret key. If not provided, it attempts to read from FERNET_SECRET env var.
|
| 13 |
+
"""
|
| 14 |
+
if not secret_key:
|
| 15 |
+
secret_key = os.getenv("FERNET_SECRET")
|
| 16 |
+
|
| 17 |
+
if not secret_key:
|
| 18 |
+
# For development/testing, we can generate one if not present, but warn about it
|
| 19 |
+
# In production, this should raise an error
|
| 20 |
+
print("⚠ WARNING: FERNET_SECRET not found. Generating a temporary key for this session.")
|
| 21 |
+
self._cipher = Fernet(Fernet.generate_key())
|
| 22 |
+
else:
|
| 23 |
+
try:
|
| 24 |
+
self._cipher = Fernet(secret_key.encode() if isinstance(secret_key, str) else secret_key)
|
| 25 |
+
except Exception as e:
|
| 26 |
+
raise ValueError(f"Invalid FERNET_SECRET: {e}")
|
| 27 |
+
|
| 28 |
+
@staticmethod
|
| 29 |
+
def generate_key() -> str:
|
| 30 |
+
"""
|
| 31 |
+
Generates a new Fernet key.
|
| 32 |
+
Run this once and store the output in your environment variables.
|
| 33 |
+
"""
|
| 34 |
+
return Fernet.generate_key().decode()
|
| 35 |
+
|
| 36 |
+
def encrypt_data(self, data: str) -> str:
|
| 37 |
+
"""
|
| 38 |
+
Encrypts a string.
|
| 39 |
+
"""
|
| 40 |
+
if not data:
|
| 41 |
+
return ""
|
| 42 |
+
encrypted = self._cipher.encrypt(data.encode())
|
| 43 |
+
return encrypted.decode()
|
| 44 |
+
|
| 45 |
+
def decrypt_data(self, encrypted_data: str) -> str:
|
| 46 |
+
"""
|
| 47 |
+
Decrypts an encrypted string.
|
| 48 |
+
"""
|
| 49 |
+
if not encrypted_data:
|
| 50 |
+
return ""
|
| 51 |
+
try:
|
| 52 |
+
decrypted = self._cipher.decrypt(encrypted_data.encode())
|
| 53 |
+
return decrypted.decode()
|
| 54 |
+
except Exception as e:
|
| 55 |
+
print(f"❌ Decryption failed: {e}")
|
| 56 |
+
return ""
|
| 57 |
+
|
| 58 |
+
# Helper instance
|
| 59 |
+
# You can import 'security' and use security.encrypt_data() directly if env var is set
|
| 60 |
+
try:
|
| 61 |
+
security = SecurityManager()
|
| 62 |
+
except Exception:
|
| 63 |
+
security = None
|
core/stt.py
CHANGED
|
@@ -33,13 +33,14 @@ class STT:
|
|
| 33 |
print(f"⚠️ GPU not available, using CPU with {model_size} model: {e}")
|
| 34 |
self.model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
| 35 |
|
| 36 |
-
def get_transcript(self, video_path: str, language: str = None, skip_ai: bool = False, timestamp_mode="segments"):
|
| 37 |
"""تحويل الفيديو لنص مع توقيت الكلمات باستخدام Faster-Whisper
|
| 38 |
|
| 39 |
Args:
|
| 40 |
timestamp_mode: "words" للكلمات الفردية, "segments" للجمل الكاملة
|
|
|
|
| 41 |
"""
|
| 42 |
-
print(f"🎙️ Transcribing: {video_path} (Language: {language if language else 'Auto'}, Mode: {timestamp_mode})")
|
| 43 |
|
| 44 |
# تسجيل الـ transcript في ملف logs
|
| 45 |
log_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs", "transcript.log")
|
|
@@ -104,8 +105,8 @@ class STT:
|
|
| 104 |
beam_size=1,
|
| 105 |
word_timestamps=word_timestamps,
|
| 106 |
language=actual_stt_lang,
|
| 107 |
-
vad_filter=
|
| 108 |
-
vad_parameters=dict(min_silence_duration_ms=500)
|
| 109 |
)
|
| 110 |
detected_lang = info.language
|
| 111 |
|
|
|
|
| 33 |
print(f"⚠️ GPU not available, using CPU with {model_size} model: {e}")
|
| 34 |
self.model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
| 35 |
|
| 36 |
+
def get_transcript(self, video_path: str, language: str = None, skip_ai: bool = False, timestamp_mode="segments", vad_filter=True):
|
| 37 |
"""تحويل الفيديو لنص مع توقيت الكلمات باستخدام Faster-Whisper
|
| 38 |
|
| 39 |
Args:
|
| 40 |
timestamp_mode: "words" للكلمات الفردية, "segments" للجمل الكاملة
|
| 41 |
+
vad_filter: تصفية الصوت الفارغ (True/False)
|
| 42 |
"""
|
| 43 |
+
print(f"🎙️ Transcribing: {video_path} (Language: {language if language else 'Auto'}, Mode: {timestamp_mode}, VAD: {vad_filter})")
|
| 44 |
|
| 45 |
# تسجيل الـ transcript في ملف logs
|
| 46 |
log_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs", "transcript.log")
|
|
|
|
| 105 |
beam_size=1,
|
| 106 |
word_timestamps=word_timestamps,
|
| 107 |
language=actual_stt_lang,
|
| 108 |
+
vad_filter=vad_filter, # استخدام المعامل الممرر
|
| 109 |
+
vad_parameters=dict(min_silence_duration_ms=500) if vad_filter else None
|
| 110 |
)
|
| 111 |
detected_lang = info.language
|
| 112 |
|
core/styles.py
CHANGED
|
@@ -1,6 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from abc import ABC, abstractmethod
|
| 2 |
import os
|
| 3 |
import cv2
|
|
|
|
| 4 |
import moviepy.editor as mpe
|
| 5 |
from .config import Config
|
| 6 |
from .logger import Logger
|
|
@@ -8,34 +14,114 @@ from .subtitle_manager import SubtitleManager
|
|
| 8 |
|
| 9 |
logger = Logger.get_logger(__name__)
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
class SmartFaceCropper:
|
| 12 |
def __init__(self, output_size=(1080, 1920)):
|
| 13 |
self.output_size = output_size
|
| 14 |
-
self.face_cascade = cv2.CascadeClassifier(
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
self.
|
| 18 |
-
self.
|
|
|
|
|
|
|
| 19 |
|
| 20 |
def get_crop_coordinates(self, frame):
|
| 21 |
-
h, w
|
| 22 |
target_w = int(h * self.output_size[0] / self.output_size[1])
|
| 23 |
-
gray
|
| 24 |
-
|
| 25 |
-
faces
|
| 26 |
|
| 27 |
if len(faces) > 0:
|
| 28 |
-
faces = sorted(faces, key=lambda f: f[2]*f[3], reverse=True)
|
| 29 |
-
fx, fy, fw, fh
|
| 30 |
current_center_x = fx + fw // 2
|
| 31 |
-
self.last_coords
|
| 32 |
else:
|
| 33 |
current_center_x = w // 2 if self.smoothed_x is None else self.smoothed_x
|
| 34 |
|
| 35 |
if self.smoothed_x is None:
|
| 36 |
self.smoothed_x = current_center_x
|
| 37 |
else:
|
| 38 |
-
self.smoothed_x =
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
left = int(self.smoothed_x - target_w // 2)
|
| 41 |
left = max(0, min(left, w - target_w))
|
|
@@ -43,24 +129,27 @@ class SmartFaceCropper:
|
|
| 43 |
|
| 44 |
def apply_to_clip(self, clip):
|
| 45 |
frame_skip = 5
|
| 46 |
-
|
| 47 |
def filter_frame(get_frame, t):
|
| 48 |
frame = get_frame(t)
|
| 49 |
self.frame_count += 1
|
| 50 |
-
|
| 51 |
if self.frame_count % frame_skip == 0 or self.last_coords is None:
|
| 52 |
-
left,
|
| 53 |
else:
|
| 54 |
-
h, w
|
| 55 |
target_w = int(h * self.output_size[0] / self.output_size[1])
|
| 56 |
-
left
|
| 57 |
-
left
|
| 58 |
-
right
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
return cv2.resize(cropped, self.output_size)
|
| 62 |
return clip.fl(filter_frame)
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
class BaseStyle(ABC):
|
| 65 |
def __init__(self, output_size=Config.DEFAULT_SIZE):
|
| 66 |
self.output_size = output_size
|
|
@@ -68,162 +157,259 @@ class BaseStyle(ABC):
|
|
| 68 |
@abstractmethod
|
| 69 |
def apply(self, clip, **kwargs):
|
| 70 |
pass
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
# --------------------------------------------------------------------------
|
| 75 |
-
def apply_with_captions(self, clip, transcript_data=None, language=None, caption_mode="sentence", **kwargs):
|
| 76 |
-
"""
|
| 77 |
-
Applies style AND adds captions in a single composition step.
|
| 78 |
-
This prevents double rendering (CompositeVideoClip inside CompositeVideoClip).
|
| 79 |
-
"""
|
| 80 |
-
# 1. Get the base styled clip (which might be a CompositeVideoClip itself)
|
| 81 |
styled_clip = self.apply(clip, **kwargs)
|
| 82 |
-
|
| 83 |
-
# 2. If no captions needed, just return the styled clip
|
| 84 |
if not transcript_data:
|
| 85 |
return styled_clip
|
| 86 |
|
| 87 |
-
# 3. Generate caption CLIPS (ImageClips) only, do not composite yet
|
| 88 |
caption_clips = self._create_caption_clips(transcript_data, language, caption_mode)
|
| 89 |
-
|
| 90 |
if not caption_clips:
|
| 91 |
return styled_clip
|
| 92 |
|
| 93 |
-
# 4. Optimize Composition:
|
| 94 |
-
# If styled_clip is already a CompositeVideoClip, we can flatten the list
|
| 95 |
-
# instead of nesting composites.
|
| 96 |
if isinstance(styled_clip, mpe.CompositeVideoClip):
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
# If styled_clip is just a simple VideoFileClip or similar, wrap it
|
| 102 |
-
return mpe.CompositeVideoClip([styled_clip] + caption_clips, size=self.output_size)
|
| 103 |
-
# --------------------------------------------------------------------------
|
| 104 |
|
| 105 |
def add_captions(self, clip, transcript_data, language=None, caption_mode="sentence"):
|
| 106 |
-
|
| 107 |
-
# but kept for backward compatibility if needed.
|
| 108 |
if not transcript_data:
|
| 109 |
return clip
|
| 110 |
return SubtitleManager.create_captions(
|
| 111 |
-
clip,
|
| 112 |
-
|
| 113 |
-
size=self.output_size,
|
| 114 |
-
language=language,
|
| 115 |
-
caption_mode=caption_mode
|
| 116 |
)
|
| 117 |
-
|
| 118 |
def _create_caption_clips(self, transcript_data, language=None, caption_mode="sentence"):
|
| 119 |
-
"""Helper to create just the caption clips list, not a full CompositeVideoClip"""
|
| 120 |
return SubtitleManager.create_caption_clips(
|
| 121 |
-
transcript_data,
|
| 122 |
-
|
| 123 |
-
language=language,
|
| 124 |
-
caption_mode=caption_mode
|
| 125 |
)
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
class CinematicStyle(BaseStyle):
|
| 128 |
def apply(self, clip, background_path=None, **kwargs):
|
| 129 |
if background_path and os.path.exists(background_path):
|
| 130 |
ext = os.path.splitext(background_path)[1].lower()
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
bg = bg.subclip(0, clip.duration)
|
| 139 |
else:
|
| 140 |
-
bg =
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
| 142 |
if bg.w > self.output_size[0]:
|
| 143 |
-
bg = bg.crop(x_center=bg.w/2, width=self.output_size[0])
|
| 144 |
else:
|
| 145 |
bg = bg.resize(width=self.output_size[0])
|
| 146 |
else:
|
| 147 |
bg = mpe.ColorClip(size=self.output_size, color=(0, 0, 0)).set_duration(clip.duration)
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
class CinematicBlurStyle(BaseStyle):
|
| 157 |
def apply(self, clip, **kwargs):
|
| 158 |
bg = clip.resize(height=self.output_size[1])
|
| 159 |
if bg.w < self.output_size[0]:
|
| 160 |
bg = clip.resize(width=self.output_size[0])
|
| 161 |
-
|
| 162 |
def make_blur(get_frame, t):
|
| 163 |
-
frame
|
| 164 |
-
small
|
| 165 |
-
blurred = cv2.resize(
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
| 169 |
bg_blurred = bg.fl(make_blur).set_opacity(0.6)
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
class SplitVerticalStyle(BaseStyle):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
def apply(self, clip, playground_path=None, **kwargs):
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
if playground_path and os.path.exists(playground_path):
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
else:
|
| 189 |
-
bottom = bottom.subclip(0, clip.duration)
|
| 190 |
else:
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
class SplitHorizontalStyle(BaseStyle):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
def apply(self, clip, playground_path=None, **kwargs):
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
if playground_path and os.path.exists(playground_path):
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
else:
|
| 206 |
-
right = right.subclip(0, clip.duration)
|
| 207 |
else:
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
class VerticalFullStyle(BaseStyle):
|
| 213 |
def apply(self, clip, **kwargs):
|
| 214 |
cropper = SmartFaceCropper(output_size=self.output_size)
|
| 215 |
return cropper.apply_to_clip(clip)
|
| 216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
class StyleFactory:
|
| 218 |
_styles = {
|
| 219 |
-
"cinematic":
|
| 220 |
-
"cinematic_blur":
|
| 221 |
-
"split_vertical":
|
| 222 |
"split_horizontal": SplitHorizontalStyle,
|
| 223 |
-
"vertical_full":
|
| 224 |
}
|
| 225 |
|
| 226 |
@staticmethod
|
| 227 |
def get_style(style_name) -> BaseStyle:
|
| 228 |
style_class = StyleFactory._styles.get(style_name, CinematicBlurStyle)
|
| 229 |
-
return style_class()
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Video Styles — YouTube Shorts Production Engine
|
| 3 |
+
SplitVertical & SplitHorizontal rebuilt with seamless gradient blending.
|
| 4 |
+
All class/method names kept identical for drop-in integration.
|
| 5 |
+
"""
|
| 6 |
from abc import ABC, abstractmethod
|
| 7 |
import os
|
| 8 |
import cv2
|
| 9 |
+
import numpy as np
|
| 10 |
import moviepy.editor as mpe
|
| 11 |
from .config import Config
|
| 12 |
from .logger import Logger
|
|
|
|
| 14 |
|
| 15 |
logger = Logger.get_logger(__name__)
|
| 16 |
|
| 17 |
+
|
| 18 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 19 |
+
# Gradient Mask Helpers
|
| 20 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 21 |
+
|
| 22 |
+
def _linear_gradient(length: int, fade_from_zero: bool) -> np.ndarray:
|
| 23 |
+
"""
|
| 24 |
+
Returns a 1-D float32 array [0..1] of given length.
|
| 25 |
+
fade_from_zero=True → 0 → 1 (clip fades IN at this edge)
|
| 26 |
+
fade_from_zero=False → 1 → 0 (clip fades OUT at this edge)
|
| 27 |
+
"""
|
| 28 |
+
arr = np.linspace(0.0, 1.0, length, dtype=np.float32)
|
| 29 |
+
return arr if fade_from_zero else arr[::-1]
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _make_vertical_mask(clip_w: int, clip_h: int,
|
| 33 |
+
blend_top: int = 0, blend_bottom: int = 0) -> np.ndarray:
|
| 34 |
+
"""
|
| 35 |
+
Float32 mask (clip_h × clip_w) in [0,1].
|
| 36 |
+
blend_top → pixels from top that fade in (0→1)
|
| 37 |
+
blend_bottom → pixels from bottom that fade out (1→0)
|
| 38 |
+
"""
|
| 39 |
+
mask = np.ones((clip_h, clip_w), dtype=np.float32)
|
| 40 |
+
if blend_top > 0:
|
| 41 |
+
grad = _linear_gradient(blend_top, fade_from_zero=True)
|
| 42 |
+
mask[:blend_top, :] = grad[:, np.newaxis]
|
| 43 |
+
if blend_bottom > 0:
|
| 44 |
+
grad = _linear_gradient(blend_bottom, fade_from_zero=False)
|
| 45 |
+
mask[clip_h - blend_bottom:, :] = grad[:, np.newaxis]
|
| 46 |
+
return mask
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _make_horizontal_mask(clip_w: int, clip_h: int,
|
| 50 |
+
blend_left: int = 0, blend_right: int = 0) -> np.ndarray:
|
| 51 |
+
"""
|
| 52 |
+
Float32 mask (clip_h × clip_w) in [0,1].
|
| 53 |
+
blend_left → pixels from left that fade in (0→1)
|
| 54 |
+
blend_right → pixels from right that fade out (1→0)
|
| 55 |
+
"""
|
| 56 |
+
mask = np.ones((clip_h, clip_w), dtype=np.float32)
|
| 57 |
+
if blend_left > 0:
|
| 58 |
+
grad = _linear_gradient(blend_left, fade_from_zero=True)
|
| 59 |
+
mask[:, :blend_left] = grad[np.newaxis, :]
|
| 60 |
+
if blend_right > 0:
|
| 61 |
+
grad = _linear_gradient(blend_right, fade_from_zero=False)
|
| 62 |
+
mask[:, clip_w - blend_right:] = grad[np.newaxis, :]
|
| 63 |
+
return mask
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _apply_mask(clip: mpe.VideoClip, mask_array: np.ndarray) -> mpe.VideoClip:
|
| 67 |
+
"""Attach a static float32 numpy mask to a video clip."""
|
| 68 |
+
mask_clip = mpe.ImageClip(mask_array, ismask=True, duration=clip.duration)
|
| 69 |
+
return clip.set_mask(mask_clip)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def _fit_to_width(clip: mpe.VideoClip, target_w: int) -> mpe.VideoClip:
|
| 73 |
+
"""Resize clip so width == target_w, keeping aspect ratio."""
|
| 74 |
+
return clip.resize(width=target_w)
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def _fit_to_height(clip: mpe.VideoClip, target_h: int) -> mpe.VideoClip:
|
| 78 |
+
"""Resize clip so height == target_h, keeping aspect ratio."""
|
| 79 |
+
return clip.resize(height=target_h)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def _loop_or_cut(clip: mpe.VideoClip, duration: float) -> mpe.VideoClip:
|
| 83 |
+
if clip.duration < duration:
|
| 84 |
+
return clip.loop(duration=duration)
|
| 85 |
+
return clip.subclip(0, duration)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 89 |
+
# Smart Face Cropper
|
| 90 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 91 |
+
|
| 92 |
class SmartFaceCropper:
|
| 93 |
def __init__(self, output_size=(1080, 1920)):
|
| 94 |
self.output_size = output_size
|
| 95 |
+
self.face_cascade = cv2.CascadeClassifier(
|
| 96 |
+
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
|
| 97 |
+
)
|
| 98 |
+
self.last_coords = None
|
| 99 |
+
self.smoothed_x = None
|
| 100 |
+
self.smoothing = 0.2
|
| 101 |
+
self.frame_count = 0
|
| 102 |
|
| 103 |
def get_crop_coordinates(self, frame):
|
| 104 |
+
h, w = frame.shape[:2]
|
| 105 |
target_w = int(h * self.output_size[0] / self.output_size[1])
|
| 106 |
+
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
| 107 |
+
small = cv2.resize(gray, (0, 0), fx=0.5, fy=0.5)
|
| 108 |
+
faces = self.face_cascade.detectMultiScale(small, 1.1, 8, minSize=(50, 50))
|
| 109 |
|
| 110 |
if len(faces) > 0:
|
| 111 |
+
faces = sorted(faces, key=lambda f: f[2] * f[3], reverse=True)
|
| 112 |
+
fx, fy, fw, fh = [v * 2 for v in faces[0]]
|
| 113 |
current_center_x = fx + fw // 2
|
| 114 |
+
self.last_coords = (fx, fy, fw, fh)
|
| 115 |
else:
|
| 116 |
current_center_x = w // 2 if self.smoothed_x is None else self.smoothed_x
|
| 117 |
|
| 118 |
if self.smoothed_x is None:
|
| 119 |
self.smoothed_x = current_center_x
|
| 120 |
else:
|
| 121 |
+
self.smoothed_x = (
|
| 122 |
+
self.smoothed_x * (1 - self.smoothing)
|
| 123 |
+
+ current_center_x * self.smoothing
|
| 124 |
+
)
|
| 125 |
|
| 126 |
left = int(self.smoothed_x - target_w // 2)
|
| 127 |
left = max(0, min(left, w - target_w))
|
|
|
|
| 129 |
|
| 130 |
def apply_to_clip(self, clip):
|
| 131 |
frame_skip = 5
|
| 132 |
+
|
| 133 |
def filter_frame(get_frame, t):
|
| 134 |
frame = get_frame(t)
|
| 135 |
self.frame_count += 1
|
|
|
|
| 136 |
if self.frame_count % frame_skip == 0 or self.last_coords is None:
|
| 137 |
+
left, _, right, _ = self.get_crop_coordinates(frame)
|
| 138 |
else:
|
| 139 |
+
h, w = frame.shape[:2]
|
| 140 |
target_w = int(h * self.output_size[0] / self.output_size[1])
|
| 141 |
+
left = int(self.smoothed_x - target_w // 2) if self.smoothed_x else w // 2 - target_w // 2
|
| 142 |
+
left = max(0, min(left, w - target_w))
|
| 143 |
+
right = left + target_w
|
| 144 |
+
return cv2.resize(frame[:, left:right], self.output_size)
|
| 145 |
+
|
|
|
|
| 146 |
return clip.fl(filter_frame)
|
| 147 |
|
| 148 |
+
|
| 149 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 150 |
+
# Base Style
|
| 151 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 152 |
+
|
| 153 |
class BaseStyle(ABC):
|
| 154 |
def __init__(self, output_size=Config.DEFAULT_SIZE):
|
| 155 |
self.output_size = output_size
|
|
|
|
| 157 |
@abstractmethod
|
| 158 |
def apply(self, clip, **kwargs):
|
| 159 |
pass
|
| 160 |
+
|
| 161 |
+
def apply_with_captions(self, clip, transcript_data=None, language=None,
|
| 162 |
+
caption_mode="sentence", **kwargs):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
styled_clip = self.apply(clip, **kwargs)
|
|
|
|
|
|
|
| 164 |
if not transcript_data:
|
| 165 |
return styled_clip
|
| 166 |
|
|
|
|
| 167 |
caption_clips = self._create_caption_clips(transcript_data, language, caption_mode)
|
|
|
|
| 168 |
if not caption_clips:
|
| 169 |
return styled_clip
|
| 170 |
|
|
|
|
|
|
|
|
|
|
| 171 |
if isinstance(styled_clip, mpe.CompositeVideoClip):
|
| 172 |
+
return mpe.CompositeVideoClip(
|
| 173 |
+
list(styled_clip.clips) + caption_clips, size=self.output_size
|
| 174 |
+
)
|
| 175 |
+
return mpe.CompositeVideoClip([styled_clip] + caption_clips, size=self.output_size)
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
def add_captions(self, clip, transcript_data, language=None, caption_mode="sentence"):
|
| 178 |
+
"""Kept for backward compatibility."""
|
|
|
|
| 179 |
if not transcript_data:
|
| 180 |
return clip
|
| 181 |
return SubtitleManager.create_captions(
|
| 182 |
+
clip, transcript_data, size=self.output_size,
|
| 183 |
+
language=language, caption_mode=caption_mode,
|
|
|
|
|
|
|
|
|
|
| 184 |
)
|
| 185 |
+
|
| 186 |
def _create_caption_clips(self, transcript_data, language=None, caption_mode="sentence"):
|
|
|
|
| 187 |
return SubtitleManager.create_caption_clips(
|
| 188 |
+
transcript_data, size=self.output_size,
|
| 189 |
+
language=language, caption_mode=caption_mode,
|
|
|
|
|
|
|
| 190 |
)
|
| 191 |
|
| 192 |
+
|
| 193 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 194 |
+
# Cinematic Style
|
| 195 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 196 |
+
|
| 197 |
class CinematicStyle(BaseStyle):
|
| 198 |
def apply(self, clip, background_path=None, **kwargs):
|
| 199 |
if background_path and os.path.exists(background_path):
|
| 200 |
ext = os.path.splitext(background_path)[1].lower()
|
| 201 |
+
video_ext = {".mp4", ".avi", ".mov", ".mkv", ".webm"}
|
| 202 |
+
if ext in video_ext:
|
| 203 |
+
bg = _loop_or_cut(
|
| 204 |
+
mpe.VideoFileClip(background_path).without_audio()
|
| 205 |
+
.resize(height=self.output_size[1]),
|
| 206 |
+
clip.duration,
|
| 207 |
+
)
|
|
|
|
| 208 |
else:
|
| 209 |
+
bg = (
|
| 210 |
+
mpe.ImageClip(background_path)
|
| 211 |
+
.set_duration(clip.duration)
|
| 212 |
+
.resize(height=self.output_size[1])
|
| 213 |
+
)
|
| 214 |
if bg.w > self.output_size[0]:
|
| 215 |
+
bg = bg.crop(x_center=bg.w / 2, width=self.output_size[0])
|
| 216 |
else:
|
| 217 |
bg = bg.resize(width=self.output_size[0])
|
| 218 |
else:
|
| 219 |
bg = mpe.ColorClip(size=self.output_size, color=(0, 0, 0)).set_duration(clip.duration)
|
| 220 |
+
|
| 221 |
+
main = clip.resize(width=self.output_size[0]).set_position("center")
|
| 222 |
+
if main.h > self.output_size[1]:
|
| 223 |
+
main = clip.resize(height=self.output_size[1]).set_position("center")
|
| 224 |
+
|
| 225 |
+
return mpe.CompositeVideoClip([bg, main], size=self.output_size)
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 229 |
+
# Cinematic Blur Style
|
| 230 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 231 |
|
| 232 |
class CinematicBlurStyle(BaseStyle):
|
| 233 |
def apply(self, clip, **kwargs):
|
| 234 |
bg = clip.resize(height=self.output_size[1])
|
| 235 |
if bg.w < self.output_size[0]:
|
| 236 |
bg = clip.resize(width=self.output_size[0])
|
| 237 |
+
|
| 238 |
def make_blur(get_frame, t):
|
| 239 |
+
frame = get_frame(t)
|
| 240 |
+
small = cv2.resize(frame, (16, 16))
|
| 241 |
+
blurred = cv2.resize(
|
| 242 |
+
small, (self.output_size[0], self.output_size[1]),
|
| 243 |
+
interpolation=cv2.INTER_LINEAR,
|
| 244 |
+
)
|
| 245 |
+
return cv2.GaussianBlur(blurred, (21, 21), 0)
|
| 246 |
+
|
| 247 |
bg_blurred = bg.fl(make_blur).set_opacity(0.6)
|
| 248 |
+
main = clip.resize(width=self.output_size[0]).set_position("center")
|
| 249 |
+
if main.h > self.output_size[1]:
|
| 250 |
+
main = clip.resize(height=self.output_size[1]).set_position("center")
|
| 251 |
+
|
| 252 |
+
return mpe.CompositeVideoClip([bg_blurred, main], size=self.output_size)
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 256 |
+
# Split Vertical (top / bottom, seamless gradient blend)
|
| 257 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 258 |
|
| 259 |
class SplitVerticalStyle(BaseStyle):
|
| 260 |
+
"""
|
| 261 |
+
Splits the Shorts canvas (1080 × 1920) into top and bottom segments.
|
| 262 |
+
|
| 263 |
+
Layout
|
| 264 |
+
──────
|
| 265 |
+
• Top segment : 58 % of canvas height → ~1114 px
|
| 266 |
+
• Bottom segment: fills the rest → ~926 px
|
| 267 |
+
• Blend zone : 120 px overlap where the two clips cross-fade via
|
| 268 |
+
gradient masks — no hard dividing line visible.
|
| 269 |
+
|
| 270 |
+
The gradient is very subtle (linear alpha), so it doesn't destroy
|
| 271 |
+
content near the seam, it just dissolves one clip into the other.
|
| 272 |
+
"""
|
| 273 |
+
|
| 274 |
+
SPLIT_RATIO : float = 0.58 # top segment fraction of total height
|
| 275 |
+
BLEND_PX : int = 120 # overlap / blend zone height in pixels
|
| 276 |
+
|
| 277 |
def apply(self, clip, playground_path=None, **kwargs):
|
| 278 |
+
W, H = self.output_size # 1080 × 1920
|
| 279 |
+
blend = self.BLEND_PX
|
| 280 |
+
h_top_seg = int(H * self.SPLIT_RATIO) # ~1114
|
| 281 |
+
h_bot_seg = H - h_top_seg + blend # ~926 (includes overlap)
|
| 282 |
+
|
| 283 |
+
# ── Prepare main clip for top segment ───────────────────────────────
|
| 284 |
+
top_clip = _fit_to_width(clip, W)
|
| 285 |
+
|
| 286 |
+
# Crop to the top portion we need (+ blend zone so gradient has room)
|
| 287 |
+
top_h = min(top_clip.h, h_top_seg + blend // 2)
|
| 288 |
+
top_clip = top_clip.crop(x1=0, y1=0, x2=W, y2=top_h).resize((W, h_top_seg))
|
| 289 |
+
|
| 290 |
+
# Gradient: fade out the bottom `blend` rows → seamless merge
|
| 291 |
+
top_mask = _make_vertical_mask(W, h_top_seg, blend_bottom=blend)
|
| 292 |
+
top_clip = _apply_mask(top_clip, top_mask).set_position((0, 0))
|
| 293 |
+
|
| 294 |
+
# ── Prepare playground / fallback clip for bottom segment ────────────
|
| 295 |
if playground_path and os.path.exists(playground_path):
|
| 296 |
+
bot_src = _loop_or_cut(
|
| 297 |
+
mpe.VideoFileClip(playground_path).without_audio(), clip.duration
|
| 298 |
+
)
|
|
|
|
|
|
|
| 299 |
else:
|
| 300 |
+
# Fallback: mirror/tint of the same source
|
| 301 |
+
bot_src = clip.set_opacity(0.85)
|
| 302 |
+
|
| 303 |
+
bot_clip = _fit_to_width(bot_src, W)
|
| 304 |
+
|
| 305 |
+
# We want the middle/lower portion of the source for the bottom panel
|
| 306 |
+
if bot_clip.h > h_bot_seg:
|
| 307 |
+
y_start = max(0, bot_clip.h - h_bot_seg)
|
| 308 |
+
bot_clip = bot_clip.crop(x1=0, y1=y_start,
|
| 309 |
+
x2=W, y2=bot_clip.h)
|
| 310 |
+
|
| 311 |
+
bot_clip = bot_clip.resize((W, h_bot_seg))
|
| 312 |
+
|
| 313 |
+
# Gradient: fade in the top `blend` rows → seamless merge
|
| 314 |
+
bot_mask = _make_vertical_mask(W, h_bot_seg, blend_top=blend)
|
| 315 |
+
bot_y = h_top_seg - blend # overlaps by `blend` px
|
| 316 |
+
bot_clip = _apply_mask(bot_clip, bot_mask).set_position((0, bot_y))
|
| 317 |
+
|
| 318 |
+
return mpe.CompositeVideoClip([bot_clip, top_clip], size=self.output_size)
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 322 |
+
# Split Horizontal (left / right, seamless gradient blend)
|
| 323 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 324 |
|
| 325 |
class SplitHorizontalStyle(BaseStyle):
|
| 326 |
+
"""
|
| 327 |
+
Splits the Shorts canvas (1080 × 1920) into left and right panels.
|
| 328 |
+
|
| 329 |
+
Layout
|
| 330 |
+
──────
|
| 331 |
+
• Each panel fills the full 1920 px height.
|
| 332 |
+
• Left panel: 52 % of canvas width → ~562 px
|
| 333 |
+
• Right panel: fills the rest → ~518 px
|
| 334 |
+
• Blend zone : 80 px overlap with cross-fade gradient masks.
|
| 335 |
+
|
| 336 |
+
Both panels are individually cropped to portrait aspect ratio
|
| 337 |
+
(each showing a 540-wide slice of a 1080-wide source),
|
| 338 |
+
then blended at the seam — no visible dividing line.
|
| 339 |
+
"""
|
| 340 |
+
|
| 341 |
+
SPLIT_RATIO : float = 0.52 # left panel fraction of total width
|
| 342 |
+
BLEND_PX : int = 80 # horizontal overlap / blend zone
|
| 343 |
+
|
| 344 |
def apply(self, clip, playground_path=None, **kwargs):
|
| 345 |
+
W, H = self.output_size # 1080 × 1920
|
| 346 |
+
blend = self.BLEND_PX
|
| 347 |
+
w_left_seg = int(W * self.SPLIT_RATIO) # ~562
|
| 348 |
+
w_right_seg = W - w_left_seg + blend # ~598 (includes overlap)
|
| 349 |
+
|
| 350 |
+
# ── Left panel from main clip ────────────────────────────────────────
|
| 351 |
+
left_src = _fit_to_height(clip, H)
|
| 352 |
+
lw = left_src.w
|
| 353 |
+
|
| 354 |
+
# Crop the left portion (slightly more than half for a natural look)
|
| 355 |
+
crop_w_l = min(lw, w_left_seg + blend)
|
| 356 |
+
left_clip = left_src.crop(x1=max(0, lw // 2 - crop_w_l),
|
| 357 |
+
y1=0, x2=lw // 2, y2=H)
|
| 358 |
+
left_clip = left_clip.resize((w_left_seg, H))
|
| 359 |
+
|
| 360 |
+
# Gradient: fade out rightmost `blend` columns
|
| 361 |
+
left_mask = _make_horizontal_mask(w_left_seg, H, blend_right=blend)
|
| 362 |
+
left_clip = _apply_mask(left_clip, left_mask).set_position((0, 0))
|
| 363 |
+
|
| 364 |
+
# ── Right panel from playground or fallback ───────────────────────────
|
| 365 |
if playground_path and os.path.exists(playground_path):
|
| 366 |
+
right_src = _loop_or_cut(
|
| 367 |
+
mpe.VideoFileClip(playground_path).without_audio(), clip.duration
|
| 368 |
+
)
|
|
|
|
|
|
|
| 369 |
else:
|
| 370 |
+
right_src = clip.set_opacity(0.85)
|
| 371 |
+
|
| 372 |
+
right_full = _fit_to_height(right_src, H)
|
| 373 |
+
rw = right_full.w
|
| 374 |
+
|
| 375 |
+
# Crop the right portion of the source
|
| 376 |
+
crop_w_r = min(rw, w_right_seg + blend)
|
| 377 |
+
right_clip = right_full.crop(x1=rw // 2, y1=0,
|
| 378 |
+
x2=rw // 2 + crop_w_r, y2=H)
|
| 379 |
+
right_clip = right_clip.resize((w_right_seg, H))
|
| 380 |
+
|
| 381 |
+
# Gradient: fade in leftmost `blend` columns
|
| 382 |
+
right_mask = _make_horizontal_mask(w_right_seg, H, blend_left=blend)
|
| 383 |
+
right_x = w_left_seg - blend # overlaps by `blend` px
|
| 384 |
+
right_clip = _apply_mask(right_clip, right_mask).set_position((right_x, 0))
|
| 385 |
+
|
| 386 |
+
return mpe.CompositeVideoClip([right_clip, left_clip], size=self.output_size)
|
| 387 |
+
|
| 388 |
+
|
| 389 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 390 |
+
# Vertical Full Style
|
| 391 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 392 |
|
| 393 |
class VerticalFullStyle(BaseStyle):
|
| 394 |
def apply(self, clip, **kwargs):
|
| 395 |
cropper = SmartFaceCropper(output_size=self.output_size)
|
| 396 |
return cropper.apply_to_clip(clip)
|
| 397 |
|
| 398 |
+
|
| 399 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 400 |
+
# Style Factory (unchanged API)
|
| 401 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 402 |
+
|
| 403 |
class StyleFactory:
|
| 404 |
_styles = {
|
| 405 |
+
"cinematic": CinematicStyle,
|
| 406 |
+
"cinematic_blur": CinematicBlurStyle,
|
| 407 |
+
"split_vertical": SplitVerticalStyle,
|
| 408 |
"split_horizontal": SplitHorizontalStyle,
|
| 409 |
+
"vertical_full": VerticalFullStyle,
|
| 410 |
}
|
| 411 |
|
| 412 |
@staticmethod
|
| 413 |
def get_style(style_name) -> BaseStyle:
|
| 414 |
style_class = StyleFactory._styles.get(style_name, CinematicBlurStyle)
|
| 415 |
+
return style_class()
|
core/subtitle_manager.py
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import numpy as np
|
| 3 |
import urllib.request
|
|
@@ -10,307 +15,554 @@ from .logger import Logger
|
|
| 10 |
|
| 11 |
logger = Logger.get_logger(__name__)
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
class SubtitleManager:
|
|
|
|
|
|
|
| 14 |
@staticmethod
|
| 15 |
def ensure_font(language=None, style_font=None, text_content=None):
|
| 16 |
-
"""
|
| 17 |
-
|
| 18 |
-
# 1. Determine Font Name
|
| 19 |
font_name = Config.LANGUAGE_FONT_MAP.get("default", "Roboto-Bold.ttf")
|
| 20 |
-
|
| 21 |
-
# Priority 1: Explicit Style Font (if language supports it or it's Latin)
|
| 22 |
-
# However, if text is Arabic/CJK, style font (usually Latin) might break it.
|
| 23 |
-
# So we should check language compatibility first.
|
| 24 |
-
|
| 25 |
detected_lang = language
|
| 26 |
if not detected_lang and text_content:
|
| 27 |
-
# Simple script detection
|
| 28 |
if any("\u0600" <= c <= "\u06FF" for c in text_content):
|
| 29 |
detected_lang = "ar"
|
| 30 |
elif any("\u4E00" <= c <= "\u9FFF" for c in text_content):
|
| 31 |
detected_lang = "zh"
|
| 32 |
-
elif any("\u3040" <= c <= "\
|
| 33 |
detected_lang = "ja"
|
| 34 |
elif any("\u0900" <= c <= "\u097F" for c in text_content):
|
| 35 |
detected_lang = "hi"
|
| 36 |
elif any("\u0400" <= c <= "\u04FF" for c in text_content):
|
| 37 |
detected_lang = "ru"
|
| 38 |
|
| 39 |
-
# Priority 2: Language-specific font from Config Map
|
| 40 |
if detected_lang in Config.LANGUAGE_FONT_MAP:
|
| 41 |
-
|
| 42 |
-
elif style_font and not detected_lang:
|
| 43 |
-
|
| 44 |
-
font_name = style_font
|
| 45 |
|
| 46 |
-
# Fallback: if detected language is known but not in map (shouldn't happen with default keys)
|
| 47 |
if detected_lang and detected_lang not in Config.LANGUAGE_FONT_MAP:
|
| 48 |
-
|
| 49 |
-
|
| 50 |
font_path = os.path.join(Config.BASE_DIR, font_name)
|
| 51 |
-
|
| 52 |
if not os.path.exists(font_path):
|
| 53 |
-
logger.info(f"📥 Downloading font: {font_name}
|
| 54 |
-
# We might need to add more fonts to Config.FONTS or download dynamically
|
| 55 |
url = Config.FONTS.get(font_name)
|
| 56 |
if url:
|
| 57 |
try:
|
| 58 |
-
# Use Config's CSS downloader for Google Fonts
|
| 59 |
if "fonts.googleapis.com/css" in url:
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
raise Exception("CSS font download failed")
|
| 63 |
else:
|
| 64 |
-
# Fallback for direct links
|
| 65 |
urllib.request.urlretrieve(url, font_path)
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
logger.error(f"❌ Failed to download font: {e}")
|
| 70 |
return "Arial"
|
| 71 |
else:
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
if font_name == "Montserrat-Bold.ttf": # TikTok popular
|
| 75 |
-
# Add logic to download or use system font
|
| 76 |
-
pass
|
| 77 |
-
|
| 78 |
return font_path
|
| 79 |
|
|
|
|
| 80 |
@staticmethod
|
| 81 |
-
def create_pil_text_clip(text, fontsize, color, font_path,
|
| 82 |
-
|
|
|
|
| 83 |
try:
|
| 84 |
try:
|
| 85 |
font = ImageFont.truetype(font_path, fontsize)
|
| 86 |
-
except:
|
| 87 |
-
logger.warning(f"⚠️
|
| 88 |
font = ImageFont.load_default()
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
bbox
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
margin = int(stroke_width * 2) + padding
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
img = Image.new('RGBA', (int(img_width), int(img_height)), (0, 0, 0, 0))
|
| 101 |
draw = ImageDraw.Draw(img)
|
| 102 |
-
|
| 103 |
-
# Draw Background if requested
|
| 104 |
if bg_color:
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
draw.text(
|
| 115 |
-
(x, y),
|
| 116 |
-
text,
|
| 117 |
-
font=font,
|
| 118 |
-
fill=color,
|
| 119 |
-
stroke_width=stroke_width,
|
| 120 |
-
stroke_fill=stroke_color
|
| 121 |
-
)
|
| 122 |
-
|
| 123 |
return mpe.ImageClip(np.array(img))
|
| 124 |
-
|
| 125 |
-
except Exception as
|
| 126 |
-
logger.error(f"⚠️
|
| 127 |
return None
|
| 128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
@staticmethod
|
| 130 |
def get_style_config(style_name):
|
| 131 |
-
"""Returns
|
| 132 |
-
|
| 133 |
-
"classic": {
|
| 134 |
-
"fontsize": 75,
|
| 135 |
-
"color": "white",
|
| 136 |
-
"stroke_color": "black",
|
| 137 |
-
"stroke_width": 2,
|
| 138 |
-
"font": None, # Default based on language
|
| 139 |
-
"bg_color": None,
|
| 140 |
-
"position": ("center", 1350)
|
| 141 |
-
},
|
| 142 |
-
"tiktok_bold": {
|
| 143 |
-
"fontsize": 85,
|
| 144 |
-
"color": "white",
|
| 145 |
-
"stroke_color": "black",
|
| 146 |
-
"stroke_width": 4,
|
| 147 |
-
"font": "Montserrat-Bold.ttf", # Popular on TikTok
|
| 148 |
-
"bg_color": None, # Shadow usually used instead of BG
|
| 149 |
-
"position": ("center", 1400)
|
| 150 |
-
},
|
| 151 |
-
"tiktok_neon": {
|
| 152 |
-
"fontsize": 80,
|
| 153 |
-
"color": "#00f2ea", # TikTok Cyan
|
| 154 |
-
"stroke_color": "#ff0050", # TikTok Red
|
| 155 |
-
"stroke_width": 3,
|
| 156 |
-
"font": "Roboto-Bold.ttf",
|
| 157 |
-
"bg_color": None,
|
| 158 |
-
"position": ("center", 1400)
|
| 159 |
-
},
|
| 160 |
-
"youtube_clean": {
|
| 161 |
-
"fontsize": 70,
|
| 162 |
-
"color": "yellow",
|
| 163 |
-
"stroke_color": "black",
|
| 164 |
-
"stroke_width": 3,
|
| 165 |
-
"font": "Roboto-Bold.ttf",
|
| 166 |
-
"bg_color": None,
|
| 167 |
-
"position": ("center", 1300)
|
| 168 |
-
},
|
| 169 |
-
"youtube_box": {
|
| 170 |
-
"fontsize": 65,
|
| 171 |
-
"color": "white",
|
| 172 |
-
"stroke_color": None,
|
| 173 |
-
"stroke_width": 0,
|
| 174 |
-
"font": "Roboto-Bold.ttf",
|
| 175 |
-
"bg_color": "red", # YouTube Red Box
|
| 176 |
-
"position": ("center", 1300)
|
| 177 |
-
}
|
| 178 |
-
}
|
| 179 |
-
return styles.get(style_name, styles["classic"])
|
| 180 |
|
|
|
|
| 181 |
@staticmethod
|
| 182 |
-
def create_caption_clips(transcript_data, size=(1080, 1920), language=None,
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
#
|
| 189 |
-
|
| 190 |
-
# For simplicity and consistency, let's check the first non-empty text.
|
| 191 |
-
|
| 192 |
sample_text = ""
|
| 193 |
-
segments = []
|
| 194 |
if isinstance(transcript_data, list):
|
| 195 |
-
|
| 196 |
-
segments = transcript_data[0][
|
| 197 |
-
|
| 198 |
segments = transcript_data
|
| 199 |
-
elif isinstance(transcript_data, dict) and
|
| 200 |
-
segments = transcript_data[
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
if not full_text:
|
| 217 |
continue
|
| 218 |
|
| 219 |
-
start_t =
|
| 220 |
-
end_t
|
| 221 |
-
|
| 222 |
if end_t <= start_t:
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
end_t =
|
| 226 |
else:
|
| 227 |
continue
|
| 228 |
|
| 229 |
-
words_list = full_text.split()
|
| 230 |
-
if not words_list:
|
| 231 |
-
continue
|
| 232 |
-
|
| 233 |
chunk_size = 1 if caption_mode == "word" else 4
|
| 234 |
-
chunks
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
chunk_group = valid_words[i:i + chunk_size]
|
| 244 |
-
|
| 245 |
-
chunk_text = " ".join([w['text'] for w in chunk_group])
|
| 246 |
-
chunk_start = chunk_group[0]['start']
|
| 247 |
-
chunk_end = chunk_group[-1]['end']
|
| 248 |
-
|
| 249 |
-
chunks.append({
|
| 250 |
-
"text": chunk_text,
|
| 251 |
-
"start": chunk_start,
|
| 252 |
-
"end": chunk_end
|
| 253 |
-
})
|
| 254 |
else:
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
chunk_text = " ".join(chunk_words)
|
| 263 |
-
|
| 264 |
-
chunk_duration = (end_t - start_t) * (len(chunk_words) / len(words_list))
|
| 265 |
-
chunk_start = start_t + (end_t - start_t) * (i / len(words_list))
|
| 266 |
-
chunk_end = chunk_start + chunk_duration
|
| 267 |
-
|
| 268 |
-
if chunk_end <= chunk_start:
|
| 269 |
-
chunk_end = chunk_start + 0.5
|
| 270 |
-
|
| 271 |
-
chunks.append({
|
| 272 |
-
"text": chunk_text,
|
| 273 |
-
"start": chunk_start,
|
| 274 |
-
"end": chunk_end
|
| 275 |
-
})
|
| 276 |
|
| 277 |
for chunk in chunks:
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
if is_arabic:
|
| 282 |
try:
|
| 283 |
-
|
| 284 |
-
except:
|
| 285 |
pass
|
| 286 |
else:
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
font_path=font_path,
|
| 299 |
-
stroke_color=style_config.get("stroke_color", "black"),
|
| 300 |
-
stroke_width=style_config.get("stroke_width", 2),
|
| 301 |
-
bg_color=style_config.get("bg_color")
|
| 302 |
)
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
|
| 310 |
-
return
|
| 311 |
|
|
|
|
| 312 |
@staticmethod
|
| 313 |
-
def create_captions(video_clip, transcript_data, size=(1080, 1920),
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
SubtitleManager — Viral YouTube Shorts Caption Engine
|
| 3 |
+
Styles tuned for 2024-2025 Shorts/Reels/TikTok viral aesthetics.
|
| 4 |
+
All style names kept identical to the original for drop-in integration.
|
| 5 |
+
"""
|
| 6 |
import os
|
| 7 |
import numpy as np
|
| 8 |
import urllib.request
|
|
|
|
| 15 |
|
| 16 |
logger = Logger.get_logger(__name__)
|
| 17 |
|
| 18 |
+
|
| 19 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 20 |
+
# Style Registry (same 6 keys as original — drop-in compatible)
|
| 21 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 22 |
+
#
|
| 23 |
+
# Extra keys consumed only by highlight_word mode:
|
| 24 |
+
# highlight_color → text color for the active word
|
| 25 |
+
# highlight_bg → RGBA fill of the box behind active word
|
| 26 |
+
# highlight_bg_radius → corner radius of that box
|
| 27 |
+
# shadow_layers → list of (off_x, off_y, blur_steps, RGBA)
|
| 28 |
+
# drawn UNDER the highlight box for depth/glow
|
| 29 |
+
#
|
| 30 |
+
STYLES = {
|
| 31 |
+
|
| 32 |
+
# ── 1. CLASSIC ────────────────────────────────────────────────────────────
|
| 33 |
+
# Clean, professional — news / podcast feel.
|
| 34 |
+
# Active word: crisp white on a near-black pill with a soft drop shadow.
|
| 35 |
+
"classic": {
|
| 36 |
+
"fontsize": 72,
|
| 37 |
+
"color": (255, 255, 255, 255),
|
| 38 |
+
"stroke_color": (0, 0, 0, 200),
|
| 39 |
+
"stroke_width": 3,
|
| 40 |
+
"font": None,
|
| 41 |
+
"bg_color": None,
|
| 42 |
+
"position": ("center", 0.80),
|
| 43 |
+
"highlight_color": (255, 255, 255, 255),
|
| 44 |
+
"highlight_bg": (18, 18, 18, 220),
|
| 45 |
+
"highlight_bg_radius": 20,
|
| 46 |
+
"shadow_layers": [
|
| 47 |
+
(0, 6, 8, (0, 0, 0, 160)), # soft drop-shadow
|
| 48 |
+
],
|
| 49 |
+
},
|
| 50 |
+
|
| 51 |
+
# ── 2. MODERN GLOW ────────────────────────────────────────────────────────
|
| 52 |
+
# Apple / high-end documentary aesthetic.
|
| 53 |
+
# Dark frosted sentence bar; electric-blue glowing pill on active word.
|
| 54 |
+
"modern_glow": {
|
| 55 |
+
"fontsize": 78,
|
| 56 |
+
"color": (200, 225, 255, 200),
|
| 57 |
+
"stroke_color": (0, 10, 40, 255),
|
| 58 |
+
"stroke_width": 2,
|
| 59 |
+
"font": "Montserrat-Bold.ttf",
|
| 60 |
+
"bg_color": (10, 10, 30, 160), # dark frosted bar
|
| 61 |
+
"position": ("center", 0.83),
|
| 62 |
+
"highlight_color": (130, 230, 255, 255), # electric cyan text
|
| 63 |
+
"highlight_bg": (0, 130, 255, 210), # vivid blue pill
|
| 64 |
+
"highlight_bg_radius": 22,
|
| 65 |
+
"shadow_layers": [
|
| 66 |
+
(0, 0, 16, (0, 160, 255, 110)), # wide outer glow
|
| 67 |
+
(0, 3, 6, (0, 60, 160, 180)), # tight drop-shadow
|
| 68 |
+
],
|
| 69 |
+
},
|
| 70 |
+
|
| 71 |
+
# ── 3. TIKTOK BOLD ────────────────────────────────────────────────────────
|
| 72 |
+
# MrBeast / Sidemen. High-contrast yellow box, heavy stroke.
|
| 73 |
+
# Active word: black text on pure yellow — impossible to miss.
|
| 74 |
+
"tiktok_bold": {
|
| 75 |
+
"fontsize": 90,
|
| 76 |
+
"color": (255, 255, 255, 255),
|
| 77 |
+
"stroke_color": (0, 0, 0, 255),
|
| 78 |
+
"stroke_width": 5,
|
| 79 |
+
"font": "Montserrat-Bold.ttf",
|
| 80 |
+
"bg_color": None,
|
| 81 |
+
"position": ("center", 0.84),
|
| 82 |
+
"highlight_color": (10, 10, 10, 255), # almost-black on yellow
|
| 83 |
+
"highlight_bg": (255, 220, 0, 255), # MrBeast yellow
|
| 84 |
+
"highlight_bg_radius": 12,
|
| 85 |
+
"shadow_layers": [
|
| 86 |
+
(4, 6, 0, (0, 0, 0, 230)), # hard pixel-offset (punchy feel)
|
| 87 |
+
(7, 10, 0, (0, 0, 0, 90)),
|
| 88 |
+
],
|
| 89 |
+
},
|
| 90 |
+
|
| 91 |
+
# ── 4. TIKTOK NEON ────────────────────────────────────────────────────────
|
| 92 |
+
# Y2K / EDM / night-out. Hot-pink pill, cyan text — maximum vibe.
|
| 93 |
+
"tiktok_neon": {
|
| 94 |
+
"fontsize": 80,
|
| 95 |
+
"color": (255, 255, 255, 230),
|
| 96 |
+
"stroke_color": (100, 0, 60, 255),
|
| 97 |
+
"stroke_width": 3,
|
| 98 |
+
"font": "Roboto-Bold.ttf",
|
| 99 |
+
"bg_color": None,
|
| 100 |
+
"position": ("center", 0.85),
|
| 101 |
+
"highlight_color": (0, 242, 234, 255), # TikTok cyan
|
| 102 |
+
"highlight_bg": (255, 0, 80, 235), # TikTok pink-red
|
| 103 |
+
"highlight_bg_radius": 22,
|
| 104 |
+
"shadow_layers": [
|
| 105 |
+
(0, 0, 20, (255, 0, 80, 120)), # pink outer glow
|
| 106 |
+
(0, 0, 8, (0, 242, 234, 80)), # cyan inner glow
|
| 107 |
+
(3, 5, 0, (80, 0, 40, 210)), # hard dark offset
|
| 108 |
+
],
|
| 109 |
+
},
|
| 110 |
+
|
| 111 |
+
# ── 5. YOUTUBE CLEAN ──────────────────────────────────────────────────────
|
| 112 |
+
# Educator / talking-head minimal style.
|
| 113 |
+
# Frosted dark pill under sentence; warm amber box on active word.
|
| 114 |
+
"youtube_clean": {
|
| 115 |
+
"fontsize": 70,
|
| 116 |
+
"color": (240, 240, 240, 220),
|
| 117 |
+
"stroke_color": (0, 0, 0, 160),
|
| 118 |
+
"stroke_width": 2,
|
| 119 |
+
"font": "Roboto-Bold.ttf",
|
| 120 |
+
"bg_color": (0, 0, 0, 140), # subtle sentence pill
|
| 121 |
+
"position": ("center", 0.76),
|
| 122 |
+
"highlight_color": (20, 20, 20, 255), # dark text on amber
|
| 123 |
+
"highlight_bg": (255, 200, 40, 248), # warm amber
|
| 124 |
+
"highlight_bg_radius": 16,
|
| 125 |
+
"shadow_layers": [
|
| 126 |
+
(0, 4, 10, (180, 130, 0, 170)), # amber drop-shadow
|
| 127 |
+
],
|
| 128 |
+
},
|
| 129 |
+
|
| 130 |
+
# ── 6. YOUTUBE BOX ────────────────────────────────────────────────────────
|
| 131 |
+
# Karaoke / game-show energy.
|
| 132 |
+
# Solid dark sentence bar; bold YouTube-red box on active word.
|
| 133 |
+
"youtube_box": {
|
| 134 |
+
"fontsize": 68,
|
| 135 |
+
"color": (255, 255, 255, 255),
|
| 136 |
+
"stroke_color": (0, 0, 0, 255),
|
| 137 |
+
"stroke_width": 2,
|
| 138 |
+
"font": "Roboto-Bold.ttf",
|
| 139 |
+
"bg_color": (15, 15, 15, 210), # dark sentence bar
|
| 140 |
+
"position": ("center", 0.77),
|
| 141 |
+
"highlight_color": (255, 255, 255, 255),
|
| 142 |
+
"highlight_bg": (200, 0, 0, 255), # YouTube red
|
| 143 |
+
"highlight_bg_radius": 8,
|
| 144 |
+
"shadow_layers": [
|
| 145 |
+
(0, 5, 0, (110, 0, 0, 230)), # hard dark-red offset
|
| 146 |
+
(0, 9, 0, ( 0, 0, 0, 130)),
|
| 147 |
+
],
|
| 148 |
+
},
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 153 |
+
# Helpers
|
| 154 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 155 |
+
|
| 156 |
+
def _rgba(c):
|
| 157 |
+
"""Normalise any colour spec to an (R,G,B,A) tuple."""
|
| 158 |
+
if c is None:
|
| 159 |
+
return None
|
| 160 |
+
if isinstance(c, (tuple, list)):
|
| 161 |
+
return (*c[:3], c[3] if len(c) == 4 else 255)
|
| 162 |
+
tmp = Image.new("RGBA", (1, 1), c)
|
| 163 |
+
return tmp.getpixel((0, 0))
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def _draw_shadow_layers(draw, box, layers, base_radius):
|
| 167 |
+
"""
|
| 168 |
+
Paint shadow / glow layers behind a rounded-rect.
|
| 169 |
+
|
| 170 |
+
layers: [(off_x, off_y, blur_steps, rgba)]
|
| 171 |
+
blur_steps == 0 → single hard-offset rectangle
|
| 172 |
+
blur_steps > 0 → concentric rects with fading alpha (soft glow)
|
| 173 |
+
"""
|
| 174 |
+
x1, y1, x2, y2 = box
|
| 175 |
+
for (ox, oy, blur, color) in layers:
|
| 176 |
+
rgba = _rgba(color)
|
| 177 |
+
if blur == 0:
|
| 178 |
+
draw.rounded_rectangle(
|
| 179 |
+
[(x1 + ox, y1 + oy), (x2 + ox, y2 + oy)],
|
| 180 |
+
radius=base_radius, fill=rgba,
|
| 181 |
+
)
|
| 182 |
+
else:
|
| 183 |
+
steps = max(blur // 2, 3)
|
| 184 |
+
base_a = rgba[3]
|
| 185 |
+
for s in range(steps, 0, -1):
|
| 186 |
+
expand = s * (blur / steps)
|
| 187 |
+
step_alpha = int(base_a * (1 - s / (steps + 1)))
|
| 188 |
+
draw.rounded_rectangle(
|
| 189 |
+
[
|
| 190 |
+
(x1 + ox - expand, y1 + oy - expand),
|
| 191 |
+
(x2 + ox + expand, y2 + oy + expand),
|
| 192 |
+
],
|
| 193 |
+
radius=int(base_radius + expand),
|
| 194 |
+
fill=(*rgba[:3], step_alpha),
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 199 |
class SubtitleManager:
|
| 200 |
+
|
| 201 |
+
# ── Font management ───────────────────────────────────────────────────────
|
| 202 |
@staticmethod
|
| 203 |
def ensure_font(language=None, style_font=None, text_content=None):
|
| 204 |
+
"""Returns an absolute path to a valid font file."""
|
|
|
|
|
|
|
| 205 |
font_name = Config.LANGUAGE_FONT_MAP.get("default", "Roboto-Bold.ttf")
|
| 206 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
detected_lang = language
|
| 208 |
if not detected_lang and text_content:
|
|
|
|
| 209 |
if any("\u0600" <= c <= "\u06FF" for c in text_content):
|
| 210 |
detected_lang = "ar"
|
| 211 |
elif any("\u4E00" <= c <= "\u9FFF" for c in text_content):
|
| 212 |
detected_lang = "zh"
|
| 213 |
+
elif any("\u3040" <= c <= "\u30FF" for c in text_content):
|
| 214 |
detected_lang = "ja"
|
| 215 |
elif any("\u0900" <= c <= "\u097F" for c in text_content):
|
| 216 |
detected_lang = "hi"
|
| 217 |
elif any("\u0400" <= c <= "\u04FF" for c in text_content):
|
| 218 |
detected_lang = "ru"
|
| 219 |
|
|
|
|
| 220 |
if detected_lang in Config.LANGUAGE_FONT_MAP:
|
| 221 |
+
font_name = Config.LANGUAGE_FONT_MAP[detected_lang]
|
| 222 |
+
elif style_font and not detected_lang:
|
| 223 |
+
font_name = style_font
|
|
|
|
| 224 |
|
|
|
|
| 225 |
if detected_lang and detected_lang not in Config.LANGUAGE_FONT_MAP:
|
| 226 |
+
logger.warning(f"⚠️ Language '{detected_lang}' not in font map, using default.")
|
| 227 |
+
|
| 228 |
font_path = os.path.join(Config.BASE_DIR, font_name)
|
|
|
|
| 229 |
if not os.path.exists(font_path):
|
| 230 |
+
logger.info(f"📥 Downloading font: {font_name} …")
|
|
|
|
| 231 |
url = Config.FONTS.get(font_name)
|
| 232 |
if url:
|
| 233 |
try:
|
|
|
|
| 234 |
if "fonts.googleapis.com/css" in url:
|
| 235 |
+
if not Config.download_font_from_css(url, font_path):
|
| 236 |
+
raise RuntimeError("CSS font download failed")
|
|
|
|
| 237 |
else:
|
|
|
|
| 238 |
urllib.request.urlretrieve(url, font_path)
|
| 239 |
+
logger.info(f"✅ Font ready: {font_name}")
|
| 240 |
+
except Exception as exc:
|
| 241 |
+
logger.error(f"❌ Font download failed: {exc}")
|
|
|
|
| 242 |
return "Arial"
|
| 243 |
else:
|
| 244 |
+
logger.warning(f"⚠️ No URL configured for font: {font_name}")
|
| 245 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
return font_path
|
| 247 |
|
| 248 |
+
# ── Legacy single-text clip (sentence / word modes) ───────────────────────
|
| 249 |
@staticmethod
|
| 250 |
+
def create_pil_text_clip(text, fontsize, color, font_path,
|
| 251 |
+
stroke_color="black", stroke_width=2,
|
| 252 |
+
bg_color=None, padding=12, bg_radius=18):
|
| 253 |
try:
|
| 254 |
try:
|
| 255 |
font = ImageFont.truetype(font_path, fontsize)
|
| 256 |
+
except Exception:
|
| 257 |
+
logger.warning(f"⚠️ Could not load {font_path}, using default.")
|
| 258 |
font = ImageFont.load_default()
|
| 259 |
+
|
| 260 |
+
dummy = Image.new("RGBA", (1, 1))
|
| 261 |
+
d = ImageDraw.Draw(dummy)
|
| 262 |
+
bbox = d.textbbox((0, 0), text, font=font)
|
| 263 |
+
tw = bbox[2] - bbox[0]
|
| 264 |
+
th = bbox[3] - bbox[1]
|
| 265 |
+
|
| 266 |
margin = int(stroke_width * 2) + padding
|
| 267 |
+
iw, ih = tw + margin * 2, th + margin * 2
|
| 268 |
+
|
| 269 |
+
img = Image.new("RGBA", (int(iw), int(ih)), (0, 0, 0, 0))
|
|
|
|
| 270 |
draw = ImageDraw.Draw(img)
|
| 271 |
+
|
|
|
|
| 272 |
if bg_color:
|
| 273 |
+
draw.rounded_rectangle([(0, 0), (iw, ih)],
|
| 274 |
+
radius=bg_radius, fill=_rgba(bg_color))
|
| 275 |
+
|
| 276 |
+
x = (iw - tw) / 2 - bbox[0]
|
| 277 |
+
y = (ih - th) / 2 - bbox[1]
|
| 278 |
+
draw.text((x, y), text, font=font, fill=_rgba(color),
|
| 279 |
+
stroke_width=stroke_width, stroke_fill=_rgba(stroke_color))
|
| 280 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
return mpe.ImageClip(np.array(img))
|
| 282 |
+
|
| 283 |
+
except Exception as exc:
|
| 284 |
+
logger.error(f"⚠️ create_pil_text_clip: {exc}")
|
| 285 |
return None
|
| 286 |
|
| 287 |
+
# ── Highlight-word composite renderer ─────────────────────────────────────
|
| 288 |
+
@staticmethod
|
| 289 |
+
def create_sentence_highlight_clip(
|
| 290 |
+
sentence_words, active_word, font, fontsize, font_path,
|
| 291 |
+
style_config, is_arabic, padding=14, bg_radius=20,
|
| 292 |
+
):
|
| 293 |
+
"""
|
| 294 |
+
Renders the entire sentence as ONE image.
|
| 295 |
+
The active word gets a visible shadow-box underneath + highlight colour.
|
| 296 |
+
No floating clip artefacts — position is always correct.
|
| 297 |
+
"""
|
| 298 |
+
try:
|
| 299 |
+
dummy = Image.new("RGBA", (1, 1))
|
| 300 |
+
d = ImageDraw.Draw(dummy)
|
| 301 |
+
|
| 302 |
+
sp_w = max(d.textbbox((0, 0), " ", font=font)[2], 4)
|
| 303 |
+
|
| 304 |
+
# ── 1. Measure each word ─────────────────────────────────────────
|
| 305 |
+
ordered = list(reversed(sentence_words)) if is_arabic else sentence_words
|
| 306 |
+
word_metrics = []
|
| 307 |
+
cursor = 0
|
| 308 |
+
for w in ordered:
|
| 309 |
+
raw = w.get("text", "")
|
| 310 |
+
display = get_display(reshape(raw)) if is_arabic else raw.upper()
|
| 311 |
+
bbox = d.textbbox((0, 0), display, font=font)
|
| 312 |
+
ww = bbox[2] - bbox[0]
|
| 313 |
+
word_metrics.append({"id": id(w), "display": display,
|
| 314 |
+
"bbox": bbox, "x": cursor, "width": ww})
|
| 315 |
+
cursor += ww + sp_w
|
| 316 |
+
|
| 317 |
+
total_w = cursor - sp_w
|
| 318 |
+
ref_bbox = d.textbbox((0, 0), "Ag", font=font)
|
| 319 |
+
text_h = ref_bbox[3] - ref_bbox[1]
|
| 320 |
+
|
| 321 |
+
stroke_w = style_config.get("stroke_width", 2)
|
| 322 |
+
margin = int(stroke_w * 2) + padding
|
| 323 |
+
|
| 324 |
+
# Extra vertical bleed for shadow layers
|
| 325 |
+
bleed = 14
|
| 326 |
+
iw = int(total_w + margin * 2)
|
| 327 |
+
ih = int(text_h + margin * 2 + bleed)
|
| 328 |
+
|
| 329 |
+
img = Image.new("RGBA", (iw, ih), (0, 0, 0, 0))
|
| 330 |
+
draw = ImageDraw.Draw(img)
|
| 331 |
+
|
| 332 |
+
# ── 2. Optional full-sentence background bar ──────────────────────
|
| 333 |
+
sentence_bg = style_config.get("bg_color")
|
| 334 |
+
if sentence_bg:
|
| 335 |
+
draw.rounded_rectangle(
|
| 336 |
+
[(0, bleed // 2), (iw, ih - bleed // 2)],
|
| 337 |
+
radius=bg_radius, fill=_rgba(sentence_bg),
|
| 338 |
+
)
|
| 339 |
+
|
| 340 |
+
# ── 3. Shadow + highlight box for active word ─────────────────────
|
| 341 |
+
active_id = id(active_word)
|
| 342 |
+
hl_bg = _rgba(style_config.get("highlight_bg"))
|
| 343 |
+
hl_radius = style_config.get("highlight_bg_radius", bg_radius)
|
| 344 |
+
shadows = style_config.get("shadow_layers", [])
|
| 345 |
+
|
| 346 |
+
active_wm = next((wm for wm in word_metrics if wm["id"] == active_id), None)
|
| 347 |
+
|
| 348 |
+
if active_wm and hl_bg:
|
| 349 |
+
bx1 = margin + active_wm["x"] - active_wm["bbox"][0] - padding
|
| 350 |
+
by1 = bleed // 2
|
| 351 |
+
bx2 = bx1 + active_wm["width"] + padding * 2
|
| 352 |
+
by2 = ih - bleed // 2
|
| 353 |
+
|
| 354 |
+
# Shadow / glow layers first
|
| 355 |
+
_draw_shadow_layers(draw, (bx1, by1, bx2, by2), shadows, hl_radius)
|
| 356 |
+
|
| 357 |
+
# Main highlight box
|
| 358 |
+
draw.rounded_rectangle([(bx1, by1), (bx2, by2)],
|
| 359 |
+
radius=hl_radius, fill=hl_bg)
|
| 360 |
+
|
| 361 |
+
# ── 4. Draw text words ────────────────────────────────────────────
|
| 362 |
+
rest_c = _rgba(style_config.get("color", (255, 255, 255, 255)))
|
| 363 |
+
hl_c = _rgba(style_config.get("highlight_color", rest_c))
|
| 364 |
+
stk_c = _rgba(style_config.get("stroke_color", (0, 0, 0, 255)))
|
| 365 |
+
|
| 366 |
+
for wm in word_metrics:
|
| 367 |
+
col = hl_c if (wm["id"] == active_id) else rest_c
|
| 368 |
+
tx = margin + wm["x"] - wm["bbox"][0]
|
| 369 |
+
ty = margin + bleed // 2 - wm["bbox"][1]
|
| 370 |
+
draw.text((tx, ty), wm["display"], font=font, fill=col,
|
| 371 |
+
stroke_width=stroke_w, stroke_fill=stk_c)
|
| 372 |
+
|
| 373 |
+
return mpe.ImageClip(np.array(img))
|
| 374 |
+
|
| 375 |
+
except Exception as exc:
|
| 376 |
+
logger.error(f"⚠️ create_sentence_highlight_clip: {exc}")
|
| 377 |
+
return None
|
| 378 |
+
|
| 379 |
+
# ── Public style accessor ──────────────────────────────────────────────────
|
| 380 |
@staticmethod
|
| 381 |
def get_style_config(style_name):
|
| 382 |
+
"""Returns the style dict for the given name (falls back to 'classic')."""
|
| 383 |
+
return STYLES.get(style_name, STYLES["classic"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
|
| 385 |
+
# ── Main generator ─────────────────────────────────────────────────────────
|
| 386 |
@staticmethod
|
| 387 |
+
def create_caption_clips(transcript_data, size=(1080, 1920), language=None,
|
| 388 |
+
caption_mode="sentence", caption_style="classic"):
|
| 389 |
+
"""Generates all caption ImageClips ready for compositing."""
|
| 390 |
+
all_clips = []
|
| 391 |
+
style_cfg = SubtitleManager.get_style_config(caption_style)
|
| 392 |
+
|
| 393 |
+
# ── Parse transcript ─────────────────────────────────────────────────
|
| 394 |
+
segments = []
|
|
|
|
|
|
|
| 395 |
sample_text = ""
|
|
|
|
| 396 |
if isinstance(transcript_data, list):
|
| 397 |
+
if transcript_data and "segments" in transcript_data[0]:
|
| 398 |
+
segments = transcript_data[0]["segments"]
|
| 399 |
+
else:
|
| 400 |
segments = transcript_data
|
| 401 |
+
elif isinstance(transcript_data, dict) and "segments" in transcript_data:
|
| 402 |
+
segments = transcript_data["segments"]
|
| 403 |
+
|
| 404 |
+
for s in segments:
|
| 405 |
+
if s.get("text"):
|
| 406 |
+
sample_text = s["text"]
|
| 407 |
+
break
|
| 408 |
+
|
| 409 |
+
font_path = SubtitleManager.ensure_font(
|
| 410 |
+
language, style_cfg.get("font"), text_content=sample_text
|
| 411 |
+
)
|
| 412 |
+
pos_cfg = style_cfg.get("position", ("center", 0.80))
|
| 413 |
+
pos = (pos_cfg[0], int(pos_cfg[1] * size[1]))
|
| 414 |
+
|
| 415 |
+
# ════════════════════════════════════════════════════════════════════
|
| 416 |
+
# MODE: highlight_word
|
| 417 |
+
# ════════════════════════════════════════════════════════════════════
|
| 418 |
+
if caption_mode == "highlight_word":
|
| 419 |
+
all_words = []
|
| 420 |
+
for seg in segments:
|
| 421 |
+
if "words" in seg and seg["words"]:
|
| 422 |
+
all_words.extend(seg["words"])
|
| 423 |
+
|
| 424 |
+
if not all_words:
|
| 425 |
+
logger.warning("⚠️ highlight_word needs word-level timestamps — none found.")
|
| 426 |
+
return []
|
| 427 |
+
|
| 428 |
+
fontsize = style_cfg.get("fontsize", 75)
|
| 429 |
+
try:
|
| 430 |
+
font = ImageFont.truetype(font_path, fontsize)
|
| 431 |
+
except Exception:
|
| 432 |
+
logger.warning("⚠️ TrueType load failed — falling back to default font.")
|
| 433 |
+
font = ImageFont.load_default()
|
| 434 |
+
|
| 435 |
+
# Group words into sentences (gap > 0.7 s = new sentence)
|
| 436 |
+
sentences, cur = [], []
|
| 437 |
+
for i, word in enumerate(all_words):
|
| 438 |
+
if not word.get("text", "").strip():
|
| 439 |
+
continue
|
| 440 |
+
cur.append(word)
|
| 441 |
+
is_last = (i == len(all_words) - 1)
|
| 442 |
+
pause = (all_words[i + 1]["start"] - word["end"]) if not is_last else 1.0
|
| 443 |
+
if pause > 0.7 or is_last:
|
| 444 |
+
sentences.append(cur)
|
| 445 |
+
cur = []
|
| 446 |
+
|
| 447 |
+
for sw in sentences:
|
| 448 |
+
sent_text = " ".join(w["text"] for w in sw)
|
| 449 |
+
sent_start = sw[0]["start"]
|
| 450 |
+
sent_end = sw[-1]["end"]
|
| 451 |
+
is_ar = language == "ar" or any("\u0600" <= c <= "\u06FF" for c in sent_text)
|
| 452 |
+
|
| 453 |
+
# One frame per word (active highlight moves)
|
| 454 |
+
for active in sw:
|
| 455 |
+
clip = SubtitleManager.create_sentence_highlight_clip(
|
| 456 |
+
sentence_words=sw, active_word=active,
|
| 457 |
+
font=font, fontsize=fontsize, font_path=font_path,
|
| 458 |
+
style_config=style_cfg, is_arabic=is_ar,
|
| 459 |
+
padding=style_cfg.get("padding", 14),
|
| 460 |
+
bg_radius=style_cfg.get("highlight_bg_radius", 20),
|
| 461 |
+
)
|
| 462 |
+
if clip:
|
| 463 |
+
all_clips.append(
|
| 464 |
+
clip.set_start(active["start"])
|
| 465 |
+
.set_end(active["end"])
|
| 466 |
+
.set_position(pos)
|
| 467 |
+
)
|
| 468 |
+
|
| 469 |
+
# Fill inter-word gaps (no active word) with plain sentence
|
| 470 |
+
covered = [(w["start"], w["end"]) for w in sw]
|
| 471 |
+
gaps = []
|
| 472 |
+
if sent_start < covered[0][0]:
|
| 473 |
+
gaps.append((sent_start, covered[0][0]))
|
| 474 |
+
for j in range(len(covered) - 1):
|
| 475 |
+
if covered[j][1] < covered[j + 1][0]:
|
| 476 |
+
gaps.append((covered[j][1], covered[j + 1][0]))
|
| 477 |
+
if covered[-1][1] < sent_end:
|
| 478 |
+
gaps.append((covered[-1][1], sent_end))
|
| 479 |
+
|
| 480 |
+
for gs, ge in gaps:
|
| 481 |
+
plain_cfg = {**style_cfg, "highlight_bg": None, "shadow_layers": []}
|
| 482 |
+
dummy_w = {"text": "", "start": gs, "end": ge}
|
| 483 |
+
gc = SubtitleManager.create_sentence_highlight_clip(
|
| 484 |
+
sentence_words=sw, active_word=dummy_w,
|
| 485 |
+
font=font, fontsize=fontsize, font_path=font_path,
|
| 486 |
+
style_config=plain_cfg, is_arabic=is_ar,
|
| 487 |
+
)
|
| 488 |
+
if gc:
|
| 489 |
+
all_clips.append(gc.set_start(gs).set_end(ge).set_position(pos))
|
| 490 |
+
|
| 491 |
+
return all_clips
|
| 492 |
+
|
| 493 |
+
# ════════════════════════════════════════════════════════════════════
|
| 494 |
+
# LEGACY MODES: sentence / word
|
| 495 |
+
# ════════════════════════════════════════════════════════════════════
|
| 496 |
+
for seg in segments:
|
| 497 |
+
full_text = seg.get("text", "").strip() or " ".join(
|
| 498 |
+
w["text"] for w in seg.get("words", [])
|
| 499 |
+
)
|
| 500 |
if not full_text:
|
| 501 |
continue
|
| 502 |
|
| 503 |
+
start_t = seg.get("start", 0)
|
| 504 |
+
end_t = seg.get("end", 0)
|
|
|
|
| 505 |
if end_t <= start_t:
|
| 506 |
+
ws = seg.get("words", [])
|
| 507 |
+
if ws:
|
| 508 |
+
start_t, end_t = ws[0]["start"], ws[-1]["end"]
|
| 509 |
else:
|
| 510 |
continue
|
| 511 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 512 |
chunk_size = 1 if caption_mode == "word" else 4
|
| 513 |
+
chunks = []
|
| 514 |
+
stt_words = seg.get("words")
|
| 515 |
+
|
| 516 |
+
if stt_words:
|
| 517 |
+
valid = [w for w in stt_words if w.get("text", "").strip()]
|
| 518 |
+
for i in range(0, len(valid), chunk_size):
|
| 519 |
+
grp = valid[i:i + chunk_size]
|
| 520 |
+
chunks.append({"text": " ".join(w["text"] for w in grp),
|
| 521 |
+
"start": grp[0]["start"], "end": grp[-1]["end"]})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 522 |
else:
|
| 523 |
+
wl = full_text.split()
|
| 524 |
+
for i in range(0, len(wl), chunk_size):
|
| 525 |
+
cw = wl[i:i + chunk_size]
|
| 526 |
+
cs = start_t + (end_t - start_t) * (i / len(wl))
|
| 527 |
+
ce = cs + (end_t - start_t) * (len(cw) / len(wl))
|
| 528 |
+
chunks.append({"text": " ".join(cw),
|
| 529 |
+
"start": cs, "end": max(ce, cs + 0.1)})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
|
| 531 |
for chunk in chunks:
|
| 532 |
+
disp = chunk["text"]
|
| 533 |
+
is_ar = language == "ar" or any("\u0600" <= c <= "\u06FF" for c in disp)
|
| 534 |
+
if is_ar:
|
|
|
|
| 535 |
try:
|
| 536 |
+
disp = get_display(reshape(disp))
|
| 537 |
+
except Exception:
|
| 538 |
pass
|
| 539 |
else:
|
| 540 |
+
disp = disp.upper()
|
| 541 |
+
|
| 542 |
+
clip = SubtitleManager.create_pil_text_clip(
|
| 543 |
+
disp,
|
| 544 |
+
fontsize = style_cfg.get("fontsize", 72),
|
| 545 |
+
color = style_cfg.get("color", (255, 255, 255, 255)),
|
| 546 |
+
font_path = font_path,
|
| 547 |
+
stroke_color = style_cfg.get("stroke_color", (0, 0, 0, 200)),
|
| 548 |
+
stroke_width = style_cfg.get("stroke_width", 2),
|
| 549 |
+
bg_color = style_cfg.get("bg_color"),
|
| 550 |
+
bg_radius = style_cfg.get("highlight_bg_radius", 18),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 551 |
)
|
| 552 |
+
if clip:
|
| 553 |
+
all_clips.append(
|
| 554 |
+
clip.set_start(chunk["start"])
|
| 555 |
+
.set_end(chunk["end"])
|
| 556 |
+
.set_position(pos)
|
| 557 |
+
)
|
| 558 |
|
| 559 |
+
return all_clips
|
| 560 |
|
| 561 |
+
# ── Convenience compositor ─────────────────────────────────────────────────
|
| 562 |
@staticmethod
|
| 563 |
+
def create_captions(video_clip, transcript_data, size=(1080, 1920),
|
| 564 |
+
language=None, caption_mode="sentence"):
|
| 565 |
+
clips = SubtitleManager.create_caption_clips(
|
| 566 |
+
transcript_data, size, language, caption_mode
|
| 567 |
+
)
|
| 568 |
+
return mpe.CompositeVideoClip([video_clip] + clips, size=size)
|
firebase_key.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"type": "service_account",
|
| 3 |
+
"project_id": "clippingcommunity-caf5a",
|
| 4 |
+
"private_key_id": "787bbcf48f5a4924137010157aa70faac25d6b3c",
|
| 5 |
+
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDP8JezwCe7gxDn\nNTqbRWCn2Y9w9XtvLT3Rh/SS+XIgzzGZupsCz/gtPJzrPEMhR2NsEwyk0uPvumd5\nICLIBaYrmfpB6h/CtvD9bvNyea8BvPuMxkJ/COSwm4kUDlutExI5WCr3XKxdrAQB\n7pZv//vI9xgWrR08/EP/YeP74L3nb5+z1DeW1C5lxvGalYJQu80iWMSMgr7kHaQL\ner5K2kl6cpQS0+mS+A3jsj8gtTZRrviZEMpAuRbd9PTVq3FDDCZK978KJ3tt+Hpv\nOlnjbzdH10+WtU4Br9H1xLD4VKfakoXYU9lUMyVdfvQoPhpuNrcdXUWIRSpHjHKU\nvCEYnxtxAgMBAAECggEAGo/FmRD1Ilc6LAaZH2dr0tUQJSI+a7OwP1kqDTIu+XDu\nCZCUJ3ZQYdrpwoS2oIQhd5cxWMaVljRN7mOD/d/223/2P0A8YV+EBiOLlnZjvAmb\nal6S9O46ZDLh/j1dSYzzmX6hMmrUm7yS4HpNHi5pR8EEgzUG1Jj2yKME/9Iz/+F/\nuih50z9UiKA7TmCUwfn63l8kT1trBoRYUTqDFoDmul/gbDazfwqCSwtw3BXeOfHY\n98WjhPq3krhuct/nKwY3XzjCchdrej4UaOYMQNN9jRU5haq/L+JKXClEyjgp4IUR\nKywcyxj/QgfbPRFyZMrgmSsGdRBKhOhI83FyMjNLSQKBgQD5W77I2KlIqSqHuzhg\nNOMqYt3FHa86c7kcuZjFquWxEtgFYVvmN9o6IyDcOlo6yYwR0dGmBrADf1Izi/BP\nGyanYvZW2djYpb2j+V/ovPx1br9or54icVjR7eAXJQfRAuJ6pcsK916U32GuAvU+\nTMxp0kBExt+8sBI3E6mMelIAHQKBgQDVem8fw2hDRFEREKWWme9z/usRVmazw3On\nyyMsWa7dsL9tfvZVGp+NZsND4CvZbPLRM1QEyXK6TrgAv1C+lvtNah5/qdIY0NYa\nTXlI7RaP/DS8UszVtOYbVxdRt98Rz3K1vANjCS9v6Kqq3O1CQp3kxQ/07v+j5Udh\n4ALbsmxQZQKBgQDOfTjJnRDhyKQdbd3LXUBYEzLOTjySzM2XieGGnnRCY/ZazjqJ\ns+qUhg5qEDAzyj6havyO3a7X1pE9ej5vY25o4jxXw2IcVXNq29CeLBFrNWBv0i3D\nG9WPUcploBaO8DKXmb8/v4SlBy4eKPjotDP51U+/JGiWGb+buD6Iw6ovyQKBgBad\nrO4hhocx0qDLMa+9ySdxxzeD/sdmmncZRzWonqTv16fi/nfPpT2WuHMVaa/UIflV\nxb3oFZZ1RnsVyZkXZ7Iw3uBJfm+QmE4bDRFTxMMmRfP5lafCTWpyFI9cum4pmw5z\nx6wTSgpCDOqjEyOk6RNWaTVaqIyVerV8xPC/e0gdAoGBAPkFU9UePr4pMEdchvwd\nd+H7kbywlodwiPh7SoeInYMZSBxK6rzZAaRRv6nNGlBe8HoqDud5SOR1X8T0VYk+\n6Ou0s6ploSL9vbM3YkhiYqKXj1tCg+emBtucp19bILUPGBW56Aje0merJmDg5kyf\nvUTLlRBbkNySxP08n86/Fcrc\n-----END PRIVATE KEY-----\n",
|
| 6 |
+
"client_email": "firebase-adminsdk-fbsvc@clippingcommunity-caf5a.iam.gserviceaccount.com",
|
| 7 |
+
"client_id": "110147267785886278722",
|
| 8 |
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
| 9 |
+
"token_uri": "https://oauth2.googleapis.com/token",
|
| 10 |
+
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
| 11 |
+
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/firebase-adminsdk-fbsvc%40clippingcommunity-caf5a.iam.gserviceaccount.com",
|
| 12 |
+
"universe_domain": "googleapis.com"
|
| 13 |
+
}
|
main.py
CHANGED
|
@@ -12,13 +12,26 @@ from processor import VideoProcessor
|
|
| 12 |
from core.config import Config
|
| 13 |
from core.logger import Logger
|
| 14 |
from core.task_queue import TaskManager
|
|
|
|
|
|
|
| 15 |
|
| 16 |
logger = Logger.get_logger(__name__)
|
| 17 |
task_manager = TaskManager()
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
# Ensure directories exist
|
| 20 |
Config.setup_dirs()
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
class VideoStyle(str, Enum):
|
| 23 |
cinematic = "cinematic"
|
| 24 |
cinematic_blur = "cinematic_blur"
|
|
@@ -29,9 +42,11 @@ class VideoStyle(str, Enum):
|
|
| 29 |
class CaptionMode(str, Enum):
|
| 30 |
word = "word"
|
| 31 |
sentence = "sentence"
|
|
|
|
| 32 |
|
| 33 |
class CaptionStyle(str, Enum):
|
| 34 |
classic = "classic"
|
|
|
|
| 35 |
tiktok_bold = "tiktok_bold"
|
| 36 |
tiktok_neon = "tiktok_neon"
|
| 37 |
youtube_clean = "youtube_clean"
|
|
@@ -52,6 +67,37 @@ class Language(str, Enum):
|
|
| 52 |
app = FastAPI(title="Auto-Clipping API")
|
| 53 |
clipper = VideoProcessor()
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
def process_video_task(
|
| 56 |
task_id: str,
|
| 57 |
video_path: str,
|
|
@@ -69,9 +115,6 @@ def process_video_task(
|
|
| 69 |
from moviepy.editor import VideoFileClip
|
| 70 |
full_video_clip = None
|
| 71 |
try:
|
| 72 |
-
# Optimization: Open video once
|
| 73 |
-
full_video_clip = VideoFileClip(video_path)
|
| 74 |
-
|
| 75 |
# Helper for progress updates
|
| 76 |
def update_progress(progress, message):
|
| 77 |
task_manager.update_task_progress(task_id, progress, message)
|
|
@@ -79,11 +122,15 @@ def process_video_task(
|
|
| 79 |
update_progress(1, "Starting video analysis...")
|
| 80 |
|
| 81 |
# 1. Analyze video
|
| 82 |
-
|
|
|
|
| 83 |
scored_segments, total_duration, llm_moments = clipper.analyze_impact(
|
| 84 |
video_path,
|
| 85 |
-
video_clip
|
| 86 |
-
language
|
|
|
|
|
|
|
|
|
|
| 87 |
timestamp_mode=timestamp_mode,
|
| 88 |
progress_callback=update_progress
|
| 89 |
)
|
|
@@ -101,8 +148,8 @@ def process_video_task(
|
|
| 101 |
llm_moments,
|
| 102 |
style=style,
|
| 103 |
task_id=task_id,
|
| 104 |
-
language=language,
|
| 105 |
-
video_clip
|
| 106 |
playground_path=playground_path,
|
| 107 |
audio_path=audio_path,
|
| 108 |
bg_music_volume=bg_music_volume,
|
|
@@ -135,8 +182,7 @@ def process_video_task(
|
|
| 135 |
"traceback": traceback.format_exc()
|
| 136 |
}
|
| 137 |
finally:
|
| 138 |
-
|
| 139 |
-
full_video_clip.close()
|
| 140 |
|
| 141 |
# Send webhook
|
| 142 |
if webhook_url and webhook_url.strip() and webhook_url.startswith(('http://', 'https://')):
|
|
|
|
| 12 |
from core.config import Config
|
| 13 |
from core.logger import Logger
|
| 14 |
from core.task_queue import TaskManager
|
| 15 |
+
from core.database import DatabaseManager
|
| 16 |
+
from pydantic import BaseModel
|
| 17 |
|
| 18 |
logger = Logger.get_logger(__name__)
|
| 19 |
task_manager = TaskManager()
|
| 20 |
|
| 21 |
+
# Initialize Database Manager (Try Firebase, fallback to Local)
|
| 22 |
+
try:
|
| 23 |
+
db_manager = DatabaseManager(use_firebase=True)
|
| 24 |
+
except Exception:
|
| 25 |
+
db_manager = DatabaseManager(use_firebase=False)
|
| 26 |
+
|
| 27 |
# Ensure directories exist
|
| 28 |
Config.setup_dirs()
|
| 29 |
|
| 30 |
+
class APIKeyInput(BaseModel):
|
| 31 |
+
service: str
|
| 32 |
+
key: str
|
| 33 |
+
use_firebase: bool = False
|
| 34 |
+
|
| 35 |
class VideoStyle(str, Enum):
|
| 36 |
cinematic = "cinematic"
|
| 37 |
cinematic_blur = "cinematic_blur"
|
|
|
|
| 42 |
class CaptionMode(str, Enum):
|
| 43 |
word = "word"
|
| 44 |
sentence = "sentence"
|
| 45 |
+
highlight_word = "highlight_word"
|
| 46 |
|
| 47 |
class CaptionStyle(str, Enum):
|
| 48 |
classic = "classic"
|
| 49 |
+
modern_glow = "modern_glow"
|
| 50 |
tiktok_bold = "tiktok_bold"
|
| 51 |
tiktok_neon = "tiktok_neon"
|
| 52 |
youtube_clean = "youtube_clean"
|
|
|
|
| 67 |
app = FastAPI(title="Auto-Clipping API")
|
| 68 |
clipper = VideoProcessor()
|
| 69 |
|
| 70 |
+
@app.post("/api/keys")
|
| 71 |
+
async def add_api_key(input_data: APIKeyInput):
|
| 72 |
+
"""
|
| 73 |
+
Securely adds an API key to the database.
|
| 74 |
+
- service: Service name (e.g., 'openrouter', 'openai')
|
| 75 |
+
- key: The API key string
|
| 76 |
+
- use_firebase: If true, saves to community database (Firebase). If false, saves to local SQLite.
|
| 77 |
+
"""
|
| 78 |
+
try:
|
| 79 |
+
# If user explicitly requested Firebase but it wasn't initialized globally
|
| 80 |
+
target_db = db_manager
|
| 81 |
+
if input_data.use_firebase and not db_manager.use_firebase:
|
| 82 |
+
# Try to init a temporary firebase manager
|
| 83 |
+
try:
|
| 84 |
+
target_db = DatabaseManager(use_firebase=True)
|
| 85 |
+
except Exception as e:
|
| 86 |
+
return JSONResponse(
|
| 87 |
+
status_code=400,
|
| 88 |
+
content={"error": f"Firebase not configured: {str(e)}"}
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# Save key
|
| 92 |
+
target_db.save_key(input_data.service, input_data.key)
|
| 93 |
+
|
| 94 |
+
dest = "Firebase (Community)" if input_data.use_firebase else "Local SQLite"
|
| 95 |
+
return {"message": f"API Key for {input_data.service} saved securely to {dest}."}
|
| 96 |
+
|
| 97 |
+
except Exception as e:
|
| 98 |
+
logger.error(f"Error saving API key: {e}")
|
| 99 |
+
return JSONResponse(status_code=500, content={"error": str(e)})
|
| 100 |
+
|
| 101 |
def process_video_task(
|
| 102 |
task_id: str,
|
| 103 |
video_path: str,
|
|
|
|
| 115 |
from moviepy.editor import VideoFileClip
|
| 116 |
full_video_clip = None
|
| 117 |
try:
|
|
|
|
|
|
|
|
|
|
| 118 |
# Helper for progress updates
|
| 119 |
def update_progress(progress, message):
|
| 120 |
task_manager.update_task_progress(task_id, progress, message)
|
|
|
|
| 122 |
update_progress(1, "Starting video analysis...")
|
| 123 |
|
| 124 |
# 1. Analyze video
|
| 125 |
+
# Fix: Ensure 'words' mode is used for highlight_word too
|
| 126 |
+
timestamp_mode = "words" if caption_mode in (CaptionMode.word, CaptionMode.highlight_word) else "segments"
|
| 127 |
scored_segments, total_duration, llm_moments = clipper.analyze_impact(
|
| 128 |
video_path,
|
| 129 |
+
# video_clip removed as it's not supported
|
| 130 |
+
# language passed as target_language if needed, or source?
|
| 131 |
+
# In processor.py: source_language=None (auto), target_language=...
|
| 132 |
+
# main.py seems to treat 'language' as the output/target language
|
| 133 |
+
target_language=language,
|
| 134 |
timestamp_mode=timestamp_mode,
|
| 135 |
progress_callback=update_progress
|
| 136 |
)
|
|
|
|
| 148 |
llm_moments,
|
| 149 |
style=style,
|
| 150 |
task_id=task_id,
|
| 151 |
+
language=language, # target language
|
| 152 |
+
# video_clip removed
|
| 153 |
playground_path=playground_path,
|
| 154 |
audio_path=audio_path,
|
| 155 |
bg_music_volume=bg_music_volume,
|
|
|
|
| 182 |
"traceback": traceback.format_exc()
|
| 183 |
}
|
| 184 |
finally:
|
| 185 |
+
pass
|
|
|
|
| 186 |
|
| 187 |
# Send webhook
|
| 188 |
if webhook_url and webhook_url.strip() and webhook_url.startswith(('http://', 'https://')):
|
processor.py
CHANGED
|
@@ -1,9 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
|
|
|
| 2 |
import json
|
| 3 |
import traceback
|
| 4 |
-
from datetime import datetime
|
| 5 |
import moviepy.editor as mpe
|
| 6 |
-
import
|
|
|
|
|
|
|
| 7 |
from core.config import Config
|
| 8 |
from core.logger import Logger
|
| 9 |
from core.stt import STT
|
|
@@ -11,455 +25,448 @@ from core.analyze import analyze_transcript_gemini
|
|
| 11 |
from core.styles import StyleFactory
|
| 12 |
from core.subtitle_manager import SubtitleManager
|
| 13 |
from core.free_translator import FreeTranslator
|
| 14 |
-
import json_repair
|
| 15 |
|
| 16 |
logger = Logger.get_logger(__name__)
|
| 17 |
|
|
|
|
|
|
|
| 18 |
class VideoProcessor:
|
|
|
|
| 19 |
def __init__(self, model_size="base"):
|
| 20 |
self.stt = STT(model_size=model_size)
|
| 21 |
Config.setup_dirs()
|
| 22 |
-
|
|
|
|
|
|
|
| 23 |
def _clean_json_response(self, content):
|
| 24 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 25 |
if not isinstance(content, str):
|
| 26 |
return content
|
| 27 |
-
|
| 28 |
-
# Remove markdown blocks if present
|
| 29 |
content = content.strip()
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
content = content[3:]
|
| 34 |
if content.endswith("```"):
|
| 35 |
content = content[:-3]
|
| 36 |
-
|
| 37 |
content = content.strip()
|
| 38 |
-
|
| 39 |
-
# Use json_repair to fix truncated or malformed JSON
|
| 40 |
try:
|
| 41 |
-
|
| 42 |
-
return json.dumps(
|
| 43 |
except Exception as e:
|
| 44 |
-
logger.warning(f"⚠️ json_repair failed,
|
| 45 |
-
|
| 46 |
-
#
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
return content
|
| 55 |
|
| 56 |
def parse_ai_response(self, ai_res):
|
| 57 |
-
"""
|
| 58 |
-
Parses the JSON response from the AI and returns a list of segments.
|
| 59 |
-
Handles both string and dictionary responses, and various potential key names.
|
| 60 |
-
"""
|
| 61 |
if not isinstance(ai_res, dict):
|
| 62 |
-
logger.error(f"❌ Invalid AI response
|
| 63 |
return []
|
| 64 |
-
|
| 65 |
res_content = ai_res.get("content")
|
| 66 |
-
segments_data = {}
|
| 67 |
-
|
| 68 |
try:
|
| 69 |
if isinstance(res_content, str):
|
| 70 |
-
|
| 71 |
-
segments_data = json.loads(cleaned_content)
|
| 72 |
else:
|
| 73 |
segments_data = res_content
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
| 76 |
if isinstance(segments_data, dict):
|
| 77 |
-
for key in
|
| 78 |
if key in segments_data and isinstance(segments_data[key], list):
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
break
|
| 86 |
-
elif isinstance(segments_data, list):
|
| 87 |
-
chunk_segments = segments_data
|
| 88 |
-
|
| 89 |
-
return chunk_segments
|
| 90 |
-
|
| 91 |
except Exception as e:
|
| 92 |
logger.error(f"❌ Failed to parse AI response: {e}")
|
| 93 |
-
logger.error(f"Raw
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
-
def _clean_json_response(self, text):
|
| 97 |
-
"""Clean markdown and other noise from AI JSON response."""
|
| 98 |
-
text = text.strip()
|
| 99 |
-
if text.startswith("```json"):
|
| 100 |
-
text = text[7:]
|
| 101 |
-
if text.startswith("```"):
|
| 102 |
-
text = text[3:]
|
| 103 |
-
if text.endswith("```"):
|
| 104 |
-
text = text[:-3]
|
| 105 |
-
return text.strip()
|
| 106 |
-
|
| 107 |
-
def analyze_impact(self, video_path, video_clip=None, language=None, timestamp_mode="segments", progress_callback=None):
|
| 108 |
-
"""Analyzes video content and suggests viral clips using AI."""
|
| 109 |
-
if progress_callback: progress_callback(5, "Starting speech-to-text...")
|
| 110 |
-
logger.info("🎙️ Phase 1: Speech-to-Text (STT)...")
|
| 111 |
-
# Always transcribe in source language (auto detect)
|
| 112 |
full_segments, full_text, duration, detected_lang = self.stt.get_transcript(
|
| 113 |
-
video_path,
|
| 114 |
-
language=
|
| 115 |
skip_ai=True,
|
| 116 |
-
timestamp_mode=timestamp_mode
|
| 117 |
)
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
if language:
|
| 122 |
-
if hasattr(language, 'value'):
|
| 123 |
-
target_lang = language.value
|
| 124 |
-
else:
|
| 125 |
-
target_lang = str(language)
|
| 126 |
-
|
| 127 |
-
|
| 128 |
data = {
|
| 129 |
-
"segments":
|
| 130 |
-
"detected_language": detected_lang,
|
| 131 |
-
"
|
|
|
|
| 132 |
}
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
max_time
|
| 142 |
-
|
| 143 |
current_start = 0
|
|
|
|
| 144 |
while current_start < max_time:
|
| 145 |
-
current_end
|
| 146 |
-
|
| 147 |
chunk_transcript = ""
|
|
|
|
| 148 |
for seg in full_segments:
|
| 149 |
if seg["start"] >= current_start and seg["start"] < current_end:
|
| 150 |
-
chunk_transcript +=
|
| 151 |
-
|
|
|
|
|
|
|
| 152 |
if chunk_transcript.strip():
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
logger.info(
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
| 162 |
ai_res = analyze_transcript_gemini(chunk_transcript)
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
logger.info(f"🤖 AI Response Type: {type(ai_res)}")
|
| 166 |
-
if isinstance(ai_res, dict) and "content" in ai_res:
|
| 167 |
-
logger.info(f"🤖 Raw AI Response (First 500 chars): {ai_res['content'][:500]}...")
|
| 168 |
-
else:
|
| 169 |
-
logger.info(f"🤖 Raw AI Response (Structure): {str(ai_res)[:500]}...")
|
| 170 |
-
|
| 171 |
try:
|
| 172 |
-
|
| 173 |
-
logger.info(f"✅
|
| 174 |
-
|
| 175 |
except Exception as e:
|
| 176 |
-
logger.error(f"❌
|
| 177 |
logger.error(traceback.format_exc())
|
| 178 |
-
|
| 179 |
-
current_start +=
|
| 180 |
-
if current_end >= max_time:
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
for s in
|
| 186 |
-
|
| 187 |
-
if
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
def get_best_segments(self, segments, video_duration=0):
|
| 194 |
-
"""
|
| 195 |
return sorted(segments, key=lambda x: x.get("viral_score", 0), reverse=True)
|
| 196 |
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
else:
|
| 209 |
-
|
| 210 |
-
with mpe.VideoFileClip(input_video_path) as temp_vid:
|
| 211 |
-
video_duration = temp_vid.duration
|
| 212 |
-
except Exception as e:
|
| 213 |
-
logger.error(f"❌ Failed to get video duration: {e}")
|
| 214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
output_files = []
|
| 216 |
-
|
| 217 |
-
# Initialize Translator if needed
|
| 218 |
-
translator = None
|
| 219 |
-
target_language = None
|
| 220 |
-
if language:
|
| 221 |
-
target_language = language.value if hasattr(language, 'value') else language
|
| 222 |
-
|
| 223 |
-
detected_lang = data.get("detected_language", "en")
|
| 224 |
-
needs_translation = (target_language and
|
| 225 |
-
target_language != "auto" and
|
| 226 |
-
target_language != detected_lang)
|
| 227 |
-
|
| 228 |
-
if needs_translation:
|
| 229 |
-
logger.info(f"🌍 Translating from {detected_lang} to {target_language}...")
|
| 230 |
-
translator = FreeTranslator()
|
| 231 |
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
return []
|
| 236 |
-
|
| 237 |
-
logger.info(f"📊 Starting processing for {len(best_clips)} clips...")
|
| 238 |
-
|
| 239 |
-
for i, seg in enumerate(best_clips):
|
| 240 |
-
# Update progress
|
| 241 |
-
current_progress = 60 + int((i / len(best_clips)) * 35) # 60% to 95%
|
| 242 |
-
if progress_callback:
|
| 243 |
-
progress_callback(current_progress, f"Rendering clip {i+1} of {len(best_clips)}...")
|
| 244 |
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
continue
|
| 257 |
-
|
| 258 |
-
|
|
|
|
|
|
|
|
|
|
| 259 |
if needs_translation and translator:
|
|
|
|
| 260 |
try:
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
]
|
| 267 |
-
|
| 268 |
-
if matching_segs:
|
| 269 |
-
logger.info(f"🌍 Translating {len(matching_segs)} segments for Clip {i+1}...")
|
| 270 |
-
for match_s in matching_segs:
|
| 271 |
-
# Skip if already translated (heuristic check if needed, but safe to re-translate if simple)
|
| 272 |
-
# Or better, check if text is already in target language?
|
| 273 |
-
# Since we modify in place, subsequent clips covering same segment might re-translate.
|
| 274 |
-
# But clips usually don't overlap much.
|
| 275 |
-
|
| 276 |
-
tr_text, _ = translator.translate_text(match_s['text'], target_language)
|
| 277 |
-
match_s['text'] = tr_text
|
| 278 |
-
# Clear words to force interpolation since word-level timing is lost
|
| 279 |
-
if 'words' in match_s:
|
| 280 |
-
match_s['words'] = []
|
| 281 |
-
|
| 282 |
-
logger.info(f"✅ Translated clip {i+1} content to {target_language}")
|
| 283 |
-
|
| 284 |
-
except Exception as e:
|
| 285 |
-
logger.warning(f"⚠️ Translation failed for clip {i+1}: {e}")
|
| 286 |
-
|
| 287 |
-
logger.info(f"\n🎬 Processing Clip {i+1}/{len(best_clips)} ({start:.2f} - {end:.2f})...")
|
| 288 |
-
|
| 289 |
-
# Ensure style is a clean string
|
| 290 |
-
style_str = style.value if hasattr(style, "value") else str(style)
|
| 291 |
-
if "." in style_str:
|
| 292 |
-
style_str = style_str.split(".")[-1] # Handle VideoStyle.split_vertical
|
| 293 |
-
|
| 294 |
-
output_filename = f"viral_{i+1}_{style_str}.mp4"
|
| 295 |
-
# Add task_id to filename if provided to avoid collisions
|
| 296 |
-
task_id = kwargs.get("task_id")
|
| 297 |
-
if task_id:
|
| 298 |
-
output_filename = f"viral_{task_id}_{i+1}_{style_str}.mp4"
|
| 299 |
-
|
| 300 |
-
final_output = os.path.join(Config.OUTPUTS_DIR, "viral_clips", output_filename)
|
| 301 |
-
os.makedirs(os.path.dirname(final_output), exist_ok=True)
|
| 302 |
-
|
| 303 |
-
if start >= video_duration:
|
| 304 |
-
logger.warning(f"⚠️ Clip start time {start} is beyond video duration {video_duration}, skipping.")
|
| 305 |
-
continue
|
| 306 |
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
if w["start"] < end and w["end"] > start:
|
| 351 |
-
nw = w.copy()
|
| 352 |
-
nw["start"] = max(0, w["start"] - start)
|
| 353 |
-
nw["end"] = min(end - start, w["end"] - start)
|
| 354 |
-
new_words.append(nw)
|
| 355 |
-
new_seg["words"] = new_words
|
| 356 |
-
|
| 357 |
-
segment_transcript["segments"].append(new_seg)
|
| 358 |
-
|
| 359 |
-
# 4. Use the optimized apply_with_captions method
|
| 360 |
-
final_clip = style_strategy.apply_with_captions(
|
| 361 |
-
clip,
|
| 362 |
-
transcript_data=segment_transcript,
|
| 363 |
-
language=target_language if needs_translation else detected_lang,
|
| 364 |
-
caption_mode=kwargs.get('caption_mode', 'sentence'),
|
| 365 |
-
caption_style=kwargs.get('caption_style', 'classic'),
|
| 366 |
-
background_path=kwargs.get("background_path"),
|
| 367 |
-
playground_path=kwargs.get("playground_path")
|
| 368 |
-
)
|
| 369 |
-
|
| 370 |
-
# 5. Write Output
|
| 371 |
-
# Automatically use all available CPU cores
|
| 372 |
-
# os.cpu_count() returns None if undetermined, so we default to 4 in that case
|
| 373 |
-
cpu_count = os.cpu_count() or 4
|
| 374 |
-
logger.info(f"⚙️ Using {cpu_count} threads for video rendering")
|
| 375 |
-
|
| 376 |
-
final_clip.write_videofile(
|
| 377 |
-
final_output,
|
| 378 |
-
codec="libx264",
|
| 379 |
-
audio_codec="aac",
|
| 380 |
-
threads=cpu_count,
|
| 381 |
-
logger=None
|
| 382 |
-
)
|
| 383 |
-
|
| 384 |
-
output_files.append(final_output)
|
| 385 |
-
|
| 386 |
-
except Exception as e:
|
| 387 |
-
logger.error(f"❌ Error processing clip {i+1}: {e}")
|
| 388 |
-
logger.error(traceback.format_exc())
|
| 389 |
-
finally:
|
| 390 |
-
# 🧹 Explicit Cleanup
|
| 391 |
-
if final_clip:
|
| 392 |
-
try: final_clip.close()
|
| 393 |
-
except: pass
|
| 394 |
-
if clip:
|
| 395 |
-
try: clip.close()
|
| 396 |
-
except: pass
|
| 397 |
-
if current_video_clip:
|
| 398 |
-
try: current_video_clip.close()
|
| 399 |
-
except: pass
|
| 400 |
-
|
| 401 |
-
# Force garbage collection
|
| 402 |
-
import gc
|
| 403 |
-
gc.collect()
|
| 404 |
-
|
| 405 |
-
except Exception as e:
|
| 406 |
-
logger.error(f"❌ Error in processing loop: {e}")
|
| 407 |
-
logger.error(traceback.format_exc())
|
| 408 |
-
|
| 409 |
-
# Note: We don't close passed video_clip here because we didn't open it (or we treated it as read-only for duration)
|
| 410 |
-
# The caller is responsible for closing video_clip if they passed it.
|
| 411 |
|
| 412 |
return output_files
|
| 413 |
|
| 414 |
-
|
| 415 |
-
#
|
| 416 |
-
# -
|
|
|
|
|
|
|
| 417 |
def process_video(video_path, style="cinematic_blur", model_size="base", **kwargs):
|
| 418 |
"""
|
| 419 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
"""
|
| 421 |
-
video_clip = None
|
| 422 |
try:
|
| 423 |
processor = VideoProcessor(model_size=model_size)
|
| 424 |
-
|
| 425 |
-
# 1. Open Video Clip ONCE
|
| 426 |
-
video_clip = mpe.VideoFileClip(video_path)
|
| 427 |
-
|
| 428 |
-
# 2. Analyze (Reuse video_clip)
|
| 429 |
caption_mode = kwargs.get("caption_mode", "sentence")
|
| 430 |
-
|
| 431 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
viral_segments, duration, stt_data = processor.analyze_impact(
|
| 433 |
-
video_path,
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
timestamp_mode=timestamp_mode
|
| 437 |
)
|
| 438 |
-
|
| 439 |
if not viral_segments:
|
| 440 |
logger.warning("⚠️ No viral segments found.")
|
| 441 |
return []
|
| 442 |
|
| 443 |
-
#
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
|
|
|
| 452 |
)
|
| 453 |
-
|
| 454 |
-
return output_files
|
| 455 |
|
| 456 |
except Exception as e:
|
| 457 |
logger.error(f"❌ Processing failed: {e}")
|
| 458 |
logger.error(traceback.format_exc())
|
| 459 |
return []
|
| 460 |
-
|
| 461 |
-
if video_clip:
|
| 462 |
-
video_clip.close()
|
| 463 |
|
| 464 |
if __name__ == "__main__":
|
| 465 |
import sys
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
VideoProcessor — Core pipeline for viral clip extraction.
|
| 3 |
+
|
| 4 |
+
Fixes applied:
|
| 5 |
+
- source_language (for Whisper) separated from target_language (for translation/captions)
|
| 6 |
+
- Removed duplicate _clean_json_response (json_repair version kept)
|
| 7 |
+
- Single translation pass only (no double-translate on data in-place)
|
| 8 |
+
- timestamp_mode handles highlight_word correctly
|
| 9 |
+
- style string normalised once
|
| 10 |
+
- get_best_segments wired into process_video
|
| 11 |
+
- detected_lang used correctly for captions
|
| 12 |
+
"""
|
| 13 |
import os
|
| 14 |
+
import gc
|
| 15 |
import json
|
| 16 |
import traceback
|
|
|
|
| 17 |
import moviepy.editor as mpe
|
| 18 |
+
import json_repair
|
| 19 |
+
|
| 20 |
+
import core # Applies monkey patches
|
| 21 |
from core.config import Config
|
| 22 |
from core.logger import Logger
|
| 23 |
from core.stt import STT
|
|
|
|
| 25 |
from core.styles import StyleFactory
|
| 26 |
from core.subtitle_manager import SubtitleManager
|
| 27 |
from core.free_translator import FreeTranslator
|
|
|
|
| 28 |
|
| 29 |
logger = Logger.get_logger(__name__)
|
| 30 |
|
| 31 |
+
|
| 32 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 33 |
class VideoProcessor:
|
| 34 |
+
|
| 35 |
def __init__(self, model_size="base"):
|
| 36 |
self.stt = STT(model_size=model_size)
|
| 37 |
Config.setup_dirs()
|
| 38 |
+
|
| 39 |
+
# ── JSON helpers ──────────────────────────────────────────────────────────
|
| 40 |
+
|
| 41 |
def _clean_json_response(self, content):
|
| 42 |
+
"""
|
| 43 |
+
Strips markdown fences then uses json_repair to fix malformed JSON.
|
| 44 |
+
Single definition — json_repair version only.
|
| 45 |
+
"""
|
| 46 |
if not isinstance(content, str):
|
| 47 |
return content
|
| 48 |
+
|
|
|
|
| 49 |
content = content.strip()
|
| 50 |
+
for fence in ("```json", "```"):
|
| 51 |
+
if content.startswith(fence):
|
| 52 |
+
content = content[len(fence):]
|
|
|
|
| 53 |
if content.endswith("```"):
|
| 54 |
content = content[:-3]
|
|
|
|
| 55 |
content = content.strip()
|
| 56 |
+
|
|
|
|
| 57 |
try:
|
| 58 |
+
repaired = json_repair.loads(content)
|
| 59 |
+
return json.dumps(repaired)
|
| 60 |
except Exception as e:
|
| 61 |
+
logger.warning(f"⚠️ json_repair failed, using raw content: {e}")
|
| 62 |
+
|
| 63 |
+
# Last-resort brace balancing
|
| 64 |
+
open_b = content.count("{")
|
| 65 |
+
close_b = content.count("}")
|
| 66 |
+
if open_b > close_b:
|
| 67 |
+
content += "}" * (open_b - close_b)
|
| 68 |
+
logger.info(f"🔧 Appended {open_b - close_b} closing brace(s)")
|
| 69 |
+
|
|
|
|
| 70 |
return content
|
| 71 |
|
| 72 |
def parse_ai_response(self, ai_res):
|
| 73 |
+
"""Parses AI JSON response → list of segment dicts."""
|
|
|
|
|
|
|
|
|
|
| 74 |
if not isinstance(ai_res, dict):
|
| 75 |
+
logger.error(f"❌ Invalid AI response type: {type(ai_res)}")
|
| 76 |
return []
|
| 77 |
+
|
| 78 |
res_content = ai_res.get("content")
|
|
|
|
|
|
|
| 79 |
try:
|
| 80 |
if isinstance(res_content, str):
|
| 81 |
+
segments_data = json.loads(self._clean_json_response(res_content))
|
|
|
|
| 82 |
else:
|
| 83 |
segments_data = res_content
|
| 84 |
+
|
| 85 |
+
if isinstance(segments_data, list):
|
| 86 |
+
return segments_data
|
| 87 |
+
|
| 88 |
if isinstance(segments_data, dict):
|
| 89 |
+
for key in ("segments", "clips", "moments"):
|
| 90 |
if key in segments_data and isinstance(segments_data[key], list):
|
| 91 |
+
return segments_data[key]
|
| 92 |
+
# Fallback: first list value found
|
| 93 |
+
for v in segments_data.values():
|
| 94 |
+
if isinstance(v, list):
|
| 95 |
+
return v
|
| 96 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
except Exception as e:
|
| 98 |
logger.error(f"❌ Failed to parse AI response: {e}")
|
| 99 |
+
logger.error(f"Raw content: {res_content}")
|
| 100 |
+
|
| 101 |
+
return []
|
| 102 |
+
|
| 103 |
+
# ── Analysis ──────────────────────────────────────────────────────────────
|
| 104 |
+
|
| 105 |
+
def analyze_impact(self,
|
| 106 |
+
video_path,
|
| 107 |
+
source_language=None, # ← لغة الفيديو الأصلي → بتيجي لـ Whisper
|
| 108 |
+
target_language=None, # ← لغة الـ output (ترجمة/كابشن)
|
| 109 |
+
timestamp_mode="segments",
|
| 110 |
+
progress_callback=None):
|
| 111 |
+
"""
|
| 112 |
+
STT + AI viral-moment detection.
|
| 113 |
+
|
| 114 |
+
source_language : بيتبعت لـ Whisper مباشرة.
|
| 115 |
+
لو None → Whisper يكتشف تلقائي (أبطأ لكن آمن).
|
| 116 |
+
target_language : بيتحفظ في data عشان process_clips يستخدمه للترجمة والكابشن.
|
| 117 |
+
|
| 118 |
+
Returns (unique_segments, duration, data)
|
| 119 |
+
"""
|
| 120 |
+
if progress_callback:
|
| 121 |
+
progress_callback(5, "Starting speech-to-text...")
|
| 122 |
+
|
| 123 |
+
logger.info(
|
| 124 |
+
f"🎙️ Phase 1: STT | source_language={source_language or 'auto-detect'}"
|
| 125 |
+
)
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
full_segments, full_text, duration, detected_lang = self.stt.get_transcript(
|
| 128 |
+
video_path,
|
| 129 |
+
language=source_language, # None = Whisper يكتشف تلقائي
|
| 130 |
skip_ai=True,
|
| 131 |
+
timestamp_mode=timestamp_mode,
|
| 132 |
)
|
| 133 |
+
|
| 134 |
+
logger.info(f"🔍 Whisper detected language: {detected_lang}")
|
| 135 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
data = {
|
| 137 |
+
"segments": full_segments,
|
| 138 |
+
"detected_language": detected_lang, # اللغة الفعلية اللي Whisper اكتشفها
|
| 139 |
+
"target_language": target_language, # اللغة المطلوبة للـ output
|
| 140 |
+
"duration": duration,
|
| 141 |
}
|
| 142 |
+
|
| 143 |
+
# ── AI Viral Analysis ─────────────────────────────────────────────────
|
| 144 |
+
logger.info("🤖 Phase 2: AI Viral Moment Analysis …")
|
| 145 |
+
if progress_callback:
|
| 146 |
+
progress_callback(20, "Analysing content for viral moments …")
|
| 147 |
+
|
| 148 |
+
chunk_size = Config.CHUNK_SIZE_SECONDS
|
| 149 |
+
overlap = Config.OVERLAP_SECONDS
|
| 150 |
+
max_time = full_segments[-1]["end"] if full_segments else 0
|
| 151 |
+
all_ai_segs = []
|
| 152 |
current_start = 0
|
| 153 |
+
|
| 154 |
while current_start < max_time:
|
| 155 |
+
current_end = current_start + chunk_size
|
|
|
|
| 156 |
chunk_transcript = ""
|
| 157 |
+
|
| 158 |
for seg in full_segments:
|
| 159 |
if seg["start"] >= current_start and seg["start"] < current_end:
|
| 160 |
+
chunk_transcript += (
|
| 161 |
+
f"[{seg['start']:.2f} - {seg['end']:.2f}] {seg['text']}\n"
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
if chunk_transcript.strip():
|
| 165 |
+
pct = 20 + int((current_start / max_time) * 40)
|
| 166 |
+
if progress_callback:
|
| 167 |
+
progress_callback(
|
| 168 |
+
pct,
|
| 169 |
+
f"Analysing {current_start/60:.1f}m – "
|
| 170 |
+
f"{min(current_end, max_time)/60:.1f}m",
|
| 171 |
+
)
|
| 172 |
+
logger.info(
|
| 173 |
+
f"🧠 Chunk {current_start/60:.1f}m → "
|
| 174 |
+
f"{min(current_end, max_time)/60:.1f}m …"
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
ai_res = analyze_transcript_gemini(chunk_transcript)
|
| 178 |
+
logger.info(f"🤖 AI response type: {type(ai_res)}")
|
| 179 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
try:
|
| 181 |
+
chunk_segs = self.parse_ai_response(ai_res)
|
| 182 |
+
logger.info(f"✅ {len(chunk_segs)} segments in chunk")
|
| 183 |
+
all_ai_segs.extend(chunk_segs)
|
| 184 |
except Exception as e:
|
| 185 |
+
logger.error(f"❌ Chunk processing error: {e}")
|
| 186 |
logger.error(traceback.format_exc())
|
| 187 |
+
|
| 188 |
+
current_start += chunk_size - overlap
|
| 189 |
+
if current_end >= max_time:
|
| 190 |
+
break
|
| 191 |
+
|
| 192 |
+
# Deduplicate by start_time
|
| 193 |
+
seen, unique = set(), []
|
| 194 |
+
for s in all_ai_segs:
|
| 195 |
+
st = s.get("start_time")
|
| 196 |
+
if st not in seen:
|
| 197 |
+
unique.append(s)
|
| 198 |
+
seen.add(st)
|
| 199 |
+
|
| 200 |
+
logger.info(f"📊 Total unique viral segments found: {len(unique)}")
|
| 201 |
+
return unique, duration, data
|
| 202 |
+
|
| 203 |
+
# ── Sorting ───────────────────────────────────────────────────────────────
|
| 204 |
|
| 205 |
def get_best_segments(self, segments, video_duration=0):
|
| 206 |
+
"""Sort segments by viral_score descending."""
|
| 207 |
return sorted(segments, key=lambda x: x.get("viral_score", 0), reverse=True)
|
| 208 |
|
| 209 |
+
# ── Processing ────────────────────────────────────────────────────────────
|
| 210 |
+
|
| 211 |
+
def process_clips(self,
|
| 212 |
+
input_video_path,
|
| 213 |
+
best_clips,
|
| 214 |
+
data,
|
| 215 |
+
style="cinematic",
|
| 216 |
+
progress_callback=None,
|
| 217 |
+
**kwargs):
|
| 218 |
+
"""
|
| 219 |
+
Cuts, styles, captions, and exports each viral clip.
|
| 220 |
+
|
| 221 |
+
target_language يييجي من data["target_language"] (اللي حطّه analyze_impact).
|
| 222 |
+
Translation يحصل مرة واحدة فقط داخل segment_transcript loop.
|
| 223 |
+
"""
|
| 224 |
+
logger.info("🎨 Phase 3: Style & Captions …")
|
| 225 |
+
if progress_callback:
|
| 226 |
+
progress_callback(60, "Generating clips …")
|
| 227 |
+
|
| 228 |
+
# ── Video duration ────────────────────────────────────────────────────
|
| 229 |
+
video_duration = data.get("duration") or 0
|
| 230 |
+
if not video_duration:
|
| 231 |
+
try:
|
| 232 |
+
with mpe.VideoFileClip(input_video_path) as tmp:
|
| 233 |
+
video_duration = tmp.duration
|
| 234 |
+
except Exception as e:
|
| 235 |
+
logger.error(f"❌ Could not determine video duration: {e}")
|
| 236 |
+
|
| 237 |
+
# ── Language resolution ───────────────────────────────────────────────
|
| 238 |
+
#
|
| 239 |
+
# detected_lang = اللغة الفعلية للفيديو (من Whisper)
|
| 240 |
+
# target_language = اللغة المطلوبة للـ output (من الريكويست)
|
| 241 |
+
#
|
| 242 |
+
# needs_translation = True → نترجم النص
|
| 243 |
+
# caption_lang = اللغة اللي هيتعمل بيها الكابشن
|
| 244 |
+
#
|
| 245 |
+
detected_lang = data.get("detected_language", "en")
|
| 246 |
+
target_language = data.get("target_language") # من analyze_impact
|
| 247 |
+
|
| 248 |
+
# normalize
|
| 249 |
+
if hasattr(target_language, "value"):
|
| 250 |
+
target_language = target_language.value
|
| 251 |
+
|
| 252 |
+
needs_translation = bool(
|
| 253 |
+
target_language
|
| 254 |
+
and target_language != "auto"
|
| 255 |
+
and target_language != detected_lang
|
| 256 |
+
)
|
| 257 |
+
|
| 258 |
+
# الكابشن بيتعمل بلغة الـ output لو فيه ترجمة، وإلا بلغة الفيديو الأصلي
|
| 259 |
+
caption_lang = target_language if needs_translation else detected_lang
|
| 260 |
+
|
| 261 |
+
translator = FreeTranslator() if needs_translation else None
|
| 262 |
+
if needs_translation:
|
| 263 |
+
logger.info(f"🌍 Will translate: {detected_lang} → {target_language}")
|
| 264 |
else:
|
| 265 |
+
logger.info(f"🗣️ No translation needed — captions in: {caption_lang}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
|
| 267 |
+
# ── Normalise style string once ───────────────────────────────────────
|
| 268 |
+
style_str = style.value if hasattr(style, "value") else str(style)
|
| 269 |
+
if "." in style_str:
|
| 270 |
+
style_str = style_str.split(".")[-1]
|
| 271 |
+
|
| 272 |
+
# ── Main loop ─────────────────────────────────────────────────────────
|
| 273 |
output_files = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
|
| 275 |
+
if not best_clips:
|
| 276 |
+
logger.warning("⚠️ No clips to process.")
|
| 277 |
+
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
|
| 279 |
+
logger.info(f"📊 Processing {len(best_clips)} clip(s) …")
|
| 280 |
+
|
| 281 |
+
for i, seg in enumerate(best_clips):
|
| 282 |
+
pct = 60 + int((i / len(best_clips)) * 35)
|
| 283 |
+
if progress_callback:
|
| 284 |
+
progress_callback(pct, f"Rendering clip {i+1}/{len(best_clips)} …")
|
| 285 |
+
|
| 286 |
+
clip = None
|
| 287 |
+
final_clip = None
|
| 288 |
+
current_video_clip = None
|
| 289 |
+
|
| 290 |
+
try:
|
| 291 |
+
start = max(0, seg.get("start_time", 0))
|
| 292 |
+
end = min(video_duration, seg.get("end_time", 0))
|
| 293 |
+
|
| 294 |
+
if end - start < 1.0:
|
| 295 |
+
logger.warning(
|
| 296 |
+
f"⚠️ Clip {i+1} too short ({end-start:.2f}s), skipping."
|
| 297 |
+
)
|
| 298 |
+
continue
|
| 299 |
+
|
| 300 |
+
if start >= video_duration:
|
| 301 |
+
logger.warning(
|
| 302 |
+
f"⚠️ Clip {i+1} start {start}s ≥ duration {video_duration}s, skipping."
|
| 303 |
+
)
|
| 304 |
+
continue
|
| 305 |
+
|
| 306 |
+
logger.info(f"\n🎬 Clip {i+1}/{len(best_clips)} ({start:.2f}s – {end:.2f}s)")
|
| 307 |
+
|
| 308 |
+
# ── Output path ───────────────────────────────────────────────
|
| 309 |
+
task_id = kwargs.get("task_id")
|
| 310 |
+
prefix = f"viral_{task_id}_{i+1}" if task_id else f"viral_{i+1}"
|
| 311 |
+
out_name = f"{prefix}_{style_str}.mp4"
|
| 312 |
+
final_output = os.path.join(Config.OUTPUTS_DIR, "viral_clips", out_name)
|
| 313 |
+
os.makedirs(os.path.dirname(final_output), exist_ok=True)
|
| 314 |
+
|
| 315 |
+
# ── Cut clip (fresh VideoFileClip per iteration) ───────────────
|
| 316 |
+
current_video_clip = mpe.VideoFileClip(input_video_path)
|
| 317 |
+
clip = current_video_clip.subclip(start, end)
|
| 318 |
+
|
| 319 |
+
# ── Build segment_transcript ──────────────────────────────────
|
| 320 |
+
# الترجمة بتحصل هنا فقط — مفيش أي مكان تاني بيعدّل على data
|
| 321 |
+
segment_transcript = {"segments": []}
|
| 322 |
+
|
| 323 |
+
for s in data["segments"]:
|
| 324 |
+
if s["start"] >= end or s["end"] <= start:
|
| 325 |
continue
|
| 326 |
+
|
| 327 |
+
new_seg = s.copy()
|
| 328 |
+
new_seg["start"] = max(0, s["start"] - start)
|
| 329 |
+
new_seg["end"] = min(end - start, s["end"] - start)
|
| 330 |
+
|
| 331 |
if needs_translation and translator:
|
| 332 |
+
# ترجمة النص مع توزيع timestamps على الكلمات الجديدة
|
| 333 |
try:
|
| 334 |
+
translated_text, _ = translator.translate_text(
|
| 335 |
+
s["text"], target_language
|
| 336 |
+
)
|
| 337 |
+
except Exception as te:
|
| 338 |
+
logger.warning(f"⚠️ Translation error: {te}")
|
| 339 |
+
translated_text = s["text"]
|
| 340 |
+
|
| 341 |
+
new_seg["text"] = translated_text
|
| 342 |
+
words = translated_text.split()
|
| 343 |
+
seg_dur = new_seg["end"] - new_seg["start"]
|
| 344 |
+
word_dur = seg_dur / len(words) if words else seg_dur
|
| 345 |
+
new_seg["words"] = [
|
| 346 |
+
{
|
| 347 |
+
"text": w,
|
| 348 |
+
"start": new_seg["start"] + idx * word_dur,
|
| 349 |
+
"end": new_seg["start"] + (idx + 1) * word_dur,
|
| 350 |
+
}
|
| 351 |
+
for idx, w in enumerate(words)
|
| 352 |
+
]
|
| 353 |
+
else:
|
| 354 |
+
# تعديل timestamps الكلمات الموجودة بدون ترجمة
|
| 355 |
+
if "words" in s:
|
| 356 |
+
new_seg["words"] = [
|
| 357 |
+
{
|
| 358 |
+
**w,
|
| 359 |
+
"start": max(0, w["start"] - start),
|
| 360 |
+
"end": min(end - start, w["end"] - start),
|
| 361 |
+
}
|
| 362 |
+
for w in s["words"]
|
| 363 |
+
if w["start"] < end and w["end"] > start
|
| 364 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
|
| 366 |
+
segment_transcript["segments"].append(new_seg)
|
| 367 |
+
|
| 368 |
+
# ── Apply style + captions ────────────────────────────────────
|
| 369 |
+
style_strategy = StyleFactory.get_style(style_str)
|
| 370 |
+
logger.info(f"✨ Style: {style_str} | Caption lang: {caption_lang}")
|
| 371 |
+
|
| 372 |
+
final_clip = style_strategy.apply_with_captions(
|
| 373 |
+
clip,
|
| 374 |
+
transcript_data = segment_transcript,
|
| 375 |
+
language = caption_lang,
|
| 376 |
+
caption_mode = kwargs.get("caption_mode", "sentence"),
|
| 377 |
+
caption_style = kwargs.get("caption_style", "classic"),
|
| 378 |
+
background_path = kwargs.get("background_path"),
|
| 379 |
+
playground_path = kwargs.get("playground_path"),
|
| 380 |
+
)
|
| 381 |
+
|
| 382 |
+
# ── Export ────────────────────────────────────────────────────
|
| 383 |
+
cpu_count = os.cpu_count() or 4
|
| 384 |
+
logger.info(f"⚙️ Rendering with {cpu_count} thread(s) …")
|
| 385 |
+
|
| 386 |
+
final_clip.write_videofile(
|
| 387 |
+
final_output,
|
| 388 |
+
codec = "libx264",
|
| 389 |
+
audio_codec = "aac",
|
| 390 |
+
threads = cpu_count,
|
| 391 |
+
logger = None,
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
output_files.append(final_output)
|
| 395 |
+
logger.info(f"✅ Saved: {final_output}")
|
| 396 |
+
|
| 397 |
+
except Exception as e:
|
| 398 |
+
logger.error(f"❌ Clip {i+1} error: {e}")
|
| 399 |
+
logger.error(traceback.format_exc())
|
| 400 |
+
|
| 401 |
+
finally:
|
| 402 |
+
for obj in (final_clip, clip, current_video_clip):
|
| 403 |
+
if obj:
|
| 404 |
+
try:
|
| 405 |
+
obj.close()
|
| 406 |
+
except Exception:
|
| 407 |
+
pass
|
| 408 |
+
gc.collect()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
|
| 410 |
return output_files
|
| 411 |
|
| 412 |
+
|
| 413 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 414 |
+
# Module-level convenience wrapper
|
| 415 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 416 |
+
|
| 417 |
def process_video(video_path, style="cinematic_blur", model_size="base", **kwargs):
|
| 418 |
"""
|
| 419 |
+
End-to-end pipeline: STT → AI analysis → clip export.
|
| 420 |
+
|
| 421 |
+
kwargs المهمة:
|
| 422 |
+
source_language : لغة الفيديو الأصلي → بتتبعت لـ Whisper
|
| 423 |
+
لو مش محدد → Whisper يكتشف تلقائي
|
| 424 |
+
language : لغة الـ output المطلوبة (ترجمة + كابشن)
|
| 425 |
+
لو نفس لغة الفيديو → مش هيترجم
|
| 426 |
+
caption_mode : sentence | word | highlight_word
|
| 427 |
+
caption_style : classic | modern_glow | tiktok_bold | ...
|
| 428 |
"""
|
|
|
|
| 429 |
try:
|
| 430 |
processor = VideoProcessor(model_size=model_size)
|
| 431 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
caption_mode = kwargs.get("caption_mode", "sentence")
|
| 433 |
+
|
| 434 |
+
# highlight_word و word كلاهما يحتاج word-level timestamps من Whisper
|
| 435 |
+
timestamp_mode = (
|
| 436 |
+
"words"
|
| 437 |
+
if caption_mode in ("word", "highlight_word")
|
| 438 |
+
else "segments"
|
| 439 |
+
)
|
| 440 |
+
|
| 441 |
+
# Phase 1 + 2: STT + AI analysis
|
| 442 |
viral_segments, duration, stt_data = processor.analyze_impact(
|
| 443 |
+
video_path,
|
| 444 |
+
source_language = kwargs.get("source_language"), # لـ Whisper
|
| 445 |
+
target_language = kwargs.get("language"), # للترجمة والكابشن
|
| 446 |
+
timestamp_mode = timestamp_mode,
|
| 447 |
)
|
| 448 |
+
|
| 449 |
if not viral_segments:
|
| 450 |
logger.warning("⚠️ No viral segments found.")
|
| 451 |
return []
|
| 452 |
|
| 453 |
+
# Sort by viral score
|
| 454 |
+
best_clips = processor.get_best_segments(viral_segments, duration)
|
| 455 |
+
|
| 456 |
+
# Phase 3: render
|
| 457 |
+
return processor.process_clips(
|
| 458 |
+
video_path,
|
| 459 |
+
best_clips,
|
| 460 |
+
stt_data,
|
| 461 |
+
style = style,
|
| 462 |
+
**kwargs,
|
| 463 |
)
|
|
|
|
|
|
|
| 464 |
|
| 465 |
except Exception as e:
|
| 466 |
logger.error(f"❌ Processing failed: {e}")
|
| 467 |
logger.error(traceback.format_exc())
|
| 468 |
return []
|
| 469 |
+
|
|
|
|
|
|
|
| 470 |
|
| 471 |
if __name__ == "__main__":
|
| 472 |
import sys
|
requirements.txt
CHANGED
|
@@ -15,3 +15,5 @@ imageio-ffmpeg==0.4.8
|
|
| 15 |
openai>=1.0.0
|
| 16 |
scipy
|
| 17 |
json_repair
|
|
|
|
|
|
|
|
|
| 15 |
openai>=1.0.0
|
| 16 |
scipy
|
| 17 |
json_repair
|
| 18 |
+
cryptography
|
| 19 |
+
firebase-admin
|