Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,9 +2,9 @@ from flask import Flask, render_template_string, request, jsonify, send_from_dir
|
|
| 2 |
import os
|
| 3 |
import uuid
|
| 4 |
import subprocess
|
| 5 |
-
import textwrap
|
| 6 |
from werkzeug.utils import secure_filename
|
| 7 |
from faster_whisper import WhisperModel
|
|
|
|
| 8 |
|
| 9 |
app = Flask(__name__)
|
| 10 |
|
|
@@ -18,6 +18,7 @@ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
|
| 18 |
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
|
| 19 |
os.makedirs(SUBTITLE_FOLDER, exist_ok=True)
|
| 20 |
|
|
|
|
| 21 |
model = WhisperModel(
|
| 22 |
"tiny",
|
| 23 |
device="cpu",
|
|
@@ -145,7 +146,6 @@ video{
|
|
| 145 |
</head>
|
| 146 |
|
| 147 |
<body>
|
| 148 |
-
|
| 149 |
<div class="container">
|
| 150 |
<h1>Photo + Audio → Video</h1>
|
| 151 |
|
|
@@ -223,7 +223,6 @@ form.addEventListener("submit", async (e)=>{
|
|
| 223 |
}
|
| 224 |
});
|
| 225 |
</script>
|
| 226 |
-
|
| 227 |
</body>
|
| 228 |
</html>
|
| 229 |
"""
|
|
@@ -251,26 +250,70 @@ def escape_ffmpeg_path(path: str) -> str:
|
|
| 251 |
.replace("'", r"\'")
|
| 252 |
)
|
| 253 |
|
| 254 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
text = " ".join(text.strip().split())
|
| 256 |
if not text:
|
| 257 |
return ""
|
| 258 |
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
else:
|
| 264 |
-
# split long words so they can wrap too
|
| 265 |
-
tokens.extend([word[i:i + max_width] for i in range(0, len(word), max_width)])
|
| 266 |
|
|
|
|
| 267 |
lines = []
|
| 268 |
current = ""
|
| 269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
for token in tokens:
|
| 271 |
-
|
| 272 |
-
if
|
| 273 |
-
current =
|
| 274 |
else:
|
| 275 |
if current:
|
| 276 |
lines.append(current)
|
|
@@ -279,13 +322,20 @@ def hard_wrap_caption(text: str, max_width: int = 18, max_lines: int = 4) -> str
|
|
| 279 |
if current:
|
| 280 |
lines.append(current)
|
| 281 |
|
| 282 |
-
# keep it from becoming too tall
|
| 283 |
if len(lines) > max_lines:
|
| 284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
|
| 286 |
return "\n".join(lines)
|
| 287 |
|
| 288 |
def make_ass_subtitles(segments, ass_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
header = """[Script Info]
|
| 290 |
ScriptType: v4.00+
|
| 291 |
PlayResX: 1080
|
|
@@ -296,7 +346,7 @@ WrapStyle: 2
|
|
| 296 |
[V4+ Styles]
|
| 297 |
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
| 298 |
|
| 299 |
-
Style: Default,Arial,
|
| 300 |
|
| 301 |
[Events]
|
| 302 |
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|
@@ -304,6 +354,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|
| 304 |
|
| 305 |
lines = [header]
|
| 306 |
|
|
|
|
|
|
|
|
|
|
| 307 |
for seg in segments:
|
| 308 |
text = seg["text"].strip()
|
| 309 |
if not text:
|
|
@@ -312,9 +365,17 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|
| 312 |
start = ass_time(seg["start"])
|
| 313 |
end = ass_time(seg["end"])
|
| 314 |
|
| 315 |
-
wrapped =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
wrapped = ass_escape(wrapped).replace("\n", r"\N")
|
| 317 |
|
|
|
|
| 318 |
dialogue = (
|
| 319 |
f"Dialogue: 0,{start},{end},Default,,0,0,0,,"
|
| 320 |
r"{\bord0\shad0\blur0\be0\1c&HFFFFFF&\3c&H000000&\4c&H000000&\3a&H00&\4a&H00}"
|
|
@@ -394,6 +455,7 @@ def generate():
|
|
| 394 |
make_ass_subtitles(transcript, ass_path)
|
| 395 |
safe_ass_path = escape_ffmpeg_path(os.path.abspath(ass_path))
|
| 396 |
|
|
|
|
| 397 |
vf = (
|
| 398 |
"scale=1080:1920:force_original_aspect_ratio=increase,"
|
| 399 |
"crop=1080:1920,"
|
|
|
|
| 2 |
import os
|
| 3 |
import uuid
|
| 4 |
import subprocess
|
|
|
|
| 5 |
from werkzeug.utils import secure_filename
|
| 6 |
from faster_whisper import WhisperModel
|
| 7 |
+
from PIL import ImageFont
|
| 8 |
|
| 9 |
app = Flask(__name__)
|
| 10 |
|
|
|
|
| 18 |
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
|
| 19 |
os.makedirs(SUBTITLE_FOLDER, exist_ok=True)
|
| 20 |
|
| 21 |
+
# Fast CPU model
|
| 22 |
model = WhisperModel(
|
| 23 |
"tiny",
|
| 24 |
device="cpu",
|
|
|
|
| 146 |
</head>
|
| 147 |
|
| 148 |
<body>
|
|
|
|
| 149 |
<div class="container">
|
| 150 |
<h1>Photo + Audio → Video</h1>
|
| 151 |
|
|
|
|
| 223 |
}
|
| 224 |
});
|
| 225 |
</script>
|
|
|
|
| 226 |
</body>
|
| 227 |
</html>
|
| 228 |
"""
|
|
|
|
| 250 |
.replace("'", r"\'")
|
| 251 |
)
|
| 252 |
|
| 253 |
+
def find_font_path():
|
| 254 |
+
candidates = [
|
| 255 |
+
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
|
| 256 |
+
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
|
| 257 |
+
"/usr/share/fonts/truetype/liberation2/LiberationSans-Bold.ttf",
|
| 258 |
+
"/usr/share/fonts/truetype/liberation2/LiberationSans-Regular.ttf",
|
| 259 |
+
"/usr/share/fonts/truetype/freefont/FreeSansBold.ttf",
|
| 260 |
+
"/usr/share/fonts/truetype/freefont/FreeSans.ttf",
|
| 261 |
+
]
|
| 262 |
+
for path in candidates:
|
| 263 |
+
if os.path.exists(path):
|
| 264 |
+
return path
|
| 265 |
+
return None
|
| 266 |
+
|
| 267 |
+
FONT_PATH = find_font_path()
|
| 268 |
+
|
| 269 |
+
def measure_text_width(font, text: str) -> int:
|
| 270 |
+
bbox = font.getbbox(text)
|
| 271 |
+
return bbox[2] - bbox[0]
|
| 272 |
+
|
| 273 |
+
def pixel_wrap_text(text: str, font_path: str, font_size: int, max_width_px: int, max_lines: int = 5) -> str:
|
| 274 |
+
"""
|
| 275 |
+
Wrap text based on actual pixel width, not character count.
|
| 276 |
+
Also splits long words if they exceed max_width_px.
|
| 277 |
+
"""
|
| 278 |
text = " ".join(text.strip().split())
|
| 279 |
if not text:
|
| 280 |
return ""
|
| 281 |
|
| 282 |
+
if font_path:
|
| 283 |
+
font = ImageFont.truetype(font_path, font_size)
|
| 284 |
+
else:
|
| 285 |
+
font = ImageFont.load_default()
|
|
|
|
|
|
|
|
|
|
| 286 |
|
| 287 |
+
words = text.split(" ")
|
| 288 |
lines = []
|
| 289 |
current = ""
|
| 290 |
|
| 291 |
+
def split_long_word(word: str):
|
| 292 |
+
if measure_text_width(font, word) <= max_width_px:
|
| 293 |
+
return [word]
|
| 294 |
+
|
| 295 |
+
parts = []
|
| 296 |
+
chunk = ""
|
| 297 |
+
for ch in word:
|
| 298 |
+
trial = chunk + ch
|
| 299 |
+
if measure_text_width(font, trial) <= max_width_px:
|
| 300 |
+
chunk = trial
|
| 301 |
+
else:
|
| 302 |
+
if chunk:
|
| 303 |
+
parts.append(chunk)
|
| 304 |
+
chunk = ch
|
| 305 |
+
if chunk:
|
| 306 |
+
parts.append(chunk)
|
| 307 |
+
return parts
|
| 308 |
+
|
| 309 |
+
tokens = []
|
| 310 |
+
for word in words:
|
| 311 |
+
tokens.extend(split_long_word(word))
|
| 312 |
+
|
| 313 |
for token in tokens:
|
| 314 |
+
trial = token if not current else f"{current} {token}"
|
| 315 |
+
if measure_text_width(font, trial) <= max_width_px:
|
| 316 |
+
current = trial
|
| 317 |
else:
|
| 318 |
if current:
|
| 319 |
lines.append(current)
|
|
|
|
| 322 |
if current:
|
| 323 |
lines.append(current)
|
| 324 |
|
|
|
|
| 325 |
if len(lines) > max_lines:
|
| 326 |
+
# last line gets the rest so text doesn't disappear
|
| 327 |
+
kept = lines[:max_lines - 1]
|
| 328 |
+
rest = " ".join(lines[max_lines - 1:])
|
| 329 |
+
kept.append(rest)
|
| 330 |
+
lines = kept
|
| 331 |
|
| 332 |
return "\n".join(lines)
|
| 333 |
|
| 334 |
def make_ass_subtitles(segments, ass_path):
|
| 335 |
+
"""
|
| 336 |
+
Solid black box behind white text.
|
| 337 |
+
Font size reduced and wrap based on pixel width.
|
| 338 |
+
"""
|
| 339 |
header = """[Script Info]
|
| 340 |
ScriptType: v4.00+
|
| 341 |
PlayResX: 1080
|
|
|
|
| 346 |
[V4+ Styles]
|
| 347 |
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
| 348 |
|
| 349 |
+
Style: Default,Arial,38,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,1,0,0,0,100,100,0,0,3,0,0,2,120,120,220,1
|
| 350 |
|
| 351 |
[Events]
|
| 352 |
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|
|
|
| 354 |
|
| 355 |
lines = [header]
|
| 356 |
|
| 357 |
+
# Available width inside 1080 frame with margins
|
| 358 |
+
max_width_px = 820
|
| 359 |
+
|
| 360 |
for seg in segments:
|
| 361 |
text = seg["text"].strip()
|
| 362 |
if not text:
|
|
|
|
| 365 |
start = ass_time(seg["start"])
|
| 366 |
end = ass_time(seg["end"])
|
| 367 |
|
| 368 |
+
wrapped = pixel_wrap_text(
|
| 369 |
+
text=text,
|
| 370 |
+
font_path=FONT_PATH,
|
| 371 |
+
font_size=38,
|
| 372 |
+
max_width_px=max_width_px,
|
| 373 |
+
max_lines=5
|
| 374 |
+
)
|
| 375 |
+
|
| 376 |
wrapped = ass_escape(wrapped).replace("\n", r"\N")
|
| 377 |
|
| 378 |
+
# BorderStyle=3 gives the opaque black box background
|
| 379 |
dialogue = (
|
| 380 |
f"Dialogue: 0,{start},{end},Default,,0,0,0,,"
|
| 381 |
r"{\bord0\shad0\blur0\be0\1c&HFFFFFF&\3c&H000000&\4c&H000000&\3a&H00&\4a&H00}"
|
|
|
|
| 455 |
make_ass_subtitles(transcript, ass_path)
|
| 456 |
safe_ass_path = escape_ffmpeg_path(os.path.abspath(ass_path))
|
| 457 |
|
| 458 |
+
# IMPORTANT: crop first, then burn subtitles
|
| 459 |
vf = (
|
| 460 |
"scale=1080:1920:force_original_aspect_ratio=increase,"
|
| 461 |
"crop=1080:1920,"
|