ulduldp commited on
Commit
4882d31
·
verified ·
1 Parent(s): 666181c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -18
app.py CHANGED
@@ -2,9 +2,9 @@ from flask import Flask, render_template_string, request, jsonify, send_from_dir
2
  import os
3
  import uuid
4
  import subprocess
5
- import textwrap
6
  from werkzeug.utils import secure_filename
7
  from faster_whisper import WhisperModel
 
8
 
9
  app = Flask(__name__)
10
 
@@ -18,6 +18,7 @@ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
18
  os.makedirs(OUTPUT_FOLDER, exist_ok=True)
19
  os.makedirs(SUBTITLE_FOLDER, exist_ok=True)
20
 
 
21
  model = WhisperModel(
22
  "tiny",
23
  device="cpu",
@@ -145,7 +146,6 @@ video{
145
  </head>
146
 
147
  <body>
148
-
149
  <div class="container">
150
  <h1>Photo + Audio → Video</h1>
151
 
@@ -223,7 +223,6 @@ form.addEventListener("submit", async (e)=>{
223
  }
224
  });
225
  </script>
226
-
227
  </body>
228
  </html>
229
  """
@@ -251,26 +250,70 @@ def escape_ffmpeg_path(path: str) -> str:
251
  .replace("'", r"\'")
252
  )
253
 
254
- def hard_wrap_caption(text: str, max_width: int = 18, max_lines: int = 4) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  text = " ".join(text.strip().split())
256
  if not text:
257
  return ""
258
 
259
- tokens = []
260
- for word in text.split(" "):
261
- if len(word) <= max_width:
262
- tokens.append(word)
263
- else:
264
- # split long words so they can wrap too
265
- tokens.extend([word[i:i + max_width] for i in range(0, len(word), max_width)])
266
 
 
267
  lines = []
268
  current = ""
269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  for token in tokens:
271
- candidate = token if not current else f"{current} {token}"
272
- if len(candidate) <= max_width:
273
- current = candidate
274
  else:
275
  if current:
276
  lines.append(current)
@@ -279,13 +322,20 @@ def hard_wrap_caption(text: str, max_width: int = 18, max_lines: int = 4) -> str
279
  if current:
280
  lines.append(current)
281
 
282
- # keep it from becoming too tall
283
  if len(lines) > max_lines:
284
- lines = lines[:max_lines]
 
 
 
 
285
 
286
  return "\n".join(lines)
287
 
288
  def make_ass_subtitles(segments, ass_path):
 
 
 
 
289
  header = """[Script Info]
290
  ScriptType: v4.00+
291
  PlayResX: 1080
@@ -296,7 +346,7 @@ WrapStyle: 2
296
  [V4+ Styles]
297
  Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
298
 
299
- Style: Default,Arial,40,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,1,0,0,0,100,100,0,0,3,0,0,2,140,140,260,1
300
 
301
  [Events]
302
  Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
@@ -304,6 +354,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
304
 
305
  lines = [header]
306
 
 
 
 
307
  for seg in segments:
308
  text = seg["text"].strip()
309
  if not text:
@@ -312,9 +365,17 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
312
  start = ass_time(seg["start"])
313
  end = ass_time(seg["end"])
314
 
315
- wrapped = hard_wrap_caption(text, max_width=18, max_lines=4)
 
 
 
 
 
 
 
316
  wrapped = ass_escape(wrapped).replace("\n", r"\N")
317
 
 
318
  dialogue = (
319
  f"Dialogue: 0,{start},{end},Default,,0,0,0,,"
320
  r"{\bord0\shad0\blur0\be0\1c&HFFFFFF&\3c&H000000&\4c&H000000&\3a&H00&\4a&H00}"
@@ -394,6 +455,7 @@ def generate():
394
  make_ass_subtitles(transcript, ass_path)
395
  safe_ass_path = escape_ffmpeg_path(os.path.abspath(ass_path))
396
 
 
397
  vf = (
398
  "scale=1080:1920:force_original_aspect_ratio=increase,"
399
  "crop=1080:1920,"
 
2
  import os
3
  import uuid
4
  import subprocess
 
5
  from werkzeug.utils import secure_filename
6
  from faster_whisper import WhisperModel
7
+ from PIL import ImageFont
8
 
9
  app = Flask(__name__)
10
 
 
18
  os.makedirs(OUTPUT_FOLDER, exist_ok=True)
19
  os.makedirs(SUBTITLE_FOLDER, exist_ok=True)
20
 
21
+ # Fast CPU model
22
  model = WhisperModel(
23
  "tiny",
24
  device="cpu",
 
146
  </head>
147
 
148
  <body>
 
149
  <div class="container">
150
  <h1>Photo + Audio → Video</h1>
151
 
 
223
  }
224
  });
225
  </script>
 
226
  </body>
227
  </html>
228
  """
 
250
  .replace("'", r"\'")
251
  )
252
 
253
+ def find_font_path():
254
+ candidates = [
255
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
256
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
257
+ "/usr/share/fonts/truetype/liberation2/LiberationSans-Bold.ttf",
258
+ "/usr/share/fonts/truetype/liberation2/LiberationSans-Regular.ttf",
259
+ "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf",
260
+ "/usr/share/fonts/truetype/freefont/FreeSans.ttf",
261
+ ]
262
+ for path in candidates:
263
+ if os.path.exists(path):
264
+ return path
265
+ return None
266
+
267
+ FONT_PATH = find_font_path()
268
+
269
+ def measure_text_width(font, text: str) -> int:
270
+ bbox = font.getbbox(text)
271
+ return bbox[2] - bbox[0]
272
+
273
+ def pixel_wrap_text(text: str, font_path: str, font_size: int, max_width_px: int, max_lines: int = 5) -> str:
274
+ """
275
+ Wrap text based on actual pixel width, not character count.
276
+ Also splits long words if they exceed max_width_px.
277
+ """
278
  text = " ".join(text.strip().split())
279
  if not text:
280
  return ""
281
 
282
+ if font_path:
283
+ font = ImageFont.truetype(font_path, font_size)
284
+ else:
285
+ font = ImageFont.load_default()
 
 
 
286
 
287
+ words = text.split(" ")
288
  lines = []
289
  current = ""
290
 
291
+ def split_long_word(word: str):
292
+ if measure_text_width(font, word) <= max_width_px:
293
+ return [word]
294
+
295
+ parts = []
296
+ chunk = ""
297
+ for ch in word:
298
+ trial = chunk + ch
299
+ if measure_text_width(font, trial) <= max_width_px:
300
+ chunk = trial
301
+ else:
302
+ if chunk:
303
+ parts.append(chunk)
304
+ chunk = ch
305
+ if chunk:
306
+ parts.append(chunk)
307
+ return parts
308
+
309
+ tokens = []
310
+ for word in words:
311
+ tokens.extend(split_long_word(word))
312
+
313
  for token in tokens:
314
+ trial = token if not current else f"{current} {token}"
315
+ if measure_text_width(font, trial) <= max_width_px:
316
+ current = trial
317
  else:
318
  if current:
319
  lines.append(current)
 
322
  if current:
323
  lines.append(current)
324
 
 
325
  if len(lines) > max_lines:
326
+ # last line gets the rest so text doesn't disappear
327
+ kept = lines[:max_lines - 1]
328
+ rest = " ".join(lines[max_lines - 1:])
329
+ kept.append(rest)
330
+ lines = kept
331
 
332
  return "\n".join(lines)
333
 
334
  def make_ass_subtitles(segments, ass_path):
335
+ """
336
+ Solid black box behind white text.
337
+ Font size reduced and wrap based on pixel width.
338
+ """
339
  header = """[Script Info]
340
  ScriptType: v4.00+
341
  PlayResX: 1080
 
346
  [V4+ Styles]
347
  Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
348
 
349
+ Style: Default,Arial,38,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,1,0,0,0,100,100,0,0,3,0,0,2,120,120,220,1
350
 
351
  [Events]
352
  Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 
354
 
355
  lines = [header]
356
 
357
+ # Available width inside 1080 frame with margins
358
+ max_width_px = 820
359
+
360
  for seg in segments:
361
  text = seg["text"].strip()
362
  if not text:
 
365
  start = ass_time(seg["start"])
366
  end = ass_time(seg["end"])
367
 
368
+ wrapped = pixel_wrap_text(
369
+ text=text,
370
+ font_path=FONT_PATH,
371
+ font_size=38,
372
+ max_width_px=max_width_px,
373
+ max_lines=5
374
+ )
375
+
376
  wrapped = ass_escape(wrapped).replace("\n", r"\N")
377
 
378
+ # BorderStyle=3 gives the opaque black box background
379
  dialogue = (
380
  f"Dialogue: 0,{start},{end},Default,,0,0,0,,"
381
  r"{\bord0\shad0\blur0\be0\1c&HFFFFFF&\3c&H000000&\4c&H000000&\3a&H00&\4a&H00}"
 
455
  make_ass_subtitles(transcript, ass_path)
456
  safe_ass_path = escape_ffmpeg_path(os.path.abspath(ass_path))
457
 
458
+ # IMPORTANT: crop first, then burn subtitles
459
  vf = (
460
  "scale=1080:1920:force_original_aspect_ratio=increase,"
461
  "crop=1080:1920,"