Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
|
|
| 1 |
import subprocess
|
| 2 |
-
import base64
|
| 3 |
import os
|
| 4 |
import tempfile
|
| 5 |
import requests
|
|
@@ -7,17 +7,20 @@ import re
|
|
| 7 |
import textwrap
|
| 8 |
import shutil
|
| 9 |
import time
|
| 10 |
-
import
|
| 11 |
from datetime import datetime
|
| 12 |
from PIL import Image, ImageDraw, ImageFont
|
|
|
|
| 13 |
from io import BytesIO
|
| 14 |
-
from
|
| 15 |
|
|
|
|
|
|
|
|
|
|
| 16 |
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
|
| 17 |
from fastapi.responses import FileResponse, JSONResponse
|
| 18 |
from fastapi.middleware.cors import CORSMiddleware
|
| 19 |
from pydantic import BaseModel, Field
|
| 20 |
-
|
| 21 |
# ========================================
|
| 22 |
# CONFIGURATION SECTION - CUSTOMIZE HERE
|
| 23 |
# ========================================
|
|
@@ -38,14 +41,13 @@ REDDIT_CONFIG = {
|
|
| 38 |
SUBTITLE_CONFIG = {
|
| 39 |
'font_file': 'LilitaOne-Regular.ttf',
|
| 40 |
'font_name': 'Lilita One',
|
| 41 |
-
'font_size_default':
|
| 42 |
'position_alignment': 5,
|
| 43 |
-
'margin_left':
|
| 44 |
-
'margin_right':
|
| 45 |
'margin_vertical': 20,
|
| 46 |
'line_spacing': 2
|
| 47 |
}
|
| 48 |
-
# go to line 462 if you want to increase/decrease CTA part's font size!!!
|
| 49 |
|
| 50 |
VIDEO_CONFIG = {
|
| 51 |
'reddit_scale_percent': 0.75,
|
|
@@ -55,52 +57,22 @@ VIDEO_CONFIG = {
|
|
| 55 |
'fade_color_rgb': (218, 207, 195),
|
| 56 |
}
|
| 57 |
|
| 58 |
-
|
| 59 |
# ========================================
|
| 60 |
# END CONFIGURATION SECTION
|
| 61 |
# ========================================
|
| 62 |
|
| 63 |
-
# =========================
|
| 64 |
-
#
|
| 65 |
-
# =========================
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
lines = block.strip().split('\n')
|
| 76 |
-
if len(lines) >= 3:
|
| 77 |
-
subtitle_text = ' '.join(lines[2:])
|
| 78 |
-
if book_title_lower in subtitle_text.lower():
|
| 79 |
-
# 1. Get the time the title is spoken
|
| 80 |
-
times = lines[1].split(' --> ')
|
| 81 |
-
title_time = srt_time_to_ms(times[0]) / 1000.0
|
| 82 |
-
|
| 83 |
-
cta_time = None
|
| 84 |
-
cta_text_parts = []
|
| 85 |
-
|
| 86 |
-
# 2. Get the time the ACTUAL CTA text starts
|
| 87 |
-
if i + 1 < len(blocks):
|
| 88 |
-
next_block_lines = blocks[i + 1].strip().split('\n')
|
| 89 |
-
if len(next_block_lines) >= 3:
|
| 90 |
-
cta_time = srt_time_to_ms(next_block_lines[1].split(' --> ')[0]) / 1000.0
|
| 91 |
-
|
| 92 |
-
# 3. Grab all remaining text for the CTA
|
| 93 |
-
for j in range(i + 1, len(blocks)):
|
| 94 |
-
next_lines = blocks[j].strip().split('\n')
|
| 95 |
-
if len(next_lines) >= 3:
|
| 96 |
-
cta_text_parts.append(' '.join(next_lines[2:]).strip())
|
| 97 |
-
|
| 98 |
-
cta_text = ' '.join(cta_text_parts) if cta_text_parts else None
|
| 99 |
-
return title_time, cta_time, cta_text
|
| 100 |
-
return None, None, None
|
| 101 |
-
except Exception as e:
|
| 102 |
-
print(f"Error finding title and CTA: {e}")
|
| 103 |
-
return None, None, None
|
| 104 |
|
| 105 |
def setup_custom_fonts_hf(temp_dir):
|
| 106 |
try:
|
|
@@ -200,22 +172,6 @@ def get_audio_duration(audio_path):
|
|
| 200 |
return float(result.stdout.strip())
|
| 201 |
except Exception as e: raise Exception(f"Failed to get audio duration: {str(e)}")
|
| 202 |
|
| 203 |
-
def extract_first_subtitle(srt_path):
|
| 204 |
-
try:
|
| 205 |
-
with open(srt_path, 'r', encoding='utf-8') as f: content = f.read()
|
| 206 |
-
blocks = re.split(r'\n\s*\n', content.strip())
|
| 207 |
-
if not blocks: return "No subtitle found", 0.0, 3.0
|
| 208 |
-
first_block = blocks[0].strip().split('\n')
|
| 209 |
-
if len(first_block) >= 3:
|
| 210 |
-
times = first_block[1].split(' --> ')
|
| 211 |
-
def time_to_sec(t):
|
| 212 |
-
h, m, s = t.split(':')
|
| 213 |
-
s, ms = s.split(',')
|
| 214 |
-
return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000.0
|
| 215 |
-
return ' '.join(first_block[2:]).strip(), time_to_sec(times[0].strip()), time_to_sec(times[1].strip())
|
| 216 |
-
return "No subtitle found", 0.0, 3.0
|
| 217 |
-
except Exception as e: raise Exception(f"Failed to extract first subtitle: {str(e)}")
|
| 218 |
-
|
| 219 |
def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
|
| 220 |
try:
|
| 221 |
template = Image.open(template_path).convert('RGBA')
|
|
@@ -267,68 +223,161 @@ def validate_and_get_file(uploaded_file, url_string, file_type, temp_dir):
|
|
| 267 |
except Exception as e: return None, f"❌ Error downloading {file_type}: {str(e)}"
|
| 268 |
return None, f"❌ Unknown error"
|
| 269 |
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
def
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
ScriptType: v4.00+
|
| 298 |
[V4+ Styles]
|
| 299 |
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
| 300 |
Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,1,3,1,{config['position_alignment']},{config['margin_left']},{config['margin_right']},{config['margin_vertical']},1
|
| 301 |
[Events]
|
| 302 |
-
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
color_map = {
|
| 333 |
'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'),
|
| 334 |
'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'),
|
|
@@ -338,78 +387,61 @@ def create_cta_highlight_ass(srt_path, output_dir, start_sec, font_size, video_w
|
|
| 338 |
highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
|
| 339 |
margin_lr = int(video_width * 0.125) + 40
|
| 340 |
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
ass_header = f"""[Script Info]
|
| 345 |
-
Title: CTA
|
| 346 |
ScriptType: v4.00+
|
| 347 |
PlayResX: {video_width}
|
| 348 |
PlayResY: {video_height}
|
| 349 |
WrapStyle: 1
|
| 350 |
[V4+ Styles]
|
| 351 |
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
| 352 |
-
Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,3,
|
| 353 |
[Events]
|
| 354 |
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"""
|
| 355 |
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
# 1. Flatten all CTA words into a single timed stream
|
| 359 |
all_cta_words = []
|
| 360 |
-
for
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
if
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
w_end = end_ms if i == len(words) - 1 else start_ms + int((i + 1) * time_per_word)
|
| 376 |
-
all_cta_words.append({'word': word, 'start': w_start, 'end': w_end})
|
| 377 |
|
| 378 |
-
# 2. Group words into chunks with "Don't leave 1 or 2 words alone" logic
|
| 379 |
chunks = []
|
| 380 |
i = 0
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
remaining = total_words - i
|
| 385 |
-
if 10 < remaining <= 13:
|
| 386 |
-
take = remaining
|
| 387 |
-
else:
|
| 388 |
-
take = min(10, remaining)
|
| 389 |
-
|
| 390 |
chunks.append(all_cta_words[i : i + take])
|
| 391 |
i += take
|
| 392 |
|
| 393 |
-
# 3. Generate ASS Dialogue lines for each chunk
|
| 394 |
ass_events = []
|
| 395 |
for chunk in chunks:
|
| 396 |
chunk_text_only = [item['word'] for item in chunk]
|
| 397 |
-
|
| 398 |
for idx, info in enumerate(chunk):
|
| 399 |
w_start = info['start']
|
| 400 |
-
|
| 401 |
-
w_end = chunk[idx+1]['start'] if idx + 1 < len(chunk) else info['end']
|
| 402 |
|
| 403 |
text_parts = []
|
| 404 |
for j, word_str in enumerate(chunk_text_only):
|
| 405 |
-
if j == idx:
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
styled_text = ' '.join(text_parts)
|
| 411 |
-
ass_events.append(f"Dialogue: 1,{ms_to_ass_time(w_start)},{ms_to_ass_time(w_end)},Default,,0,0,0,,{styled_text}")
|
| 412 |
-
|
| 413 |
with open(ass_path, 'w', encoding='utf-8') as f:
|
| 414 |
f.write(ass_header + '\n'.join(ass_events))
|
| 415 |
return ass_path
|
|
@@ -430,8 +462,30 @@ def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, su
|
|
| 430 |
subtitle_path, s_err = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
|
| 431 |
if s_err: return None, s_err
|
| 432 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
video_width, video_height, video_fps = get_video_info(video_path)
|
| 434 |
-
audio_duration = get_audio_duration(audio_path)
|
| 435 |
|
| 436 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 437 |
reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
|
|
@@ -448,34 +502,35 @@ def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, su
|
|
| 448 |
status_msg += f" • ⚠️ Reddit card failed: {str(e)}\n"
|
| 449 |
has_reddit_template = False
|
| 450 |
|
| 451 |
-
# --- 1. Find
|
| 452 |
-
|
| 453 |
-
book_appears_at = title_timestamp if title_timestamp is not None else audio_duration * (1 - VIDEO_CONFIG['promo_percent'])
|
| 454 |
|
| 455 |
-
|
|
|
|
| 456 |
|
| 457 |
-
if
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
|
| 472 |
-
# --- 3. Process Main Subtitles ---
|
| 473 |
if enable_highlight:
|
| 474 |
-
status_msg += f"\n✨ Processing subtitles...\n"
|
| 475 |
-
|
|
|
|
| 476 |
subtitle_path, temp_dir, highlight_color, font_size,
|
| 477 |
-
|
| 478 |
-
|
| 479 |
)
|
| 480 |
else:
|
| 481 |
main_subtitle_path = subtitle_path
|
|
@@ -487,41 +542,55 @@ def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, su
|
|
| 487 |
has_book_cover = book_cover_path is not None
|
| 488 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 489 |
output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
|
| 490 |
-
|
| 491 |
-
fade_starts_at = audio_duration * VIDEO_CONFIG['fade_start_percent']
|
| 492 |
-
fade_ends_at = audio_duration * VIDEO_CONFIG['fade_end_percent']
|
| 493 |
-
fade_out_duration = fade_ends_at - fade_starts_at
|
| 494 |
-
promo_duration = audio_duration * VIDEO_CONFIG['promo_percent']
|
| 495 |
-
solid_color_duration = max(0, book_appears_at - fade_ends_at)
|
| 496 |
-
main_video_duration = fade_ends_at
|
| 497 |
-
cover_segment_duration = promo_duration
|
| 498 |
-
fade_color_hex = "#dacfc3"
|
| 499 |
|
| 500 |
if has_book_cover:
|
| 501 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
main_segment_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
|
| 503 |
cmd_main = ["ffmpeg", "-stream_loop", "-1", "-i", video_path, "-t", str(main_video_duration), "-vf", f"fps={video_fps},scale={video_width}:{video_height},fade=t=out:st={fade_starts_at}:d={fade_out_duration}:c={fade_color_hex}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", main_segment_path]
|
| 504 |
subprocess.run(cmd_main, check=True, capture_output=True, text=True, env=ffmpeg_env)
|
| 505 |
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
|
|
|
|
|
|
|
|
|
| 509 |
|
|
|
|
| 510 |
cover_segment_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
|
| 511 |
-
# Removed the fade-in effect here for a clean hard cut
|
| 512 |
cmd_cover = ["ffmpeg", "-loop", "1", "-i", book_cover_path, "-t", str(cover_segment_duration), "-vf", f"scale={video_width}:{video_height},setsar=1,fps={video_fps}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", cover_segment_path]
|
| 513 |
subprocess.run(cmd_cover, check=True, capture_output=True, text=True, env=ffmpeg_env)
|
| 514 |
|
|
|
|
| 515 |
concat_list_path = os.path.join(temp_dir, f"concat_{timestamp}.txt")
|
| 516 |
with open(concat_list_path, 'w') as f:
|
| 517 |
-
f.write(f"file '{main_segment_path}'\n")
|
|
|
|
|
|
|
|
|
|
| 518 |
|
| 519 |
-
|
| 520 |
input_cmd = ["ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path]
|
| 521 |
curr_idx = 1
|
| 522 |
curr_stream = "[0:v]"
|
| 523 |
|
| 524 |
-
# Layer 1: Reddit Card
|
| 525 |
if has_reddit_template:
|
| 526 |
input_cmd += ["-loop", "1", "-i", reddit_card_path]
|
| 527 |
filter_complex = f"[{curr_idx}:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];{curr_stream}[reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v1];"
|
|
@@ -529,16 +598,17 @@ def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, su
|
|
| 529 |
else:
|
| 530 |
filter_complex = f"{curr_stream}copy[v1];"; curr_stream = "[v1]"
|
| 531 |
|
| 532 |
-
#
|
| 533 |
filter_complex += f"{curr_stream}ass={main_sub_escaped}[v2];"; curr_stream = "[v2]"
|
| 534 |
|
| 535 |
-
#
|
| 536 |
-
if cta_ass_path:
|
| 537 |
-
|
| 538 |
-
else:
|
| 539 |
-
|
| 540 |
|
| 541 |
input_cmd += ["-i", audio_path]
|
|
|
|
| 542 |
cmd_final = input_cmd + [
|
| 543 |
"-filter_complex", filter_complex,
|
| 544 |
"-map", "[v_final]", "-map", f"{curr_idx}:a",
|
|
@@ -546,21 +616,16 @@ def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, su
|
|
| 546 |
"-c:a", "aac", "-pix_fmt", "yuv420p", "-shortest", "-y", output_path
|
| 547 |
]
|
| 548 |
|
| 549 |
-
status_msg += "🎬 Rendering final video...\n"
|
| 550 |
subprocess.run(cmd_final, check=True, capture_output=True, text=True, env=ffmpeg_env)
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
except Exception as e: return None, f"❌ Error: {str(e)}"
|
| 554 |
-
else: return None, "❌ Book cover required."
|
| 555 |
|
| 556 |
if os.path.exists(output_path): return output_path, f"✅ Success!"
|
| 557 |
else: return None, "❌ Output not created"
|
| 558 |
except Exception as e: return None, f"❌ Error: {str(e)}"
|
| 559 |
|
| 560 |
-
|
| 561 |
-
# ========================================
|
| 562 |
-
# FastAPI app
|
| 563 |
-
# ========================================
|
| 564 |
app = FastAPI(title="Video Stitcher API")
|
| 565 |
|
| 566 |
app.add_middleware(
|
|
@@ -571,13 +636,11 @@ app.add_middleware(
|
|
| 571 |
allow_headers=["*"],
|
| 572 |
)
|
| 573 |
|
| 574 |
-
|
| 575 |
class StitchErrorResponse(BaseModel):
|
| 576 |
status: str = Field(..., example="failed")
|
| 577 |
message: str = Field(..., example="❌ FFmpeg error: ...")
|
| 578 |
run_time: str = Field(..., example="0m 5s")
|
| 579 |
|
| 580 |
-
|
| 581 |
def _save_upload_to_temp(upload_file: UploadFile, temp_dir: str) -> str:
|
| 582 |
filename = os.path.basename(upload_file.filename)
|
| 583 |
dest_path = os.path.join(temp_dir, filename)
|
|
@@ -585,52 +648,36 @@ def _save_upload_to_temp(upload_file: UploadFile, temp_dir: str) -> str:
|
|
| 585 |
f.write(upload_file.file.read())
|
| 586 |
return dest_path
|
| 587 |
|
| 588 |
-
|
| 589 |
-
@app.post(
|
| 590 |
-
'/video_stitch',
|
| 591 |
-
responses={400: {"model": StitchErrorResponse}, 500: {"model": StitchErrorResponse}},
|
| 592 |
-
summary="Stitch video, audio, and subtitles into a final MP4",
|
| 593 |
-
description=(
|
| 594 |
-
"Synchronous endpoint — holds the connection open until encoding is complete, "
|
| 595 |
-
"then returns the finished MP4 directly. Designed for n8n HTTP Request nodes."
|
| 596 |
-
),
|
| 597 |
-
)
|
| 598 |
async def stitch_upload(
|
| 599 |
request: Request,
|
| 600 |
-
# Video
|
| 601 |
video_file: Optional[UploadFile] = File(None),
|
| 602 |
video_url: Optional[str] = Form(None),
|
| 603 |
-
# Audio
|
| 604 |
audio_file: Optional[UploadFile] = File(None),
|
| 605 |
audio_url: Optional[str] = Form(None),
|
| 606 |
-
# Subtitle
|
| 607 |
subtitle_file: Optional[UploadFile] = File(None),
|
| 608 |
subtitle_url: Optional[str] = Form(None),
|
| 609 |
-
# Book Cover (use exactly ONE)
|
| 610 |
book_cover_file: Optional[UploadFile] = File(None),
|
| 611 |
book_cover_url: Optional[str] = Form(None),
|
| 612 |
book_cover_base64: Optional[str] = Form(None),
|
| 613 |
book_id: Optional[str] = Form(None),
|
| 614 |
-
# Book Title (used to detect CTA split point in subtitle)
|
| 615 |
book_title: Optional[str] = Form(None),
|
| 616 |
-
# Settings
|
| 617 |
enable_highlight: bool = Form(True),
|
| 618 |
highlight_color: str = Form('yellow'),
|
| 619 |
font_size: int = Form(10),
|
| 620 |
crf_quality: int = Form(23),
|
| 621 |
):
|
| 622 |
-
temp_dir = tempfile.mkdtemp()
|
| 623 |
-
|
| 624 |
# Format validation
|
| 625 |
-
if
|
| 626 |
-
raise HTTPException(status_code=422, detail=
|
| 627 |
-
if
|
|
|
|
|
|
|
| 628 |
raise HTTPException(status_code=422, detail=f"❌ Invalid audio format: {audio_file.content_type}")
|
| 629 |
-
if subtitle_file and not (subtitle_file.filename.endswith('.srt') or subtitle_file.filename.endswith('.json')):
|
| 630 |
-
raise HTTPException(status_code=422, detail="❌ Subtitle must be a .srt or .json file")
|
| 631 |
if book_cover_file and book_cover_file.content_type not in {"image/jpeg", "image/png", "image/webp"}:
|
| 632 |
raise HTTPException(status_code=422, detail="❌ Book cover must be jpeg, png, or webp")
|
| 633 |
|
|
|
|
| 634 |
payload = {
|
| 635 |
'video_file': None, 'video_url': video_url,
|
| 636 |
'audio_file': None, 'audio_url': audio_url,
|
|
@@ -655,8 +702,6 @@ async def stitch_upload(
|
|
| 655 |
payload['book_cover_file'] = _save_upload_to_temp(book_cover_file, temp_dir)
|
| 656 |
|
| 657 |
start_time = time.time()
|
| 658 |
-
|
| 659 |
-
# Run blocking FFmpeg work in a thread so the event loop stays healthy
|
| 660 |
loop = asyncio.get_event_loop()
|
| 661 |
result_path, message = await loop.run_in_executor(
|
| 662 |
None,
|
|
@@ -687,7 +732,6 @@ async def stitch_upload(
|
|
| 687 |
"X-Status": "completed",
|
| 688 |
"X-Run-Time": run_time_fmt,
|
| 689 |
"X-File-Size-MB": f"{file_size_mb:.2f}",
|
| 690 |
-
"X-Message": "Video created successfully",
|
| 691 |
}
|
| 692 |
)
|
| 693 |
else:
|
|
@@ -695,11 +739,9 @@ async def stitch_upload(
|
|
| 695 |
{'status': 'failed', 'message': message, 'run_time': run_time_fmt},
|
| 696 |
status_code=400
|
| 697 |
)
|
| 698 |
-
|
| 699 |
except Exception as e:
|
| 700 |
raise HTTPException(status_code=500, detail=str(e))
|
| 701 |
|
| 702 |
-
|
| 703 |
-
@app.get('/health', summary="Health check")
|
| 704 |
async def health():
|
| 705 |
return {"status": "ok"}
|
|
|
|
| 1 |
+
|
| 2 |
import subprocess
|
|
|
|
| 3 |
import os
|
| 4 |
import tempfile
|
| 5 |
import requests
|
|
|
|
| 7 |
import textwrap
|
| 8 |
import shutil
|
| 9 |
import time
|
| 10 |
+
import json
|
| 11 |
from datetime import datetime
|
| 12 |
from PIL import Image, ImageDraw, ImageFont
|
| 13 |
+
import base64
|
| 14 |
from io import BytesIO
|
| 15 |
+
from thefuzz import fuzz
|
| 16 |
|
| 17 |
+
import asyncio
|
| 18 |
+
from io import BytesIO
|
| 19 |
+
from typing import Optional
|
| 20 |
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
|
| 21 |
from fastapi.responses import FileResponse, JSONResponse
|
| 22 |
from fastapi.middleware.cors import CORSMiddleware
|
| 23 |
from pydantic import BaseModel, Field
|
|
|
|
| 24 |
# ========================================
|
| 25 |
# CONFIGURATION SECTION - CUSTOMIZE HERE
|
| 26 |
# ========================================
|
|
|
|
| 41 |
SUBTITLE_CONFIG = {
|
| 42 |
'font_file': 'LilitaOne-Regular.ttf',
|
| 43 |
'font_name': 'Lilita One',
|
| 44 |
+
'font_size_default': 11,
|
| 45 |
'position_alignment': 5,
|
| 46 |
+
'margin_left': 70,
|
| 47 |
+
'margin_right': 80,
|
| 48 |
'margin_vertical': 20,
|
| 49 |
'line_spacing': 2
|
| 50 |
}
|
|
|
|
| 51 |
|
| 52 |
VIDEO_CONFIG = {
|
| 53 |
'reddit_scale_percent': 0.75,
|
|
|
|
| 57 |
'fade_color_rgb': (218, 207, 195),
|
| 58 |
}
|
| 59 |
|
|
|
|
| 60 |
# ========================================
|
| 61 |
# END CONFIGURATION SECTION
|
| 62 |
# ========================================
|
| 63 |
|
| 64 |
+
# =========================
|
| 65 |
+
# HELPER FUNCTIONS
|
| 66 |
+
# =========================
|
| 67 |
+
|
| 68 |
+
def sec_to_ass_time(seconds):
|
| 69 |
+
"""Converts seconds (e.g. 1.219) to ASS time format (H:MM:SS.cs)"""
|
| 70 |
+
ms = int(seconds * 1000)
|
| 71 |
+
h, ms = divmod(ms, 3600000)
|
| 72 |
+
m, ms = divmod(ms, 60000)
|
| 73 |
+
s, ms = divmod(ms, 1000)
|
| 74 |
+
cs = ms // 10
|
| 75 |
+
return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
def setup_custom_fonts_hf(temp_dir):
|
| 78 |
try:
|
|
|
|
| 172 |
return float(result.stdout.strip())
|
| 173 |
except Exception as e: raise Exception(f"Failed to get audio duration: {str(e)}")
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
|
| 176 |
try:
|
| 177 |
template = Image.open(template_path).convert('RGBA')
|
|
|
|
| 223 |
except Exception as e: return None, f"❌ Error downloading {file_type}: {str(e)}"
|
| 224 |
return None, f"❌ Unknown error"
|
| 225 |
|
| 226 |
+
|
| 227 |
+
# ============================================
|
| 228 |
+
# JSON LOGIC: PARSERS & SUBTITLE GENERATORS
|
| 229 |
+
# ============================================
|
| 230 |
+
|
| 231 |
+
def extract_first_subtitle(json_path):
|
| 232 |
+
"""Gets the first full sentence up to a period for the Reddit Card."""
|
| 233 |
+
try:
|
| 234 |
+
with open(json_path, 'r', encoding='utf-8') as f:
|
| 235 |
+
data = json.load(f)
|
| 236 |
+
|
| 237 |
+
title_words = []
|
| 238 |
+
start_time = None
|
| 239 |
+
end_time = 3.0
|
| 240 |
+
|
| 241 |
+
for segment in data.get('segments', []):
|
| 242 |
+
for word_data in segment.get('words', []):
|
| 243 |
+
word_text = word_data.get('text', '').strip()
|
| 244 |
+
if not word_text: continue
|
| 245 |
+
|
| 246 |
+
if start_time is None:
|
| 247 |
+
start_time = word_data.get('start_time', 0.0)
|
| 248 |
+
|
| 249 |
+
title_words.append(word_text)
|
| 250 |
+
|
| 251 |
+
# Check if this word ends with sentence-ending punctuation
|
| 252 |
+
if re.search(r'[.!?]$', word_text):
|
| 253 |
+
end_time = word_data.get('end_time', 3.0)
|
| 254 |
+
return " ".join(title_words), start_time, end_time
|
| 255 |
+
|
| 256 |
+
# Fallback just in case there is literally no punctuation
|
| 257 |
+
if title_words:
|
| 258 |
+
return " ".join(title_words), start_time, end_time
|
| 259 |
+
return "No subtitle found", 0.0, 3.0
|
| 260 |
+
except Exception as e:
|
| 261 |
+
print(f"Error extracting first subtitle: {e}")
|
| 262 |
+
return "No subtitle found", 0.0, 3.0
|
| 263 |
+
|
| 264 |
+
# ============================================
|
| 265 |
+
# FINDS BOOK TITLE WORD'S EXACT TIMINGS
|
| 266 |
+
# ============================================
|
| 267 |
+
def find_title_and_cta(json_path, book_title):
|
| 268 |
+
"""Uses a sliding window to find the exact start and end millisecond of the book title."""
|
| 269 |
+
try:
|
| 270 |
+
if not book_title or not book_title.strip(): return None, None
|
| 271 |
+
|
| 272 |
+
with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f)
|
| 273 |
+
|
| 274 |
+
book_title_lower = book_title.lower()
|
| 275 |
+
title_clean = re.sub(r'[^\w\s]', '', book_title_lower).strip()
|
| 276 |
+
book_title_words = title_clean.split()
|
| 277 |
+
window_size = len(book_title_words)
|
| 278 |
+
|
| 279 |
+
# Flatten all words with their timings
|
| 280 |
+
all_words = []
|
| 281 |
+
for segment in data.get('segments', []):
|
| 282 |
+
for word_data in segment.get('words', []):
|
| 283 |
+
word_text = word_data.get('text', '').strip()
|
| 284 |
+
if word_text:
|
| 285 |
+
all_words.append({
|
| 286 |
+
'text': word_text,
|
| 287 |
+
'start': word_data.get('start_time', 0.0),
|
| 288 |
+
'end': word_data.get('end_time', 0.0)
|
| 289 |
+
})
|
| 290 |
+
|
| 291 |
+
best_score = 0
|
| 292 |
+
best_start = None
|
| 293 |
+
best_end = None
|
| 294 |
+
|
| 295 |
+
# Sliding Window: Checks 2, 3, and 4 word groups to catch fuzzy/bad transcriptions
|
| 296 |
+
for w_size in [window_size, window_size + 1, window_size - 1]:
|
| 297 |
+
if w_size <= 0: continue
|
| 298 |
+
for i in range(len(all_words) - w_size + 1):
|
| 299 |
+
window_text = " ".join([w['text'] for w in all_words[i : i + w_size]]).lower()
|
| 300 |
+
window_text_clean = re.sub(r'[^\w\s]', '', window_text).strip()
|
| 301 |
+
|
| 302 |
+
score = fuzz.ratio(title_clean, window_text_clean)
|
| 303 |
+
if score > best_score:
|
| 304 |
+
best_score = score
|
| 305 |
+
best_start = all_words[i]['start']
|
| 306 |
+
best_end = all_words[i + w_size - 1]['end']
|
| 307 |
+
|
| 308 |
+
# If it's a strong match, return exact start and end times
|
| 309 |
+
if best_score >= 85:
|
| 310 |
+
return best_start, best_end
|
| 311 |
+
|
| 312 |
+
return None, None
|
| 313 |
+
except Exception as e:
|
| 314 |
+
print(f"Error finding title: {e}")
|
| 315 |
+
return None, None
|
| 316 |
+
|
| 317 |
+
def create_body_ass_from_json(json_path, output_dir, highlight_color='yellow',
|
| 318 |
+
font_size=None, start_time_sec=0.0, config=SUBTITLE_CONFIG,
|
| 319 |
+
stop_time_sec=None):
|
| 320 |
+
"""Creates dynamic body subtitles starting at 1 word and increasing by 2 up to 50."""
|
| 321 |
+
if font_size is None: font_size = config['font_size_default']
|
| 322 |
+
color_map = {'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'), 'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'), 'pink': ('&H00FF69B4', '&H00000000'), 'red': ('&H000000FF', '&H00FFFFFF'), 'blue': ('&H00FF0000', '&H00FFFFFF')}
|
| 323 |
+
highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
|
| 324 |
+
|
| 325 |
+
ass_path = os.path.join(output_dir, 'body_subtitles.ass')
|
| 326 |
+
ass_header = f"""[Script Info]
|
| 327 |
+
Title: Body JSON Subtitles
|
| 328 |
ScriptType: v4.00+
|
| 329 |
[V4+ Styles]
|
| 330 |
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
| 331 |
Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,1,3,1,{config['position_alignment']},{config['margin_left']},{config['margin_right']},{config['margin_vertical']},1
|
| 332 |
[Events]
|
| 333 |
+
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"""
|
| 334 |
+
|
| 335 |
+
with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f)
|
| 336 |
+
|
| 337 |
+
all_words = []
|
| 338 |
+
for segment in data.get('segments', []):
|
| 339 |
+
for word_data in segment.get('words', []):
|
| 340 |
+
word_text = word_data.get('text', '').strip()
|
| 341 |
+
start_ms = word_data.get('start_time', 0)
|
| 342 |
+
if start_ms < start_time_sec - 0.1: continue
|
| 343 |
+
if stop_time_sec is not None and start_ms >= stop_time_sec - 0.1: continue
|
| 344 |
+
if word_text:
|
| 345 |
+
all_words.append({'word': word_text, 'start': start_ms, 'end': word_data.get('end_time', 0)})
|
| 346 |
+
|
| 347 |
+
chunks = []
|
| 348 |
+
i = 0
|
| 349 |
+
current_chunk_size = 1
|
| 350 |
+
max_chunk_size = 50
|
| 351 |
+
|
| 352 |
+
while i < len(all_words):
|
| 353 |
+
remaining = len(all_words) - i
|
| 354 |
+
take = min(current_chunk_size, remaining)
|
| 355 |
+
chunks.append(all_words[i : i + take])
|
| 356 |
+
i += take
|
| 357 |
+
if current_chunk_size < max_chunk_size:
|
| 358 |
+
current_chunk_size = min(current_chunk_size + 4, max_chunk_size)
|
| 359 |
+
|
| 360 |
+
ass_events = []
|
| 361 |
+
for chunk in chunks:
|
| 362 |
+
chunk_text_only = [item['word'] for item in chunk]
|
| 363 |
+
frame_end = chunk[-1]['end']
|
| 364 |
+
for idx, info in enumerate(chunk):
|
| 365 |
+
w_start = info['start']
|
| 366 |
+
w_end = chunk[idx+1]['start'] if idx + 1 < len(chunk) else frame_end
|
| 367 |
+
|
| 368 |
+
text_parts = []
|
| 369 |
+
for j, word_str in enumerate(chunk_text_only):
|
| 370 |
+
if j == idx: text_parts.append(f"{{\\c{highlight_text}\\3c{highlight_bg}\\bord5}}{word_str}{{\\r}}")
|
| 371 |
+
else: text_parts.append(word_str)
|
| 372 |
+
ass_events.append(f"Dialogue: 0,{sec_to_ass_time(w_start)},{sec_to_ass_time(w_end)},Default,,0,0,0,,{' '.join(text_parts)}")
|
| 373 |
+
|
| 374 |
+
with open(ass_path, 'w', encoding='utf-8') as f:
|
| 375 |
+
f.write(ass_header + '\n'.join(ass_events))
|
| 376 |
+
return ass_path
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
def create_cta_ass_from_json(json_path, output_dir, start_sec, font_size, video_width, video_height, highlight_color='yellow', config=SUBTITLE_CONFIG, words_per_frame=10):
|
| 380 |
+
"""Creates the chunky, Instagram-style box subtitles for the CTA."""
|
| 381 |
color_map = {
|
| 382 |
'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'),
|
| 383 |
'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'),
|
|
|
|
| 387 |
highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
|
| 388 |
margin_lr = int(video_width * 0.125) + 40
|
| 389 |
|
| 390 |
+
ass_path = os.path.join(output_dir, 'cta_subtitles.ass')
|
| 391 |
+
# Style logic: WrapStyle=1, BorderStyle=3, Outline=10 (Tight Instagram Box)
|
|
|
|
| 392 |
ass_header = f"""[Script Info]
|
| 393 |
+
Title: CTA JSON Subtitles
|
| 394 |
ScriptType: v4.00+
|
| 395 |
PlayResX: {video_width}
|
| 396 |
PlayResY: {video_height}
|
| 397 |
WrapStyle: 1
|
| 398 |
[V4+ Styles]
|
| 399 |
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
| 400 |
+
Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,3,10,0,5,{margin_lr},{margin_lr},0,1
|
| 401 |
[Events]
|
| 402 |
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"""
|
| 403 |
|
| 404 |
+
with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f)
|
| 405 |
+
|
|
|
|
| 406 |
all_cta_words = []
|
| 407 |
+
for segment in data.get('segments', []):
|
| 408 |
+
for word_data in segment.get('words', []):
|
| 409 |
+
word_text = word_data.get('text', '').strip()
|
| 410 |
+
start_ms = word_data.get('start_time', 0)
|
| 411 |
+
if start_ms < start_sec - 0.1: continue # Skip words before the CTA starts
|
| 412 |
+
if word_text:
|
| 413 |
+
# Merge "Book" and "Access" into "BookXcess"
|
| 414 |
+
if word_text.lower().startswith('access') and len(all_cta_words) > 0 and all_cta_words[-1]['word'].lower() == 'book':
|
| 415 |
+
# Keep any trailing punctuation (like commas or periods) from "Access"
|
| 416 |
+
punctuation = word_text[6:]
|
| 417 |
+
all_cta_words[-1]['word'] = 'BookXcess' + punctuation
|
| 418 |
+
# Extend the highlight time to cover both words
|
| 419 |
+
all_cta_words[-1]['end'] = word_data.get('end_time', 0)
|
| 420 |
+
continue # Skip adding "Access" as a separate word
|
| 421 |
+
all_cta_words.append({'word': word_text, 'start': start_ms, 'end': word_data.get('end_time', 0)})
|
|
|
|
|
|
|
| 422 |
|
|
|
|
| 423 |
chunks = []
|
| 424 |
i = 0
|
| 425 |
+
while i < len(all_cta_words):
|
| 426 |
+
remaining = len(all_cta_words) - i
|
| 427 |
+
take = remaining if words_per_frame < remaining <= words_per_frame + 2 else min(words_per_frame, remaining)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
chunks.append(all_cta_words[i : i + take])
|
| 429 |
i += take
|
| 430 |
|
|
|
|
| 431 |
ass_events = []
|
| 432 |
for chunk in chunks:
|
| 433 |
chunk_text_only = [item['word'] for item in chunk]
|
| 434 |
+
frame_end = chunk[-1]['end']
|
| 435 |
for idx, info in enumerate(chunk):
|
| 436 |
w_start = info['start']
|
| 437 |
+
w_end = chunk[idx+1]['start'] if idx + 1 < len(chunk) else frame_end
|
|
|
|
| 438 |
|
| 439 |
text_parts = []
|
| 440 |
for j, word_str in enumerate(chunk_text_only):
|
| 441 |
+
if j == idx: text_parts.append(f"{{\\c{highlight_text}}}{word_str}{{\\r}}")
|
| 442 |
+
else: text_parts.append(word_str)
|
| 443 |
+
ass_events.append(f"Dialogue: 1,{sec_to_ass_time(w_start)},{sec_to_ass_time(w_end)},Default,,0,0,0,,{' '.join(text_parts)}")
|
| 444 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 445 |
with open(ass_path, 'w', encoding='utf-8') as f:
|
| 446 |
f.write(ass_header + '\n'.join(ass_events))
|
| 447 |
return ass_path
|
|
|
|
| 462 |
subtitle_path, s_err = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
|
| 463 |
if s_err: return None, s_err
|
| 464 |
|
| 465 |
+
# ✨ PRE-PROCESS SPEED HACK ✨
|
| 466 |
+
speed_factor = 1.3
|
| 467 |
+
|
| 468 |
+
# 1. Physically speed up the audio file
|
| 469 |
+
fast_audio = os.path.join(temp_dir, f"fast_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3")
|
| 470 |
+
subprocess.run(["ffmpeg", "-v", "error", "-y", "-i", audio_path, "-filter:a", f"atempo={speed_factor}", fast_audio], check=True)
|
| 471 |
+
audio_path = fast_audio # Trick the script into using the fast audio!
|
| 472 |
+
|
| 473 |
+
# 2. Physically shrink the JSON timestamps
|
| 474 |
+
fast_json = os.path.join(temp_dir, f"fast_subs_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
|
| 475 |
+
with open(subtitle_path, 'r', encoding='utf-8') as f: json_data = json.load(f)
|
| 476 |
+
|
| 477 |
+
for segment in json_data.get('segments', []):
|
| 478 |
+
segment['start_time'] = segment.get('start_time', 0) / speed_factor
|
| 479 |
+
segment['end_time'] = segment.get('end_time', 0) / speed_factor
|
| 480 |
+
for word in segment.get('words', []):
|
| 481 |
+
word['start_time'] = word.get('start_time', 0) / speed_factor
|
| 482 |
+
word['end_time'] = word.get('end_time', 0) / speed_factor
|
| 483 |
+
|
| 484 |
+
with open(fast_json, 'w', encoding='utf-8') as f: json.dump(json_data, f)
|
| 485 |
+
subtitle_path = fast_json # Trick the script into using the fast subtitles!
|
| 486 |
+
|
| 487 |
video_width, video_height, video_fps = get_video_info(video_path)
|
| 488 |
+
audio_duration = get_audio_duration(audio_path) # Now gets the new 1:18 duration natively!
|
| 489 |
|
| 490 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 491 |
reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
|
|
|
|
| 502 |
status_msg += f" • ⚠️ Reddit card failed: {str(e)}\n"
|
| 503 |
has_reddit_template = False
|
| 504 |
|
| 505 |
+
# --- 1. Find Title Exact Word Timings ---
|
| 506 |
+
title_start, title_end = find_title_and_cta(subtitle_path, book_title)
|
|
|
|
| 507 |
|
| 508 |
+
book_appears_at = title_start if title_start is not None else audio_duration * (1 - VIDEO_CONFIG['promo_percent'])
|
| 509 |
+
box_appears_at = title_end if title_end is not None else book_appears_at + 1.5
|
| 510 |
|
| 511 |
+
if title_start is not None:
|
| 512 |
+
status_msg += f"\n📖 Hard cut to Book Cover at {title_start:.2f}s\n"
|
| 513 |
+
status_msg += f"🤫 Book title silenced in subtitles.\n"
|
| 514 |
+
status_msg += f"🖤 CTA text starts exactly at {title_end:.2f}s\n"
|
| 515 |
+
|
| 516 |
+
# --- 2. Prepare Dynamic CTA Text (JSON) ---
|
| 517 |
+
status_msg += "🖤 Generating Instagram-style dynamic CTA...\n"
|
| 518 |
+
cta_font_size = int(video_width * 0.060)
|
| 519 |
+
|
| 520 |
+
cta_ass_path = create_cta_ass_from_json(
|
| 521 |
+
subtitle_path, temp_dir, box_appears_at,
|
| 522 |
+
cta_font_size, video_width, video_height, highlight_color
|
| 523 |
+
)
|
| 524 |
+
cta_sub_escaped = cta_ass_path.replace('\\', '/').replace(':', '\\:')
|
| 525 |
|
| 526 |
+
# --- 3. Process Main Subtitles (JSON) ---
|
| 527 |
if enable_highlight:
|
| 528 |
+
status_msg += f"\n✨ Processing JSON subtitles...\n"
|
| 529 |
+
body_start_time = first_sub_end if has_reddit_template else 0.0
|
| 530 |
+
main_subtitle_path = create_body_ass_from_json(
|
| 531 |
subtitle_path, temp_dir, highlight_color, font_size,
|
| 532 |
+
start_time_sec=body_start_time, config=SUBTITLE_CONFIG,
|
| 533 |
+
stop_time_sec=book_appears_at # Stops EXACTLY before the title is spoken
|
| 534 |
)
|
| 535 |
else:
|
| 536 |
main_subtitle_path = subtitle_path
|
|
|
|
| 542 |
has_book_cover = book_cover_path is not None
|
| 543 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 544 |
output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 545 |
|
| 546 |
if has_book_cover:
|
| 547 |
try:
|
| 548 |
+
fade_starts_at = audio_duration * VIDEO_CONFIG['fade_start_percent']
|
| 549 |
+
fade_ends_at = audio_duration * VIDEO_CONFIG['fade_end_percent']
|
| 550 |
+
|
| 551 |
+
# Safety net: If the book title is spoken BEFORE the fade is supposed to end,
|
| 552 |
+
# we shorten the fade so it doesn't overlap the book cover cut.
|
| 553 |
+
if fade_ends_at > book_appears_at:
|
| 554 |
+
fade_ends_at = book_appears_at
|
| 555 |
+
fade_starts_at = min(fade_starts_at, fade_ends_at - 1.0)
|
| 556 |
+
|
| 557 |
+
fade_out_duration = fade_ends_at - fade_starts_at
|
| 558 |
+
solid_color_duration = max(0, book_appears_at - fade_ends_at)
|
| 559 |
+
|
| 560 |
+
main_video_duration = fade_ends_at
|
| 561 |
+
cover_segment_duration = audio_duration - book_appears_at
|
| 562 |
+
fade_color_hex = "#dacfc3" # Book page type color
|
| 563 |
+
|
| 564 |
+
# 1. Main Segment (background video fading into sandal color)
|
| 565 |
main_segment_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
|
| 566 |
cmd_main = ["ffmpeg", "-stream_loop", "-1", "-i", video_path, "-t", str(main_video_duration), "-vf", f"fps={video_fps},scale={video_width}:{video_height},fade=t=out:st={fade_starts_at}:d={fade_out_duration}:c={fade_color_hex}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", main_segment_path]
|
| 567 |
subprocess.run(cmd_main, check=True, capture_output=True, text=True, env=ffmpeg_env)
|
| 568 |
|
| 569 |
+
# 2. Solid Color Segment (Holds the sandal color until the hard cut)
|
| 570 |
+
solid_color_path = None
|
| 571 |
+
if solid_color_duration > 0:
|
| 572 |
+
solid_color_path = os.path.join(temp_dir, f"solid_{timestamp}.mp4")
|
| 573 |
+
cmd_solid = ["ffmpeg", "-f", "lavfi", "-i", f"color=c={fade_color_hex}:s={video_width}x{video_height}:d={solid_color_duration}:r={video_fps}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-y", solid_color_path]
|
| 574 |
+
subprocess.run(cmd_solid, check=True, capture_output=True, text=True, env=ffmpeg_env)
|
| 575 |
|
| 576 |
+
# 3. Book Cover Segment (Hard cut triggered exactly when title is spoken)
|
| 577 |
cover_segment_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
|
|
|
|
| 578 |
cmd_cover = ["ffmpeg", "-loop", "1", "-i", book_cover_path, "-t", str(cover_segment_duration), "-vf", f"scale={video_width}:{video_height},setsar=1,fps={video_fps}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", cover_segment_path]
|
| 579 |
subprocess.run(cmd_cover, check=True, capture_output=True, text=True, env=ffmpeg_env)
|
| 580 |
|
| 581 |
+
# 4. Stitch them all together
|
| 582 |
concat_list_path = os.path.join(temp_dir, f"concat_{timestamp}.txt")
|
| 583 |
with open(concat_list_path, 'w') as f:
|
| 584 |
+
f.write(f"file '{main_segment_path}'\n")
|
| 585 |
+
if solid_color_path:
|
| 586 |
+
f.write(f"file '{solid_color_path}'\n")
|
| 587 |
+
f.write(f"file '{cover_segment_path}'\n")
|
| 588 |
|
| 589 |
+
#--- 5. Build the Filter Graph (Subtitles, Overlays & SPEEDUP) ---
|
| 590 |
input_cmd = ["ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path]
|
| 591 |
curr_idx = 1
|
| 592 |
curr_stream = "[0:v]"
|
| 593 |
|
|
|
|
| 594 |
if has_reddit_template:
|
| 595 |
input_cmd += ["-loop", "1", "-i", reddit_card_path]
|
| 596 |
filter_complex = f"[{curr_idx}:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];{curr_stream}[reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v1];"
|
|
|
|
| 598 |
else:
|
| 599 |
filter_complex = f"{curr_stream}copy[v1];"; curr_stream = "[v1]"
|
| 600 |
|
| 601 |
+
# 1. Burn in Main Subtitles
|
| 602 |
filter_complex += f"{curr_stream}ass={main_sub_escaped}[v2];"; curr_stream = "[v2]"
|
| 603 |
|
| 604 |
+
# 2. Burn in CTA Subtitles (Straight to v_final - NO DUPLICATES)
|
| 605 |
+
if cta_ass_path:
|
| 606 |
+
filter_complex += f"{curr_stream}ass={cta_sub_escaped}[v_final]"
|
| 607 |
+
else:
|
| 608 |
+
filter_complex += f"{curr_stream}copy[v_final]"
|
| 609 |
|
| 610 |
input_cmd += ["-i", audio_path]
|
| 611 |
+
|
| 612 |
cmd_final = input_cmd + [
|
| 613 |
"-filter_complex", filter_complex,
|
| 614 |
"-map", "[v_final]", "-map", f"{curr_idx}:a",
|
|
|
|
| 616 |
"-c:a", "aac", "-pix_fmt", "yuv420p", "-shortest", "-y", output_path
|
| 617 |
]
|
| 618 |
|
| 619 |
+
status_msg += "🎬 Rendering final synchronized video...\n"
|
| 620 |
subprocess.run(cmd_final, check=True, capture_output=True, text=True, env=ffmpeg_env)
|
| 621 |
+
except Exception as e:
|
| 622 |
+
return None, f"❌ Book cover processing error: {str(e)}"
|
|
|
|
|
|
|
| 623 |
|
| 624 |
if os.path.exists(output_path): return output_path, f"✅ Success!"
|
| 625 |
else: return None, "❌ Output not created"
|
| 626 |
except Exception as e: return None, f"❌ Error: {str(e)}"
|
| 627 |
|
| 628 |
+
|
|
|
|
|
|
|
|
|
|
| 629 |
app = FastAPI(title="Video Stitcher API")
|
| 630 |
|
| 631 |
app.add_middleware(
|
|
|
|
| 636 |
allow_headers=["*"],
|
| 637 |
)
|
| 638 |
|
|
|
|
| 639 |
class StitchErrorResponse(BaseModel):
|
| 640 |
status: str = Field(..., example="failed")
|
| 641 |
message: str = Field(..., example="❌ FFmpeg error: ...")
|
| 642 |
run_time: str = Field(..., example="0m 5s")
|
| 643 |
|
|
|
|
| 644 |
def _save_upload_to_temp(upload_file: UploadFile, temp_dir: str) -> str:
|
| 645 |
filename = os.path.basename(upload_file.filename)
|
| 646 |
dest_path = os.path.join(temp_dir, filename)
|
|
|
|
| 648 |
f.write(upload_file.file.read())
|
| 649 |
return dest_path
|
| 650 |
|
| 651 |
+
@app.post('/video_stitch', responses={400: {"model": StitchErrorResponse}, 500: {"model": StitchErrorResponse}})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 652 |
async def stitch_upload(
|
| 653 |
request: Request,
|
|
|
|
| 654 |
video_file: Optional[UploadFile] = File(None),
|
| 655 |
video_url: Optional[str] = Form(None),
|
|
|
|
| 656 |
audio_file: Optional[UploadFile] = File(None),
|
| 657 |
audio_url: Optional[str] = Form(None),
|
|
|
|
| 658 |
subtitle_file: Optional[UploadFile] = File(None),
|
| 659 |
subtitle_url: Optional[str] = Form(None),
|
|
|
|
| 660 |
book_cover_file: Optional[UploadFile] = File(None),
|
| 661 |
book_cover_url: Optional[str] = Form(None),
|
| 662 |
book_cover_base64: Optional[str] = Form(None),
|
| 663 |
book_id: Optional[str] = Form(None),
|
|
|
|
| 664 |
book_title: Optional[str] = Form(None),
|
|
|
|
| 665 |
enable_highlight: bool = Form(True),
|
| 666 |
highlight_color: str = Form('yellow'),
|
| 667 |
font_size: int = Form(10),
|
| 668 |
crf_quality: int = Form(23),
|
| 669 |
):
|
|
|
|
|
|
|
| 670 |
# Format validation
|
| 671 |
+
if subtitle_file and not subtitle_file.filename.endswith('.json'):
|
| 672 |
+
raise HTTPException(status_code=422, detail="❌ Subtitle must be a .json file")
|
| 673 |
+
if subtitle_url and not subtitle_url.strip().split('?')[0].endswith('.json'):
|
| 674 |
+
raise HTTPException(status_code=422, detail="❌ Subtitle URL must point to a .json file")
|
| 675 |
+
if audio_file and audio_file.content_type not in {"audio/mpeg", "audio/mp3", "audio/wav", "audio/x-wav", "audio/aac", "audio/mp4", "audio/x-m4a"}:
|
| 676 |
raise HTTPException(status_code=422, detail=f"❌ Invalid audio format: {audio_file.content_type}")
|
|
|
|
|
|
|
| 677 |
if book_cover_file and book_cover_file.content_type not in {"image/jpeg", "image/png", "image/webp"}:
|
| 678 |
raise HTTPException(status_code=422, detail="❌ Book cover must be jpeg, png, or webp")
|
| 679 |
|
| 680 |
+
temp_dir = tempfile.mkdtemp()
|
| 681 |
payload = {
|
| 682 |
'video_file': None, 'video_url': video_url,
|
| 683 |
'audio_file': None, 'audio_url': audio_url,
|
|
|
|
| 702 |
payload['book_cover_file'] = _save_upload_to_temp(book_cover_file, temp_dir)
|
| 703 |
|
| 704 |
start_time = time.time()
|
|
|
|
|
|
|
| 705 |
loop = asyncio.get_event_loop()
|
| 706 |
result_path, message = await loop.run_in_executor(
|
| 707 |
None,
|
|
|
|
| 732 |
"X-Status": "completed",
|
| 733 |
"X-Run-Time": run_time_fmt,
|
| 734 |
"X-File-Size-MB": f"{file_size_mb:.2f}",
|
|
|
|
| 735 |
}
|
| 736 |
)
|
| 737 |
else:
|
|
|
|
| 739 |
{'status': 'failed', 'message': message, 'run_time': run_time_fmt},
|
| 740 |
status_code=400
|
| 741 |
)
|
|
|
|
| 742 |
except Exception as e:
|
| 743 |
raise HTTPException(status_code=500, detail=str(e))
|
| 744 |
|
| 745 |
+
@app.get('/health')
|
|
|
|
| 746 |
async def health():
|
| 747 |
return {"status": "ok"}
|