Nav3005 commited on
Commit
2343e20
·
verified ·
1 Parent(s): 886ecfa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +298 -256
app.py CHANGED
@@ -1,5 +1,5 @@
 
1
  import subprocess
2
- import base64
3
  import os
4
  import tempfile
5
  import requests
@@ -7,17 +7,20 @@ import re
7
  import textwrap
8
  import shutil
9
  import time
10
- import asyncio
11
  from datetime import datetime
12
  from PIL import Image, ImageDraw, ImageFont
 
13
  from io import BytesIO
14
- from typing import Optional
15
 
 
 
 
16
  from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
17
  from fastapi.responses import FileResponse, JSONResponse
18
  from fastapi.middleware.cors import CORSMiddleware
19
  from pydantic import BaseModel, Field
20
-
21
  # ========================================
22
  # CONFIGURATION SECTION - CUSTOMIZE HERE
23
  # ========================================
@@ -38,14 +41,13 @@ REDDIT_CONFIG = {
38
  SUBTITLE_CONFIG = {
39
  'font_file': 'LilitaOne-Regular.ttf',
40
  'font_name': 'Lilita One',
41
- 'font_size_default': 10,
42
  'position_alignment': 5,
43
- 'margin_left': 50,
44
- 'margin_right': 70,
45
  'margin_vertical': 20,
46
  'line_spacing': 2
47
  }
48
- # go to line 462 if you want to increase/decrease CTA part's font size!!!
49
 
50
  VIDEO_CONFIG = {
51
  'reddit_scale_percent': 0.75,
@@ -55,52 +57,22 @@ VIDEO_CONFIG = {
55
  'fade_color_rgb': (218, 207, 195),
56
  }
57
 
58
-
59
  # ========================================
60
  # END CONFIGURATION SECTION
61
  # ========================================
62
 
63
- # ============================================
64
- # FINDS BOOK TITLE TO SPLIT CTA AND BODY SCRIPT
65
- # ============================================
66
- def find_title_and_cta(srt_path, book_title):
67
- try:
68
- if not book_title or not book_title.strip():
69
- return None, None, None
70
- with open(srt_path, 'r', encoding='utf-8') as f:
71
- content = f.read()
72
- blocks = re.split(r'\n\s*\n', content.strip())
73
- book_title_lower = book_title.lower()
74
- for i, block in enumerate(blocks):
75
- lines = block.strip().split('\n')
76
- if len(lines) >= 3:
77
- subtitle_text = ' '.join(lines[2:])
78
- if book_title_lower in subtitle_text.lower():
79
- # 1. Get the time the title is spoken
80
- times = lines[1].split(' --> ')
81
- title_time = srt_time_to_ms(times[0]) / 1000.0
82
-
83
- cta_time = None
84
- cta_text_parts = []
85
-
86
- # 2. Get the time the ACTUAL CTA text starts
87
- if i + 1 < len(blocks):
88
- next_block_lines = blocks[i + 1].strip().split('\n')
89
- if len(next_block_lines) >= 3:
90
- cta_time = srt_time_to_ms(next_block_lines[1].split(' --> ')[0]) / 1000.0
91
-
92
- # 3. Grab all remaining text for the CTA
93
- for j in range(i + 1, len(blocks)):
94
- next_lines = blocks[j].strip().split('\n')
95
- if len(next_lines) >= 3:
96
- cta_text_parts.append(' '.join(next_lines[2:]).strip())
97
-
98
- cta_text = ' '.join(cta_text_parts) if cta_text_parts else None
99
- return title_time, cta_time, cta_text
100
- return None, None, None
101
- except Exception as e:
102
- print(f"Error finding title and CTA: {e}")
103
- return None, None, None
104
 
105
  def setup_custom_fonts_hf(temp_dir):
106
  try:
@@ -200,22 +172,6 @@ def get_audio_duration(audio_path):
200
  return float(result.stdout.strip())
201
  except Exception as e: raise Exception(f"Failed to get audio duration: {str(e)}")
202
 
203
- def extract_first_subtitle(srt_path):
204
- try:
205
- with open(srt_path, 'r', encoding='utf-8') as f: content = f.read()
206
- blocks = re.split(r'\n\s*\n', content.strip())
207
- if not blocks: return "No subtitle found", 0.0, 3.0
208
- first_block = blocks[0].strip().split('\n')
209
- if len(first_block) >= 3:
210
- times = first_block[1].split(' --> ')
211
- def time_to_sec(t):
212
- h, m, s = t.split(':')
213
- s, ms = s.split(',')
214
- return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000.0
215
- return ' '.join(first_block[2:]).strip(), time_to_sec(times[0].strip()), time_to_sec(times[1].strip())
216
- return "No subtitle found", 0.0, 3.0
217
- except Exception as e: raise Exception(f"Failed to extract first subtitle: {str(e)}")
218
-
219
  def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
220
  try:
221
  template = Image.open(template_path).convert('RGBA')
@@ -267,68 +223,161 @@ def validate_and_get_file(uploaded_file, url_string, file_type, temp_dir):
267
  except Exception as e: return None, f"❌ Error downloading {file_type}: {str(e)}"
268
  return None, f"❌ Unknown error"
269
 
270
- def srt_time_to_ms(time_str):
271
- h, m, s = time_str.strip().split(':')
272
- s, ms = s.split(',')
273
- return int(h)*3600000 + int(m)*60000 + int(s)*1000 + int(ms)
274
-
275
- def ms_to_ass_time(ms):
276
- h, ms = divmod(ms, 3600000)
277
- m, ms = divmod(ms, 60000)
278
- s, ms = divmod(ms, 1000)
279
- cs = ms // 10
280
- return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
281
-
282
- #-----------------------
283
- # BODY SCRIPT HIGHLIGHTS ASS
284
- #-----------------------
285
- def create_word_by_word_highlight_ass(srt_path, output_dir, highlight_color='yellow',
286
- font_size=None, skip_first=False, config=SUBTITLE_CONFIG,
287
- cta_start_time_sec=None):
288
- """Convert SRT to ASS. Stops before cta_start_time_sec."""
289
- if font_size is None: font_size = config['font_size_default']
290
- color_map = {'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'), 'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'), 'pink': ('&H00FF69B4', '&H00000000'), 'red': ('&H000000FF', '&H00FFFFFF'), 'blue': ('&H00FF0000', '&H00FFFFFF')}
291
- highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
292
-
293
- with open(srt_path, 'r', encoding='utf-8') as f: srt_content = f.read()
294
- ass_path = os.path.join(output_dir, 'word_highlight_subtitles.ass')
295
- ass_header = f"""[Script Info]
296
- Title: Word-by-Word Highlight Subtitles
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  ScriptType: v4.00+
298
  [V4+ Styles]
299
  Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
300
  Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,1,3,1,{config['position_alignment']},{config['margin_left']},{config['margin_right']},{config['margin_vertical']},1
301
  [Events]
302
- Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
303
- """
304
- srt_blocks = re.split(r'\n\s*\n', srt_content.strip())
305
- ass_events = []
306
- start_index = 1 if skip_first else 0
307
- for block in srt_blocks[start_index:]:
308
- lines = block.strip().split('\n')
309
- if len(lines) >= 3:
310
- times = lines[1].split(' --> ')
311
- if len(times) == 2:
312
- start_ms = srt_time_to_ms(times[0])
313
- if cta_start_time_sec is not None and (start_ms / 1000.0) >= cta_start_time_sec - 0.1: break
314
- end_ms = srt_time_to_ms(times[1])
315
- words = ' '.join(lines[2:]).split()
316
- if not words: continue
317
- time_per_word = (end_ms - start_ms) / len(words)
318
- for i, word in enumerate(words):
319
- word_start = start_ms + int(i * time_per_word)
320
- word_end = start_ms + int((i + 1) * time_per_word)
321
- if i == len(words) - 1: word_end = end_ms
322
- text_parts = [f"{{\\c{highlight_text}\\3c{highlight_bg}\\bord5}}{w}{{\\r}}" if j == i else w for j, w in enumerate(words)]
323
- ass_events.append(f"Dialogue: 0,{ms_to_ass_time(word_start)},{ms_to_ass_time(word_end)},Default,,0,0,0,,{' '.join(text_parts)}")
324
- with open(ass_path, 'w', encoding='utf-8') as f: f.write(ass_header); f.write('\n'.join(ass_events))
325
- return ass_path
326
-
327
- #-----------------------
328
- # CTA HIGHLIGHTS ASS
329
- #-----------------------
330
- def create_cta_highlight_ass(srt_path, output_dir, start_sec, font_size, video_width, video_height, highlight_color='yellow', config=SUBTITLE_CONFIG):
331
- """Groups CTA words into frames of max 10, but merges leftovers if they are < 3 words."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  color_map = {
333
  'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'),
334
  'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'),
@@ -338,78 +387,61 @@ def create_cta_highlight_ass(srt_path, output_dir, start_sec, font_size, video_w
338
  highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
339
  margin_lr = int(video_width * 0.125) + 40
340
 
341
- with open(srt_path, 'r', encoding='utf-8') as f: srt_content = f.read()
342
- ass_path = os.path.join(output_dir, 'cta_animated_subtitles.ass')
343
-
344
  ass_header = f"""[Script Info]
345
- Title: CTA Animated Subtitles
346
  ScriptType: v4.00+
347
  PlayResX: {video_width}
348
  PlayResY: {video_height}
349
  WrapStyle: 1
350
  [V4+ Styles]
351
  Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
352
- Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,3,15,0,5,{margin_lr},{margin_lr},0,1
353
  [Events]
354
  Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"""
355
 
356
- srt_blocks = re.split(r'\n\s*\n', srt_content.strip())
357
-
358
- # 1. Flatten all CTA words into a single timed stream
359
  all_cta_words = []
360
- for block in srt_blocks:
361
- lines = block.strip().split('\n')
362
- if len(lines) >= 3:
363
- times = lines[1].split(' --> ')
364
- if len(times) == 2:
365
- start_ms = srt_time_to_ms(times[0])
366
- if (start_ms / 1000.0) < start_sec - 0.1: continue
367
-
368
- end_ms = srt_time_to_ms(times[1])
369
- words = ' '.join(lines[2:]).split()
370
- if not words: continue
371
-
372
- time_per_word = (end_ms - start_ms) / len(words)
373
- for i, word in enumerate(words):
374
- w_start = start_ms + int(i * time_per_word)
375
- w_end = end_ms if i == len(words) - 1 else start_ms + int((i + 1) * time_per_word)
376
- all_cta_words.append({'word': word, 'start': w_start, 'end': w_end})
377
 
378
- # 2. Group words into chunks with "Don't leave 1 or 2 words alone" logic
379
  chunks = []
380
  i = 0
381
- total_words = len(all_cta_words)
382
-
383
- while i < total_words:
384
- remaining = total_words - i
385
- if 10 < remaining <= 13:
386
- take = remaining
387
- else:
388
- take = min(10, remaining)
389
-
390
  chunks.append(all_cta_words[i : i + take])
391
  i += take
392
 
393
- # 3. Generate ASS Dialogue lines for each chunk
394
  ass_events = []
395
  for chunk in chunks:
396
  chunk_text_only = [item['word'] for item in chunk]
397
-
398
  for idx, info in enumerate(chunk):
399
  w_start = info['start']
400
- # Match the start of the next word to avoid background box flickering
401
- w_end = chunk[idx+1]['start'] if idx + 1 < len(chunk) else info['end']
402
 
403
  text_parts = []
404
  for j, word_str in enumerate(chunk_text_only):
405
- if j == idx:
406
- text_parts.append(f"{{\\c{highlight_text}}}{word_str}{{\\r}}")
407
- else:
408
- text_parts.append(word_str)
409
-
410
- styled_text = ' '.join(text_parts)
411
- ass_events.append(f"Dialogue: 1,{ms_to_ass_time(w_start)},{ms_to_ass_time(w_end)},Default,,0,0,0,,{styled_text}")
412
-
413
  with open(ass_path, 'w', encoding='utf-8') as f:
414
  f.write(ass_header + '\n'.join(ass_events))
415
  return ass_path
@@ -430,8 +462,30 @@ def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, su
430
  subtitle_path, s_err = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
431
  if s_err: return None, s_err
432
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
  video_width, video_height, video_fps = get_video_info(video_path)
434
- audio_duration = get_audio_duration(audio_path)
435
 
436
  script_dir = os.path.dirname(os.path.abspath(__file__))
437
  reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
@@ -448,34 +502,35 @@ def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, su
448
  status_msg += f" • ⚠️ Reddit card failed: {str(e)}\n"
449
  has_reddit_template = False
450
 
451
- # --- 1. Find CTA Info ---
452
- title_timestamp, cta_timestamp, cta_text_raw = find_title_and_cta(subtitle_path, book_title)
453
- book_appears_at = title_timestamp if title_timestamp is not None else audio_duration * (1 - VIDEO_CONFIG['promo_percent'])
454
 
455
- box_appears_at = cta_timestamp if cta_timestamp is not None else book_appears_at + 1.5
 
456
 
457
- if title_timestamp: status_msg += f"\n📖 Book title at {title_timestamp:.2f}s\n"
458
- if cta_timestamp: status_msg += f"🖤 CTA text starts at {cta_timestamp:.2f}s\n"
459
-
460
- # --- 2. Prepare Dynamic CTA Text ---
461
- cta_ass_path = None
462
- if cta_text_raw:
463
- status_msg += "🖤 Generating Instagram-style dynamic CTA...\n"
464
- cta_font_size = int(video_width * 0.060) #INCREASE / DECREASE CTA FONT SIZE HERE
465
-
466
- cta_ass_path = create_cta_highlight_ass(
467
- subtitle_path, temp_dir, box_appears_at,
468
- cta_font_size, video_width, video_height, highlight_color
469
- )
470
- cta_sub_escaped = cta_ass_path.replace('\\', '/').replace(':', '\\:')
471
 
472
- # --- 3. Process Main Subtitles ---
473
  if enable_highlight:
474
- status_msg += f"\n✨ Processing subtitles...\n"
475
- main_subtitle_path = create_word_by_word_highlight_ass(
 
476
  subtitle_path, temp_dir, highlight_color, font_size,
477
- skip_first=has_reddit_template, config=SUBTITLE_CONFIG,
478
- cta_start_time_sec=title_timestamp
479
  )
480
  else:
481
  main_subtitle_path = subtitle_path
@@ -487,41 +542,55 @@ def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, su
487
  has_book_cover = book_cover_path is not None
488
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
489
  output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
490
-
491
- fade_starts_at = audio_duration * VIDEO_CONFIG['fade_start_percent']
492
- fade_ends_at = audio_duration * VIDEO_CONFIG['fade_end_percent']
493
- fade_out_duration = fade_ends_at - fade_starts_at
494
- promo_duration = audio_duration * VIDEO_CONFIG['promo_percent']
495
- solid_color_duration = max(0, book_appears_at - fade_ends_at)
496
- main_video_duration = fade_ends_at
497
- cover_segment_duration = promo_duration
498
- fade_color_hex = "#dacfc3"
499
 
500
  if has_book_cover:
501
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
502
  main_segment_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
503
  cmd_main = ["ffmpeg", "-stream_loop", "-1", "-i", video_path, "-t", str(main_video_duration), "-vf", f"fps={video_fps},scale={video_width}:{video_height},fade=t=out:st={fade_starts_at}:d={fade_out_duration}:c={fade_color_hex}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", main_segment_path]
504
  subprocess.run(cmd_main, check=True, capture_output=True, text=True, env=ffmpeg_env)
505
 
506
- solid_color_path = os.path.join(temp_dir, f"solid_{timestamp}.mp4")
507
- cmd_solid = ["ffmpeg", "-f", "lavfi", "-i", f"color=c={fade_color_hex}:s={video_width}x{video_height}:d={solid_color_duration}:r={video_fps}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-y", solid_color_path]
508
- subprocess.run(cmd_solid, check=True, capture_output=True, text=True, env=ffmpeg_env)
 
 
 
509
 
 
510
  cover_segment_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
511
- # Removed the fade-in effect here for a clean hard cut
512
  cmd_cover = ["ffmpeg", "-loop", "1", "-i", book_cover_path, "-t", str(cover_segment_duration), "-vf", f"scale={video_width}:{video_height},setsar=1,fps={video_fps}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", cover_segment_path]
513
  subprocess.run(cmd_cover, check=True, capture_output=True, text=True, env=ffmpeg_env)
514
 
 
515
  concat_list_path = os.path.join(temp_dir, f"concat_{timestamp}.txt")
516
  with open(concat_list_path, 'w') as f:
517
- f.write(f"file '{main_segment_path}'\n"); f.write(f"file '{solid_color_path}'\n"); f.write(f"file '{cover_segment_path}'\n")
 
 
 
518
 
519
- #--- 4. Build the Filter Graph ---
520
  input_cmd = ["ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path]
521
  curr_idx = 1
522
  curr_stream = "[0:v]"
523
 
524
- # Layer 1: Reddit Card
525
  if has_reddit_template:
526
  input_cmd += ["-loop", "1", "-i", reddit_card_path]
527
  filter_complex = f"[{curr_idx}:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];{curr_stream}[reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v1];"
@@ -529,16 +598,17 @@ def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, su
529
  else:
530
  filter_complex = f"{curr_stream}copy[v1];"; curr_stream = "[v1]"
531
 
532
- # Layer 2: Main Subtitles (Auto-stops right before CTA)
533
  filter_complex += f"{curr_stream}ass={main_sub_escaped}[v2];"; curr_stream = "[v2]"
534
 
535
- # Layer 3: Animated CTA Subtitles Overlay (Dynamic Box is built-in!)
536
- if cta_ass_path:
537
- filter_complex += f"{curr_stream}ass={cta_sub_escaped}[v_final]"
538
- else:
539
- filter_complex += f"{curr_stream}copy[v_final]"
540
 
541
  input_cmd += ["-i", audio_path]
 
542
  cmd_final = input_cmd + [
543
  "-filter_complex", filter_complex,
544
  "-map", "[v_final]", "-map", f"{curr_idx}:a",
@@ -546,21 +616,16 @@ def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, su
546
  "-c:a", "aac", "-pix_fmt", "yuv420p", "-shortest", "-y", output_path
547
  ]
548
 
549
- status_msg += "🎬 Rendering final video...\n"
550
  subprocess.run(cmd_final, check=True, capture_output=True, text=True, env=ffmpeg_env)
551
-
552
- except subprocess.CalledProcessError as e: return None, f"❌ FFmpeg error:\n{e.stderr[-1000:] if e.stderr else str(e)}"
553
- except Exception as e: return None, f"❌ Error: {str(e)}"
554
- else: return None, "❌ Book cover required."
555
 
556
  if os.path.exists(output_path): return output_path, f"✅ Success!"
557
  else: return None, "❌ Output not created"
558
  except Exception as e: return None, f"❌ Error: {str(e)}"
559
 
560
-
561
- # ========================================
562
- # FastAPI app
563
- # ========================================
564
  app = FastAPI(title="Video Stitcher API")
565
 
566
  app.add_middleware(
@@ -571,13 +636,11 @@ app.add_middleware(
571
  allow_headers=["*"],
572
  )
573
 
574
-
575
  class StitchErrorResponse(BaseModel):
576
  status: str = Field(..., example="failed")
577
  message: str = Field(..., example="❌ FFmpeg error: ...")
578
  run_time: str = Field(..., example="0m 5s")
579
 
580
-
581
  def _save_upload_to_temp(upload_file: UploadFile, temp_dir: str) -> str:
582
  filename = os.path.basename(upload_file.filename)
583
  dest_path = os.path.join(temp_dir, filename)
@@ -585,52 +648,36 @@ def _save_upload_to_temp(upload_file: UploadFile, temp_dir: str) -> str:
585
  f.write(upload_file.file.read())
586
  return dest_path
587
 
588
-
589
- @app.post(
590
- '/video_stitch',
591
- responses={400: {"model": StitchErrorResponse}, 500: {"model": StitchErrorResponse}},
592
- summary="Stitch video, audio, and subtitles into a final MP4",
593
- description=(
594
- "Synchronous endpoint — holds the connection open until encoding is complete, "
595
- "then returns the finished MP4 directly. Designed for n8n HTTP Request nodes."
596
- ),
597
- )
598
  async def stitch_upload(
599
  request: Request,
600
- # Video
601
  video_file: Optional[UploadFile] = File(None),
602
  video_url: Optional[str] = Form(None),
603
- # Audio
604
  audio_file: Optional[UploadFile] = File(None),
605
  audio_url: Optional[str] = Form(None),
606
- # Subtitle
607
  subtitle_file: Optional[UploadFile] = File(None),
608
  subtitle_url: Optional[str] = Form(None),
609
- # Book Cover (use exactly ONE)
610
  book_cover_file: Optional[UploadFile] = File(None),
611
  book_cover_url: Optional[str] = Form(None),
612
  book_cover_base64: Optional[str] = Form(None),
613
  book_id: Optional[str] = Form(None),
614
- # Book Title (used to detect CTA split point in subtitle)
615
  book_title: Optional[str] = Form(None),
616
- # Settings
617
  enable_highlight: bool = Form(True),
618
  highlight_color: str = Form('yellow'),
619
  font_size: int = Form(10),
620
  crf_quality: int = Form(23),
621
  ):
622
- temp_dir = tempfile.mkdtemp()
623
-
624
  # Format validation
625
- if video_file and video_file.content_type not in {"video/mp4", "video/quicktime", "video/x-msvideo", "video/x-matroska"}:
626
- raise HTTPException(status_code=422, detail=f"❌ Invalid video format: {video_file.content_type}")
627
- if audio_file and audio_file.content_type not in {"audio/mpeg", "audio/wav", "audio/x-wav", "audio/aac", "audio/mp4", "audio/x-m4a"}:
 
 
628
  raise HTTPException(status_code=422, detail=f"❌ Invalid audio format: {audio_file.content_type}")
629
- if subtitle_file and not (subtitle_file.filename.endswith('.srt') or subtitle_file.filename.endswith('.json')):
630
- raise HTTPException(status_code=422, detail="❌ Subtitle must be a .srt or .json file")
631
  if book_cover_file and book_cover_file.content_type not in {"image/jpeg", "image/png", "image/webp"}:
632
  raise HTTPException(status_code=422, detail="❌ Book cover must be jpeg, png, or webp")
633
 
 
634
  payload = {
635
  'video_file': None, 'video_url': video_url,
636
  'audio_file': None, 'audio_url': audio_url,
@@ -655,8 +702,6 @@ async def stitch_upload(
655
  payload['book_cover_file'] = _save_upload_to_temp(book_cover_file, temp_dir)
656
 
657
  start_time = time.time()
658
-
659
- # Run blocking FFmpeg work in a thread so the event loop stays healthy
660
  loop = asyncio.get_event_loop()
661
  result_path, message = await loop.run_in_executor(
662
  None,
@@ -687,7 +732,6 @@ async def stitch_upload(
687
  "X-Status": "completed",
688
  "X-Run-Time": run_time_fmt,
689
  "X-File-Size-MB": f"{file_size_mb:.2f}",
690
- "X-Message": "Video created successfully",
691
  }
692
  )
693
  else:
@@ -695,11 +739,9 @@ async def stitch_upload(
695
  {'status': 'failed', 'message': message, 'run_time': run_time_fmt},
696
  status_code=400
697
  )
698
-
699
  except Exception as e:
700
  raise HTTPException(status_code=500, detail=str(e))
701
 
702
-
703
- @app.get('/health', summary="Health check")
704
  async def health():
705
  return {"status": "ok"}
 
1
+
2
  import subprocess
 
3
  import os
4
  import tempfile
5
  import requests
 
7
  import textwrap
8
  import shutil
9
  import time
10
+ import json
11
  from datetime import datetime
12
  from PIL import Image, ImageDraw, ImageFont
13
+ import base64
14
  from io import BytesIO
15
+ from thefuzz import fuzz
16
 
17
+ import asyncio
18
+ from io import BytesIO
19
+ from typing import Optional
20
  from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
21
  from fastapi.responses import FileResponse, JSONResponse
22
  from fastapi.middleware.cors import CORSMiddleware
23
  from pydantic import BaseModel, Field
 
24
  # ========================================
25
  # CONFIGURATION SECTION - CUSTOMIZE HERE
26
  # ========================================
 
41
  SUBTITLE_CONFIG = {
42
  'font_file': 'LilitaOne-Regular.ttf',
43
  'font_name': 'Lilita One',
44
+ 'font_size_default': 11,
45
  'position_alignment': 5,
46
+ 'margin_left': 70,
47
+ 'margin_right': 80,
48
  'margin_vertical': 20,
49
  'line_spacing': 2
50
  }
 
51
 
52
  VIDEO_CONFIG = {
53
  'reddit_scale_percent': 0.75,
 
57
  'fade_color_rgb': (218, 207, 195),
58
  }
59
 
 
60
  # ========================================
61
  # END CONFIGURATION SECTION
62
  # ========================================
63
 
64
+ # =========================
65
+ # HELPER FUNCTIONS
66
+ # =========================
67
+
68
+ def sec_to_ass_time(seconds):
69
+ """Converts seconds (e.g. 1.219) to ASS time format (H:MM:SS.cs)"""
70
+ ms = int(seconds * 1000)
71
+ h, ms = divmod(ms, 3600000)
72
+ m, ms = divmod(ms, 60000)
73
+ s, ms = divmod(ms, 1000)
74
+ cs = ms // 10
75
+ return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  def setup_custom_fonts_hf(temp_dir):
78
  try:
 
172
  return float(result.stdout.strip())
173
  except Exception as e: raise Exception(f"Failed to get audio duration: {str(e)}")
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
176
  try:
177
  template = Image.open(template_path).convert('RGBA')
 
223
  except Exception as e: return None, f"❌ Error downloading {file_type}: {str(e)}"
224
  return None, f"❌ Unknown error"
225
 
226
+
227
+ # ============================================
228
+ # JSON LOGIC: PARSERS & SUBTITLE GENERATORS
229
+ # ============================================
230
+
231
+ def extract_first_subtitle(json_path):
232
+ """Gets the first full sentence up to a period for the Reddit Card."""
233
+ try:
234
+ with open(json_path, 'r', encoding='utf-8') as f:
235
+ data = json.load(f)
236
+
237
+ title_words = []
238
+ start_time = None
239
+ end_time = 3.0
240
+
241
+ for segment in data.get('segments', []):
242
+ for word_data in segment.get('words', []):
243
+ word_text = word_data.get('text', '').strip()
244
+ if not word_text: continue
245
+
246
+ if start_time is None:
247
+ start_time = word_data.get('start_time', 0.0)
248
+
249
+ title_words.append(word_text)
250
+
251
+ # Check if this word ends with sentence-ending punctuation
252
+ if re.search(r'[.!?]$', word_text):
253
+ end_time = word_data.get('end_time', 3.0)
254
+ return " ".join(title_words), start_time, end_time
255
+
256
+ # Fallback just in case there is literally no punctuation
257
+ if title_words:
258
+ return " ".join(title_words), start_time, end_time
259
+ return "No subtitle found", 0.0, 3.0
260
+ except Exception as e:
261
+ print(f"Error extracting first subtitle: {e}")
262
+ return "No subtitle found", 0.0, 3.0
263
+
264
+ # ============================================
265
+ # FINDS BOOK TITLE WORD'S EXACT TIMINGS
266
+ # ============================================
267
+ def find_title_and_cta(json_path, book_title):
268
+ """Uses a sliding window to find the exact start and end millisecond of the book title."""
269
+ try:
270
+ if not book_title or not book_title.strip(): return None, None
271
+
272
+ with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f)
273
+
274
+ book_title_lower = book_title.lower()
275
+ title_clean = re.sub(r'[^\w\s]', '', book_title_lower).strip()
276
+ book_title_words = title_clean.split()
277
+ window_size = len(book_title_words)
278
+
279
+ # Flatten all words with their timings
280
+ all_words = []
281
+ for segment in data.get('segments', []):
282
+ for word_data in segment.get('words', []):
283
+ word_text = word_data.get('text', '').strip()
284
+ if word_text:
285
+ all_words.append({
286
+ 'text': word_text,
287
+ 'start': word_data.get('start_time', 0.0),
288
+ 'end': word_data.get('end_time', 0.0)
289
+ })
290
+
291
+ best_score = 0
292
+ best_start = None
293
+ best_end = None
294
+
295
+ # Sliding Window: Checks 2, 3, and 4 word groups to catch fuzzy/bad transcriptions
296
+ for w_size in [window_size, window_size + 1, window_size - 1]:
297
+ if w_size <= 0: continue
298
+ for i in range(len(all_words) - w_size + 1):
299
+ window_text = " ".join([w['text'] for w in all_words[i : i + w_size]]).lower()
300
+ window_text_clean = re.sub(r'[^\w\s]', '', window_text).strip()
301
+
302
+ score = fuzz.ratio(title_clean, window_text_clean)
303
+ if score > best_score:
304
+ best_score = score
305
+ best_start = all_words[i]['start']
306
+ best_end = all_words[i + w_size - 1]['end']
307
+
308
+ # If it's a strong match, return exact start and end times
309
+ if best_score >= 85:
310
+ return best_start, best_end
311
+
312
+ return None, None
313
+ except Exception as e:
314
+ print(f"Error finding title: {e}")
315
+ return None, None
316
+
317
+ def create_body_ass_from_json(json_path, output_dir, highlight_color='yellow',
318
+ font_size=None, start_time_sec=0.0, config=SUBTITLE_CONFIG,
319
+ stop_time_sec=None):
320
+ """Creates dynamic body subtitles starting at 1 word and increasing by 2 up to 50."""
321
+ if font_size is None: font_size = config['font_size_default']
322
+ color_map = {'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'), 'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'), 'pink': ('&H00FF69B4', '&H00000000'), 'red': ('&H000000FF', '&H00FFFFFF'), 'blue': ('&H00FF0000', '&H00FFFFFF')}
323
+ highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
324
+
325
+ ass_path = os.path.join(output_dir, 'body_subtitles.ass')
326
+ ass_header = f"""[Script Info]
327
+ Title: Body JSON Subtitles
328
  ScriptType: v4.00+
329
  [V4+ Styles]
330
  Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
331
  Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,1,3,1,{config['position_alignment']},{config['margin_left']},{config['margin_right']},{config['margin_vertical']},1
332
  [Events]
333
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"""
334
+
335
+ with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f)
336
+
337
+ all_words = []
338
+ for segment in data.get('segments', []):
339
+ for word_data in segment.get('words', []):
340
+ word_text = word_data.get('text', '').strip()
341
+ start_ms = word_data.get('start_time', 0)
342
+ if start_ms < start_time_sec - 0.1: continue
343
+ if stop_time_sec is not None and start_ms >= stop_time_sec - 0.1: continue
344
+ if word_text:
345
+ all_words.append({'word': word_text, 'start': start_ms, 'end': word_data.get('end_time', 0)})
346
+
347
+ chunks = []
348
+ i = 0
349
+ current_chunk_size = 1
350
+ max_chunk_size = 50
351
+
352
+ while i < len(all_words):
353
+ remaining = len(all_words) - i
354
+ take = min(current_chunk_size, remaining)
355
+ chunks.append(all_words[i : i + take])
356
+ i += take
357
+ if current_chunk_size < max_chunk_size:
358
+ current_chunk_size = min(current_chunk_size + 4, max_chunk_size)
359
+
360
+ ass_events = []
361
+ for chunk in chunks:
362
+ chunk_text_only = [item['word'] for item in chunk]
363
+ frame_end = chunk[-1]['end']
364
+ for idx, info in enumerate(chunk):
365
+ w_start = info['start']
366
+ w_end = chunk[idx+1]['start'] if idx + 1 < len(chunk) else frame_end
367
+
368
+ text_parts = []
369
+ for j, word_str in enumerate(chunk_text_only):
370
+ if j == idx: text_parts.append(f"{{\\c{highlight_text}\\3c{highlight_bg}\\bord5}}{word_str}{{\\r}}")
371
+ else: text_parts.append(word_str)
372
+ ass_events.append(f"Dialogue: 0,{sec_to_ass_time(w_start)},{sec_to_ass_time(w_end)},Default,,0,0,0,,{' '.join(text_parts)}")
373
+
374
+ with open(ass_path, 'w', encoding='utf-8') as f:
375
+ f.write(ass_header + '\n'.join(ass_events))
376
+ return ass_path
377
+
378
+
379
+ def create_cta_ass_from_json(json_path, output_dir, start_sec, font_size, video_width, video_height, highlight_color='yellow', config=SUBTITLE_CONFIG, words_per_frame=10):
380
+ """Creates the chunky, Instagram-style box subtitles for the CTA."""
381
  color_map = {
382
  'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'),
383
  'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'),
 
387
  highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
388
  margin_lr = int(video_width * 0.125) + 40
389
 
390
+ ass_path = os.path.join(output_dir, 'cta_subtitles.ass')
391
+ # Style logic: WrapStyle=1, BorderStyle=3, Outline=10 (Tight Instagram Box)
 
392
  ass_header = f"""[Script Info]
393
+ Title: CTA JSON Subtitles
394
  ScriptType: v4.00+
395
  PlayResX: {video_width}
396
  PlayResY: {video_height}
397
  WrapStyle: 1
398
  [V4+ Styles]
399
  Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
400
+ Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,3,10,0,5,{margin_lr},{margin_lr},0,1
401
  [Events]
402
  Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"""
403
 
404
+ with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f)
405
+
 
406
  all_cta_words = []
407
+ for segment in data.get('segments', []):
408
+ for word_data in segment.get('words', []):
409
+ word_text = word_data.get('text', '').strip()
410
+ start_ms = word_data.get('start_time', 0)
411
+ if start_ms < start_sec - 0.1: continue # Skip words before the CTA starts
412
+ if word_text:
413
+ # Merge "Book" and "Access" into "BookXcess"
414
+ if word_text.lower().startswith('access') and len(all_cta_words) > 0 and all_cta_words[-1]['word'].lower() == 'book':
415
+ # Keep any trailing punctuation (like commas or periods) from "Access"
416
+ punctuation = word_text[6:]
417
+ all_cta_words[-1]['word'] = 'BookXcess' + punctuation
418
+ # Extend the highlight time to cover both words
419
+ all_cta_words[-1]['end'] = word_data.get('end_time', 0)
420
+ continue # Skip adding "Access" as a separate word
421
+ all_cta_words.append({'word': word_text, 'start': start_ms, 'end': word_data.get('end_time', 0)})
 
 
422
 
 
423
  chunks = []
424
  i = 0
425
+ while i < len(all_cta_words):
426
+ remaining = len(all_cta_words) - i
427
+ take = remaining if words_per_frame < remaining <= words_per_frame + 2 else min(words_per_frame, remaining)
 
 
 
 
 
 
428
  chunks.append(all_cta_words[i : i + take])
429
  i += take
430
 
 
431
  ass_events = []
432
  for chunk in chunks:
433
  chunk_text_only = [item['word'] for item in chunk]
434
+ frame_end = chunk[-1]['end']
435
  for idx, info in enumerate(chunk):
436
  w_start = info['start']
437
+ w_end = chunk[idx+1]['start'] if idx + 1 < len(chunk) else frame_end
 
438
 
439
  text_parts = []
440
  for j, word_str in enumerate(chunk_text_only):
441
+ if j == idx: text_parts.append(f"{{\\c{highlight_text}}}{word_str}{{\\r}}")
442
+ else: text_parts.append(word_str)
443
+ ass_events.append(f"Dialogue: 1,{sec_to_ass_time(w_start)},{sec_to_ass_time(w_end)},Default,,0,0,0,,{' '.join(text_parts)}")
444
+
 
 
 
 
445
  with open(ass_path, 'w', encoding='utf-8') as f:
446
  f.write(ass_header + '\n'.join(ass_events))
447
  return ass_path
 
462
  subtitle_path, s_err = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
463
  if s_err: return None, s_err
464
 
465
+ # ✨ PRE-PROCESS SPEED HACK ✨
466
+ speed_factor = 1.3
467
+
468
+ # 1. Physically speed up the audio file
469
+ fast_audio = os.path.join(temp_dir, f"fast_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3")
470
+ subprocess.run(["ffmpeg", "-v", "error", "-y", "-i", audio_path, "-filter:a", f"atempo={speed_factor}", fast_audio], check=True)
471
+ audio_path = fast_audio # Trick the script into using the fast audio!
472
+
473
+ # 2. Physically shrink the JSON timestamps
474
+ fast_json = os.path.join(temp_dir, f"fast_subs_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
475
+ with open(subtitle_path, 'r', encoding='utf-8') as f: json_data = json.load(f)
476
+
477
+ for segment in json_data.get('segments', []):
478
+ segment['start_time'] = segment.get('start_time', 0) / speed_factor
479
+ segment['end_time'] = segment.get('end_time', 0) / speed_factor
480
+ for word in segment.get('words', []):
481
+ word['start_time'] = word.get('start_time', 0) / speed_factor
482
+ word['end_time'] = word.get('end_time', 0) / speed_factor
483
+
484
+ with open(fast_json, 'w', encoding='utf-8') as f: json.dump(json_data, f)
485
+ subtitle_path = fast_json # Trick the script into using the fast subtitles!
486
+
487
  video_width, video_height, video_fps = get_video_info(video_path)
488
+ audio_duration = get_audio_duration(audio_path) # Now gets the new 1:18 duration natively!
489
 
490
  script_dir = os.path.dirname(os.path.abspath(__file__))
491
  reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
 
502
  status_msg += f" • ⚠️ Reddit card failed: {str(e)}\n"
503
  has_reddit_template = False
504
 
505
+ # --- 1. Find Title Exact Word Timings ---
506
+ title_start, title_end = find_title_and_cta(subtitle_path, book_title)
 
507
 
508
+ book_appears_at = title_start if title_start is not None else audio_duration * (1 - VIDEO_CONFIG['promo_percent'])
509
+ box_appears_at = title_end if title_end is not None else book_appears_at + 1.5
510
 
511
+ if title_start is not None:
512
+ status_msg += f"\n📖 Hard cut to Book Cover at {title_start:.2f}s\n"
513
+ status_msg += f"🤫 Book title silenced in subtitles.\n"
514
+ status_msg += f"🖤 CTA text starts exactly at {title_end:.2f}s\n"
515
+
516
+ # --- 2. Prepare Dynamic CTA Text (JSON) ---
517
+ status_msg += "🖤 Generating Instagram-style dynamic CTA...\n"
518
+ cta_font_size = int(video_width * 0.060)
519
+
520
+ cta_ass_path = create_cta_ass_from_json(
521
+ subtitle_path, temp_dir, box_appears_at,
522
+ cta_font_size, video_width, video_height, highlight_color
523
+ )
524
+ cta_sub_escaped = cta_ass_path.replace('\\', '/').replace(':', '\\:')
525
 
526
+ # --- 3. Process Main Subtitles (JSON) ---
527
  if enable_highlight:
528
+ status_msg += f"\n✨ Processing JSON subtitles...\n"
529
+ body_start_time = first_sub_end if has_reddit_template else 0.0
530
+ main_subtitle_path = create_body_ass_from_json(
531
  subtitle_path, temp_dir, highlight_color, font_size,
532
+ start_time_sec=body_start_time, config=SUBTITLE_CONFIG,
533
+ stop_time_sec=book_appears_at # Stops EXACTLY before the title is spoken
534
  )
535
  else:
536
  main_subtitle_path = subtitle_path
 
542
  has_book_cover = book_cover_path is not None
543
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
544
  output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
 
 
 
 
 
 
 
 
 
545
 
546
  if has_book_cover:
547
  try:
548
+ fade_starts_at = audio_duration * VIDEO_CONFIG['fade_start_percent']
549
+ fade_ends_at = audio_duration * VIDEO_CONFIG['fade_end_percent']
550
+
551
+ # Safety net: If the book title is spoken BEFORE the fade is supposed to end,
552
+ # we shorten the fade so it doesn't overlap the book cover cut.
553
+ if fade_ends_at > book_appears_at:
554
+ fade_ends_at = book_appears_at
555
+ fade_starts_at = min(fade_starts_at, fade_ends_at - 1.0)
556
+
557
+ fade_out_duration = fade_ends_at - fade_starts_at
558
+ solid_color_duration = max(0, book_appears_at - fade_ends_at)
559
+
560
+ main_video_duration = fade_ends_at
561
+ cover_segment_duration = audio_duration - book_appears_at
562
+ fade_color_hex = "#dacfc3" # Book page type color
563
+
564
+ # 1. Main Segment (background video fading into sandal color)
565
  main_segment_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
566
  cmd_main = ["ffmpeg", "-stream_loop", "-1", "-i", video_path, "-t", str(main_video_duration), "-vf", f"fps={video_fps},scale={video_width}:{video_height},fade=t=out:st={fade_starts_at}:d={fade_out_duration}:c={fade_color_hex}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", main_segment_path]
567
  subprocess.run(cmd_main, check=True, capture_output=True, text=True, env=ffmpeg_env)
568
 
569
+ # 2. Solid Color Segment (Holds the sandal color until the hard cut)
570
+ solid_color_path = None
571
+ if solid_color_duration > 0:
572
+ solid_color_path = os.path.join(temp_dir, f"solid_{timestamp}.mp4")
573
+ cmd_solid = ["ffmpeg", "-f", "lavfi", "-i", f"color=c={fade_color_hex}:s={video_width}x{video_height}:d={solid_color_duration}:r={video_fps}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-y", solid_color_path]
574
+ subprocess.run(cmd_solid, check=True, capture_output=True, text=True, env=ffmpeg_env)
575
 
576
+ # 3. Book Cover Segment (Hard cut triggered exactly when title is spoken)
577
  cover_segment_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
 
578
  cmd_cover = ["ffmpeg", "-loop", "1", "-i", book_cover_path, "-t", str(cover_segment_duration), "-vf", f"scale={video_width}:{video_height},setsar=1,fps={video_fps}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", cover_segment_path]
579
  subprocess.run(cmd_cover, check=True, capture_output=True, text=True, env=ffmpeg_env)
580
 
581
+ # 4. Stitch them all together
582
  concat_list_path = os.path.join(temp_dir, f"concat_{timestamp}.txt")
583
  with open(concat_list_path, 'w') as f:
584
+ f.write(f"file '{main_segment_path}'\n")
585
+ if solid_color_path:
586
+ f.write(f"file '{solid_color_path}'\n")
587
+ f.write(f"file '{cover_segment_path}'\n")
588
 
589
+ #--- 5. Build the Filter Graph (Subtitles, Overlays & SPEEDUP) ---
590
  input_cmd = ["ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path]
591
  curr_idx = 1
592
  curr_stream = "[0:v]"
593
 
 
594
  if has_reddit_template:
595
  input_cmd += ["-loop", "1", "-i", reddit_card_path]
596
  filter_complex = f"[{curr_idx}:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];{curr_stream}[reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v1];"
 
598
  else:
599
  filter_complex = f"{curr_stream}copy[v1];"; curr_stream = "[v1]"
600
 
601
+ # 1. Burn in Main Subtitles
602
  filter_complex += f"{curr_stream}ass={main_sub_escaped}[v2];"; curr_stream = "[v2]"
603
 
604
+ # 2. Burn in CTA Subtitles (Straight to v_final - NO DUPLICATES)
605
+ if cta_ass_path:
606
+ filter_complex += f"{curr_stream}ass={cta_sub_escaped}[v_final]"
607
+ else:
608
+ filter_complex += f"{curr_stream}copy[v_final]"
609
 
610
  input_cmd += ["-i", audio_path]
611
+
612
  cmd_final = input_cmd + [
613
  "-filter_complex", filter_complex,
614
  "-map", "[v_final]", "-map", f"{curr_idx}:a",
 
616
  "-c:a", "aac", "-pix_fmt", "yuv420p", "-shortest", "-y", output_path
617
  ]
618
 
619
+ status_msg += "🎬 Rendering final synchronized video...\n"
620
  subprocess.run(cmd_final, check=True, capture_output=True, text=True, env=ffmpeg_env)
621
+ except Exception as e:
622
+ return None, f"❌ Book cover processing error: {str(e)}"
 
 
623
 
624
  if os.path.exists(output_path): return output_path, f"✅ Success!"
625
  else: return None, "❌ Output not created"
626
  except Exception as e: return None, f"❌ Error: {str(e)}"
627
 
628
+
 
 
 
629
  app = FastAPI(title="Video Stitcher API")
630
 
631
  app.add_middleware(
 
636
  allow_headers=["*"],
637
  )
638
 
 
639
  class StitchErrorResponse(BaseModel):
640
  status: str = Field(..., example="failed")
641
  message: str = Field(..., example="❌ FFmpeg error: ...")
642
  run_time: str = Field(..., example="0m 5s")
643
 
 
644
  def _save_upload_to_temp(upload_file: UploadFile, temp_dir: str) -> str:
645
  filename = os.path.basename(upload_file.filename)
646
  dest_path = os.path.join(temp_dir, filename)
 
648
  f.write(upload_file.file.read())
649
  return dest_path
650
 
651
+ @app.post('/video_stitch', responses={400: {"model": StitchErrorResponse}, 500: {"model": StitchErrorResponse}})
 
 
 
 
 
 
 
 
 
652
  async def stitch_upload(
653
  request: Request,
 
654
  video_file: Optional[UploadFile] = File(None),
655
  video_url: Optional[str] = Form(None),
 
656
  audio_file: Optional[UploadFile] = File(None),
657
  audio_url: Optional[str] = Form(None),
 
658
  subtitle_file: Optional[UploadFile] = File(None),
659
  subtitle_url: Optional[str] = Form(None),
 
660
  book_cover_file: Optional[UploadFile] = File(None),
661
  book_cover_url: Optional[str] = Form(None),
662
  book_cover_base64: Optional[str] = Form(None),
663
  book_id: Optional[str] = Form(None),
 
664
  book_title: Optional[str] = Form(None),
 
665
  enable_highlight: bool = Form(True),
666
  highlight_color: str = Form('yellow'),
667
  font_size: int = Form(10),
668
  crf_quality: int = Form(23),
669
  ):
 
 
670
  # Format validation
671
+ if subtitle_file and not subtitle_file.filename.endswith('.json'):
672
+ raise HTTPException(status_code=422, detail="❌ Subtitle must be a .json file")
673
+ if subtitle_url and not subtitle_url.strip().split('?')[0].endswith('.json'):
674
+ raise HTTPException(status_code=422, detail="❌ Subtitle URL must point to a .json file")
675
+ if audio_file and audio_file.content_type not in {"audio/mpeg", "audio/mp3", "audio/wav", "audio/x-wav", "audio/aac", "audio/mp4", "audio/x-m4a"}:
676
  raise HTTPException(status_code=422, detail=f"❌ Invalid audio format: {audio_file.content_type}")
 
 
677
  if book_cover_file and book_cover_file.content_type not in {"image/jpeg", "image/png", "image/webp"}:
678
  raise HTTPException(status_code=422, detail="❌ Book cover must be jpeg, png, or webp")
679
 
680
+ temp_dir = tempfile.mkdtemp()
681
  payload = {
682
  'video_file': None, 'video_url': video_url,
683
  'audio_file': None, 'audio_url': audio_url,
 
702
  payload['book_cover_file'] = _save_upload_to_temp(book_cover_file, temp_dir)
703
 
704
  start_time = time.time()
 
 
705
  loop = asyncio.get_event_loop()
706
  result_path, message = await loop.run_in_executor(
707
  None,
 
732
  "X-Status": "completed",
733
  "X-Run-Time": run_time_fmt,
734
  "X-File-Size-MB": f"{file_size_mb:.2f}",
 
735
  }
736
  )
737
  else:
 
739
  {'status': 'failed', 'message': message, 'run_time': run_time_fmt},
740
  status_code=400
741
  )
 
742
  except Exception as e:
743
  raise HTTPException(status_code=500, detail=str(e))
744
 
745
+ @app.get('/health')
 
746
  async def health():
747
  return {"status": "ok"}