Video-stitch

Sleeping

App Files Files Community

Nav3005 commited on Feb 23

Commit

2343e20

verified ·

1 Parent(s): 886ecfa

Update app.py

Browse files

Files changed (1) hide show

app.py +298 -256

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import subprocess
-import base64
 import os
 import tempfile
 import requests
@@ -7,17 +7,20 @@ import re
 import textwrap
 import shutil
 import time
-import asyncio
 from datetime import datetime
 from PIL import Image, ImageDraw, ImageFont
 from io import BytesIO
-from typing import Optional
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
 from fastapi.responses import FileResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
 # ========================================
 # CONFIGURATION SECTION - CUSTOMIZE HERE
 # ========================================
@@ -38,14 +41,13 @@ REDDIT_CONFIG = {
 SUBTITLE_CONFIG = {
   'font_file': 'LilitaOne-Regular.ttf',
   'font_name': 'Lilita One',
-  'font_size_default': 10,
   'position_alignment': 5,
-  'margin_left': 50,
-  'margin_right': 70,
   'margin_vertical': 20,
   'line_spacing': 2
 }
-# go to line 462 if you want to increase/decrease CTA part's font size!!!
 VIDEO_CONFIG = {
   'reddit_scale_percent': 0.75,
@@ -55,52 +57,22 @@ VIDEO_CONFIG = {
   'fade_color_rgb': (218, 207, 195),
 }
 # ========================================
 # END CONFIGURATION SECTION
 # ========================================
-# ============================================
-# FINDS BOOK TITLE TO SPLIT CTA AND BODY SCRIPT
-# ============================================
-def find_title_and_cta(srt_path, book_title):
-    try:
-        if not book_title or not book_title.strip():
-            return None, None, None
-        with open(srt_path, 'r', encoding='utf-8') as f:
-            content = f.read()
-        blocks = re.split(r'\n\s*\n', content.strip())
-        book_title_lower = book_title.lower()
-        for i, block in enumerate(blocks):
-            lines = block.strip().split('\n')
-            if len(lines) >= 3:
-                subtitle_text = ' '.join(lines[2:])
-                if book_title_lower in subtitle_text.lower():
-                    # 1. Get the time the title is spoken
-                    times = lines[1].split(' --> ')
-                    title_time = srt_time_to_ms(times[0]) / 1000.0
-                    cta_time = None
-                    cta_text_parts = []
-                    # 2. Get the time the ACTUAL CTA text starts
-                    if i + 1 < len(blocks):
-                        next_block_lines = blocks[i + 1].strip().split('\n')
-                        if len(next_block_lines) >= 3:
-                            cta_time = srt_time_to_ms(next_block_lines[1].split(' --> ')[0]) / 1000.0
-                    # 3. Grab all remaining text for the CTA
-                    for j in range(i + 1, len(blocks)):
-                        next_lines = blocks[j].strip().split('\n')
-                        if len(next_lines) >= 3:
-                            cta_text_parts.append(' '.join(next_lines[2:]).strip())
-                    cta_text = ' '.join(cta_text_parts) if cta_text_parts else None
-                    return title_time, cta_time, cta_text
-        return None, None, None
-    except Exception as e:
-        print(f"Error finding title and CTA: {e}")
-        return None, None, None
 def setup_custom_fonts_hf(temp_dir):
   try:
@@ -200,22 +172,6 @@ def get_audio_duration(audio_path):
       return float(result.stdout.strip())
   except Exception as e: raise Exception(f"Failed to get audio duration: {str(e)}")
-def extract_first_subtitle(srt_path):
-  try:
-      with open(srt_path, 'r', encoding='utf-8') as f: content = f.read()
-      blocks = re.split(r'\n\s*\n', content.strip())
-      if not blocks: return "No subtitle found", 0.0, 3.0
-      first_block = blocks[0].strip().split('\n')
-      if len(first_block) >= 3:
-          times = first_block[1].split(' --> ')
-          def time_to_sec(t):
-              h, m, s = t.split(':')
-              s, ms = s.split(',')
-              return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000.0
-          return ' '.join(first_block[2:]).strip(), time_to_sec(times[0].strip()), time_to_sec(times[1].strip())
-      return "No subtitle found", 0.0, 3.0
-  except Exception as e: raise Exception(f"Failed to extract first subtitle: {str(e)}")
 def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
   try:
       template = Image.open(template_path).convert('RGBA')
@@ -267,68 +223,161 @@ def validate_and_get_file(uploaded_file, url_string, file_type, temp_dir):
       except Exception as e: return None, f"❌ Error downloading {file_type}: {str(e)}"
   return None, f"❌ Unknown error"
-def srt_time_to_ms(time_str):
-  h, m, s = time_str.strip().split(':')
-  s, ms = s.split(',')
-  return int(h)*3600000 + int(m)*60000 + int(s)*1000 + int(ms)
-def ms_to_ass_time(ms):
-  h, ms = divmod(ms, 3600000)
-  m, ms = divmod(ms, 60000)
-  s, ms = divmod(ms, 1000)
-  cs = ms // 10
-  return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
-#-----------------------
-# BODY SCRIPT HIGHLIGHTS ASS
-#-----------------------
-def create_word_by_word_highlight_ass(srt_path, output_dir, highlight_color='yellow',
-                                     font_size=None, skip_first=False, config=SUBTITLE_CONFIG,
-                                     cta_start_time_sec=None):
-  """Convert SRT to ASS. Stops before cta_start_time_sec."""
-  if font_size is None: font_size = config['font_size_default']
-  color_map = {'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'), 'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'), 'pink': ('&H00FF69B4', '&H00000000'), 'red': ('&H000000FF', '&H00FFFFFF'), 'blue': ('&H00FF0000', '&H00FFFFFF')}
-  highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
-  with open(srt_path, 'r', encoding='utf-8') as f: srt_content = f.read()
-  ass_path = os.path.join(output_dir, 'word_highlight_subtitles.ass')
-  ass_header = f"""[Script Info]
-Title: Word-by-Word Highlight Subtitles
 ScriptType: v4.00+
 [V4+ Styles]
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
 Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,1,3,1,{config['position_alignment']},{config['margin_left']},{config['margin_right']},{config['margin_vertical']},1
 [Events]
-Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
-"""
-  srt_blocks = re.split(r'\n\s*\n', srt_content.strip())
-  ass_events = []
-  start_index = 1 if skip_first else 0
-  for block in srt_blocks[start_index:]:
-      lines = block.strip().split('\n')
-      if len(lines) >= 3:
-          times = lines[1].split(' --> ')
-          if len(times) == 2:
-              start_ms = srt_time_to_ms(times[0])
-              if cta_start_time_sec is not None and (start_ms / 1000.0) >= cta_start_time_sec - 0.1: break
-              end_ms = srt_time_to_ms(times[1])
-              words = ' '.join(lines[2:]).split()
-              if not words: continue
-              time_per_word = (end_ms - start_ms) / len(words)
-              for i, word in enumerate(words):
-                  word_start = start_ms + int(i * time_per_word)
-                  word_end = start_ms + int((i + 1) * time_per_word)
-                  if i == len(words) - 1: word_end = end_ms
-                  text_parts = [f"{{\\c{highlight_text}\\3c{highlight_bg}\\bord5}}{w}{{\\r}}" if j == i else w for j, w in enumerate(words)]
-                  ass_events.append(f"Dialogue: 0,{ms_to_ass_time(word_start)},{ms_to_ass_time(word_end)},Default,,0,0,0,,{' '.join(text_parts)}")
-  with open(ass_path, 'w', encoding='utf-8') as f: f.write(ass_header); f.write('\n'.join(ass_events))
-  return ass_path
-#-----------------------
-# CTA HIGHLIGHTS ASS
-#-----------------------
-def create_cta_highlight_ass(srt_path, output_dir, start_sec, font_size, video_width, video_height, highlight_color='yellow', config=SUBTITLE_CONFIG):
-    """Groups CTA words into frames of max 10, but merges leftovers if they are < 3 words."""
     color_map = {
         'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'),
         'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'),
@@ -338,78 +387,61 @@ def create_cta_highlight_ass(srt_path, output_dir, start_sec, font_size, video_w
     highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
     margin_lr = int(video_width * 0.125) + 40
-    with open(srt_path, 'r', encoding='utf-8') as f: srt_content = f.read()
-    ass_path = os.path.join(output_dir, 'cta_animated_subtitles.ass')
     ass_header = f"""[Script Info]
-Title: CTA Animated Subtitles
 ScriptType: v4.00+
 PlayResX: {video_width}
 PlayResY: {video_height}
 WrapStyle: 1
 [V4+ Styles]
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
-Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,3,15,0,5,{margin_lr},{margin_lr},0,1
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"""
-    srt_blocks = re.split(r'\n\s*\n', srt_content.strip())
-    # 1. Flatten all CTA words into a single timed stream
     all_cta_words = []
-    for block in srt_blocks:
-        lines = block.strip().split('\n')
-        if len(lines) >= 3:
-            times = lines[1].split(' --> ')
-            if len(times) == 2:
-                start_ms = srt_time_to_ms(times[0])
-                if (start_ms / 1000.0) < start_sec - 0.1: continue
-                end_ms = srt_time_to_ms(times[1])
-                words = ' '.join(lines[2:]).split()
-                if not words: continue
-                time_per_word = (end_ms - start_ms) / len(words)
-                for i, word in enumerate(words):
-                    w_start = start_ms + int(i * time_per_word)
-                    w_end = end_ms if i == len(words) - 1 else start_ms + int((i + 1) * time_per_word)
-                    all_cta_words.append({'word': word, 'start': w_start, 'end': w_end})
-    # 2. Group words into chunks with "Don't leave 1 or 2 words alone" logic
     chunks = []
     i = 0
-    total_words = len(all_cta_words)
-    while i < total_words:
-        remaining = total_words - i
-        if 10 < remaining <= 13:
-            take = remaining
-        else:
-            take = min(10, remaining)
         chunks.append(all_cta_words[i : i + take])
         i += take
-    # 3. Generate ASS Dialogue lines for each chunk
     ass_events = []
     for chunk in chunks:
         chunk_text_only = [item['word'] for item in chunk]
         for idx, info in enumerate(chunk):
             w_start = info['start']
-            # Match the start of the next word to avoid background box flickering
-            w_end = chunk[idx+1]['start'] if idx + 1 < len(chunk) else info['end']
             text_parts = []
             for j, word_str in enumerate(chunk_text_only):
-                if j == idx:
-                    text_parts.append(f"{{\\c{highlight_text}}}{word_str}{{\\r}}")
-                else:
-                    text_parts.append(word_str)
-            styled_text = ' '.join(text_parts)
-            ass_events.append(f"Dialogue: 1,{ms_to_ass_time(w_start)},{ms_to_ass_time(w_end)},Default,,0,0,0,,{styled_text}")
     with open(ass_path, 'w', encoding='utf-8') as f:
         f.write(ass_header + '\n'.join(ass_events))
     return ass_path
@@ -430,8 +462,30 @@ def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, su
       subtitle_path, s_err = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
       if s_err: return None, s_err
       video_width, video_height, video_fps = get_video_info(video_path)
-      audio_duration = get_audio_duration(audio_path)
       script_dir = os.path.dirname(os.path.abspath(__file__))
       reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
@@ -448,34 +502,35 @@ def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, su
               status_msg += f"  • ⚠️ Reddit card failed: {str(e)}\n"
               has_reddit_template = False
-      # --- 1. Find CTA Info ---
-      title_timestamp, cta_timestamp, cta_text_raw = find_title_and_cta(subtitle_path, book_title)
-      book_appears_at = title_timestamp if title_timestamp is not None else audio_duration * (1 - VIDEO_CONFIG['promo_percent'])
-      box_appears_at = cta_timestamp if cta_timestamp is not None else book_appears_at + 1.5
-      if title_timestamp: status_msg += f"\n📖 Book title at {title_timestamp:.2f}s\n"
-      if cta_timestamp: status_msg += f"🖤 CTA text starts at {cta_timestamp:.2f}s\n"
-      # --- 2. Prepare Dynamic CTA Text ---
-      cta_ass_path = None
-      if cta_text_raw:
-          status_msg += "🖤 Generating Instagram-style dynamic CTA...\n"
-          cta_font_size = int(video_width * 0.060) #INCREASE / DECREASE CTA FONT SIZE HERE
-          cta_ass_path = create_cta_highlight_ass(
-              subtitle_path, temp_dir, box_appears_at,
-              cta_font_size, video_width, video_height, highlight_color
-          )
-          cta_sub_escaped = cta_ass_path.replace('\\', '/').replace(':', '\\:')
-      # --- 3. Process Main Subtitles ---
       if enable_highlight:
-          status_msg += f"\n✨ Processing subtitles...\n"
-          main_subtitle_path = create_word_by_word_highlight_ass(
               subtitle_path, temp_dir, highlight_color, font_size,
-              skip_first=has_reddit_template, config=SUBTITLE_CONFIG,
-              cta_start_time_sec=title_timestamp
           )
       else:
           main_subtitle_path = subtitle_path
@@ -487,41 +542,55 @@ def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, su
       has_book_cover = book_cover_path is not None
       timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
       output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
-      fade_starts_at = audio_duration * VIDEO_CONFIG['fade_start_percent']
-      fade_ends_at = audio_duration * VIDEO_CONFIG['fade_end_percent']
-      fade_out_duration = fade_ends_at - fade_starts_at
-      promo_duration = audio_duration * VIDEO_CONFIG['promo_percent']
-      solid_color_duration = max(0, book_appears_at - fade_ends_at)
-      main_video_duration = fade_ends_at
-      cover_segment_duration = promo_duration
-      fade_color_hex = "#dacfc3"
       if has_book_cover:
           try:
               main_segment_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
               cmd_main = ["ffmpeg", "-stream_loop", "-1", "-i", video_path, "-t", str(main_video_duration), "-vf", f"fps={video_fps},scale={video_width}:{video_height},fade=t=out:st={fade_starts_at}:d={fade_out_duration}:c={fade_color_hex}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", main_segment_path]
               subprocess.run(cmd_main, check=True, capture_output=True, text=True, env=ffmpeg_env)
-              solid_color_path = os.path.join(temp_dir, f"solid_{timestamp}.mp4")
-              cmd_solid = ["ffmpeg", "-f", "lavfi", "-i", f"color=c={fade_color_hex}:s={video_width}x{video_height}:d={solid_color_duration}:r={video_fps}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-y", solid_color_path]
-              subprocess.run(cmd_solid, check=True, capture_output=True, text=True, env=ffmpeg_env)
               cover_segment_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
-              # Removed the fade-in effect here for a clean hard cut
               cmd_cover = ["ffmpeg", "-loop", "1", "-i", book_cover_path, "-t", str(cover_segment_duration), "-vf", f"scale={video_width}:{video_height},setsar=1,fps={video_fps}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", cover_segment_path]
               subprocess.run(cmd_cover, check=True, capture_output=True, text=True, env=ffmpeg_env)
               concat_list_path = os.path.join(temp_dir, f"concat_{timestamp}.txt")
               with open(concat_list_path, 'w') as f:
-                  f.write(f"file '{main_segment_path}'\n"); f.write(f"file '{solid_color_path}'\n"); f.write(f"file '{cover_segment_path}'\n")
-               #--- 4. Build the Filter Graph ---
               input_cmd = ["ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path]
               curr_idx = 1
               curr_stream = "[0:v]"
-              # Layer 1: Reddit Card
               if has_reddit_template:
                     input_cmd += ["-loop", "1", "-i", reddit_card_path]
                     filter_complex = f"[{curr_idx}:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];{curr_stream}[reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v1];"
@@ -529,16 +598,17 @@ def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, su
               else:
                     filter_complex = f"{curr_stream}copy[v1];"; curr_stream = "[v1]"
-              # Layer 2: Main Subtitles (Auto-stops right before CTA)
               filter_complex += f"{curr_stream}ass={main_sub_escaped}[v2];"; curr_stream = "[v2]"
-              # Layer 3: Animated CTA Subtitles Overlay (Dynamic Box is built-in!)
-              if cta_ass_path:
-                    filter_complex += f"{curr_stream}ass={cta_sub_escaped}[v_final]"
-              else:
-                    filter_complex += f"{curr_stream}copy[v_final]"
               input_cmd += ["-i", audio_path]
               cmd_final = input_cmd + [
                   "-filter_complex", filter_complex,
                   "-map", "[v_final]", "-map", f"{curr_idx}:a",
@@ -546,21 +616,16 @@ def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, su
                   "-c:a", "aac", "-pix_fmt", "yuv420p", "-shortest", "-y", output_path
               ]
-              status_msg += "🎬 Rendering final video...\n"
               subprocess.run(cmd_final, check=True, capture_output=True, text=True, env=ffmpeg_env)
-          except subprocess.CalledProcessError as e: return None, f"❌ FFmpeg error:\n{e.stderr[-1000:] if e.stderr else str(e)}"
-          except Exception as e: return None, f"❌ Error: {str(e)}"
-      else: return None, "❌ Book cover required."
       if os.path.exists(output_path): return output_path, f"✅ Success!"
       else: return None, "❌ Output not created"
   except Exception as e: return None, f"❌ Error: {str(e)}"
-# ========================================
-# FastAPI app
-# ========================================
 app = FastAPI(title="Video Stitcher API")
 app.add_middleware(
@@ -571,13 +636,11 @@ app.add_middleware(
     allow_headers=["*"],
 )
 class StitchErrorResponse(BaseModel):
     status: str = Field(..., example="failed")
     message: str = Field(..., example="❌ FFmpeg error: ...")
     run_time: str = Field(..., example="0m 5s")
 def _save_upload_to_temp(upload_file: UploadFile, temp_dir: str) -> str:
     filename = os.path.basename(upload_file.filename)
     dest_path = os.path.join(temp_dir, filename)
@@ -585,52 +648,36 @@ def _save_upload_to_temp(upload_file: UploadFile, temp_dir: str) -> str:
         f.write(upload_file.file.read())
     return dest_path
-@app.post(
-    '/video_stitch',
-    responses={400: {"model": StitchErrorResponse}, 500: {"model": StitchErrorResponse}},
-    summary="Stitch video, audio, and subtitles into a final MP4",
-    description=(
-        "Synchronous endpoint — holds the connection open until encoding is complete, "
-        "then returns the finished MP4 directly. Designed for n8n HTTP Request nodes."
-    ),
-)
 async def stitch_upload(
     request: Request,
-    # Video
     video_file: Optional[UploadFile] = File(None),
     video_url: Optional[str] = Form(None),
-    # Audio
     audio_file: Optional[UploadFile] = File(None),
     audio_url: Optional[str] = Form(None),
-    # Subtitle
     subtitle_file: Optional[UploadFile] = File(None),
     subtitle_url: Optional[str] = Form(None),
-    # Book Cover (use exactly ONE)
     book_cover_file: Optional[UploadFile] = File(None),
     book_cover_url: Optional[str] = Form(None),
     book_cover_base64: Optional[str] = Form(None),
     book_id: Optional[str] = Form(None),
-    # Book Title (used to detect CTA split point in subtitle)
     book_title: Optional[str] = Form(None),
-    # Settings
     enable_highlight: bool = Form(True),
     highlight_color: str = Form('yellow'),
     font_size: int = Form(10),
     crf_quality: int = Form(23),
 ):
-    temp_dir = tempfile.mkdtemp()
     # Format validation
-    if video_file and video_file.content_type not in {"video/mp4", "video/quicktime", "video/x-msvideo", "video/x-matroska"}:
-        raise HTTPException(status_code=422, detail=f"❌ Invalid video format: {video_file.content_type}")
-    if audio_file and audio_file.content_type not in {"audio/mpeg", "audio/wav", "audio/x-wav", "audio/aac", "audio/mp4", "audio/x-m4a"}:
         raise HTTPException(status_code=422, detail=f"❌ Invalid audio format: {audio_file.content_type}")
-    if subtitle_file and not (subtitle_file.filename.endswith('.srt') or subtitle_file.filename.endswith('.json')):
-        raise HTTPException(status_code=422, detail="❌ Subtitle must be a .srt or .json file")
     if book_cover_file and book_cover_file.content_type not in {"image/jpeg", "image/png", "image/webp"}:
         raise HTTPException(status_code=422, detail="❌ Book cover must be jpeg, png, or webp")
     payload = {
         'video_file': None, 'video_url': video_url,
         'audio_file': None, 'audio_url': audio_url,
@@ -655,8 +702,6 @@ async def stitch_upload(
             payload['book_cover_file'] = _save_upload_to_temp(book_cover_file, temp_dir)
         start_time = time.time()
-        # Run blocking FFmpeg work in a thread so the event loop stays healthy
         loop = asyncio.get_event_loop()
         result_path, message = await loop.run_in_executor(
             None,
@@ -687,7 +732,6 @@ async def stitch_upload(
                     "X-Status": "completed",
                     "X-Run-Time": run_time_fmt,
                     "X-File-Size-MB": f"{file_size_mb:.2f}",
-                    "X-Message": "Video created successfully",
                 }
             )
         else:
@@ -695,11 +739,9 @@ async def stitch_upload(
                 {'status': 'failed', 'message': message, 'run_time': run_time_fmt},
                 status_code=400
             )
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
-@app.get('/health', summary="Health check")
 async def health():
     return {"status": "ok"}

 import subprocess
 import os
 import tempfile
 import requests
 import textwrap
 import shutil
 import time
+import json
 from datetime import datetime
 from PIL import Image, ImageDraw, ImageFont
+import base64
 from io import BytesIO
+from thefuzz import fuzz
+import asyncio
+from io import BytesIO
+from typing import Optional
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
 from fastapi.responses import FileResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
 # ========================================
 # CONFIGURATION SECTION - CUSTOMIZE HERE
 # ========================================
 SUBTITLE_CONFIG = {
   'font_file': 'LilitaOne-Regular.ttf',
   'font_name': 'Lilita One',
+  'font_size_default': 11,
   'position_alignment': 5,
+  'margin_left': 70,
+  'margin_right': 80,
   'margin_vertical': 20,
   'line_spacing': 2
 }
 VIDEO_CONFIG = {
   'reddit_scale_percent': 0.75,
   'fade_color_rgb': (218, 207, 195),
 }
 # ========================================
 # END CONFIGURATION SECTION
 # ========================================
+# =========================
+# HELPER FUNCTIONS
+# =========================
+def sec_to_ass_time(seconds):
+    """Converts seconds (e.g. 1.219) to ASS time format (H:MM:SS.cs)"""
+    ms = int(seconds * 1000)
+    h, ms = divmod(ms, 3600000)
+    m, ms = divmod(ms, 60000)
+    s, ms = divmod(ms, 1000)
+    cs = ms // 10
+    return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
 def setup_custom_fonts_hf(temp_dir):
   try:
       return float(result.stdout.strip())
   except Exception as e: raise Exception(f"Failed to get audio duration: {str(e)}")
 def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
   try:
       template = Image.open(template_path).convert('RGBA')
       except Exception as e: return None, f"❌ Error downloading {file_type}: {str(e)}"
   return None, f"❌ Unknown error"
+# ============================================
+# JSON LOGIC: PARSERS & SUBTITLE GENERATORS
+# ============================================
+def extract_first_subtitle(json_path):
+    """Gets the first full sentence up to a period for the Reddit Card."""
+    try:
+        with open(json_path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        title_words = []
+        start_time = None
+        end_time = 3.0
+        for segment in data.get('segments', []):
+            for word_data in segment.get('words', []):
+                word_text = word_data.get('text', '').strip()
+                if not word_text: continue
+                if start_time is None:
+                    start_time = word_data.get('start_time', 0.0)
+                title_words.append(word_text)
+                # Check if this word ends with sentence-ending punctuation
+                if re.search(r'[.!?]$', word_text):
+                    end_time = word_data.get('end_time', 3.0)
+                    return " ".join(title_words), start_time, end_time
+        # Fallback just in case there is literally no punctuation
+        if title_words:
+            return " ".join(title_words), start_time, end_time
+        return "No subtitle found", 0.0, 3.0
+    except Exception as e:
+        print(f"Error extracting first subtitle: {e}")
+        return "No subtitle found", 0.0, 3.0
+# ============================================
+# FINDS BOOK TITLE WORD'S EXACT TIMINGS
+# ============================================
+def find_title_and_cta(json_path, book_title):
+    """Uses a sliding window to find the exact start and end millisecond of the book title."""
+    try:
+        if not book_title or not book_title.strip(): return None, None
+        with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f)
+        book_title_lower = book_title.lower()
+        title_clean = re.sub(r'[^\w\s]', '', book_title_lower).strip()
+        book_title_words = title_clean.split()
+        window_size = len(book_title_words)
+        # Flatten all words with their timings
+        all_words = []
+        for segment in data.get('segments', []):
+            for word_data in segment.get('words', []):
+                word_text = word_data.get('text', '').strip()
+                if word_text:
+                    all_words.append({
+                        'text': word_text,
+                        'start': word_data.get('start_time', 0.0),
+                        'end': word_data.get('end_time', 0.0)
+                    })
+        best_score = 0
+        best_start = None
+        best_end = None
+        # Sliding Window: Checks 2, 3, and 4 word groups to catch fuzzy/bad transcriptions
+        for w_size in [window_size, window_size + 1, window_size - 1]:
+            if w_size <= 0: continue
+            for i in range(len(all_words) - w_size + 1):
+                window_text = " ".join([w['text'] for w in all_words[i : i + w_size]]).lower()
+                window_text_clean = re.sub(r'[^\w\s]', '', window_text).strip()
+                score = fuzz.ratio(title_clean, window_text_clean)
+                if score > best_score:
+                    best_score = score
+                    best_start = all_words[i]['start']
+                    best_end = all_words[i + w_size - 1]['end']
+        # If it's a strong match, return exact start and end times
+        if best_score >= 85:
+            return best_start, best_end
+        return None, None
+    except Exception as e:
+        print(f"Error finding title: {e}")
+        return None, None
+def create_body_ass_from_json(json_path, output_dir, highlight_color='yellow',
+                              font_size=None, start_time_sec=0.0, config=SUBTITLE_CONFIG,
+                              stop_time_sec=None):
+    """Creates dynamic body subtitles starting at 1 word and increasing by 2 up to 50."""
+    if font_size is None: font_size = config['font_size_default']
+    color_map = {'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'), 'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'), 'pink': ('&H00FF69B4', '&H00000000'), 'red': ('&H000000FF', '&H00FFFFFF'), 'blue': ('&H00FF0000', '&H00FFFFFF')}
+    highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
+    ass_path = os.path.join(output_dir, 'body_subtitles.ass')
+    ass_header = f"""[Script Info]
+Title: Body JSON Subtitles
 ScriptType: v4.00+
 [V4+ Styles]
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
 Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,1,3,1,{config['position_alignment']},{config['margin_left']},{config['margin_right']},{config['margin_vertical']},1
 [Events]
+Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"""
+    with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f)
+    all_words = []
+    for segment in data.get('segments', []):
+        for word_data in segment.get('words', []):
+            word_text = word_data.get('text', '').strip()
+            start_ms = word_data.get('start_time', 0)
+            if start_ms < start_time_sec - 0.1: continue
+            if stop_time_sec is not None and start_ms >= stop_time_sec - 0.1: continue
+            if word_text:
+                all_words.append({'word': word_text, 'start': start_ms, 'end': word_data.get('end_time', 0)})
+    chunks = []
+    i = 0
+    current_chunk_size = 1
+    max_chunk_size = 50
+    while i < len(all_words):
+        remaining = len(all_words) - i
+        take = min(current_chunk_size, remaining)
+        chunks.append(all_words[i : i + take])
+        i += take
+        if current_chunk_size < max_chunk_size:
+            current_chunk_size = min(current_chunk_size + 4, max_chunk_size)
+    ass_events = []
+    for chunk in chunks:
+        chunk_text_only = [item['word'] for item in chunk]
+        frame_end = chunk[-1]['end']
+        for idx, info in enumerate(chunk):
+            w_start = info['start']
+            w_end = chunk[idx+1]['start'] if idx + 1 < len(chunk) else frame_end
+            text_parts = []
+            for j, word_str in enumerate(chunk_text_only):
+                if j == idx: text_parts.append(f"{{\\c{highlight_text}\\3c{highlight_bg}\\bord5}}{word_str}{{\\r}}")
+                else: text_parts.append(word_str)
+            ass_events.append(f"Dialogue: 0,{sec_to_ass_time(w_start)},{sec_to_ass_time(w_end)},Default,,0,0,0,,{' '.join(text_parts)}")
+    with open(ass_path, 'w', encoding='utf-8') as f:
+        f.write(ass_header + '\n'.join(ass_events))
+    return ass_path
+def create_cta_ass_from_json(json_path, output_dir, start_sec, font_size, video_width, video_height, highlight_color='yellow', config=SUBTITLE_CONFIG, words_per_frame=10):
+    """Creates the chunky, Instagram-style box subtitles for the CTA."""
     color_map = {
         'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'),
         'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'),
     highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
     margin_lr = int(video_width * 0.125) + 40
+    ass_path = os.path.join(output_dir, 'cta_subtitles.ass')
+    # Style logic: WrapStyle=1, BorderStyle=3, Outline=10 (Tight Instagram Box)
     ass_header = f"""[Script Info]
+Title: CTA JSON Subtitles
 ScriptType: v4.00+
 PlayResX: {video_width}
 PlayResY: {video_height}
 WrapStyle: 1
 [V4+ Styles]
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
+Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,3,10,0,5,{margin_lr},{margin_lr},0,1
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"""
+    with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f)
     all_cta_words = []
+    for segment in data.get('segments', []):
+        for word_data in segment.get('words', []):
+            word_text = word_data.get('text', '').strip()
+            start_ms = word_data.get('start_time', 0)
+            if start_ms < start_sec - 0.1: continue # Skip words before the CTA starts
+            if word_text:
+                # Merge "Book" and "Access" into "BookXcess"
+                if word_text.lower().startswith('access') and len(all_cta_words) > 0 and all_cta_words[-1]['word'].lower() == 'book':
+                    # Keep any trailing punctuation (like commas or periods) from "Access"
+                    punctuation = word_text[6:]
+                    all_cta_words[-1]['word'] = 'BookXcess' + punctuation
+                    # Extend the highlight time to cover both words
+                    all_cta_words[-1]['end'] = word_data.get('end_time', 0)
+                    continue # Skip adding "Access" as a separate word
+                all_cta_words.append({'word': word_text, 'start': start_ms, 'end': word_data.get('end_time', 0)})
     chunks = []
     i = 0
+    while i < len(all_cta_words):
+        remaining = len(all_cta_words) - i
+        take = remaining if words_per_frame < remaining <= words_per_frame + 2 else min(words_per_frame, remaining)
         chunks.append(all_cta_words[i : i + take])
         i += take
     ass_events = []
     for chunk in chunks:
         chunk_text_only = [item['word'] for item in chunk]
+        frame_end = chunk[-1]['end']
         for idx, info in enumerate(chunk):
             w_start = info['start']
+            w_end = chunk[idx+1]['start'] if idx + 1 < len(chunk) else frame_end
             text_parts = []
             for j, word_str in enumerate(chunk_text_only):
+                if j == idx: text_parts.append(f"{{\\c{highlight_text}}}{word_str}{{\\r}}")
+                else: text_parts.append(word_str)
+            ass_events.append(f"Dialogue: 1,{sec_to_ass_time(w_start)},{sec_to_ass_time(w_end)},Default,,0,0,0,,{' '.join(text_parts)}")
     with open(ass_path, 'w', encoding='utf-8') as f:
         f.write(ass_header + '\n'.join(ass_events))
     return ass_path
       subtitle_path, s_err = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
       if s_err: return None, s_err
+      # ✨ PRE-PROCESS SPEED HACK ✨
+      speed_factor = 1.3
+      # 1. Physically speed up the audio file
+      fast_audio = os.path.join(temp_dir, f"fast_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3")
+      subprocess.run(["ffmpeg", "-v", "error", "-y", "-i", audio_path, "-filter:a", f"atempo={speed_factor}", fast_audio], check=True)
+      audio_path = fast_audio # Trick the script into using the fast audio!
+      # 2. Physically shrink the JSON timestamps
+      fast_json = os.path.join(temp_dir, f"fast_subs_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
+      with open(subtitle_path, 'r', encoding='utf-8') as f: json_data = json.load(f)
+      for segment in json_data.get('segments', []):
+          segment['start_time'] = segment.get('start_time', 0) / speed_factor
+          segment['end_time'] = segment.get('end_time', 0) / speed_factor
+          for word in segment.get('words', []):
+              word['start_time'] = word.get('start_time', 0) / speed_factor
+              word['end_time'] = word.get('end_time', 0) / speed_factor
+      with open(fast_json, 'w', encoding='utf-8') as f: json.dump(json_data, f)
+      subtitle_path = fast_json # Trick the script into using the fast subtitles!
       video_width, video_height, video_fps = get_video_info(video_path)
+      audio_duration = get_audio_duration(audio_path) # Now gets the new 1:18 duration natively!
       script_dir = os.path.dirname(os.path.abspath(__file__))
       reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
               status_msg += f"  • ⚠️ Reddit card failed: {str(e)}\n"
               has_reddit_template = False
+      # --- 1. Find Title Exact Word Timings ---
+      title_start, title_end = find_title_and_cta(subtitle_path, book_title)
+      book_appears_at = title_start if title_start is not None else audio_duration * (1 - VIDEO_CONFIG['promo_percent'])
+      box_appears_at = title_end if title_end is not None else book_appears_at + 1.5
+      if title_start is not None:
+          status_msg += f"\n📖 Hard cut to Book Cover at {title_start:.2f}s\n"
+          status_msg += f"🤫 Book title silenced in subtitles.\n"
+          status_msg += f"🖤 CTA text starts exactly at {title_end:.2f}s\n"
+      # --- 2. Prepare Dynamic CTA Text (JSON) ---
+      status_msg += "🖤 Generating Instagram-style dynamic CTA...\n"
+      cta_font_size = int(video_width * 0.060)
+      cta_ass_path = create_cta_ass_from_json(
+          subtitle_path, temp_dir, box_appears_at,
+          cta_font_size, video_width, video_height, highlight_color
+      )
+      cta_sub_escaped = cta_ass_path.replace('\\', '/').replace(':', '\\:')
+      # --- 3. Process Main Subtitles (JSON) ---
       if enable_highlight:
+          status_msg += f"\n✨ Processing JSON subtitles...\n"
+          body_start_time = first_sub_end if has_reddit_template else 0.0
+          main_subtitle_path = create_body_ass_from_json(
               subtitle_path, temp_dir, highlight_color, font_size,
+              start_time_sec=body_start_time, config=SUBTITLE_CONFIG,
+              stop_time_sec=book_appears_at  # Stops EXACTLY before the title is spoken
           )
       else:
           main_subtitle_path = subtitle_path
       has_book_cover = book_cover_path is not None
       timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
       output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
       if has_book_cover:
           try:
+              fade_starts_at = audio_duration * VIDEO_CONFIG['fade_start_percent']
+              fade_ends_at = audio_duration * VIDEO_CONFIG['fade_end_percent']
+              # Safety net: If the book title is spoken BEFORE the fade is supposed to end,
+              # we shorten the fade so it doesn't overlap the book cover cut.
+              if fade_ends_at > book_appears_at:
+                  fade_ends_at = book_appears_at
+                  fade_starts_at = min(fade_starts_at, fade_ends_at - 1.0)
+              fade_out_duration = fade_ends_at - fade_starts_at
+              solid_color_duration = max(0, book_appears_at - fade_ends_at)
+              main_video_duration = fade_ends_at
+              cover_segment_duration = audio_duration - book_appears_at
+              fade_color_hex = "#dacfc3" # Book page type color
+              # 1. Main Segment (background video fading into sandal color)
               main_segment_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
               cmd_main = ["ffmpeg", "-stream_loop", "-1", "-i", video_path, "-t", str(main_video_duration), "-vf", f"fps={video_fps},scale={video_width}:{video_height},fade=t=out:st={fade_starts_at}:d={fade_out_duration}:c={fade_color_hex}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", main_segment_path]
               subprocess.run(cmd_main, check=True, capture_output=True, text=True, env=ffmpeg_env)
+              # 2. Solid Color Segment (Holds the sandal color until the hard cut)
+              solid_color_path = None
+              if solid_color_duration > 0:
+                  solid_color_path = os.path.join(temp_dir, f"solid_{timestamp}.mp4")
+                  cmd_solid = ["ffmpeg", "-f", "lavfi", "-i", f"color=c={fade_color_hex}:s={video_width}x{video_height}:d={solid_color_duration}:r={video_fps}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-y", solid_color_path]
+                  subprocess.run(cmd_solid, check=True, capture_output=True, text=True, env=ffmpeg_env)
+              # 3. Book Cover Segment (Hard cut triggered exactly when title is spoken)
               cover_segment_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
               cmd_cover = ["ffmpeg", "-loop", "1", "-i", book_cover_path, "-t", str(cover_segment_duration), "-vf", f"scale={video_width}:{video_height},setsar=1,fps={video_fps}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", cover_segment_path]
               subprocess.run(cmd_cover, check=True, capture_output=True, text=True, env=ffmpeg_env)
+              # 4. Stitch them all together
               concat_list_path = os.path.join(temp_dir, f"concat_{timestamp}.txt")
               with open(concat_list_path, 'w') as f:
+                  f.write(f"file '{main_segment_path}'\n")
+                  if solid_color_path:
+                      f.write(f"file '{solid_color_path}'\n")
+                  f.write(f"file '{cover_segment_path}'\n")
+              #--- 5. Build the Filter Graph (Subtitles, Overlays & SPEEDUP) ---
               input_cmd = ["ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path]
               curr_idx = 1
               curr_stream = "[0:v]"
               if has_reddit_template:
                     input_cmd += ["-loop", "1", "-i", reddit_card_path]
                     filter_complex = f"[{curr_idx}:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];{curr_stream}[reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v1];"
               else:
                     filter_complex = f"{curr_stream}copy[v1];"; curr_stream = "[v1]"
+              # 1. Burn in Main Subtitles
               filter_complex += f"{curr_stream}ass={main_sub_escaped}[v2];"; curr_stream = "[v2]"
+              # 2. Burn in CTA Subtitles (Straight to v_final - NO DUPLICATES)
+              if cta_ass_path:
+                  filter_complex += f"{curr_stream}ass={cta_sub_escaped}[v_final]"
+              else:
+                  filter_complex += f"{curr_stream}copy[v_final]"
               input_cmd += ["-i", audio_path]
               cmd_final = input_cmd + [
                   "-filter_complex", filter_complex,
                   "-map", "[v_final]", "-map", f"{curr_idx}:a",
                   "-c:a", "aac", "-pix_fmt", "yuv420p", "-shortest", "-y", output_path
               ]
+              status_msg += "🎬 Rendering final synchronized video...\n"
               subprocess.run(cmd_final, check=True, capture_output=True, text=True, env=ffmpeg_env)
+          except Exception as e:
+              return None, f"❌ Book cover processing error: {str(e)}"
       if os.path.exists(output_path): return output_path, f"✅ Success!"
       else: return None, "❌ Output not created"
   except Exception as e: return None, f"❌ Error: {str(e)}"
 app = FastAPI(title="Video Stitcher API")
 app.add_middleware(
     allow_headers=["*"],
 )
 class StitchErrorResponse(BaseModel):
     status: str = Field(..., example="failed")
     message: str = Field(..., example="❌ FFmpeg error: ...")
     run_time: str = Field(..., example="0m 5s")
 def _save_upload_to_temp(upload_file: UploadFile, temp_dir: str) -> str:
     filename = os.path.basename(upload_file.filename)
     dest_path = os.path.join(temp_dir, filename)
         f.write(upload_file.file.read())
     return dest_path
+@app.post('/video_stitch', responses={400: {"model": StitchErrorResponse}, 500: {"model": StitchErrorResponse}})
 async def stitch_upload(
     request: Request,
     video_file: Optional[UploadFile] = File(None),
     video_url: Optional[str] = Form(None),
     audio_file: Optional[UploadFile] = File(None),
     audio_url: Optional[str] = Form(None),
     subtitle_file: Optional[UploadFile] = File(None),
     subtitle_url: Optional[str] = Form(None),
     book_cover_file: Optional[UploadFile] = File(None),
     book_cover_url: Optional[str] = Form(None),
     book_cover_base64: Optional[str] = Form(None),
     book_id: Optional[str] = Form(None),
     book_title: Optional[str] = Form(None),
     enable_highlight: bool = Form(True),
     highlight_color: str = Form('yellow'),
     font_size: int = Form(10),
     crf_quality: int = Form(23),
 ):
     # Format validation
+    if subtitle_file and not subtitle_file.filename.endswith('.json'):
+        raise HTTPException(status_code=422, detail="❌ Subtitle must be a .json file")
+    if subtitle_url and not subtitle_url.strip().split('?')[0].endswith('.json'):
+        raise HTTPException(status_code=422, detail="❌ Subtitle URL must point to a .json file")
+    if audio_file and audio_file.content_type not in {"audio/mpeg", "audio/mp3", "audio/wav", "audio/x-wav", "audio/aac", "audio/mp4", "audio/x-m4a"}:
         raise HTTPException(status_code=422, detail=f"❌ Invalid audio format: {audio_file.content_type}")
     if book_cover_file and book_cover_file.content_type not in {"image/jpeg", "image/png", "image/webp"}:
         raise HTTPException(status_code=422, detail="❌ Book cover must be jpeg, png, or webp")
+    temp_dir = tempfile.mkdtemp()
     payload = {
         'video_file': None, 'video_url': video_url,
         'audio_file': None, 'audio_url': audio_url,
             payload['book_cover_file'] = _save_upload_to_temp(book_cover_file, temp_dir)
         start_time = time.time()
         loop = asyncio.get_event_loop()
         result_path, message = await loop.run_in_executor(
             None,
                     "X-Status": "completed",
                     "X-Run-Time": run_time_fmt,
                     "X-File-Size-MB": f"{file_size_mb:.2f}",
                 }
             )
         else:
                 {'status': 'failed', 'message': message, 'run_time': run_time_fmt},
                 status_code=400
             )
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+@app.get('/health')
 async def health():
     return {"status": "ok"}