Video-stitch

Sleeping

App Files Files Community

Nav3005 commited on Feb 23

Commit

fa6f8e0

verified ·

1 Parent(s): db6ca8c

Update app.py

Browse files

Files changed (1) hide show

app.py +401 -469

app.py CHANGED Viewed

@@ -18,47 +18,50 @@ from fastapi.responses import FileResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
 # ========================================
 # CONFIGURATION SECTION - CUSTOMIZE HERE
 # ========================================
 REDDIT_CONFIG = {
-    'template_file': 'reddit_template.png',
-    'font_file': 'RFDewi-Bold.ttf',
-    'font_size_max': 180,
-    'font_size_min': 16,
-    'text_wrap_width': 35,
-    'text_color': 'black',
-    'line_spacing': 10,
-    'text_box_width_percent': 0.85,
-    'text_box_height_percent': 0.65,
-    'y_offset': 20,
 }
 SUBTITLE_CONFIG = {
-    'font_file': 'LilitaOne-Regular.ttf',
-    'font_name': 'Lilita One',
-    'font_size_default': 10,
-    'position_alignment': 5,
-    'margin_left': 50,
-    'margin_right': 70,
-    'margin_vertical': 20,
-    'line_spacing': 2,
 }
 VIDEO_CONFIG = {
-    'reddit_scale_percent': 0.75,
-    'fade_start_percent': 0.70,
-    'fade_end_percent': 0.85,
-    'promo_percent': 0.094,
-    'fade_color_rgb': (218, 207, 195),
 }
 # ========================================
 # END CONFIGURATION SECTION
 # ========================================
 # ============================================
 # FINDS BOOK TITLE TO SPLIT CTA AND BODY SCRIPT
@@ -76,18 +79,25 @@ def find_title_and_cta(srt_path, book_title):
             if len(lines) >= 3:
                 subtitle_text = ' '.join(lines[2:])
                 if book_title_lower in subtitle_text.lower():
                     times = lines[1].split(' --> ')
                     title_time = srt_time_to_ms(times[0]) / 1000.0
                     cta_time = None
                     cta_text_parts = []
                     if i + 1 < len(blocks):
                         next_block_lines = blocks[i + 1].strip().split('\n')
                         if len(next_block_lines) >= 3:
                             cta_time = srt_time_to_ms(next_block_lines[1].split(' --> ')[0]) / 1000.0
                     for j in range(i + 1, len(blocks)):
                         next_lines = blocks[j].strip().split('\n')
                         if len(next_lines) >= 3:
                             cta_text_parts.append(' '.join(next_lines[2:]).strip())
                     cta_text = ' '.join(cta_text_parts) if cta_text_parts else None
                     return title_time, cta_time, cta_text
         return None, None, None
@@ -95,248 +105,197 @@ def find_title_and_cta(srt_path, book_title):
         print(f"Error finding title and CTA: {e}")
         return None, None, None
 def setup_custom_fonts_hf(temp_dir):
-    try:
-        fonts_dir = os.path.join(temp_dir, 'fonts')
-        os.makedirs(fonts_dir, exist_ok=True)
-        script_dir = os.path.dirname(os.path.abspath(__file__))
-        repo_fonts_dir = os.path.join(script_dir, 'fonts')
-        fonts_to_copy = []
-        if os.path.exists(repo_fonts_dir):
-            for font_file in os.listdir(repo_fonts_dir):
-                if font_file.endswith(('.ttf', '.otf', '.TTF', '.OTF')):
-                    fonts_to_copy.append(os.path.join(repo_fonts_dir, font_file))
-        for item in [REDDIT_CONFIG['font_file'], SUBTITLE_CONFIG['font_file']]:
-            font_path = os.path.join(script_dir, item)
-            if os.path.exists(font_path) and font_path not in fonts_to_copy:
-                fonts_to_copy.append(font_path)
-        for src in fonts_to_copy:
-            dst = os.path.join(fonts_dir, os.path.basename(src))
-            shutil.copy(src, dst)
-        if fonts_to_copy:
-            fonts_conf = f"""<?xml version="1.0"?>
 <fontconfig><dir>{fonts_dir}</dir><cachedir>{temp_dir}/cache</cachedir></fontconfig>"""
-            conf_path = os.path.join(temp_dir, 'fonts.conf')
-            with open(conf_path, 'w') as f:
-                f.write(fonts_conf)
-            env = os.environ.copy()
-            env['FONTCONFIG_FILE'] = conf_path
-            env['FONTCONFIG_PATH'] = temp_dir
-            return env
-        return os.environ.copy()
-    except Exception:
-        return os.environ.copy()
 def download_file_from_url(url, output_dir, filename):
-    try:
-        response = requests.get(url, stream=True, timeout=30)
-        response.raise_for_status()
-        file_path = os.path.join(output_dir, filename)
-        with open(file_path, 'wb') as f:
-            for chunk in response.iter_content(chunk_size=8192):
-                f.write(chunk)
-        return file_path
-    except Exception as e:
-        raise Exception(f"Failed to download file: {str(e)}")
 def download_book_cover(book_id, output_dir):
-    try:
-        image_url = f"https://books.google.com/books/publisher/content/images/frontcover/{book_id}"
-        response = requests.get(image_url, timeout=30)
-        response.raise_for_status()
-        image_path = os.path.join(output_dir, 'book_cover.png')
-        with open(image_path, 'wb') as f:
-            f.write(response.content)
-        Image.open(image_path).verify()
-        return image_path
-    except Exception as e:
-        raise Exception(f"Failed to download book cover: {str(e)}")
 def decode_base64_image(base64_string, output_dir):
-    try:
-        if ',' in base64_string and 'base64' in base64_string:
-            base64_string = base64_string.split(',', 1)[1]
-        image_data = base64.b64decode(base64_string.strip())
-        Image.open(BytesIO(image_data)).verify()
-        output_path = os.path.join(output_dir, f"book_cover_b64_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png")
-        Image.open(BytesIO(image_data)).save(output_path, 'PNG')
-        return output_path
-    except Exception as e:
-        raise Exception(f"Base64 decode failed: {str(e)}")
 def validate_book_cover_input(book_cover_file, book_cover_url, book_cover_base64, book_id, temp_dir):
-    has_file = book_cover_file is not None
-    has_url = bool(book_cover_url and book_cover_url.strip())
-    has_base64 = bool(book_cover_base64 and book_cover_base64.strip())
-    has_id = bool(book_id and book_id.strip())
-    methods_count = sum([has_file, has_url, has_base64, has_id])
-    if methods_count == 0:
-        return None, None
-    if methods_count > 1:
-        return None, "❌ Book Cover: Use only ONE method (file, url, base64, or book_id)"
-    try:
-        if has_file:
-            return str(book_cover_file.name if hasattr(book_cover_file, 'name') else book_cover_file), None
-        if has_url:
-            return download_file_from_url(book_cover_url.strip(), temp_dir, f"book_cover_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"), None
-        if has_base64:
-            return decode_base64_image(book_cover_base64.strip(), temp_dir), None
-        if has_id:
-            return download_book_cover(book_id.strip(), temp_dir), None
-    except Exception as e:
-        return None, f"❌ Book cover error: {str(e)}"
-    return None, None
 def get_video_info(video_path):
-    try:
-        cmd_res = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "stream=width,height", "-of", "csv=s=x:p=0", video_path]
-        result = subprocess.run(cmd_res, capture_output=True, text=True, check=True)
-        width, height = result.stdout.strip().split('x')
-        cmd_fps = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "stream=r_frame_rate", "-of", "default=noprint_wrappers=1:nokey=1", video_path]
-        result = subprocess.run(cmd_fps, capture_output=True, text=True, check=True)
-        fps_str = result.stdout.strip()
-        fps = float(fps_str.split('/')[0]) / float(fps_str.split('/')[1]) if '/' in fps_str else float(fps_str)
-        return int(width), int(height), fps
-    except Exception as e:
-        raise Exception(f"Failed to get video info: {str(e)}")
 def get_audio_duration(audio_path):
-    try:
-        cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", audio_path]
-        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
-        return float(result.stdout.strip())
-    except Exception as e:
-        raise Exception(f"Failed to get audio duration: {str(e)}")
 def extract_first_subtitle(srt_path):
-    try:
-        with open(srt_path, 'r', encoding='utf-8') as f:
-            content = f.read()
-        blocks = re.split(r'\n\s*\n', content.strip())
-        if not blocks:
-            return "No subtitle found", 0.0, 3.0
-        first_block = blocks[0].strip().split('\n')
-        if len(first_block) >= 3:
-            times = first_block[1].split(' --> ')
-            def time_to_sec(t):
-                h, m, s = t.split(':')
-                s, ms = s.split(',')
-                return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000.0
-            return ' '.join(first_block[2:]).strip(), time_to_sec(times[0].strip()), time_to_sec(times[1].strip())
-        return "No subtitle found", 0.0, 3.0
-    except Exception as e:
-        raise Exception(f"Failed to extract first subtitle: {str(e)}")
 def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
-    try:
-        template = Image.open(template_path).convert('RGBA')
-        temp_w, temp_h = template.size
-        box_w = int(temp_w * config['text_box_width_percent'])
-        box_h = int(temp_h * config['text_box_height_percent'])
-        script_dir = os.path.dirname(os.path.abspath(__file__))
-        font_paths = [os.path.join(script_dir, 'fonts', config['font_file']), os.path.join(script_dir, config['font_file'])]
-        best_font_size = config['font_size_max']
-        best_wrapped_text = hook_text
-        for font_size in range(config['font_size_max'], config['font_size_min'] - 1, -2):
-            font = None
-            for fp in font_paths:
-                if os.path.exists(fp):
-                    try:
-                        font = ImageFont.truetype(fp, font_size)
-                        break
-                    except:
-                        pass
-            if font is None:
-                font = ImageFont.load_default()
-            wrapped = textwrap.fill(hook_text, width=config['text_wrap_width'])
-            draw = ImageDraw.Draw(template)
-            bbox = draw.multiline_textbbox((0, 0), wrapped, font=font, spacing=config['line_spacing'])
-            if (bbox[2] - bbox[0] <= box_w and bbox[3] - bbox[1] <= box_h):
-                best_font_size = font_size
-                best_wrapped_text = wrapped
-                break
-        font = None
-        for fp in font_paths:
-            if os.path.exists(fp):
-                try:
-                    font = ImageFont.truetype(fp, best_font_size)
-                    break
-                except:
-                    pass
-        if font is None:
-            font = ImageFont.load_default()
-        draw = ImageDraw.Draw(template)
-        bbox = draw.multiline_textbbox((0, 0), best_wrapped_text, font=font, spacing=config['line_spacing'])
-        x = (temp_w - (bbox[2] - bbox[0])) / 2
-        y = (temp_h - (bbox[3] - bbox[1])) / 2 + config['y_offset']
-        draw.multiline_text((x, y), best_wrapped_text, fill=config['text_color'], font=font, spacing=config['line_spacing'], align='left')
-        output_path = os.path.join(output_dir, 'reddit_card_composite.png')
-        template.save(output_path, 'PNG')
-        return output_path
-    except Exception as e:
-        raise Exception(f"Failed to create Reddit card: {str(e)}")
 def validate_and_get_file(uploaded_file, url_string, file_type, temp_dir):
-    has_upload = uploaded_file is not None
-    has_url = url_string and url_string.strip()
-    if not has_upload and not has_url:
-        return None, f"❌ Please provide {file_type}"
-    if has_upload and has_url:
-        return None, f"❌ Use only ONE method for {file_type}"
-    if has_upload:
-        return str(uploaded_file.name if hasattr(uploaded_file, 'name') else uploaded_file), None
-    if has_url:
-        try:
-            fname = f"{file_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{url_string.split('/')[-1] or 'file'}"
-            return download_file_from_url(url_string.strip(), temp_dir, fname), None
-        except Exception as e:
-            return None, f"❌ Error downloading {file_type}: {str(e)}"
-    return None, "❌ Unknown error"
 def srt_time_to_ms(time_str):
-    h, m, s = time_str.strip().split(':')
-    s, ms = s.split(',')
-    return int(h) * 3600000 + int(m) * 60000 + int(s) * 1000 + int(ms)
 def ms_to_ass_time(ms):
-    h, ms = divmod(ms, 3600000)
-    m, ms = divmod(ms, 60000)
-    s, ms = divmod(ms, 1000)
-    cs = ms // 10
-    return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
-# -----------------------
 # BODY SCRIPT HIGHLIGHTS ASS
-# -----------------------
-def create_word_by_word_highlight_ass(srt_path, output_dir, highlight_color='yellow',
-                                      font_size=None, skip_first=False, config=SUBTITLE_CONFIG,
-                                      cta_start_time_sec=None):
-    if font_size is None:
-        font_size = config['font_size_default']
-    color_map = {
-        'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'),
-        'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'),
-        'pink': ('&H00FF69B4', '&H00000000'), 'red': ('&H000000FF', '&H00FFFFFF'),
-        'blue': ('&H00FF0000', '&H00FFFFFF'),
-    }
-    highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
-    with open(srt_path, 'r', encoding='utf-8') as f:
-        srt_content = f.read()
-    ass_path = os.path.join(output_dir, 'word_highlight_subtitles.ass')
-    ass_header = f"""[Script Info]
 Title: Word-by-Word Highlight Subtitles
 ScriptType: v4.00+
 [V4+ Styles]
@@ -345,54 +304,46 @@ Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H0000000
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 """
-    srt_blocks = re.split(r'\n\s*\n', srt_content.strip())
-    ass_events = []
-    start_index = 1 if skip_first else 0
-    for block in srt_blocks[start_index:]:
-        lines = block.strip().split('\n')
-        if len(lines) >= 3:
-            times = lines[1].split(' --> ')
-            if len(times) == 2:
-                start_ms = srt_time_to_ms(times[0])
-                if cta_start_time_sec is not None and (start_ms / 1000.0) >= cta_start_time_sec - 0.1:
-                    break
-                end_ms = srt_time_to_ms(times[1])
-                words = ' '.join(lines[2:]).split()
-                if not words:
-                    continue
-                time_per_word = (end_ms - start_ms) / len(words)
-                for i, word in enumerate(words):
-                    word_start = start_ms + int(i * time_per_word)
-                    word_end = start_ms + int((i + 1) * time_per_word)
-                    if i == len(words) - 1:
-                        word_end = end_ms
-                    text_parts = [
-                        f"{{\\c{highlight_text}\\3c{highlight_bg}\\bord5}}{w}{{\\r}}" if j == i else w
-                        for j, w in enumerate(words)
-                    ]
-                    ass_events.append(f"Dialogue: 0,{ms_to_ass_time(word_start)},{ms_to_ass_time(word_end)},Default,,0,0,0,,{' '.join(text_parts)}")
-    with open(ass_path, 'w', encoding='utf-8') as f:
-        f.write(ass_header)
-        f.write('\n'.join(ass_events))
-    return ass_path
-# -----------------------
 # CTA HIGHLIGHTS ASS
-# -----------------------
 def create_cta_highlight_ass(srt_path, output_dir, start_sec, font_size, video_width, video_height, highlight_color='yellow', config=SUBTITLE_CONFIG):
     color_map = {
         'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'),
         'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'),
         'pink': ('&H00FF69B4', '&H00000000'), 'red': ('&H000000FF', '&H00FFFFFF'),
-        'blue': ('&H00FF0000', '&H00FFFFFF'),
     }
     highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
-    margin_lr = int(video_width * 0.125) + 40
-    with open(srt_path, 'r', encoding='utf-8') as f:
-        srt_content = f.read()
     ass_path = os.path.join(output_dir, 'cta_animated_subtitles.ass')
     ass_header = f"""[Script Info]
 Title: CTA Animated Subtitles
 ScriptType: v4.00+
@@ -403,9 +354,11 @@ WrapStyle: 1
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
 Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,3,15,0,5,{margin_lr},{margin_lr},0,1
 [Events]
-Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
-"""
     srt_blocks = re.split(r'\n\s*\n', srt_content.strip())
     all_cta_words = []
     for block in srt_blocks:
         lines = block.strip().split('\n')
@@ -413,220 +366,199 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             times = lines[1].split(' --> ')
             if len(times) == 2:
                 start_ms = srt_time_to_ms(times[0])
-                if (start_ms / 1000.0) < start_sec - 0.1:
-                    continue
                 end_ms = srt_time_to_ms(times[1])
                 words = ' '.join(lines[2:]).split()
-                if not words:
-                    continue
                 time_per_word = (end_ms - start_ms) / len(words)
                 for i, word in enumerate(words):
                     w_start = start_ms + int(i * time_per_word)
                     w_end = end_ms if i == len(words) - 1 else start_ms + int((i + 1) * time_per_word)
                     all_cta_words.append({'word': word, 'start': w_start, 'end': w_end})
     chunks = []
     i = 0
     total_words = len(all_cta_words)
     while i < total_words:
         remaining = total_words - i
-        take = remaining if 10 < remaining <= 13 else min(10, remaining)
-        chunks.append(all_cta_words[i: i + take])
         i += take
     ass_events = []
     for chunk in chunks:
         chunk_text_only = [item['word'] for item in chunk]
         for idx, info in enumerate(chunk):
             w_start = info['start']
-            w_end = chunk[idx + 1]['start'] if idx + 1 < len(chunk) else info['end']
-            text_parts = [
-                f"{{\\c{highlight_text}}}{word_str}{{\\r}}" if j == idx else word_str
-                for j, word_str in enumerate(chunk_text_only)
-            ]
-            ass_events.append(f"Dialogue: 1,{ms_to_ass_time(w_start)},{ms_to_ass_time(w_end)},Default,,0,0,0,,{' '.join(text_parts)}")
-    with open(ass_path, 'w', encoding='utf-8') as f:
         f.write(ass_header + '\n'.join(ass_events))
     return ass_path
 # =========================
 # MAIN STITCH FUNCTION
 # =========================
-def stitch_media(
-    video_file, video_url,
-    audio_file, audio_url,
-    subtitle_file, subtitle_url,
-    book_cover_file, book_cover_url, book_cover_base64, book_id,
-    book_title,
-    enable_highlight, highlight_color, font_size,
-    crf_quality=23
-):
-    temp_dir = tempfile.mkdtemp()
-    status_msg = "🚀 Starting video stitching...\n"
-    try:
-        ffmpeg_env = setup_custom_fonts_hf(temp_dir)
-        video_path, v_err = validate_and_get_file(video_file, video_url, 'video', temp_dir)
-        if v_err: return None, v_err
-        audio_path, a_err = validate_and_get_file(audio_file, audio_url, 'audio', temp_dir)
-        if a_err: return None, a_err
-        subtitle_path, s_err = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
-        if s_err: return None, s_err
-        video_width, video_height, video_fps = get_video_info(video_path)
-        audio_duration = get_audio_duration(audio_path)
-        script_dir = os.path.dirname(os.path.abspath(__file__))
-        reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
-        has_reddit_template = os.path.exists(reddit_template_path)
-        first_sub_start = 0
-        first_sub_end = 0
-        reddit_card_path = None
-        if has_reddit_template:
-            try:
-                first_sub_text, first_sub_start, first_sub_end = extract_first_subtitle(subtitle_path)
-                status_msg += f"\n📱 Reddit Overlay: '{first_sub_text[:30]}...'\n"
-                reddit_card_path = create_reddit_card_with_text(reddit_template_path, first_sub_text, temp_dir, REDDIT_CONFIG)
-            except Exception as e:
-                status_msg += f"  • ⚠️ Reddit card failed: {str(e)}\n"
-                has_reddit_template = False
-        # --- 1. Find CTA Info ---
-        title_timestamp, cta_timestamp, cta_text_raw = find_title_and_cta(subtitle_path, book_title)
-        book_appears_at = title_timestamp if title_timestamp is not None else audio_duration * (1 - VIDEO_CONFIG['promo_percent'])
-        box_appears_at = cta_timestamp if cta_timestamp is not None else book_appears_at + 1.5
-        if title_timestamp: status_msg += f"\n📖 Book title at {title_timestamp:.2f}s\n"
-        if cta_timestamp: status_msg += f"🖤 CTA text starts at {cta_timestamp:.2f}s\n"
-        # --- 2. Prepare Dynamic CTA ---
-        cta_ass_path = None
-        cta_sub_escaped = None
-        if cta_text_raw:
-            status_msg += "🖤 Generating Instagram-style dynamic CTA...\n"
-            cta_font_size = int(video_width * 0.060)
-            cta_ass_path = create_cta_highlight_ass(
-                subtitle_path, temp_dir, box_appears_at,
-                cta_font_size, video_width, video_height, highlight_color
-            )
-            cta_sub_escaped = cta_ass_path.replace('\\', '/').replace(':', '\\:')
-        # --- 3. Process Main Subtitles ---
-        if enable_highlight:
-            status_msg += f"\n✨ Processing subtitles...\n"
-            main_subtitle_path = create_word_by_word_highlight_ass(
-                subtitle_path, temp_dir, highlight_color, font_size,
-                skip_first=has_reddit_template, config=SUBTITLE_CONFIG,
-                cta_start_time_sec=title_timestamp
-            )
-        else:
-            main_subtitle_path = subtitle_path
-        main_sub_escaped = main_subtitle_path.replace('\\', '/').replace(':', '\\:')
-        # --- 4. Book Cover ---
-        book_cover_path, book_error = validate_book_cover_input(
-            book_cover_file, book_cover_url, book_cover_base64, book_id, temp_dir
-        )
-        if book_error: return None, book_error
-        if book_cover_path is None: return None, "❌ Book cover required."
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
-        fade_starts_at = audio_duration * VIDEO_CONFIG['fade_start_percent']
-        fade_ends_at = audio_duration * VIDEO_CONFIG['fade_end_percent']
-        fade_out_duration = fade_ends_at - fade_starts_at
-        promo_duration = audio_duration * VIDEO_CONFIG['promo_percent']
-        solid_color_duration = max(0, book_appears_at - fade_ends_at)
-        main_video_duration = fade_ends_at
-        cover_segment_duration = promo_duration
-        fade_color_hex = "#dacfc3"
-        try:
-            # Step 1: Main video with fade-out
-            main_segment_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
-            subprocess.run([
-                "ffmpeg", "-stream_loop", "-1", "-i", video_path,
-                "-t", str(main_video_duration),
-                "-vf", f"fps={video_fps},scale={video_width}:{video_height},fade=t=out:st={fade_starts_at}:d={fade_out_duration}:c={fade_color_hex}",
-                "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", main_segment_path
-            ], check=True, capture_output=True, text=True, env=ffmpeg_env)
-            # Step 2: Solid color hold
-            solid_color_path = os.path.join(temp_dir, f"solid_{timestamp}.mp4")
-            subprocess.run([
-                "ffmpeg", "-f", "lavfi",
-                "-i", f"color=c={fade_color_hex}:s={video_width}x{video_height}:d={solid_color_duration}:r={video_fps}",
-                "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-y", solid_color_path
-            ], check=True, capture_output=True, text=True, env=ffmpeg_env)
-            # Step 3: Book cover segment
-            cover_segment_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
-            subprocess.run([
-                "ffmpeg", "-loop", "1", "-i", book_cover_path,
-                "-t", str(cover_segment_duration),
-                "-vf", f"scale={video_width}:{video_height},setsar=1,fps={video_fps}",
-                "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", cover_segment_path
-            ], check=True, capture_output=True, text=True, env=ffmpeg_env)
-            # Step 4: Concat list
-            concat_list_path = os.path.join(temp_dir, f"concat_{timestamp}.txt")
-            with open(concat_list_path, 'w') as f:
-                f.write(f"file '{main_segment_path}'\n")
-                f.write(f"file '{solid_color_path}'\n")
-                f.write(f"file '{cover_segment_path}'\n")
-            # Step 5: Build filter graph
-            input_cmd = ["ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path]
-            curr_idx = 1
-            curr_stream = "[0:v]"
-            filter_complex = ""
-            if has_reddit_template:
-                input_cmd += ["-loop", "1", "-i", reddit_card_path]
-                filter_complex += f"[{curr_idx}:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];{curr_stream}[reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v1];"
-                curr_stream = "[v1]"
-                curr_idx += 1
-            else:
-                filter_complex += f"{curr_stream}copy[v1];"
-                curr_stream = "[v1]"
-            filter_complex += f"{curr_stream}ass={main_sub_escaped}[v2];"
-            curr_stream = "[v2]"
-            if cta_ass_path:
-                filter_complex += f"{curr_stream}ass={cta_sub_escaped}[v_final]"
-            else:
-                filter_complex += f"{curr_stream}copy[v_final]"
-            input_cmd += ["-i", audio_path]
-            cmd_final = input_cmd + [
-                "-filter_complex", filter_complex,
-                "-map", "[v_final]", "-map", f"{curr_idx}:a",
-                "-c:v", "libx264", "-crf", str(crf_quality),
-                "-c:a", "aac", "-pix_fmt", "yuv420p", "-shortest", "-y", output_path
-            ]
-            status_msg += "🎬 Rendering final video...\n"
-            subprocess.run(cmd_final, check=True, capture_output=True, text=True, env=ffmpeg_env)
-        except subprocess.CalledProcessError as e:
-            return None, f"❌ FFmpeg error:\n{e.stderr[-1000:] if e.stderr else str(e)}"
-        except Exception as e:
-            return None, f"❌ Error: {str(e)}"
-        if os.path.exists(output_path):
-            return output_path, f"✅ Success!\n\n{status_msg}"
-        return None, "❌ Output not created"
-    except Exception as e:
-        return None, f"❌ Error: {str(e)}"
 # ========================================
@@ -697,8 +629,8 @@ async def stitch_upload(
         raise HTTPException(status_code=422, detail=f"❌ Invalid video format: {video_file.content_type}")
     if audio_file and audio_file.content_type not in {"audio/mpeg", "audio/wav", "audio/x-wav", "audio/aac", "audio/mp4", "audio/x-m4a"}:
         raise HTTPException(status_code=422, detail=f"❌ Invalid audio format: {audio_file.content_type}")
-    if subtitle_file and not subtitle_file.filename.endswith('.srt'):
-        raise HTTPException(status_code=422, detail="❌ Subtitle must be a .srt file")
     if book_cover_file and book_cover_file.content_type not in {"image/jpeg", "image/png", "image/webp"}:
         raise HTTPException(status_code=422, detail="❌ Book cover must be jpeg, png, or webp")

 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
 # ========================================
 # CONFIGURATION SECTION - CUSTOMIZE HERE
 # ========================================
 REDDIT_CONFIG = {
+  'template_file': 'reddit_template.png',
+  'font_file': 'RFDewi-Bold.ttf',
+  'font_size_max': 180,
+  'font_size_min': 16,
+  'text_wrap_width': 35,
+  'text_color': 'black',
+  'line_spacing': 10,
+  'text_box_width_percent': 0.85,
+  'text_box_height_percent': 0.65,
+  'y_offset': 20,
 }
 SUBTITLE_CONFIG = {
+  'font_file': 'LilitaOne-Regular.ttf',
+  'font_name': 'Lilita One',
+  'font_size_default': 10,
+  'position_alignment': 5,
+  'margin_left': 50,
+  'margin_right': 70,
+  'margin_vertical': 20,
+  'line_spacing': 2
 }
+# go to line 462 if you want to increase/decrease CTA part's font size!!!
 VIDEO_CONFIG = {
+  'reddit_scale_percent': 0.75,
+  'fade_start_percent': 0.70,
+  'fade_end_percent': 0.85,
+  'promo_percent': 0.094,
+  'fade_color_rgb': (218, 207, 195),
 }
 # ========================================
 # END CONFIGURATION SECTION
 # ========================================
+# Add static ffmpeg to PATH
+static_ffmpeg.add_paths()
 # ============================================
 # FINDS BOOK TITLE TO SPLIT CTA AND BODY SCRIPT
             if len(lines) >= 3:
                 subtitle_text = ' '.join(lines[2:])
                 if book_title_lower in subtitle_text.lower():
+                    # 1. Get the time the title is spoken
                     times = lines[1].split(' --> ')
                     title_time = srt_time_to_ms(times[0]) / 1000.0
                     cta_time = None
                     cta_text_parts = []
+                    # 2. Get the time the ACTUAL CTA text starts
                     if i + 1 < len(blocks):
                         next_block_lines = blocks[i + 1].strip().split('\n')
                         if len(next_block_lines) >= 3:
                             cta_time = srt_time_to_ms(next_block_lines[1].split(' --> ')[0]) / 1000.0
+                    # 3. Grab all remaining text for the CTA
                     for j in range(i + 1, len(blocks)):
                         next_lines = blocks[j].strip().split('\n')
                         if len(next_lines) >= 3:
                             cta_text_parts.append(' '.join(next_lines[2:]).strip())
                     cta_text = ' '.join(cta_text_parts) if cta_text_parts else None
                     return title_time, cta_time, cta_text
         return None, None, None
         print(f"Error finding title and CTA: {e}")
         return None, None, None
 def setup_custom_fonts_hf(temp_dir):
+  try:
+      fonts_dir = os.path.join(temp_dir, 'fonts')
+      os.makedirs(fonts_dir, exist_ok=True)
+      script_dir = os.path.dirname(os.path.abspath(__file__))
+      repo_fonts_dir = os.path.join(script_dir, 'fonts')
+      fonts_to_copy = []
+      if os.path.exists(repo_fonts_dir):
+          for font_file in os.listdir(repo_fonts_dir):
+              if font_file.endswith(('.ttf', '.otf', '.TTF', '.OTF')):
+                  fonts_to_copy.append(os.path.join(repo_fonts_dir, font_file))
+      for item in [REDDIT_CONFIG['font_file'], SUBTITLE_CONFIG['font_file']]:
+          font_path = os.path.join(script_dir, item)
+          if os.path.exists(font_path) and font_path not in fonts_to_copy:
+              fonts_to_copy.append(font_path)
+      for src in fonts_to_copy:
+          dst = os.path.join(fonts_dir, os.path.basename(src))
+          shutil.copy(src, dst)
+      if fonts_to_copy:
+          fonts_conf = f"""<?xml version="1.0"?>
 <fontconfig><dir>{fonts_dir}</dir><cachedir>{temp_dir}/cache</cachedir></fontconfig>"""
+          conf_path = os.path.join(temp_dir, 'fonts.conf')
+          with open(conf_path, 'w') as f:
+              f.write(fonts_conf)
+          env = os.environ.copy()
+          env['FONTCONFIG_FILE'] = conf_path
+          env['FONTCONFIG_PATH'] = temp_dir
+          return env
+      return os.environ.copy()
+  except Exception as e: return os.environ.copy()
 def download_file_from_url(url, output_dir, filename):
+  try:
+      response = requests.get(url, stream=True, timeout=30)
+      response.raise_for_status()
+      file_path = os.path.join(output_dir, filename)
+      with open(file_path, 'wb') as f:
+          for chunk in response.iter_content(chunk_size=8192): f.write(chunk)
+      return file_path
+  except Exception as e: raise Exception(f"Failed to download file: {str(e)}")
 def download_book_cover(book_id, output_dir):
+  try:
+      image_url = f"https://books.google.com/books/publisher/content/images/frontcover/{book_id}"
+      response = requests.get(image_url, timeout=30)
+      response.raise_for_status()
+      image_path = os.path.join(output_dir, 'book_cover.png')
+      with open(image_path, 'wb') as f: f.write(response.content)
+      Image.open(image_path).verify()
+      return image_path
+  except Exception as e: raise Exception(f"Failed to download book cover: {str(e)}")
 def decode_base64_image(base64_string, output_dir):
+  try:
+      if ',' in base64_string and 'base64' in base64_string:
+          base64_string = base64_string.split(',', 1)[1]
+      image_data = base64.b64decode(base64_string.strip())
+      Image.open(BytesIO(image_data)).verify()
+      output_path = os.path.join(output_dir, f"book_cover_b64_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png")
+      Image.open(BytesIO(image_data)).save(output_path, 'PNG')
+      return output_path
+  except Exception as e: raise Exception(f"Base64 decode failed: {str(e)}")
 def validate_book_cover_input(book_cover_file, book_cover_url, book_cover_base64, book_id, temp_dir):
+  has_file = book_cover_file is not None
+  has_url = bool(book_cover_url and book_cover_url.strip())
+  has_base64 = bool(book_cover_base64 and book_cover_base64.strip())
+  has_id = bool(book_id and book_id.strip())
+  methods_count = sum([has_file, has_url, has_base64, has_id])
+  if methods_count == 0: return None, None
+  if methods_count > 1: return None, "❌ Book Cover: Use only ONE method"
+  try:
+      if has_file: return str(book_cover_file.name if hasattr(book_cover_file, 'name') else book_cover_file), None
+      if has_url: return download_file_from_url(book_cover_url.strip(), temp_dir, f"book_cover_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"), None
+      if has_base64: return decode_base64_image(book_cover_base64.strip(), temp_dir), None
+      if has_id: return download_book_cover(book_id.strip(), temp_dir), None
+  except Exception as e: return None, f"❌ Book cover error: {str(e)}"
+  return None, None
 def get_video_info(video_path):
+  try:
+      cmd_res = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "stream=width,height", "-of", "csv=s=x:p=0", video_path]
+      result = subprocess.run(cmd_res, capture_output=True, text=True, check=True)
+      width, height = result.stdout.strip().split('x')
+      cmd_fps = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "stream=r_frame_rate", "-of", "default=noprint_wrappers=1:nokey=1", video_path]
+      result = subprocess.run(cmd_fps, capture_output=True, text=True, check=True)
+      fps_str = result.stdout.strip()
+      fps = float(fps_str.split('/')[0]) / float(fps_str.split('/')[1]) if '/' in fps_str else float(fps_str)
+      return int(width), int(height), fps
+  except Exception as e: raise Exception(f"Failed to get video info: {str(e)}")
 def get_audio_duration(audio_path):
+  try:
+      cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", audio_path]
+      result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+      return float(result.stdout.strip())
+  except Exception as e: raise Exception(f"Failed to get audio duration: {str(e)}")
 def extract_first_subtitle(srt_path):
+  try:
+      with open(srt_path, 'r', encoding='utf-8') as f: content = f.read()
+      blocks = re.split(r'\n\s*\n', content.strip())
+      if not blocks: return "No subtitle found", 0.0, 3.0
+      first_block = blocks[0].strip().split('\n')
+      if len(first_block) >= 3:
+          times = first_block[1].split(' --> ')
+          def time_to_sec(t):
+              h, m, s = t.split(':')
+              s, ms = s.split(',')
+              return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000.0
+          return ' '.join(first_block[2:]).strip(), time_to_sec(times[0].strip()), time_to_sec(times[1].strip())
+      return "No subtitle found", 0.0, 3.0
+  except Exception as e: raise Exception(f"Failed to extract first subtitle: {str(e)}")
 def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
+  try:
+      template = Image.open(template_path).convert('RGBA')
+      temp_w, temp_h = template.size
+      box_w = int(temp_w * config['text_box_width_percent'])
+      box_h = int(temp_h * config['text_box_height_percent'])
+      script_dir = os.path.dirname(os.path.abspath(__file__))
+      font_paths = [os.path.join(script_dir, 'fonts', config['font_file']), os.path.join(script_dir, config['font_file'])]
+      best_font_size = config['font_size_max']
+      best_wrapped_text = hook_text
+      for font_size in range(config['font_size_max'], config['font_size_min'] - 1, -2):
+          font = None
+          for fp in font_paths:
+              if os.path.exists(fp):
+                  try: font = ImageFont.truetype(fp, font_size); break
+                  except: pass
+          if font is None: font = ImageFont.load_default()
+          wrapped = textwrap.fill(hook_text, width=config['text_wrap_width'])
+          draw = ImageDraw.Draw(template)
+          bbox = draw.multiline_textbbox((0, 0), wrapped, font=font, spacing=config['line_spacing'])
+          if (bbox[2]-bbox[0] <= box_w and bbox[3]-bbox[1] <= box_h):
+              best_font_size = font_size; best_wrapped_text = wrapped; break
+      font = None
+      for fp in font_paths:
+          if os.path.exists(fp):
+               try: font = ImageFont.truetype(fp, best_font_size); break
+               except: pass
+      if font is None: font = ImageFont.load_default()
+      draw = ImageDraw.Draw(template)
+      bbox = draw.multiline_textbbox((0, 0), best_wrapped_text, font=font, spacing=config['line_spacing'])
+      x = (temp_w - (bbox[2]-bbox[0])) / 2
+      y = (temp_h - (bbox[3]-bbox[1])) / 2 + config['y_offset']
+      draw.multiline_text((x, y), best_wrapped_text, fill=config['text_color'], font=font, spacing=config['line_spacing'], align='left')
+      output_path = os.path.join(output_dir, 'reddit_card_composite.png')
+      template.save(output_path, 'PNG')
+      return output_path
+  except Exception as e: raise Exception(f"Failed to create Reddit card: {str(e)}")
 def validate_and_get_file(uploaded_file, url_string, file_type, temp_dir):
+  has_upload = uploaded_file is not None
+  has_url = url_string and url_string.strip()
+  if not has_upload and not has_url: return None, f"❌ Please provide {file_type}"
+  if has_upload and has_url: return None, f"❌ Use only ONE method for {file_type}"
+  if has_upload: return str(uploaded_file.name if hasattr(uploaded_file, 'name') else uploaded_file), None
+  if has_url:
+      try:
+          fname = f"{file_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{url_string.split('/')[-1] if url_string.split('/')[-1] else 'file'}"
+          return download_file_from_url(url_string.strip(), temp_dir, fname), None
+      except Exception as e: return None, f"❌ Error downloading {file_type}: {str(e)}"
+  return None, f"❌ Unknown error"
 def srt_time_to_ms(time_str):
+  h, m, s = time_str.strip().split(':')
+  s, ms = s.split(',')
+  return int(h)*3600000 + int(m)*60000 + int(s)*1000 + int(ms)
 def ms_to_ass_time(ms):
+  h, ms = divmod(ms, 3600000)
+  m, ms = divmod(ms, 60000)
+  s, ms = divmod(ms, 1000)
+  cs = ms // 10
+  return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
+#-----------------------
 # BODY SCRIPT HIGHLIGHTS ASS
+#-----------------------
+def create_word_by_word_highlight_ass(srt_path, output_dir, highlight_color='yellow',
+                                     font_size=None, skip_first=False, config=SUBTITLE_CONFIG,
+                                     cta_start_time_sec=None):
+  """Convert SRT to ASS. Stops before cta_start_time_sec."""
+  if font_size is None: font_size = config['font_size_default']
+  color_map = {'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'), 'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'), 'pink': ('&H00FF69B4', '&H00000000'), 'red': ('&H000000FF', '&H00FFFFFF'), 'blue': ('&H00FF0000', '&H00FFFFFF')}
+  highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
+  with open(srt_path, 'r', encoding='utf-8') as f: srt_content = f.read()
+  ass_path = os.path.join(output_dir, 'word_highlight_subtitles.ass')
+  ass_header = f"""[Script Info]
 Title: Word-by-Word Highlight Subtitles
 ScriptType: v4.00+
 [V4+ Styles]
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 """
+  srt_blocks = re.split(r'\n\s*\n', srt_content.strip())
+  ass_events = []
+  start_index = 1 if skip_first else 0
+  for block in srt_blocks[start_index:]:
+      lines = block.strip().split('\n')
+      if len(lines) >= 3:
+          times = lines[1].split(' --> ')
+          if len(times) == 2:
+              start_ms = srt_time_to_ms(times[0])
+              if cta_start_time_sec is not None and (start_ms / 1000.0) >= cta_start_time_sec - 0.1: break
+              end_ms = srt_time_to_ms(times[1])
+              words = ' '.join(lines[2:]).split()
+              if not words: continue
+              time_per_word = (end_ms - start_ms) / len(words)
+              for i, word in enumerate(words):
+                  word_start = start_ms + int(i * time_per_word)
+                  word_end = start_ms + int((i + 1) * time_per_word)
+                  if i == len(words) - 1: word_end = end_ms
+                  text_parts = [f"{{\\c{highlight_text}\\3c{highlight_bg}\\bord5}}{w}{{\\r}}" if j == i else w for j, w in enumerate(words)]
+                  ass_events.append(f"Dialogue: 0,{ms_to_ass_time(word_start)},{ms_to_ass_time(word_end)},Default,,0,0,0,,{' '.join(text_parts)}")
+  with open(ass_path, 'w', encoding='utf-8') as f: f.write(ass_header); f.write('\n'.join(ass_events))
+  return ass_path
+#-----------------------
 # CTA HIGHLIGHTS ASS
+#-----------------------
 def create_cta_highlight_ass(srt_path, output_dir, start_sec, font_size, video_width, video_height, highlight_color='yellow', config=SUBTITLE_CONFIG):
+    """Groups CTA words into frames of max 10, but merges leftovers if they are < 3 words."""
     color_map = {
         'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'),
         'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'),
         'pink': ('&H00FF69B4', '&H00000000'), 'red': ('&H000000FF', '&H00FFFFFF'),
+        'blue': ('&H00FF0000', '&H00FFFFFF')
     }
     highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
+    margin_lr = int(video_width * 0.125) + 40
+    with open(srt_path, 'r', encoding='utf-8') as f: srt_content = f.read()
     ass_path = os.path.join(output_dir, 'cta_animated_subtitles.ass')
     ass_header = f"""[Script Info]
 Title: CTA Animated Subtitles
 ScriptType: v4.00+
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
 Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,3,15,0,5,{margin_lr},{margin_lr},0,1
 [Events]
+Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"""
     srt_blocks = re.split(r'\n\s*\n', srt_content.strip())
+    # 1. Flatten all CTA words into a single timed stream
     all_cta_words = []
     for block in srt_blocks:
         lines = block.strip().split('\n')
             times = lines[1].split(' --> ')
             if len(times) == 2:
                 start_ms = srt_time_to_ms(times[0])
+                if (start_ms / 1000.0) < start_sec - 0.1: continue
                 end_ms = srt_time_to_ms(times[1])
                 words = ' '.join(lines[2:]).split()
+                if not words: continue
                 time_per_word = (end_ms - start_ms) / len(words)
                 for i, word in enumerate(words):
                     w_start = start_ms + int(i * time_per_word)
                     w_end = end_ms if i == len(words) - 1 else start_ms + int((i + 1) * time_per_word)
                     all_cta_words.append({'word': word, 'start': w_start, 'end': w_end})
+    # 2. Group words into chunks with "Don't leave 1 or 2 words alone" logic
     chunks = []
     i = 0
     total_words = len(all_cta_words)
     while i < total_words:
         remaining = total_words - i
+        if 10 < remaining <= 13:
+            take = remaining
+        else:
+            take = min(10, remaining)
+        chunks.append(all_cta_words[i : i + take])
         i += take
+    # 3. Generate ASS Dialogue lines for each chunk
     ass_events = []
     for chunk in chunks:
         chunk_text_only = [item['word'] for item in chunk]
         for idx, info in enumerate(chunk):
             w_start = info['start']
+            # Match the start of the next word to avoid background box flickering
+            w_end = chunk[idx+1]['start'] if idx + 1 < len(chunk) else info['end']
+            text_parts = []
+            for j, word_str in enumerate(chunk_text_only):
+                if j == idx:
+                    text_parts.append(f"{{\\c{highlight_text}}}{word_str}{{\\r}}")
+                else:
+                    text_parts.append(word_str)
+            styled_text = ' '.join(text_parts)
+            ass_events.append(f"Dialogue: 1,{ms_to_ass_time(w_start)},{ms_to_ass_time(w_end)},Default,,0,0,0,,{styled_text}")
+    with open(ass_path, 'w', encoding='utf-8') as f:
         f.write(ass_header + '\n'.join(ass_events))
     return ass_path
 # =========================
 # MAIN STITCH FUNCTION
 # =========================
+def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, subtitle_url, book_cover_file, book_cover_url, book_cover_base64, book_id, book_title, enable_highlight, highlight_color, font_size, crf_quality=23):
+  temp_dir = tempfile.mkdtemp()
+  status_msg = "🚀 Starting video stitching...\n"
+  try:
+      ffmpeg_env = setup_custom_fonts_hf(temp_dir)
+      video_path, v_err = validate_and_get_file(video_file, video_url, 'video', temp_dir)
+      if v_err: return None, v_err
+      audio_path, a_err = validate_and_get_file(audio_file, audio_url, 'audio', temp_dir)
+      if a_err: return None, a_err
+      subtitle_path, s_err = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
+      if s_err: return None, s_err
+      video_width, video_height, video_fps = get_video_info(video_path)
+      audio_duration = get_audio_duration(audio_path)
+      script_dir = os.path.dirname(os.path.abspath(__file__))
+      reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
+      has_reddit_template = os.path.exists(reddit_template_path)
+      first_sub_start = 0
+      first_sub_end = 0
+      if has_reddit_template:
+          try:
+              first_sub_text, first_sub_start, first_sub_end = extract_first_subtitle(subtitle_path)
+              status_msg += f"\n📱 Reddit Overlay: '{first_sub_text[:30]}...'\n"
+              reddit_card_path = create_reddit_card_with_text(reddit_template_path, first_sub_text, temp_dir, REDDIT_CONFIG)
+          except Exception as e:
+              status_msg += f"  • ⚠️ Reddit card failed: {str(e)}\n"
+              has_reddit_template = False
+      # --- 1. Find CTA Info ---
+      title_timestamp, cta_timestamp, cta_text_raw = find_title_and_cta(subtitle_path, book_title)
+      book_appears_at = title_timestamp if title_timestamp is not None else audio_duration * (1 - VIDEO_CONFIG['promo_percent'])
+      box_appears_at = cta_timestamp if cta_timestamp is not None else book_appears_at + 1.5
+      if title_timestamp: status_msg += f"\n📖 Book title at {title_timestamp:.2f}s\n"
+      if cta_timestamp: status_msg += f"🖤 CTA text starts at {cta_timestamp:.2f}s\n"
+      # --- 2. Prepare Dynamic CTA Text ---
+      cta_ass_path = None
+      if cta_text_raw:
+          status_msg += "🖤 Generating Instagram-style dynamic CTA...\n"
+          cta_font_size = int(video_width * 0.060) #INCREASE / DECREASE CTA FONT SIZE HERE
+          cta_ass_path = create_cta_highlight_ass(
+              subtitle_path, temp_dir, box_appears_at,
+              cta_font_size, video_width, video_height, highlight_color
+          )
+          cta_sub_escaped = cta_ass_path.replace('\\', '/').replace(':', '\\:')
+      # --- 3. Process Main Subtitles ---
+      if enable_highlight:
+          status_msg += f"\n✨ Processing subtitles...\n"
+          main_subtitle_path = create_word_by_word_highlight_ass(
+              subtitle_path, temp_dir, highlight_color, font_size,
+              skip_first=has_reddit_template, config=SUBTITLE_CONFIG,
+              cta_start_time_sec=title_timestamp
+          )
+      else:
+          main_subtitle_path = subtitle_path
+      main_sub_escaped = main_subtitle_path.replace('\\', '/').replace(':', '\\:')
+      book_cover_path, book_error = validate_book_cover_input(book_cover_file, book_cover_url, book_cover_base64, book_id, temp_dir)
+      if book_error: return None, book_error
+      has_book_cover = book_cover_path is not None
+      timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+      output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
+      fade_starts_at = audio_duration * VIDEO_CONFIG['fade_start_percent']
+      fade_ends_at = audio_duration * VIDEO_CONFIG['fade_end_percent']
+      fade_out_duration = fade_ends_at - fade_starts_at
+      promo_duration = audio_duration * VIDEO_CONFIG['promo_percent']
+      solid_color_duration = max(0, book_appears_at - fade_ends_at)
+      main_video_duration = fade_ends_at
+      cover_segment_duration = promo_duration
+      fade_color_hex = "#dacfc3"
+      if has_book_cover:
+          try:
+              main_segment_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
+              cmd_main = ["ffmpeg", "-stream_loop", "-1", "-i", video_path, "-t", str(main_video_duration), "-vf", f"fps={video_fps},scale={video_width}:{video_height},fade=t=out:st={fade_starts_at}:d={fade_out_duration}:c={fade_color_hex}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", main_segment_path]
+              subprocess.run(cmd_main, check=True, capture_output=True, text=True, env=ffmpeg_env)
+              solid_color_path = os.path.join(temp_dir, f"solid_{timestamp}.mp4")
+              cmd_solid = ["ffmpeg", "-f", "lavfi", "-i", f"color=c={fade_color_hex}:s={video_width}x{video_height}:d={solid_color_duration}:r={video_fps}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-y", solid_color_path]
+              subprocess.run(cmd_solid, check=True, capture_output=True, text=True, env=ffmpeg_env)
+              cover_segment_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
+              # Removed the fade-in effect here for a clean hard cut
+              cmd_cover = ["ffmpeg", "-loop", "1", "-i", book_cover_path, "-t", str(cover_segment_duration), "-vf", f"scale={video_width}:{video_height},setsar=1,fps={video_fps}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", cover_segment_path]
+              subprocess.run(cmd_cover, check=True, capture_output=True, text=True, env=ffmpeg_env)
+              concat_list_path = os.path.join(temp_dir, f"concat_{timestamp}.txt")
+              with open(concat_list_path, 'w') as f:
+                  f.write(f"file '{main_segment_path}'\n"); f.write(f"file '{solid_color_path}'\n"); f.write(f"file '{cover_segment_path}'\n")
+               #--- 4. Build the Filter Graph ---
+              input_cmd = ["ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path]
+              curr_idx = 1
+              curr_stream = "[0:v]"
+              # Layer 1: Reddit Card
+              if has_reddit_template:
+                    input_cmd += ["-loop", "1", "-i", reddit_card_path]
+                    filter_complex = f"[{curr_idx}:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];{curr_stream}[reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v1];"
+                    curr_stream, curr_idx = "[v1]", curr_idx + 1
+              else:
+                    filter_complex = f"{curr_stream}copy[v1];"; curr_stream = "[v1]"
+              # Layer 2: Main Subtitles (Auto-stops right before CTA)
+              filter_complex += f"{curr_stream}ass={main_sub_escaped}[v2];"; curr_stream = "[v2]"
+              # Layer 3: Animated CTA Subtitles Overlay (Dynamic Box is built-in!)
+              if cta_ass_path:
+                    filter_complex += f"{curr_stream}ass={cta_sub_escaped}[v_final]"
+              else:
+                    filter_complex += f"{curr_stream}copy[v_final]"
+              input_cmd += ["-i", audio_path]
+              cmd_final = input_cmd + [
+                  "-filter_complex", filter_complex,
+                  "-map", "[v_final]", "-map", f"{curr_idx}:a",
+                  "-c:v", "libx264", "-crf", str(crf_quality),
+                  "-c:a", "aac", "-pix_fmt", "yuv420p", "-shortest", "-y", output_path
+              ]
+              status_msg += "🎬 Rendering final video...\n"
+              subprocess.run(cmd_final, check=True, capture_output=True, text=True, env=ffmpeg_env)
+          except subprocess.CalledProcessError as e: return None, f"❌ FFmpeg error:\n{e.stderr[-1000:] if e.stderr else str(e)}"
+          except Exception as e: return None, f"❌ Error: {str(e)}"
+      else: return None, "❌ Book cover required."
+      if os.path.exists(output_path): return output_path, f"✅ Success!"
+      else: return None, "❌ Output not created"
+  except Exception as e: return None, f"❌ Error: {str(e)}"
 # ========================================
         raise HTTPException(status_code=422, detail=f"❌ Invalid video format: {video_file.content_type}")
     if audio_file and audio_file.content_type not in {"audio/mpeg", "audio/wav", "audio/x-wav", "audio/aac", "audio/mp4", "audio/x-m4a"}:
         raise HTTPException(status_code=422, detail=f"❌ Invalid audio format: {audio_file.content_type}")
+    if subtitle_file and not (subtitle_file.filename.endswith('.srt') or subtitle_file.filename.endswith('.json')):
+        raise HTTPException(status_code=422, detail="❌ Subtitle must be a .srt or .json file")
     if book_cover_file and book_cover_file.content_type not in {"image/jpeg", "image/png", "image/webp"}:
         raise HTTPException(status_code=422, detail="❌ Book cover must be jpeg, png, or webp")